# 10 Minutes to pandas
## Object Creation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20171122', periods=6)

In [4]:
dates

DatetimeIndex(['2017-11-22', '2017-11-23', '2017-11-24', '2017-11-25',
               '2017-11-26', '2017-11-27'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6, 4), 
                  index=dates, 
                  columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2017-11-22,0.650628,0.522711,0.202438,0.001545
2017-11-23,-1.836835,0.361807,-1.333353,-0.154992
2017-11-24,-0.23736,-1.225819,-1.257283,0.351367
2017-11-25,1.201184,-0.639025,-0.211236,0.056999
2017-11-26,0.272317,1.102574,-0.601809,2.172882
2017-11-27,0.440767,1.100296,-1.114614,-0.426112


In [6]:
df2 = pd.DataFrame({
    'A' : 1.,
    'B' : pd.Timestamp('20171122'),
    'C' : pd.Series(1, index=list(range(4)), dtype='float32'),
    'D' : np.array([3] * 4, dtype='int32'),
    'E' : pd.Categorical(['test', 'train', 'test', 'train']),
    'F' :'foo'
})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2017-11-22,1.0,3,test,foo
1,1.0,2017-11-22,1.0,3,train,foo
2,1.0,2017-11-22,1.0,3,test,foo
3,1.0,2017-11-22,1.0,3,train,foo


In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [8]:
# df2.<TAB>

# Viewing Data

In [9]:
df.head()

Unnamed: 0,A,B,C,D
2017-11-22,0.650628,0.522711,0.202438,0.001545
2017-11-23,-1.836835,0.361807,-1.333353,-0.154992
2017-11-24,-0.23736,-1.225819,-1.257283,0.351367
2017-11-25,1.201184,-0.639025,-0.211236,0.056999
2017-11-26,0.272317,1.102574,-0.601809,2.172882


In [10]:
df.tail(3)

Unnamed: 0,A,B,C,D
2017-11-25,1.201184,-0.639025,-0.211236,0.056999
2017-11-26,0.272317,1.102574,-0.601809,2.172882
2017-11-27,0.440767,1.100296,-1.114614,-0.426112


In [11]:
df.index

DatetimeIndex(['2017-11-22', '2017-11-23', '2017-11-24', '2017-11-25',
               '2017-11-26', '2017-11-27'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [13]:
df.values

array([[  6.50628239e-01,   5.22711167e-01,   2.02438049e-01,
          1.54464203e-03],
       [ -1.83683521e+00,   3.61807436e-01,  -1.33335289e+00,
         -1.54992461e-01],
       [ -2.37359917e-01,  -1.22581925e+00,  -1.25728307e+00,
          3.51367436e-01],
       [  1.20118423e+00,  -6.39024895e-01,  -2.11236154e-01,
          5.69988767e-02],
       [  2.72317024e-01,   1.10257373e+00,  -6.01809425e-01,
          2.17288178e+00],
       [  4.40767214e-01,   1.10029590e+00,  -1.11461410e+00,
         -4.26112213e-01]])

In [14]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.081784,0.203757,-0.71931,0.333615
std,1.051167,0.947875,0.623585,0.936582
min,-1.836835,-1.225819,-1.333353,-0.426112
25%,-0.109941,-0.388817,-1.221616,-0.115858
50%,0.356542,0.442259,-0.858212,0.029272
75%,0.598163,0.9559,-0.308879,0.277775
max,1.201184,1.102574,0.202438,2.172882


In [15]:
df.T

Unnamed: 0,2017-11-22 00:00:00,2017-11-23 00:00:00,2017-11-24 00:00:00,2017-11-25 00:00:00,2017-11-26 00:00:00,2017-11-27 00:00:00
A,0.650628,-1.836835,-0.23736,1.201184,0.272317,0.440767
B,0.522711,0.361807,-1.225819,-0.639025,1.102574,1.100296
C,0.202438,-1.333353,-1.257283,-0.211236,-0.601809,-1.114614
D,0.001545,-0.154992,0.351367,0.056999,2.172882,-0.426112


In [16]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2017-11-22,0.001545,0.202438,0.522711,0.650628
2017-11-23,-0.154992,-1.333353,0.361807,-1.836835
2017-11-24,0.351367,-1.257283,-1.225819,-0.23736
2017-11-25,0.056999,-0.211236,-0.639025,1.201184
2017-11-26,2.172882,-0.601809,1.102574,0.272317
2017-11-27,-0.426112,-1.114614,1.100296,0.440767


In [17]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2017-11-24,-0.23736,-1.225819,-1.257283,0.351367
2017-11-25,1.201184,-0.639025,-0.211236,0.056999
2017-11-23,-1.836835,0.361807,-1.333353,-0.154992
2017-11-22,0.650628,0.522711,0.202438,0.001545
2017-11-27,0.440767,1.100296,-1.114614,-0.426112
2017-11-26,0.272317,1.102574,-0.601809,2.172882


## Selection

In [18]:
df.loc[:]

Unnamed: 0,A,B,C,D
2017-11-22,0.650628,0.522711,0.202438,0.001545
2017-11-23,-1.836835,0.361807,-1.333353,-0.154992
2017-11-24,-0.23736,-1.225819,-1.257283,0.351367
2017-11-25,1.201184,-0.639025,-0.211236,0.056999
2017-11-26,0.272317,1.102574,-0.601809,2.172882
2017-11-27,0.440767,1.100296,-1.114614,-0.426112


### Getting

In [19]:
df['A']

2017-11-22    0.650628
2017-11-23   -1.836835
2017-11-24   -0.237360
2017-11-25    1.201184
2017-11-26    0.272317
2017-11-27    0.440767
Freq: D, Name: A, dtype: float64

In [20]:
df[0:3]

Unnamed: 0,A,B,C,D
2017-11-22,0.650628,0.522711,0.202438,0.001545
2017-11-23,-1.836835,0.361807,-1.333353,-0.154992
2017-11-24,-0.23736,-1.225819,-1.257283,0.351367


In [21]:
df['20171125':'20171128']

Unnamed: 0,A,B,C,D
2017-11-25,1.201184,-0.639025,-0.211236,0.056999
2017-11-26,0.272317,1.102574,-0.601809,2.172882
2017-11-27,0.440767,1.100296,-1.114614,-0.426112


### Selection by Label

In [22]:
df.loc[dates[0]]

A    0.650628
B    0.522711
C    0.202438
D    0.001545
Name: 2017-11-22 00:00:00, dtype: float64

In [23]:
df.loc[:,['A', 'B']]

Unnamed: 0,A,B
2017-11-22,0.650628,0.522711
2017-11-23,-1.836835,0.361807
2017-11-24,-0.23736,-1.225819
2017-11-25,1.201184,-0.639025
2017-11-26,0.272317,1.102574
2017-11-27,0.440767,1.100296


In [24]:
df.loc['20171125':'20171128',['A', 'B']]

Unnamed: 0,A,B
2017-11-25,1.201184,-0.639025
2017-11-26,0.272317,1.102574
2017-11-27,0.440767,1.100296


In [25]:
df.loc['20171125',['A', 'B']]

A    1.201184
B   -0.639025
Name: 2017-11-25 00:00:00, dtype: float64

In [26]:
df.loc[dates[0],['A', 'B']]

A    0.650628
B    0.522711
Name: 2017-11-22 00:00:00, dtype: float64

In [27]:
df.at[dates[0],'A']

0.65062823886941035

### Selection by Position