# 10 Minutes to pandas
## Object Creation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20171122', periods=6)

In [4]:
dates

DatetimeIndex(['2017-11-22', '2017-11-23', '2017-11-24', '2017-11-25',
               '2017-11-26', '2017-11-27'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6, 4), 
                  index=dates, 
                  columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2017-11-22,-1.882143,-1.021913,-0.783328,1.236502
2017-11-23,1.579743,-2.111794,0.31845,0.844011
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049
2017-11-25,-2.01102,0.493498,0.072706,1.042325
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605
2017-11-27,-0.575408,-0.848009,1.276438,-0.176408


In [6]:
df2 = pd.DataFrame({
    'A' : 1.,
    'B' : pd.Timestamp('20171122'),
    'C' : pd.Series(1, index=list(range(4)), dtype='float32'),
    'D' : np.array([3] * 4, dtype='int32'),
    'E' : pd.Categorical(['test', 'train', 'test', 'train']),
    'F' :'foo'
})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2017-11-22,1.0,3,test,foo
1,1.0,2017-11-22,1.0,3,train,foo
2,1.0,2017-11-22,1.0,3,test,foo
3,1.0,2017-11-22,1.0,3,train,foo


In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [8]:
# df2.<TAB>

# Viewing Data

In [9]:
df.head()

Unnamed: 0,A,B,C,D
2017-11-22,-1.882143,-1.021913,-0.783328,1.236502
2017-11-23,1.579743,-2.111794,0.31845,0.844011
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049
2017-11-25,-2.01102,0.493498,0.072706,1.042325
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605


In [10]:
df.tail(3)

Unnamed: 0,A,B,C,D
2017-11-25,-2.01102,0.493498,0.072706,1.042325
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605
2017-11-27,-0.575408,-0.848009,1.276438,-0.176408


In [11]:
df.index

DatetimeIndex(['2017-11-22', '2017-11-23', '2017-11-24', '2017-11-25',
               '2017-11-26', '2017-11-27'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [13]:
df.values

array([[-1.88214326, -1.02191323, -0.78332785,  1.23650166],
       [ 1.57974313, -2.11179367,  0.31844992,  0.84401135],
       [-0.35740295, -0.8425704 , -0.72115188, -0.42904937],
       [-2.01102026,  0.49349753,  0.07270639,  1.04232504],
       [ 1.86495006, -0.4007882 , -0.87748713, -0.38605029],
       [-0.57540804, -0.84800939,  1.27643763, -0.17640824]])

In [14]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.230214,-0.788596,-0.119062,0.355222
std,1.655363,0.849222,0.843158,0.766143
min,-2.01102,-2.111794,-0.877487,-0.429049
25%,-1.555459,-0.978437,-0.767784,-0.33364
50%,-0.466405,-0.84529,-0.324223,0.333802
75%,1.095457,-0.511234,0.257014,0.992747
max,1.86495,0.493498,1.276438,1.236502


In [15]:
df.T

Unnamed: 0,2017-11-22 00:00:00,2017-11-23 00:00:00,2017-11-24 00:00:00,2017-11-25 00:00:00,2017-11-26 00:00:00,2017-11-27 00:00:00
A,-1.882143,1.579743,-0.357403,-2.01102,1.86495,-0.575408
B,-1.021913,-2.111794,-0.84257,0.493498,-0.400788,-0.848009
C,-0.783328,0.31845,-0.721152,0.072706,-0.877487,1.276438
D,1.236502,0.844011,-0.429049,1.042325,-0.38605,-0.176408


In [16]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2017-11-22,1.236502,-0.783328,-1.021913,-1.882143
2017-11-23,0.844011,0.31845,-2.111794,1.579743
2017-11-24,-0.429049,-0.721152,-0.84257,-0.357403
2017-11-25,1.042325,0.072706,0.493498,-2.01102
2017-11-26,-0.38605,-0.877487,-0.400788,1.86495
2017-11-27,-0.176408,1.276438,-0.848009,-0.575408


In [17]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2017-11-23,1.579743,-2.111794,0.31845,0.844011
2017-11-22,-1.882143,-1.021913,-0.783328,1.236502
2017-11-27,-0.575408,-0.848009,1.276438,-0.176408
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605
2017-11-25,-2.01102,0.493498,0.072706,1.042325


## Selection

In [18]:
df.loc[:]

Unnamed: 0,A,B,C,D
2017-11-22,-1.882143,-1.021913,-0.783328,1.236502
2017-11-23,1.579743,-2.111794,0.31845,0.844011
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049
2017-11-25,-2.01102,0.493498,0.072706,1.042325
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605
2017-11-27,-0.575408,-0.848009,1.276438,-0.176408


### Getting

In [19]:
df['A']

2017-11-22   -1.882143
2017-11-23    1.579743
2017-11-24   -0.357403
2017-11-25   -2.011020
2017-11-26    1.864950
2017-11-27   -0.575408
Freq: D, Name: A, dtype: float64

In [20]:
df[0:3]

Unnamed: 0,A,B,C,D
2017-11-22,-1.882143,-1.021913,-0.783328,1.236502
2017-11-23,1.579743,-2.111794,0.31845,0.844011
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049


In [21]:
df['20171125':'20171128']

Unnamed: 0,A,B,C,D
2017-11-25,-2.01102,0.493498,0.072706,1.042325
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605
2017-11-27,-0.575408,-0.848009,1.276438,-0.176408


### Selection by Label

In [22]:
df.loc[dates[0]]

A   -1.882143
B   -1.021913
C   -0.783328
D    1.236502
Name: 2017-11-22 00:00:00, dtype: float64

In [23]:
df.loc[:,['A', 'B']]

Unnamed: 0,A,B
2017-11-22,-1.882143,-1.021913
2017-11-23,1.579743,-2.111794
2017-11-24,-0.357403,-0.84257
2017-11-25,-2.01102,0.493498
2017-11-26,1.86495,-0.400788
2017-11-27,-0.575408,-0.848009


In [24]:
df.loc['20171125':'20171128',['A', 'B']]

Unnamed: 0,A,B
2017-11-25,-2.01102,0.493498
2017-11-26,1.86495,-0.400788
2017-11-27,-0.575408,-0.848009


In [25]:
df.loc['20171125',['A', 'B']]

A   -2.011020
B    0.493498
Name: 2017-11-25 00:00:00, dtype: float64

In [26]:
df.loc[dates[0],['A', 'B']]

A   -1.882143
B   -1.021913
Name: 2017-11-22 00:00:00, dtype: float64

In [27]:
df.at[dates[0],'A']

-1.8821432622900076

### Selection by Position

In [28]:
df.iloc[3]

A   -2.011020
B    0.493498
C    0.072706
D    1.042325
Name: 2017-11-25 00:00:00, dtype: float64

In [29]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2017-11-25,-2.01102,0.493498
2017-11-26,1.86495,-0.400788


In [30]:
df.iloc[[1, 2, 4],[0, 2]]

Unnamed: 0,A,C
2017-11-23,1.579743,0.31845
2017-11-24,-0.357403,-0.721152
2017-11-26,1.86495,-0.877487


In [31]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2017-11-23,1.579743,-2.111794,0.31845,0.844011
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049


In [32]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2017-11-22,-1.021913,-0.783328
2017-11-23,-2.111794,0.31845
2017-11-24,-0.84257,-0.721152
2017-11-25,0.493498,0.072706
2017-11-26,-0.400788,-0.877487
2017-11-27,-0.848009,1.276438


In [33]:
df.iloc[1, 1]

-2.1117936733987057

In [34]:
df.iat[1, 1]

-2.1117936733987057

### bloolean Indexing

In [35]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2017-11-23,1.579743,-2.111794,0.31845,0.844011
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605


In [36]:
df[df > 0]

Unnamed: 0,A,B,C,D
2017-11-22,,,,1.236502
2017-11-23,1.579743,,0.31845,0.844011
2017-11-24,,,,
2017-11-25,,0.493498,0.072706,1.042325
2017-11-26,1.86495,,,
2017-11-27,,,1.276438,


In [37]:
df2 = df.copy()

In [38]:
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2017-11-22,-1.882143,-1.021913,-0.783328,1.236502,one
2017-11-23,1.579743,-2.111794,0.31845,0.844011,one
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049,two
2017-11-25,-2.01102,0.493498,0.072706,1.042325,three
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605,four
2017-11-27,-0.575408,-0.848009,1.276438,-0.176408,three


In [39]:
df2[df2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049,two
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605,four


### Setting

In [40]:
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20171201', periods=6))
s1

2017-12-01    1
2017-12-02    2
2017-12-03    3
2017-12-04    4
2017-12-05    5
2017-12-06    6
Freq: D, dtype: int64

In [41]:
df.at[dates[0], 'A'] = 0
df

Unnamed: 0,A,B,C,D
2017-11-22,0.0,-1.021913,-0.783328,1.236502
2017-11-23,1.579743,-2.111794,0.31845,0.844011
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049
2017-11-25,-2.01102,0.493498,0.072706,1.042325
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605
2017-11-27,-0.575408,-0.848009,1.276438,-0.176408


In [42]:
df.iat[0, 1] = 0
df

Unnamed: 0,A,B,C,D
2017-11-22,0.0,0.0,-0.783328,1.236502
2017-11-23,1.579743,-2.111794,0.31845,0.844011
2017-11-24,-0.357403,-0.84257,-0.721152,-0.429049
2017-11-25,-2.01102,0.493498,0.072706,1.042325
2017-11-26,1.86495,-0.400788,-0.877487,-0.38605
2017-11-27,-0.575408,-0.848009,1.276438,-0.176408


In [43]:
df.loc[:,'D'] = np.array([5] * len(df))
df

Unnamed: 0,A,B,C,D
2017-11-22,0.0,0.0,-0.783328,5
2017-11-23,1.579743,-2.111794,0.31845,5
2017-11-24,-0.357403,-0.84257,-0.721152,5
2017-11-25,-2.01102,0.493498,0.072706,5
2017-11-26,1.86495,-0.400788,-0.877487,5
2017-11-27,-0.575408,-0.848009,1.276438,5


In [44]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D
2017-11-22,0.0,0.0,-0.783328,-5
2017-11-23,-1.579743,-2.111794,-0.31845,-5
2017-11-24,-0.357403,-0.84257,-0.721152,-5
2017-11-25,-2.01102,-0.493498,-0.072706,-5
2017-11-26,-1.86495,-0.400788,-0.877487,-5
2017-11-27,-0.575408,-0.848009,-1.276438,-5


##  Missing Data

In [45]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
df1

Unnamed: 0,A,B,C,D,E
2017-11-22,0.0,0.0,-0.783328,5,
2017-11-23,1.579743,-2.111794,0.31845,5,
2017-11-24,-0.357403,-0.84257,-0.721152,5,
2017-11-25,-2.01102,0.493498,0.072706,5,


In [46]:
df1.loc[dates[0]:dates[1], 'E'] = 1
df1

Unnamed: 0,A,B,C,D,E
2017-11-22,0.0,0.0,-0.783328,5,1.0
2017-11-23,1.579743,-2.111794,0.31845,5,1.0
2017-11-24,-0.357403,-0.84257,-0.721152,5,
2017-11-25,-2.01102,0.493498,0.072706,5,


In [47]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,E
2017-11-22,0.0,0.0,-0.783328,5,1.0
2017-11-23,1.579743,-2.111794,0.31845,5,1.0


In [48]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,E
2017-11-22,0.0,0.0,-0.783328,5,1.0
2017-11-23,1.579743,-2.111794,0.31845,5,1.0
2017-11-24,-0.357403,-0.84257,-0.721152,5,5.0
2017-11-25,-2.01102,0.493498,0.072706,5,5.0


In [49]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,E
2017-11-22,False,False,False,False,False
2017-11-23,False,False,False,False,False
2017-11-24,False,False,False,False,True
2017-11-25,False,False,False,False,True
