In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [2]:
data = Series(['one', 'two', np.nan, 'four'])
data

0     one
1     two
2     NaN
3    four
dtype: object

In [3]:
data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [4]:
data.dropna()   

0     one
1     two
3    four
dtype: object

In [5]:
df = DataFrame([[1, 2, 3],[np.nan, 5, 6], [7, np.nan, 9], [np.nan, np.nan, np.nan]])
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


In [8]:
clean_dframe = df.dropna()
clean_dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0


In [9]:
# To drop rows completely missing all data
df.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0


In [10]:
 df.dropna(axis=1)

0
1
2
3


In [12]:
npn = np.nan
df2 = DataFrame([[1,2,3,npn],[2,npn,5,6],[npn,7,npn,9],[1,npn,npn,npn]])
df2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [13]:
#Rows having a minimum of 2 data points
df2.dropna(thresh=2)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0


In [14]:
df2.dropna(thresh=3)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0


In [15]:
df2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [17]:
#To fill null values with something
df2.fillna(1)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,1.0
1,2.0,1.0,5.0,6.0
2,1.0,7.0,1.0,9.0
3,1.0,1.0,1.0,1.0


In [18]:
df2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [20]:
# To fill different values to different columns
df2.fillna({0:0, 1:1, 2:2, 3:3})

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,9.0
3,1.0,1.0,2.0,3.0


In [21]:
# To make the change permanent
df2.fillna(0, inplace=True)
df2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,0.0
1,2.0,0.0,5.0,6.0
2,0.0,7.0,0.0,9.0
3,1.0,0.0,0.0,0.0


# Index  Hierarchy

In [22]:
from numpy.random import randn

In [25]:
ser = Series(randn(6), index=[[1,1,1,2,2,2],['a','b','c','a','b','c']])
ser

1  a   -1.282766
   b   -0.474097
   c    1.106253
2  a   -1.237255
   b   -1.904958
   c    0.608825
dtype: float64

In [26]:
ser.index

MultiIndex(levels=[[1, 2], ['a', 'b', 'c']],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [27]:
ser[1]

a   -1.282766
b   -0.474097
c    1.106253
dtype: float64

In [28]:
ser[2]

a   -1.237255
b   -1.904958
c    0.608825
dtype: float64

In [29]:
ser[:, 'a']

1   -1.282766
2   -1.237255
dtype: float64

In [30]:
# Unstack method
df = ser.unstack()
df      #Lower index level will be- columns

Unnamed: 0,a,b,c
1,-1.282766,-0.474097,1.106253
2,-1.237255,-1.904958,0.608825


In [31]:
df2 = DataFrame(np.arange(16).reshape(4,4), index=[['a','a','b','b'],[1,2,1,2]],
                                                   columns=[['NY','NY','LA','SF'],['cold','hot','hot','cold']])
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,NY,NY,LA,SF
Unnamed: 0_level_1,Unnamed: 1_level_1,cold,hot,hot,cold
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [32]:
# Naming index levels
df2.index.names = ['INDEX_1', 'INDEX_2']
df2.columns.names = ['Cities', 'Temp']
df2

Unnamed: 0_level_0,Cities,NY,NY,LA,SF
Unnamed: 0_level_1,Temp,cold,hot,hot,cold
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [33]:
df2.swaplevel('Cities','Temp', axis=1)  # Swapping levels

Unnamed: 0_level_0,Temp,cold,hot,hot,cold
Unnamed: 0_level_1,Cities,NY,NY,LA,SF
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [40]:
df2.sortlevel(1)

  if __name__ == '__main__':


Unnamed: 0_level_0,Cities,NY,NY,LA,SF
Unnamed: 0_level_1,Temp,cold,hot,hot,cold
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
b,1,8,9,10,11
a,2,4,5,6,7
b,2,12,13,14,15


In [41]:
df2.sum(level='Temp', axis=1)

Unnamed: 0_level_0,Temp,cold,hot
INDEX_1,INDEX_2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,3,3
a,2,11,11
b,1,19,19
b,2,27,27
