In [3]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [5]:
data = Series(['one', 'two', np.nan, 'four'])
data

0     one
1     two
2     NaN
3    four
dtype: object

In [6]:
data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [7]:
# drop null values from series
data.dropna()

0     one
1     two
3    four
dtype: object

In [8]:
# missing data in data frames
dframe = DataFrame([[1,2,3], [np.nan, 5,6],[7, np.nan, 9], [np.nan, np.nan, np.nan]])
dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


In [12]:
# if you drop na from a data frame without providing additional args
# any row with a NaN value is deleted by default
clean_dframe = dframe.dropna()
clean_dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0


In [13]:
# only drops rows from data frame where all the row's value have NaN
dframe.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0


In [14]:
# can drop cols as well
dframe.dropna(axis=1)

0
1
2
3


In [16]:
# Can also threshold data points
# e.g if we only want rows with 3 data points
npn = np.nan
dframe2 = DataFrame([[1,2,3,npn],[2,npn, 5,6],[npn, 7, npn, 9],[1, npn, npn, npn]])
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [18]:
# threshold for no. of actual data points
dframe2.dropna(thresh=2)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0


In [19]:
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [20]:
# replace null values using fillna() method
dframe.fillna(1)

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,1.0,5.0,6.0
2,7.0,1.0,9.0
3,1.0,1.0,1.0


In [21]:
# fill different values for different columns
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [22]:
dframe2.fillna({0:0,1:1,2:2,3:3})

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,9.0
3,1.0,1.0,2.0,3.0


In [23]:
# modify data frame in place without having to reassign, using inplace attribute
dframe2.fillna(0)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,0.0
1,2.0,0.0,5.0,6.0
2,0.0,7.0,0.0,9.0
3,1.0,0.0,0.0,0.0


In [24]:
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [27]:
dframe2.fillna(0, inplace=True)
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,0.0
1,2.0,0.0,5.0,6.0
2,0.0,7.0,0.0,9.0
3,1.0,0.0,0.0,0.0


Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,0.0
1,2.0,0.0,5.0,6.0
2,0.0,7.0,0.0,9.0
3,1.0,0.0,0.0,0.0
