In [1]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

In [3]:
data = Series(['One','Two',np.nan,'Four'])

data

0     One
1     Two
2     NaN
3    Four
dtype: object

In [4]:
# Pandas makes it easy to find missing values:

data.isnull()

# Will show 'Null' values as 'True'

0    False
1    False
2     True
3    False
dtype: bool

In [8]:
# We can also drop and remove all 'Null' values:

data.dropna()

0     One
1     Two
3    Four
dtype: object

In [9]:
dframe = DataFrame([[1,2,3],[np.nan,5,6],[7,np.nan,9],[np.nan,np.nan,np.nan]])

dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


In [10]:
# If you simple dropna with no additional arguments, any row with any 'Null' will be dropped completely.

clean_dframe = dframe.dropna()

clean_dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0


In [12]:
# Only drop rows that are completely missing all data:

dframe.dropna(how = 'all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0


In [13]:
# Specify to drop columns instead of rows by specifying the axis:

dframe.dropna(axis=1)

0
1
2
3


In [14]:
dframe.dropna(axis=1,how = 'all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


In [17]:
npn = np.nan

dframe2 = DataFrame([[1,2,3,npn],[2,npn,5,6],[npn,7,npn,9],[1,npn,npn,npn]])

dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [18]:
# Threshold by number of  actual datapoints:

dframe.dropna(thresh = 2)

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0


In [20]:
dframe2.dropna(thresh = 3)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0


In [23]:
# Replace 'Null' with '0' value.

dframe2.fillna(0)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,0.0
1,2.0,0.0,5.0,6.0
2,0.0,7.0,0.0,9.0
3,1.0,0.0,0.0,0.0


In [26]:
# Fill in different values for different columns.
# Done so by passing a {dictionary}

dframe2.fillna({0:0,1:1,2:2,3:3})

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,9.0
3,1.0,1.0,2.0,3.0


In [28]:
# Modify the existing object:

dframe2 = dframe2.fillna({0:0,1:1,2:2,3:3})

dframe2
# or:

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,9.0
3,1.0,1.0,2.0,3.0


In [29]:
# Reset to '0's

dframe2.fillna(0)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,9.0
3,1.0,1.0,2.0,3.0


In [31]:
# or:

# Make the change permanent:

dframe2.fillna(0,inplace=True)

dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,9.0
3,1.0,1.0,2.0,3.0
