In [1]:
import numpy as np
import pandas as pd

from pandas import DataFrame, Series

In [3]:
data = Series(['one', 'two', np.nan, 'four'])
data

0     one
1     two
2     NaN
3    four
dtype: object

In [4]:
#Check for null values
data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [6]:
# drop all the nan values
data.dropna()

0     one
1     two
3    four
dtype: object

In [8]:
dframe = DataFrame([[1,2,3],[4,5,np.nan], [7, np.nan, 8], [np.nan,np.nan,np.nan]])
dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,5.0,
2,7.0,,8.0
3,,,


In [11]:
# By default, the dropna finds any nan values and drops the entire row
clean_dframe = dframe.dropna()
clean_dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0


In [12]:
# This will drop the entire row if all the values are nan
# If not, it keeps the nan values
dframe.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,5.0,
2,7.0,,8.0


In [13]:
# The axis=1 means we are looking at columns rather than the row
dframe.dropna(axis=1)

0
1
2
3


In [15]:
npn = np.nan
dframe2 = DataFrame([[1,2,3,npn], [2,npn,5,6], [npn,7,npn,9], [1,npn,npn,npn]])
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [17]:
# Adding constraints; the tresh tells us that if we have 
# at least 2 values, then we will not drop that row or col

dframe2.dropna(thresh=2)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0


In [19]:
dframe2.dropna(thresh=3)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0


In [20]:
dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,5.0,
2,7.0,,8.0
3,,,


In [23]:
# filling up values that are nan with 1
dframe.fillna(1)

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,5.0,1.0
2,7.0,1.0,8.0
3,1.0,1.0,1.0


In [25]:
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [24]:
dframe2.fillna(1)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,1.0
1,2.0,1.0,5.0,6.0
2,1.0,7.0,1.0,9.0
3,1.0,1.0,1.0,1.0


In [28]:
# specifying the values that will be filled for every column
dframe2.fillna({0:0, 1:1, 2:2, 3:3})

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,9.0
3,1.0,1.0,2.0,3.0


In [29]:
# modifying df in place
dframe2.fillna({0:0, 1:1, 2:2, 3:3}, inplace=True)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,9.0
3,1.0,1.0,2.0,3.0
