## Checking missing values

In [26]:
import numpy as np
import pandas as pd
x = pd.Series([1,2,3,4,np.nan,2,np.nan,4,3])
x

0    1.0
1    2.0
2    3.0
3    4.0
4    NaN
5    2.0
6    NaN
7    4.0
8    3.0
dtype: float64

In [27]:
x.dropna()

0    1.0
1    2.0
2    3.0
3    4.0
5    2.0
7    4.0
8    3.0
dtype: float64

In [28]:
x[x.notnull()]

0    1.0
1    2.0
2    3.0
3    4.0
5    2.0
7    4.0
8    3.0
dtype: float64

In [29]:
y = pd.DataFrame( [[1,2,3,np.nan],[4,5,np.nan,np.nan],[6,np.nan,8,np.nan],[np.nan,np.nan,np.nan,np.nan],[1,np.nan,np.nan,np.nan]])
y

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,4.0,5.0,,
2,6.0,,8.0,
3,,,,
4,1.0,,,


In [30]:
y.dropna()

Unnamed: 0,0,1,2,3


In [41]:
y.dropna(thresh=1)
# use thresh parameter to define the minimum number of non-empty observations for each row to keep

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,4.0,5.0,,
2,6.0,,8.0,
4,1.0,,,


In [42]:
y.dropna(how='all')
# use how='all' to specify only those data rows should be removed where all the columns have empty values

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,4.0,5.0,,
2,6.0,,8.0,
4,1.0,,,


In [45]:
y.dropna(how='all', axis=1)
# use axis=1 for removing such columns with all Nan values 

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,5.0,
2,6.0,,8.0
3,,,
4,1.0,,


## Filling Missing Values

In [33]:
x.fillna(0)

0    1.0
1    2.0
2    3.0
3    4.0
4    0.0
5    2.0
6    0.0
7    4.0
8    3.0
dtype: float64

In [34]:
y

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,4.0,5.0,,
2,6.0,,8.0,
3,,,,
4,1.0,,,


In [35]:
y.fillna(99)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,99.0
1,4.0,5.0,99.0,99.0
2,6.0,99.0,8.0,99.0
3,99.0,99.0,99.0,99.0
4,1.0,99.0,99.0,99.0


In [36]:
y.fillna({0:77,1:88,2:99,3:100})

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,100.0
1,4.0,5.0,99.0,100.0
2,6.0,88.0,8.0,100.0
3,77.0,88.0,99.0,100.0
4,1.0,88.0,99.0,100.0


- fillna methond can also fill empty valus using the data set itself. 
- pad: fill values forward
- bfill: fill values backward

In [46]:
y.fillna(method='pad')

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,4.0,5.0,3.0,
2,6.0,5.0,8.0,
3,6.0,5.0,8.0,
4,1.0,5.0,8.0,


In [20]:
y.fillna(method='pad', axis=1)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,4.0,5.0,5.0,5.0
2,6.0,6.0,8.0,8.0
3,,,,
4,1.0,1.0,1.0,1.0


In [38]:
y

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,4.0,5.0,,
2,6.0,,8.0,
3,,,,
4,1.0,,,


In [39]:
y.fillna(y.mean())

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,4.0,5.0,5.5,
2,6.0,3.5,8.0,
3,3.0,3.5,5.5,
4,1.0,3.5,5.5,


In [40]:
x.fillna(x.median())

0    1.0
1    2.0
2    3.0
3    4.0
4    3.0
5    2.0
6    3.0
7    4.0
8    3.0
dtype: float64