# Missing Data

Let's show a few convenient methods to deal with Missing Data in pandas:

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({
    'A': [12, 1, 2, np.nan],
    'B': [12, 5, np.nan, np.nan],
    'C': [12, 1, 2, 3]
})

In [3]:
df

Unnamed: 0,A,B,C
0,12.0,12.0,12
1,1.0,5.0,1
2,2.0,,2
3,,,3


In [4]:
df.dropna(axis='rows')

Unnamed: 0,A,B,C
0,12.0,12.0,12
1,1.0,5.0,1


In [5]:
df.shape

(4, 3)

In [6]:
len(df)

4

In [7]:
df.shape[0]/2

2.0

In [8]:
df.dropna(axis='columns', thresh=df.shape[0]//2)

Unnamed: 0,A,B,C
0,12.0,12.0,12
1,1.0,5.0,1
2,2.0,,2
3,,,3


In [9]:
df.dropna(axis=1)

Unnamed: 0,C
0,12
1,1
2,2
3,3


In [10]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,12.0,12.0,12
1,1.0,5.0,1
2,2.0,,2


In [11]:
df

Unnamed: 0,A,B,C
0,12.0,12.0,12
1,1.0,5.0,1
2,2.0,,2
3,,,3


In [12]:
df.fillna(method='ffill')

Unnamed: 0,A,B,C
0,12.0,12.0,12
1,1.0,5.0,1
2,2.0,5.0,2
3,2.0,5.0,3


In [13]:
df

Unnamed: 0,A,B,C
0,12.0,12.0,12
1,1.0,5.0,1
2,2.0,,2
3,,,3


In [14]:
df['A'].fillna(value=df['A'].mode())

0    12.0
1     1.0
2     2.0
3     NaN
Name: A, dtype: float64