# Pandas - Dealing with Missing Data

In [1]:
import numpy as np
import pandas as pd

## Create a dataframe from a dictionary

In [2]:
d = {'A': [1, 2, np.nan],
     'B': [5, np.nan, np.nan],
     'C': [1, 2, 3 ]}
df = pd.DataFrame(d)

In [3]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


## Dropping missing values

In [4]:
# drop all rows with NaN values
df.dropna()


Unnamed: 0,A,B,C
0,1.0,5.0,1


In [6]:
# deop all columns with NaN values
df.dropna(axis=1)

Unnamed: 0,C
0,1
1,2
2,3


In [7]:
# drop nan threshold to 2
df.dropna(thresh=2) 

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


## Filling missing values

In [8]:
df.fillna(value='FILL VALUE')

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,FILL VALUE,2
2,FILL VALUE,FILL VALUE,3


In [9]:
# fill nan with the mean of the column
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

In [10]:
df['A'].fillna(value=0)

0    1.0
1    2.0
2    0.0
Name: A, dtype: float64