In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt

In [2]:
data = {'First':[100, 90, np.nan, 95], 
        'Second': [30, 45, 56, np.nan], 
        'Third':[np.nan, 40, 80, 98]} 

In [4]:
df = pd.DataFrame(data)
df

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [5]:
# get some lines
df.head()

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [6]:
# shape 
df.shape

(4, 3)

In [7]:
df.isna()

Unnamed: 0,First,Second,Third
0,False,False,True
1,False,False,False
2,True,False,False
3,False,True,False


In [8]:
df.notna()

Unnamed: 0,First,Second,Third
0,True,True,False
1,True,True,True
2,False,True,True
3,True,False,True


In [10]:
df['First'].isna()

0    False
1    False
2     True
3    False
Name: First, dtype: bool

In [11]:
df['First'].notna()

0     True
1     True
2    False
3     True
Name: First, dtype: bool

In [12]:
df.isna().any()

First     True
Second    True
Third     True
dtype: bool

In [14]:
pd.Series([False, False]).any()

False

In [15]:
pd.Series([True, False]).any()

True

## Filtering Data with missing values

In [16]:
df

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [18]:
df[df['First'].isna()]

Unnamed: 0,First,Second,Third
2,,56.0,80.0


In [19]:
df[~df['First'].isna()]

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
3,95.0,,98.0


In [20]:
df[df['First'].notna()]

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
3,95.0,,98.0


## Drop rows with `NAN`

In [37]:
df.dropna(axis='rows', how='any')

Unnamed: 0,First,Second,Third
1,90.0,45.0,40.0


In [38]:
df['missing'] = np.nan

In [39]:
df

Unnamed: 0,First,Second,Third,missing
0,100.0,30.0,,
1,90.0,45.0,40.0,
2,,56.0,80.0,
3,95.0,,98.0,


In [43]:
df.dropna(axis=1, how='all', inplace=True)

In [44]:
df

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [55]:
df.isna().sum() / df.shape[0]

First     0.25
Second    0.25
Third     0.25
dtype: float64

In [56]:
df.drop(columns=['First'])

Unnamed: 0,Second,Third
0,30.0,
1,45.0,40.0
2,56.0,80.0
3,,98.0


In [57]:
df

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [58]:
df.fillna(-1)

Unnamed: 0,First,Second,Third
0,100.0,30.0,-1.0
1,90.0,45.0,40.0
2,-1.0,56.0,80.0
3,95.0,-1.0,98.0


In [59]:
df['First'].fillna('First Missing')

0            100.0
1             90.0
2    First Missing
3             95.0
Name: First, dtype: object

## Fill by previous/next Value

In [60]:
df

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [61]:
df.fillna(method='ffill')

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,90.0,56.0,80.0
3,95.0,56.0,98.0


In [62]:
df.fillna(method='bfill')

Unnamed: 0,First,Second,Third
0,100.0,30.0,40.0
1,90.0,45.0,40.0
2,95.0,56.0,80.0
3,95.0,,98.0


In [63]:
df

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [65]:
df.fillna(method='ffill').fillna(method='bfill')

Unnamed: 0,First,Second,Third
0,100.0,30.0,40.0
1,90.0,45.0,40.0
2,90.0,56.0,80.0
3,95.0,56.0,98.0


In [66]:
df_copy = df.copy()
df_copy

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [67]:
df_copy['First'].fillna(-1, inplace=True)
df_copy['Second'].fillna(method='bfill', inplace=True)
df_copy['Third'].fillna('Third', inplace=True)

In [68]:
df_copy

Unnamed: 0,First,Second,Third
0,100.0,30.0,Third
1,90.0,45.0,40.0
2,-1.0,56.0,80.0
3,95.0,,98.0


In [70]:
df

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [71]:
df.interpolate(method='linear')

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,92.5,56.0,80.0
3,95.0,56.0,98.0


In [75]:
df

Unnamed: 0,First,Second,Third
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [82]:
df['First'].fillna(100).mode()

0    100.0
dtype: float64

In [81]:
df.mode()

Unnamed: 0,First,Second,Third
0,90.0,30.0,40.0
1,95.0,45.0,80.0
2,100.0,56.0,98.0
