In [1]:
import pandas as pd
df = pd.read_csv('data/handling_missing_value.csv')
df

Unnamed: 0,Date,City,Temp
0,1-Nov-20,Ahmedabad,34.0
1,3-Nov-20,Ahmedabad,
2,10-Nov-20,Anand,38.0
3,13-Nov-20,Anand,
4,15-Nov-20,Baroda,40.0
5,20-Nov-20,Baroda,
6,23-Nov-20,Delhi,
7,27-Nov-20,Delhi,40.0


# In order to resolve the problem of missing values we can use:
- Drop missing values
- Fill missing values with median, mean, mode and etc.
- Interpolate

## Interpolation:

In [2]:
df['Date'] = pd.to_datetime(df['Date'])
df.set_index(['Date'], inplace=True)

In [3]:
# Linear interpolation, does not include dates field
linear_interpolation = df.interpolate()
linear_interpolation

Unnamed: 0_level_0,City,Temp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-11-01,Ahmedabad,34.0
2020-11-03,Ahmedabad,36.0
2020-11-10,Anand,38.0
2020-11-13,Anand,39.0
2020-11-15,Baroda,40.0
2020-11-20,Baroda,40.0
2020-11-23,Delhi,40.0
2020-11-27,Delhi,40.0


In [4]:
# Time interpolation, interpolate missing values looking and the timestamp index
time_interpolation = df.interpolate(method='time')
time_interpolation


Unnamed: 0_level_0,City,Temp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-11-01,Ahmedabad,34.0
2020-11-03,Ahmedabad,34.888889
2020-11-10,Anand,38.0
2020-11-13,Anand,39.2
2020-11-15,Baroda,40.0
2020-11-20,Baroda,40.0
2020-11-23,Delhi,40.0
2020-11-27,Delhi,40.0


In [5]:
# Quadratic interpolation, very useful if we have to deal with time series
quadratic_interpolation = df.interpolate(method='quadratic')
quadratic_interpolation

Unnamed: 0_level_0,City,Temp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-11-01,Ahmedabad,34.0
2020-11-03,Ahmedabad,34.902942
2020-11-10,Anand,38.0
2020-11-13,Anand,39.291119
2020-11-15,Baroda,40.0
2020-11-20,Baroda,40.886101
2020-11-23,Delhi,40.810149
2020-11-27,Delhi,40.0


In [6]:
# Spline or polynomial interpolation uses order(degree) parameter
poly_interpolation = df.interpolate(method='polynomial', order=3)
poly_interpolation

Unnamed: 0_level_0,City,Temp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-11-01,Ahmedabad,34.0
2020-11-03,Ahmedabad,34.80181
2020-11-10,Anand,38.0
2020-11-13,Anand,39.275415
2020-11-15,Baroda,40.0
2020-11-20,Baroda,41.097536
2020-11-23,Delhi,41.078618
2020-11-27,Delhi,40.0


## Fillna method:

In [7]:
#Fills all missing values in data set with 0 values
df.reset_index(inplace=True)
df.fillna(0)

Unnamed: 0,Date,City,Temp
0,2020-11-01,Ahmedabad,34.0
1,2020-11-03,Ahmedabad,0.0
2,2020-11-10,Anand,38.0
3,2020-11-13,Anand,0.0
4,2020-11-15,Baroda,40.0
5,2020-11-20,Baroda,0.0
6,2020-11-23,Delhi,0.0
7,2020-11-27,Delhi,40.0


In [8]:
# You can use ffill and bfill if need replace NaN values forward and backward filling
print(df.fillna(method='ffill'))
print(df.fillna(method='bfill'))

        Date        City   Temp
0 2020-11-01   Ahmedabad   34.0
1 2020-11-03   Ahmedabad   34.0
2 2020-11-10       Anand   38.0
3 2020-11-13       Anand   38.0
4 2020-11-15      Baroda   40.0
5 2020-11-20      Baroda   40.0
6 2020-11-23       Delhi   40.0
7 2020-11-27       Delhi   40.0
        Date        City   Temp
0 2020-11-01   Ahmedabad   34.0
1 2020-11-03   Ahmedabad   38.0
2 2020-11-10       Anand   38.0
3 2020-11-13       Anand   40.0
4 2020-11-15      Baroda   40.0
5 2020-11-20      Baroda   40.0
6 2020-11-23       Delhi   40.0
7 2020-11-27       Delhi   40.0


## Dropna method

In [9]:
# drop all rows if there is a nan value in it
df.dropna()


Unnamed: 0,Date,City,Temp
0,2020-11-01,Ahmedabad,34.0
2,2020-11-10,Anand,38.0
4,2020-11-15,Baroda,40.0
7,2020-11-27,Delhi,40.0


In [10]:
# drop rows if only they are filled with nan value
df.dropna(how='all')


Unnamed: 0,Date,City,Temp
0,2020-11-01,Ahmedabad,34.0
1,2020-11-03,Ahmedabad,
2,2020-11-10,Anand,38.0
3,2020-11-13,Anand,
4,2020-11-15,Baroda,40.0
5,2020-11-20,Baroda,
6,2020-11-23,Delhi,
7,2020-11-27,Delhi,40.0


In [11]:
# drop rows if they have more than 2 missing values
df.dropna(thresh=2)


Unnamed: 0,Date,City,Temp
0,2020-11-01,Ahmedabad,34.0
1,2020-11-03,Ahmedabad,
2,2020-11-10,Anand,38.0
3,2020-11-13,Anand,
4,2020-11-15,Baroda,40.0
5,2020-11-20,Baroda,
6,2020-11-23,Delhi,
7,2020-11-27,Delhi,40.0
