This notebook demonstrates different techniques for handling missing values in datasets using pandas. It covers identifying missing data (isnull()), replacing with default values (fillna()), and using methods like forward fill (pad) and backward fill (bfill) to maintain data consistency.

In [None]:
import numpy as np
import pandas as pd

#dictionary of lists
dict = {'Marks': [100,90,np.nan, 95],
        'Rank': [1,2,4,np.nan],
        'Avg Marks': [np.nan, 40,80,98]}
df=pd.DataFrame(dict)
df

Unnamed: 0,Marks,Rank,Avg Marks
0,100.0,1.0,
1,90.0,2.0,40.0
2,,4.0,80.0
3,95.0,,98.0


In [None]:
df.isnull()

Unnamed: 0,Marks,Rank,Avg Marks
0,False,False,True
1,False,False,False
2,True,False,False
3,False,True,False


In [None]:
df.fillna(0)

Unnamed: 0,Marks,Rank,Avg Marks
0,100.0,1.0,0.0
1,90.0,2.0,40.0
2,0.0,4.0,80.0
3,95.0,0.0,98.0


In [None]:
df.fillna(method='bfill')

#change the value or replace the values before to it

  df.fillna(method='bfill')


Unnamed: 0,Marks,Rank,Avg Marks
0,100.0,1.0,40.0
1,90.0,2.0,40.0
2,95.0,4.0,80.0
3,95.0,,98.0


In [None]:
df.fillna(method='pad')

#change the value or replace the values next to it

  df.fillna(method='pad')


Unnamed: 0,Marks,Rank,Avg Marks
0,100.0,1.0,
1,90.0,2.0,40.0
2,90.0,4.0,80.0
3,95.0,4.0,98.0


In [None]:
df.interpolate(method='linear', limit_direction='forward')

#using linear method and move inn forward direction and taking the mean of the othr values and filling the data


Unnamed: 0,Marks,Rank,Avg Marks
0,100.0,1.0,
1,90.0,2.0,40.0
2,92.5,4.0,80.0
3,95.0,4.0,98.0


In [None]:
df.dropna()

Unnamed: 0,Marks,Rank,Avg Marks
1,90.0,2.0,40.0
