In [1]:
#What Are Missing Values?
#...Missing values mean some data entries are blank or not available or null.
#...In Python (especially in Pandas), missing values are represented as NaN (Not a Number).
#...Handling missing data is very important because most machine learning models can’t handle NaN values directly.

In [2]:
#Import Library and Create DataFrame

# ->We’ll use the Pandas library for handling missing data.

In [14]:
import pandas as pd


# Creating a simple dataset with missing values
data = {
    'Name': ['Harsh', 'Anuj', 'soham', 'kaustubh', 'Ayush'],
    'Age': [None ,22, 27, None, 19],
    'Marks': [80, 90, None, 70, 85],
    'City': ['Delhi', 'Mumbai', None, 'Pune', 'Mumbai']
}

df = pd.DataFrame(data)
print("Original DataFrame:\n", df)

Original DataFrame:
        Name   Age  Marks    City
0     Harsh   NaN   80.0   Delhi
1      Anuj  22.0   90.0  Mumbai
2     soham  27.0    NaN    None
3  kaustubh   NaN   70.0    Pune
4     Ayush  19.0   85.0  Mumbai


In [None]:
#we will check Missing Values

#We first check how many missing values are present in each column.

In [15]:
print(df.isnull().sum())

Name     0
Age      2
Marks    1
City     1
dtype: int64


In [2]:
 Handling Missing Values (Filling Techniques)



In [16]:
import warnings
warnings.filterwarnings('ignore')   # hides warnings (optional)

In [17]:
df['Age'].fillna(df['Age'].mean(), inplace=True)
print(df)

       Name        Age  Marks    City
0     Harsh  22.666667   80.0   Delhi
1      Anuj  22.000000   90.0  Mumbai
2     soham  27.000000    NaN    None
3  kaustubh  22.666667   70.0    Pune
4     Ayush  19.000000   85.0  Mumbai


In [18]:
df['Marks'].fillna(df['Marks'].median(), inplace=True)
print(df)

       Name        Age  Marks    City
0     Harsh  22.666667   80.0   Delhi
1      Anuj  22.000000   90.0  Mumbai
2     soham  27.000000   82.5    None
3  kaustubh  22.666667   70.0    Pune
4     Ayush  19.000000   85.0  Mumbai


In [19]:
df["City"].fillna(df['City'].mode()[0],inplace=True)
print(df)

       Name        Age  Marks    City
0     Harsh  22.666667   80.0   Delhi
1      Anuj  22.000000   90.0  Mumbai
2     soham  27.000000   82.5  Mumbai
3  kaustubh  22.666667   70.0    Pune
4     Ayush  19.000000   85.0  Mumbai


In [20]:
data = {
    'Name': ['Riya', 'Aman', 'Meena', 'Sohan', 'Neha'],
    'Age': [20, None, 25, None, 22],
    'City': ['Delhi', 'Mumbai', None, None, 'Pune']
}

ndf = pd.DataFrame(data)
print("Original DataFrame:\n", ndf)

Original DataFrame:
     Name   Age    City
0   Riya  20.0   Delhi
1   Aman   NaN  Mumbai
2  Meena  25.0    None
3  Sohan   NaN    None
4   Neha  22.0    Pune


In [21]:
ndf['Age'].fillna(method='ffill',inplace=True)
print(ndf)

    Name   Age    City
0   Riya  20.0   Delhi
1   Aman  20.0  Mumbai
2  Meena  25.0    None
3  Sohan  25.0    None
4   Neha  22.0    Pune


In [22]:
ndf['City'].fillna(method='bfill', inplace=True)
print(ndf)

    Name   Age    City
0   Riya  20.0   Delhi
1   Aman  20.0  Mumbai
2  Meena  25.0    Pune
3  Sohan  25.0    Pune
4   Neha  22.0    Pune
