# Handling Missing Data

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({
    "Name": ["Amit", "Priya", "Ravi", "Neha"],
    "Age": [23, 21, 25, 22],
    "Marks": [85, 90, 88, 92]
})
df

Unnamed: 0,Name,Age,Marks
0,Amit,23,85
1,Priya,21,90
2,Ravi,25,88
3,Neha,22,92


In [3]:
df.isnull()

Unnamed: 0,Name,Age,Marks
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False


In [4]:
df = pd.DataFrame({
    "Name": ["Amit", None, "Ravi", "Neha"],
    "Age": [23, 21, None, 22],
    "Marks": [85, None, 88, 92]
})
df

Unnamed: 0,Name,Age,Marks
0,Amit,23.0,85.0
1,,21.0,
2,Ravi,,88.0
3,Neha,22.0,92.0


In [5]:
df.isnull()

Unnamed: 0,Name,Age,Marks
0,False,False,False
1,True,False,True
2,False,True,False
3,False,False,False


In [6]:
df.isnull().sum()

Name     1
Age      1
Marks    1
dtype: int64

In [7]:
data = {"A": [1, np.nan, 3], "B": [4, 5, np.nan]}
df2 = pd.DataFrame(data)
df2

Unnamed: 0,A,B
0,1.0,4.0
1,,5.0
2,3.0,


In [8]:
print(df2.isnull())   # check NaN value missing 
print(df2.notnull())

       A      B
0  False  False
1   True  False
2  False   True
       A      B
0   True   True
1  False   True
2   True  False


In [9]:
# removing and adding values
df.dropna(inplace=True)

In [10]:
df

Unnamed: 0,Name,Age,Marks
0,Amit,23.0,85.0
3,Neha,22.0,92.0


In [11]:
df = pd.DataFrame({
    "Name": ["Amit", None, "Ravi", "Neha"],
    "Age": [23, 21, None, 22],
    "Marks": [85, None, 88, 92]
})
df

Unnamed: 0,Name,Age,Marks
0,Amit,23.0,85.0
1,,21.0,
2,Ravi,,88.0
3,Neha,22.0,92.0


In [13]:
df.fillna(0, inplace=True)
df

Unnamed: 0,Name,Age,Marks
0,Amit,23.0,85.0
1,0,21.0,0.0
2,Ravi,0.0,88.0
3,Neha,22.0,92.0


In [14]:
df["Age"].fillna(df["Age"].mean(), inplace=True)
df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Age"].fillna(df["Age"].mean(), inplace=True)


Unnamed: 0,Name,Age,Marks
0,Amit,23.0,85.0
1,0,21.0,0.0
2,Ravi,0.0,88.0
3,Neha,22.0,92.0
