In [96]:
import numpy as np
import pandas as pd

In [97]:
data = {
    "Name ": ["Ravi", " Priya", "Amit", "Neha", "Devang"],
    "Age": [23, np.nan, np.nan, 22, 30],
    "Score": ["88", "76", "91", np.nan, "91"]
}
df = pd.DataFrame(data)
df


Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88.0
1,Priya,,76.0
2,Amit,,91.0
3,Neha,22.0,
4,Devang,30.0,91.0


## Finding Missing Values

In [98]:
df.isna()  # Returns True for missing value.

Unnamed: 0,Name,Age,Score
0,False,False,False
1,False,True,False
2,False,True,False
3,False,False,True
4,False,False,False


In [99]:
df.isna().sum()  # Count missing values per column

Name     0
Age      2
Score    1
dtype: int64

In [100]:
df.isna().any()  # Return True if any column has missing values

Name     False
Age       True
Score     True
dtype: bool

In [101]:
df

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88.0
1,Priya,,76.0
2,Amit,,91.0
3,Neha,22.0,
4,Devang,30.0,91.0


## Removing Missing Data

In [102]:
df.dropna()  # Drop rows with any missing values

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88
4,Devang,30.0,91


In [103]:
df.dropna(axis=1)  # Drop columns with missing values

Unnamed: 0,Name
0,Ravi
1,Priya
2,Amit
3,Neha
4,Devang


In [104]:
df.dropna(thresh=3)  # Drop rows with less than 3 non-NaN values

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88
4,Devang,30.0,91


In [105]:
df

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88.0
1,Priya,,76.0
2,Amit,,91.0
3,Neha,22.0,
4,Devang,30.0,91.0


## Filling Missing Data

In [106]:
df.fillna(0)  # Fill all NaNs with 0

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88
1,Priya,0.0,76
2,Amit,0.0,91
3,Neha,22.0,0
4,Devang,30.0,91


In [107]:
values = {'Age':12, 'Score':15}
df.fillna(value=values)

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88
1,Priya,12.0,76
2,Amit,12.0,91
3,Neha,22.0,15
4,Devang,30.0,91


In [108]:
df['Age'].fillna(df['Age'].mean())  # Fill with column mean

0    23.0
1    25.0
2    25.0
3    22.0
4    30.0
Name: Age, dtype: float64

In [109]:
df.ffill() # Forward fill

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88
1,Priya,23.0,76
2,Amit,23.0,91
3,Neha,22.0,91
4,Devang,30.0,91


In [110]:
df.bfill() # Backward fill

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88
1,Priya,22.0,76
2,Amit,22.0,91
3,Neha,22.0,91
4,Devang,30.0,91


In [111]:
df

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88.0
1,Priya,,76.0
2,Amit,,91.0
3,Neha,22.0,
4,Devang,30.0,91.0


## Changing Data Types

In [119]:
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Score'].fillna(0, inplace=True)
df['Score'] = df['Score'].astype(int)
mean_score = df[df['Score'] != 0 ]['Score'].mean()
df['Score'] = df['Score'].replace(0, int(mean_score))
df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Score'].fillna(0, inplace=True)


Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88
1,Priya,25.0,76
2,Amit,25.0,91
3,Neha,22.0,86
4,Devang,30.0,91


In [118]:
df

Unnamed: 0,Name,Age,Score
0,Ravi,23.0,88
1,Priya,25.0,76
2,Amit,25.0,91
3,Neha,22.0,86
4,Devang,30.0,91
