In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', None, None],
    'Age': [25, None, 35, None],
    'Salary': [50000, 60000, None, None],
    'JoinDate': [pd.Timestamp('2024-01-10'), pd.NaT, pd.Timestamp('2024-03-15'), None]
})
df

Unnamed: 0,Name,Age,Salary,JoinDate
0,Alice,25.0,50000.0,2024-01-10
1,Bob,,60000.0,NaT
2,,35.0,,2024-03-15
3,,,,NaT


In [3]:
df.isnull()

Unnamed: 0,Name,Age,Salary,JoinDate
0,False,False,False,False
1,False,True,False,True
2,True,False,True,False
3,True,True,True,True


In [4]:
# Count total missing values:
df.isnull().sum() # Column-wise count
df.isnull().sum().sum() # Total missing values

np.int64(8)

In [5]:
 # Fill with constant value:
df.fillna(0)

Unnamed: 0,Name,Age,Salary,JoinDate
0,Alice,25.0,50000.0,2024-01-10 00:00:00
1,Bob,0.0,60000.0,0
2,0,35.0,0.0,2024-03-15 00:00:00
3,0,0.0,0.0,0


In [6]:
# Fill with column-specific values:
df.fillna({"Age":df["Age"].mean(), "Salary":0})

Unnamed: 0,Name,Age,Salary,JoinDate
0,Alice,25.0,50000.0,2024-01-10
1,Bob,30.0,60000.0,NaT
2,,35.0,0.0,2024-03-15
3,,30.0,0.0,NaT


In [7]:
# Forward fill (use previous row value):
df.fillna(method='ffill')

  df.fillna(method='ffill')


Unnamed: 0,Name,Age,Salary,JoinDate
0,Alice,25.0,50000.0,2024-01-10
1,Bob,25.0,60000.0,2024-01-10
2,Bob,35.0,60000.0,2024-03-15
3,Bob,35.0,60000.0,2024-03-15


In [8]:
# Backward fill (use next row value):
df.fillna(method='bfill')

  df.fillna(method='bfill')


Unnamed: 0,Name,Age,Salary,JoinDate
0,Alice,25.0,50000.0,2024-01-10
1,Bob,35.0,60000.0,2024-03-15
2,,35.0,,2024-03-15
3,,,,NaT


In [9]:
# Drop rows with any missing values:
df.dropna()

Unnamed: 0,Name,Age,Salary,JoinDate
0,Alice,25.0,50000.0,2024-01-10


In [10]:
# Drop rows only if all values are missing:
df.dropna(how='all')

Unnamed: 0,Name,Age,Salary,JoinDate
0,Alice,25.0,50000.0,2024-01-10
1,Bob,,60000.0,NaT
2,,35.0,,2024-03-15


In [14]:
# Drop columns with missing values:
df.dropna(axis=1)

0
1
2
3


In [15]:
# Drop rows where specific column has missing value:
df.dropna(subset=['Age'])

Unnamed: 0,Name,Age,Salary,JoinDate
0,Alice,25.0,50000.0,2024-01-10
2,,35.0,,2024-03-15
