In [4]:
import numpy as np 
import pandas as pd 


## Finding Missing Data

In [18]:
data={

    "A":[1,2,np.nan,4,5],
    "B":[1,2,3,4,5],
    "C":[1,2,3,np.nan,np.nan],
    "D":[1,np.nan,np.nan,np.nan,5]
}
df=pd.DataFrame(data)


In [19]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [17]:
df.isna() # Returns a DataFrame of the same shape with True for missing (NaN) values and False for non-missing values

Unnamed: 0,A,B,C,D
0,False,False,False,False
1,False,False,False,True
2,True,False,False,True
3,False,False,True,True
4,False,False,True,False


In [20]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [21]:
df.isna().sum() # Counts the number of missing (NaN) values in each column and returns a Series

A    1
B    0
C    2
D    3
dtype: int64

In [22]:
df.isna().any()  # Checks if each column contains at least one missing (NaN) value; returns True/False per column

A     True
B    False
C     True
D     True
dtype: bool

## Remove Missing Data

In [23]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [24]:
df.dropna() # Drops all rows that contain at least one missing (NaN) value

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0


In [31]:
df.dropna(thresh=3)  
# Drops rows that have less than 3 non-NaN values (keeps rows with at least 3 valid entries)

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
4,5.0,5,,5.0


## Filling the missing data

In [32]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [36]:
df.fillna(3)  # Replaces all missing (NaN) values in the DataFrame with 3


Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,3.0
2,3.0,3,3.0,3.0
3,4.0,4,3.0,3.0
4,5.0,5,3.0,5.0


In [34]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [38]:
val={"A":100,"B":200,"C":300,"D":400}
df.fillna(value=val)

 # Replaces missing values with column-specific values:
# NaN in column A → 100, B → 200, C → 300, D → 400

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,400.0
2,100.0,3,3.0,400.0
3,4.0,4,300.0,400.0
4,5.0,5,300.0,5.0


In [39]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [40]:
df.fillna(df.mean())  # Replaces missing values with the mean of their respective columns (numeric columns only)


Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,3.0
2,3.0,3,3.0,3.0
3,4.0,4,2.0,3.0
4,5.0,5,2.0,5.0


In [42]:
df.mean()

A    3.0
B    3.0
C    2.0
D    3.0
dtype: float64