## Handling Missing Data 

In [51]:
import pandas as pd
import numpy as np

# Base DataFrame
data = {
    'Name': ['Alice', np.nan, 'Charlie', 'David', 'Eva', 'Mayra',' Priyanka'],
    'Age': [25, 30, 35, 40, 22, 18, 19],
    'City': ['New York', 'London', 'Paris', 'New York', np.nan, 'Moscow', 'Berlin'],
    'Salary': [50000, np.nan, 55000, 70000, 65000, 72000, 80000]
}
df = pd.DataFrame(data, index=['row1', 'row2', 'row3', 'row4', 'row5', 'row6','row7'])
print(df)

           Name  Age      City   Salary
row1      Alice   25  New York  50000.0
row2        NaN   30    London      NaN
row3    Charlie   35     Paris  55000.0
row4      David   40  New York  70000.0
row5        Eva   22       NaN  65000.0
row6      Mayra   18    Moscow  72000.0
row7   Priyanka   19    Berlin  80000.0


In [52]:
# 61. Check for any missing values: Pata karein ki DataFrame mein koi missing value hai ya nahi (True/False return karega).
df.isnull().values.any()

True

In [53]:
# 62. Count missing values in each column: Har column mein kitni NaN values hain, unki ginti karein.
df.isnull().sum()

Name      1
Age       0
City      1
Salary    1
dtype: int64

In [54]:
# 63. Sum all missing values: Poore DataFrame mein total kitni NaN values hain, unka sum nikalen.
df.isnull().sum().sum()

3

In [55]:
# 64. Drop rows with any missing values: dropna() ka use karke un sabhi rows ko hatayein jinmein kam se kam ek missing value ho. 
df.dropna()

Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0


In [56]:
# 65. Drop columns with any missing values: Un sabhi columns ko hatayein jinmein kam se kam ek missing value ho. 
df.dropna(axis=1)

Unnamed: 0,Age
row1,25
row2,30
row3,35
row4,40
row5,22
row6,18
row7,19


In [58]:
# 66. Fill missing values with a specific value: Sabhi NaN values ko 0 se fill karein.
df.fillna(0)

Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row2,0,30,London,0.0
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row5,Eva,22,0,65000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0


In [59]:
# 67. Fill missing values with the mean: 'Age' column ke NaN values ko usi column ke mean (average) se fill karein. 
df['Age'].fillna(df['Age'].mean(), inplace=False)

row1    25
row2    30
row3    35
row4    40
row5    22
row6    18
row7    19
Name: Age, dtype: int64

In [60]:
# 68. Fill missing values with the median: 'Salary' column ke NaN values ko usi column ke median se fill karein.
df['Salary'].fillna(df['Salary'].median(), inplace=False)

row1    50000.0
row2    67500.0
row3    55000.0
row4    70000.0
row5    65000.0
row6    72000.0
row7    80000.0
Name: Salary, dtype: float64

In [61]:
# 69. Fill missing values with the mode: 'City' column ke NaN values ko usi column ke mode (sabse zyada aane wali value) se fill karein.
df['City'].fillna(df['City'].mode().iloc[0], inplace=False)

row1    New York
row2      London
row3       Paris
row4    New York
row5    New York
row6      Moscow
row7      Berlin
Name: City, dtype: object

In [67]:
# 70. Forward-fill missing values: fillna() mein method='ffill' ka use karke missing values ko pichli valid value se bharein.
df.fillna(method='ffill')

  df.fillna(method='ffill')


Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row2,Alice,30,London,50000.0
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row5,Eva,22,New York,65000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0


In [68]:
# 71. Backward-fill missing values: fillna() mein method='bfill' ka use karke missing values ko agli valid value se bharein.
df.fillna(method='bfill')

  df.fillna(method='bfill')


Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row2,Charlie,30,London,55000.0
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row5,Eva,22,Moscow,65000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0


In [69]:
# 72. Drop rows with all missing values: Un rows ko hatayein jinke sabhi values NaN hon.
df.dropna(how='all')

Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row2,,30,London,
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row5,Eva,22,,65000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0


In [70]:
# 73. Check if a value is NaN: isnull() ka use karke pata karein ki 'Age' column ke har element NaN hai ya nahi.
df['Age'].isnull()

row1    False
row2    False
row3    False
row4    False
row5    False
row6    False
row7    False
Name: Age, dtype: bool

In [71]:
# 74. Check if a value is not NaN: notnull() ka use karke pata karein ki 'Age' column ke har element valid hai ya nahi. 
df['Age'].notnull()

row1    True
row2    True
row3    True
row4    True
row5    True
row6    True
row7    True
Name: Age, dtype: bool

In [72]:
# 75. Drop rows that have less than N valid values: Un rows ko hatayein jinmein 3 se kam valid (non-NaN) data points hon.
df.dropna(thresh=3)

Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row5,Eva,22,,65000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0


In [74]:
# 76. Interpolate missing values: Linear interpolation ka use karke numerical column mein missing values ko fill karein.
df['Salary'].interpolate(method='linear')

row1    50000.0
row2    52500.0
row3    55000.0
row4    70000.0
row5    65000.0
row6    72000.0
row7    80000.0
Name: Salary, dtype: float64

In [76]:
# 77. Fill NaN in one column based on another: 'City' column mein NaN ko 'Unknown' se fill karein.
df['City']=df['City'].fillna('Unknown')
df

Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row2,,30,London,
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row5,Eva,22,Unknown,65000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0


In [77]:
# 78. Select rows that do not have missing values in a specific column: 'Salary' column se NaN waali rows ko chhodkar baaki sab select karein.
df[df['Salary'].notnull()]

Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row5,Eva,22,Unknown,65000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0


In [79]:
# 79. Replace a specific value with NaN: 'City' column mein jahan bhi 'Unknown' likha hai, use NaN se replace karein.
df['City']=df['City'].replace('Unknown', np.nan)
df

Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row2,,30,London,
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row5,Eva,22,,65000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0


In [80]:
# 80. Fill missing values differently for each column: Ek dictionary ka use karke 'Age' ke NaN ko 0 se aur 'City' ke NaN ko 'N/A' se fill karein.
df.fillna({'Age':0,'City':'N/A'})

Unnamed: 0,Name,Age,City,Salary
row1,Alice,25,New York,50000.0
row2,,30,London,
row3,Charlie,35,Paris,55000.0
row4,David,40,New York,70000.0
row5,Eva,22,,65000.0
row6,Mayra,18,Moscow,72000.0
row7,Priyanka,19,Berlin,80000.0
