In [None]:
import pandas as pd
import numpy as np

# Step 1: Create a sample dataset with missing values
data = {
    'A': [1, 2, np.nan, 4, 5, np.nan, 7, 8],
    'B': [np.nan, 2, 2, np.nan, 5, 5, np.nan, 8],
    'C': ['cat', np.nan, 'cat', 'dog', 'dog', np.nan, 'dog', 'cat'],
    'D': [1, 2, 3, np.nan, np.nan, 6, 7, 8]
}

df = pd.DataFrame(data)
print("Original Dataset with Missing Values:")
print(df)

# Step 2: Identifying Missing Values
print("\nMissing Values per Column:")
print(df.isnull().sum())

print("\nRows with Missing Values:")
print(df[df.isnull().any(axis=1)])

# Step 3: Apply different imputation techniques

# A. Mean Imputation (for numerical columns)
df_mean_imputed = df.copy()
df_mean_imputed['A'].fillna(df['A'].mean(), inplace=True)
print("\nAfter Mean Imputation for Column A:")
print(df_mean_imputed)

# B. Median Imputation (for numerical columns)
df_median_imputed = df.copy()
df_median_imputed['B'].fillna(df['B'].median(), inplace=True)
print("\nAfter Median Imputation for Column B:")
print(df_median_imputed)

# C. Mode Imputation (for categorical columns)
df_mode_imputed = df.copy()
df_mode_imputed['C'].fillna(df['C'].mode()[0], inplace=True)
print("\nAfter Mode Imputation for Column C:")
print(df_mode_imputed)

# D. Forward Fill
df_ffill = df.copy()
df_ffill.fillna(method='ffill', inplace=True)
print("\nAfter Forward Fill:")
print(df_ffill)

# E. Backward Fill
df_bfill = df.copy()
df_bfill.fillna(method='bfill', inplace=True)
print("\nAfter Backward Fill:")
print(df_bfill)

# F. Interpolation (for numerical columns)
df_interpolated = df.copy()
df_interpolated['A'].interpolate(method='linear', inplace=True)
df_interpolated['D'].interpolate(method='linear', inplace=True)
print("\nAfter Interpolation for Columns A and D:")
print(df_interpolated)


Original Dataset with Missing Values:
     A    B    C    D
0  1.0  NaN  cat  1.0
1  2.0  2.0  NaN  2.0
2  NaN  2.0  cat  3.0
3  4.0  NaN  dog  NaN
4  5.0  5.0  dog  NaN
5  NaN  5.0  NaN  6.0
6  7.0  NaN  dog  7.0
7  8.0  8.0  cat  8.0

Missing Values per Column:
A    2
B    3
C    2
D    2
dtype: int64

Rows with Missing Values:
     A    B    C    D
0  1.0  NaN  cat  1.0
1  2.0  2.0  NaN  2.0
2  NaN  2.0  cat  3.0
3  4.0  NaN  dog  NaN
4  5.0  5.0  dog  NaN
5  NaN  5.0  NaN  6.0
6  7.0  NaN  dog  7.0

After Mean Imputation for Column A:
     A    B    C    D
0  1.0  NaN  cat  1.0
1  2.0  2.0  NaN  2.0
2  4.5  2.0  cat  3.0
3  4.0  NaN  dog  NaN
4  5.0  5.0  dog  NaN
5  4.5  5.0  NaN  6.0
6  7.0  NaN  dog  7.0
7  8.0  8.0  cat  8.0

After Median Imputation for Column B:
     A    B    C    D
0  1.0  5.0  cat  1.0
1  2.0  2.0  NaN  2.0
2  NaN  2.0  cat  3.0
3  4.0  5.0  dog  NaN
4  5.0  5.0  dog  NaN
5  NaN  5.0  NaN  6.0
6  7.0  5.0  dog  7.0
7  8.0  8.0  cat  8.0

After Mode Imputati

  df_ffill.fillna(method='ffill', inplace=True)
  df_bfill.fillna(method='bfill', inplace=True)
