In [1]:
import pandas as pd

# Sample categorical data with missing values
data = {
    'Category': ['Red', None, 'Blue', 'Green', None, 'Blue', None]
}
df = pd.DataFrame(data)
print("Original Data:")
df

Original Data:


Unnamed: 0,Category
0,Red
1,
2,Blue
3,Green
4,
5,Blue
6,


In [2]:
# 1. Mode Imputation (replace missing with most frequent category)
df_mode = df.copy()
df_mode['Category'].fillna(df_mode['Category'].mode()[0], inplace=True)
print("\nAfter Mode Imputation:")
df_mode


After Mode Imputation:


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_mode['Category'].fillna(df_mode['Category'].mode()[0], inplace=True)


Unnamed: 0,Category
0,Red
1,Blue
2,Blue
3,Green
4,Blue
5,Blue
6,Blue


In [3]:
# 2. Constant Value Imputation (replace missing with 'Unknown')
df_constant = df.copy()
df_constant['Category'].fillna('Unknown', inplace=True)
print("\nAfter Constant Value Imputation:")
df_constant


After Constant Value Imputation:


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_constant['Category'].fillna('Unknown', inplace=True)


Unnamed: 0,Category
0,Red
1,Unknown
2,Blue
3,Green
4,Unknown
5,Blue
6,Unknown


In [4]:
# 3. Forward Fill / Backward Fill (propagate last known value)
df_ffill = df.copy()
df_ffill['Category'].fillna(method='ffill', inplace=True)
print("\nAfter Forward Fill:")
df_ffill


After Forward Fill:


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_ffill['Category'].fillna(method='ffill', inplace=True)
  df_ffill['Category'].fillna(method='ffill', inplace=True)


Unnamed: 0,Category
0,Red
1,Red
2,Blue
3,Green
4,Green
5,Blue
6,Blue


In [5]:
# Backward Fill (propagate next known value)
df_bfill = df.copy()
df_bfill['Category'].fillna(method='bfill', inplace=True)
print("\nAfter Backward Fill:")
df_bfill



After Backward Fill:


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_bfill['Category'].fillna(method='bfill', inplace=True)
  df_bfill['Category'].fillna(method='bfill', inplace=True)


Unnamed: 0,Category
0,Red
1,Blue
2,Blue
3,Green
4,Blue
5,Blue
6,
