In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer

In [2]:
# Load the Titanic dataset from a CSV file
df = pd.read_csv('titanic.csv')

# ---------------------------------------------------------
# MANUAL IMPUTATION METHODS (Using pandas .fillna())
# ---------------------------------------------------------

# Replace missing ages with the mean age
df['age_mean'] = df['age'].fillna(df['age'].mean())

# Replace missing ages with the median age
df['age_median'] = df['age'].fillna(df['age'].median())

# Replace missing ages with the mode age (mode() returns a Series â†’ take first value)
df['age_mode'] = df['age'].fillna(df['age'].mode()[0])

# Replace missing ages with a constant value (0 in this case)
df['age_constant'] = df['age'].fillna(0)

In [3]:
# ---------------------------------------------------------
# AUTOMATED IMPUTATION USING SimpleImputer (sklearn)
# ---------------------------------------------------------

# Impute missing values using mean strategy
imp_mean = SimpleImputer(strategy='mean')
df['age_imputer_mean'] = imp_mean.fit_transform(df[['age']])

# Impute missing values using median strategy
imp_median = SimpleImputer(strategy='median')
df['age_imputer_median'] = imp_median.fit_transform(df[['age']])

# Impute missing values using mode (most frequent) strategy
imp_mode = SimpleImputer(strategy='most_frequent')
df['age_imputer_mode'] = imp_mode.fit_transform(df[['age']])

# Impute missing values using a constant value (0)
imp_const = SimpleImputer(strategy='constant', fill_value=0)
df['age_imputer_constant'] = imp_const.fit_transform(df[['age']])

In [4]:
# ---------------------------------------------------------
# PRINT ALL AGE COLUMNS SIDE-BY-SIDE FOR COMPARISON
# ---------------------------------------------------------

print(df[['age',
          'age_mean', 'age_imputer_mean',
          'age_median', 'age_imputer_median',
          'age_mode', 'age_imputer_mode',
          'age_constant', 'age_imputer_constant']])

      age   age_mean  age_imputer_mean  age_median  age_imputer_median  \
0    22.0  22.000000         22.000000        22.0                22.0   
1    38.0  38.000000         38.000000        38.0                38.0   
2    26.0  26.000000         26.000000        26.0                26.0   
3    35.0  35.000000         35.000000        35.0                35.0   
4    35.0  35.000000         35.000000        35.0                35.0   
..    ...        ...               ...         ...                 ...   
886  27.0  27.000000         27.000000        27.0                27.0   
887  19.0  19.000000         19.000000        19.0                19.0   
888   NaN  29.699118         29.699118        28.0                28.0   
889  26.0  26.000000         26.000000        26.0                26.0   
890  32.0  32.000000         32.000000        32.0                32.0   

     age_mode  age_imputer_mode  age_constant  age_imputer_constant  
0        22.0              22.0          