In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

# IterativeImputer is still experimental, so we need to enable it explicitly
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [2]:
df = sns.load_dataset('titanic')[['age', 'pclass', 'sibsp', 'parch', 'fare']]

In [3]:
print("--- Original Data ---")
print("Original missing values:")
print(df.isnull().sum())
print("\nFirst 5 rows with missing 'age':")
print(df[df['age'].isnull()].head())

--- Original Data ---
Original missing values:
age       177
pclass      0
sibsp       0
parch       0
fare        0
dtype: int64

First 5 rows with missing 'age':
    age  pclass  sibsp  parch     fare
5   NaN       3      0      0   8.4583
17  NaN       2      0      0  13.0000
19  NaN       3      0      0   7.2250
26  NaN       3      0      0   7.2250
28  NaN       3      0      0   7.8792


In [4]:
# 2. Create the IterativeImputer instance
# It will model each feature as a function of the others and impute missing values.
imputer = IterativeImputer(max_iter=10, random_state=0)

In [5]:
# 3. Fit the imputer to the data and transform it
# The imputer learns the relationships between columns and fills in the gaps.
df_imputed_array = imputer.fit_transform(df)

In [6]:
# 4. Convert the result back to a DataFrame
# The imputer returns a NumPy array, so we reconstruct the DataFrame with original columns.
df_imputed = pd.DataFrame(df_imputed_array, columns=df.columns)

In [7]:
print("\n--- Imputed Data ---")
print("Missing values after imputation:")
print(df_imputed.isnull().sum())
print("\nSame 5 rows, now with 'age' imputed:")
print(df_imputed.iloc[df[df['age'].isnull()].index].head())



--- Imputed Data ---
Missing values after imputation:
age       0
pclass    0
sibsp     0
parch     0
fare      0
dtype: int64

Same 5 rows, now with 'age' imputed:
          age  pclass  sibsp  parch     fare
5   27.617131     3.0    0.0    0.0   8.4583
17  34.111643     2.0    0.0    0.0  13.0000
19  27.639522     3.0    0.0    0.0   7.2250
26  27.639522     3.0    0.0    0.0   7.2250
28  27.627645     3.0    0.0    0.0   7.8792
