In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
import pandas as pd
import numpy as np
import seaborn as sns

# Sample dataset with missing values
data = pd.DataFrame({
    'feature1': [1, 2, None, 4],
    'feature2': [10, None, 30, 40],
    'feature3': [100, 200, 300, 400]
})

# Apply Iterative Imputer
imputer = IterativeImputer(random_state=42)
imputed_data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

print(imputed_data)


   feature1  feature2  feature3
0       1.0      10.0     100.0
1       2.0      20.0     200.0
2       3.0      30.0     300.0
3       4.0      40.0     400.0


In [None]:
# Load data
df = sns.load_dataset("mpg")

# Create a new column where we will create some NAs
df['mpg_na'] = df['mpg']

# Add 5 NAs in the dataset
df['mpg_na'] = df['mpg_na'].sample(frac=1).reset_index(drop=True)
df.loc[np.random.choice(df.index, 5, replace=False), 'mpg_na'] = np.nan

In [None]:
df.query('mpg_na!=mpg_na')

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name,mpg_na
78,21.0,4,120.0,87.0,2979,19.5,72,europe,peugeot 504 (sw),
131,32.0,4,71.0,65.0,1836,21.0,74,japan,toyota corolla 1200,
284,20.6,6,225.0,110.0,3360,16.6,79,usa,dodge aspen 6,
298,23.0,8,350.0,125.0,3900,17.4,79,usa,cadillac eldorado,
319,31.3,4,120.0,75.0,2542,17.5,80,japan,mazda 626,


In [None]:
data = df.drop(['origin', 'name'], axis=1)

# Apply Iterative Imputer over DF
imputed_data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Slice the data on the imputed data
imputed_data.iloc[[78, 131, 284, 298, 319]]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,mpg_na
78,21.0,4.0,120.0,87.0,2979.0,19.5,72.0,23.559285
131,32.0,4.0,71.0,65.0,1836.0,21.0,74.0,23.49701
284,20.6,6.0,225.0,110.0,3360.0,16.6,79.0,23.578268
298,23.0,8.0,350.0,125.0,3900.0,17.4,79.0,23.606757
319,31.3,4.0,120.0,75.0,2542.0,17.5,80.0,23.53561


In [None]:
data = df[['cylinders', 'horsepower', 'weight', 'mpg_na']]

# Apply Iterative Imputer over DF
imputed_data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Slice the data on the imputed data
imputed_data.iloc[[78, 131, 284, 298, 319]]

Unnamed: 0,cylinders,horsepower,weight,mpg_na
78,4.0,87.0,2979.0,23.558797
131,4.0,65.0,1836.0,23.496941
284,6.0,110.0,3360.0,23.57819
298,8.0,125.0,3900.0,23.607055
319,4.0,75.0,2542.0,23.53543


In [None]:
# Load data
df = sns.load_dataset("titanic")

# Add a new column where NA = 1, other = 0
df['na'] = np.where(df['age'].isna(), 1, 0)

data = df.select_dtypes(include='number')

# Apply Iterative Imputer over DF
imputer = IterativeImputer(n_nearest_features=10, sample_posterior=True, max_iter = 100, min_value= 0 ,random_state=42)
imputed_data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Slice the data on the imputed data
imputed_data.query('na==1')

Unnamed: 0,survived,pclass,age,sibsp,parch,fare,na
5,0.0,3.0,32.451331,0.0,0.0,8.4583,1.0
17,1.0,2.0,6.882117,0.0,0.0,13.0000,1.0
19,1.0,3.0,25.907944,0.0,0.0,7.2250,1.0
26,0.0,3.0,42.640301,0.0,0.0,7.2250,1.0
28,1.0,3.0,31.247138,0.0,0.0,7.8792,1.0
...,...,...,...,...,...,...,...
859,0.0,3.0,30.586435,0.0,0.0,7.2292,1.0
863,0.0,3.0,0.000000,8.0,2.0,69.5500,1.0
868,0.0,3.0,34.911140,0.0,0.0,9.5000,1.0
878,0.0,3.0,36.525684,0.0,0.0,7.8958,1.0
