In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Sample DataFrame with missing values
data = {
    'feature1': [1, 2, 3, 4, 5, 6, 7, 8],
    'feature2': [3, 4, 5, 6, 7, 8, 9, 10],
    'target': [10, 12, np.nan, 16, 18, np.nan, 22, 24]
}

df = pd.DataFrame(data)

# Split data into rows with and without missing values in 'target'
df_missing = df[df['target'].isnull()]
df_not_missing = df[df['target'].notnull()]

# Features to use for prediction (excluding the target)
features = ['feature1', 'feature2']

# Training data
X_train = df_not_missing[features]
y_train = df_not_missing['target']

# Data to predict
X_missing = df_missing[features]

# Model: RandomForest for regression-based imputation
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict missing values
predicted_values = model.predict(X_missing)

# Fill missing values in original DataFrame
df.loc[df['target'].isnull(), 'target'] = predicted_values

print("DataFrame after predictive imputation:")
print(df)


DataFrame after predictive imputation:
   feature1  feature2  target
0         1         3   10.00
1         2         4   12.00
2         3         5   12.92
3         4         6   16.00
4         5         7   18.00
5         6         8   19.26
6         7         9   22.00
7         8        10   24.00
