In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression



In [2]:
# create a sample dataframe with outliers
df = pd.DataFrame({
    'A': [1, 2, 3, 4, 5, 100, 7, 8, 9, 200],
    'B': [10, 20, 30, 40, 50, 600, 70, 80, 90, 1000]
})

# calculate IQR for each column
q1 = df.quantile(0.25)
q3 = df.quantile(0.75)
iqr = q3 - q1



In [3]:
# define a function to replace outliers with predicted values
def impute_outliers(col):
    is_outlier = (col < q1[col.name] - 1.5 * iqr[col.name]) | (col > q3[col.name] + 1.5 * iqr[col.name])
    x_train = df.loc[~is_outlier, [c for c in df.columns if c != col.name]]
    y_train = df.loc[~is_outlier, col.name]
    x_test = df.loc[is_outlier, [c for c in df.columns if c != col.name]]
    model = LinearRegression()
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    col[is_outlier] = y_pred
    return col



In [4]:
# apply the function to each column
df = df.apply(impute_outliers)

print(df)

     A       B
0    1    10.0
1    2    20.0
2    3    30.0
3    4    40.0
4    5    50.0
5   60   600.0
6    7    70.0
7    8    80.0
8    9    90.0
9  100  1000.0
