In [9]:
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer

# create data with missing values
data = pd.DataFrame([[1, 2, np.nan], [3, 4, 30], [np.nan, 6, 50], [8, 8, 70]])
data

Unnamed: 0,0,1,2
0,1.0,2,
1,3.0,4,30.0
2,,6,50.0
3,8.0,8,70.0


In [10]:
# impute missing values with KNN
imputer = KNNImputer(n_neighbors=2)
X = pd.DataFrame(imputer.fit_transform(data))
X

Unnamed: 0,0,1,2
0,1.0,2.0,40.0
1,3.0,4.0,30.0
2,2.0,6.0,50.0
3,8.0,8.0,70.0


In [11]:
# create a standardized version of original
X_z = pd.DataFrame()
for col in X.columns.values:
  X_z[col] = (data[col] - data[col].mean()) / data[col].std()
X_z

Unnamed: 0,0,1,2
0,-0.83205,-1.161895,
1,-0.27735,-0.387298,-1.0
2,,0.387298,0.0
3,1.1094,1.161895,1.0


In [12]:
# impute missing values of the standardized data with KNN
X_z[:] = pd.DataFrame(imputer.fit_transform(X_z))
X_z

Unnamed: 0,0,1,2
0,-0.83205,-1.161895,-0.5
1,-0.27735,-0.387298,-1.0
2,0.416025,0.387298,0.0
3,1.1094,1.161895,1.0


In [13]:
# inverse transform the standardized data that has been imputed
# we see that the imputed value in (2,0) is now 5.5 since more emphasis has 
# been put on column 1 instead of column 2 when it was standardized
for col in X.columns.values:
  X_normalized[col] = X_normalized[col] * data[col].std() + data[col].mean()
X_normalized

Unnamed: 0,0,1,2
0,1.0,2.0,40.0
1,3.0,4.0,30.0
2,5.5,6.0,50.0
3,8.0,8.0,70.0
