In [1]:
# Load the necessary libraries
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer
from sklearn.impute import KNNImputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Load the rent.csv dataset and uses the following feature:
# beds, baths, sqft, and price
rent = pd.read_csv('rent.csv')
rent = rent[['beds','baths','sqft','price']]
rent_features = rent[['beds','baths','sqft']]

In [3]:
# Initiliaze and fit a simple imputer to the dataset using 
# the median of each feature
imputer = SimpleImputer(strategy='median')
imputer.fit(rent)

In [4]:
# Display the median of each feature
imputer.statistics_

array([   2.,    2., 1000., 1800.])

In [5]:
# Fill the missing value for each column
pd.DataFrame(imputer.transform(rent),columns=['beds','baths','sqft','price'])

Unnamed: 0,beds,baths,sqft,price
0,2.0,2.0,1000.0,1250.0
1,2.0,2.0,1000.0,1295.0
2,2.0,2.0,1000.0,1100.0
3,1.0,2.0,735.0,1425.0
4,1.0,2.0,1000.0,890.0
...,...,...,...,...
200791,2.0,2.0,1000.0,1850.0
200792,2.0,2.0,1000.0,1950.0
200793,2.0,2.0,1300.0,2600.0
200794,2.0,2.0,1000.0,1900.0


In [6]:
# Selects a sample of the rent dataset and scales
# the values using MinMaxScaler()
scaler = MinMaxScaler()
rent_sample = rent.sample(n=2000)
scaler.fit(rent_sample)
rent_scaled = scaler.transform(rent_sample)

In [7]:
# Initializes a KNN imputer with 8 neighbors
knn_imputer = KNNImputer(n_neighbors=8)

In [8]:
# Fill the missing value for each feature
pd.DataFrame(knn_imputer.fit_transform(rent_scaled),
             columns=['scaled_beds','scaled_baths','scaled_sqft','scaled_price'])

Unnamed: 0,scaled_beds,scaled_baths,scaled_sqft,scaled_price
0,0.229167,0.000000,0.069717,0.026263
1,0.333333,0.156250,0.156761,0.091475
2,0.166667,0.000000,0.103502,0.060606
3,0.333333,0.156250,0.141356,0.063232
4,0.833333,0.609375,0.743482,0.191919
...,...,...,...,...
1995,0.166667,0.015625,0.091174,0.064646
1996,0.458333,0.000000,0.190283,0.090909
1997,0.333333,0.078125,0.136296,0.041414
1998,0.166667,0.000000,0.080810,0.052525


In [9]:
# Initialize and fit an interative imputer to the 
# rent dataset
iter_imputer = IterativeImputer()
iter_imputer.fit(rent_features)

In [10]:
# Display the number of iteration rounds that occurred
iter_imputer.n_iter_

3

In [11]:
# Fill in the missing values for each feature
pd.DataFrame(iter_imputer.transform(rent_features), columns=['beds','baths','sqft'])

Unnamed: 0,beds,baths,sqft
0,2.0,2.000000,582.607411
1,2.0,1.565187,1146.830456
2,2.0,1.565187,1146.830456
3,1.0,0.962895,735.000000
4,1.0,1.249608,130.085504
...,...,...,...
200791,2.0,1.565187,1146.830456
200792,2.0,1.565187,1146.830456
200793,2.0,2.000000,1300.000000
200794,2.0,1.634781,1000.000000
