# IMPORT

In [32]:
import pandas as pd
import numpy as np

from sklearn.impute import KNNImputer

# PREPARATION

In [8]:
# train с новыми признаками
train = pd.read_csv('../data/train_new_features.csv',
                    dtype={'floors': str,
                           'total_floors': str})

# test с новыми признаками
test = pd.read_csv('../data/test_new_features.csv',
                    dtype={'floors': str,
                           'total_floors': str})

In [10]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76393 entries, 0 to 76392
Data columns (total 33 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   page                   76393 non-null  object 
 1   description            76393 non-null  object 
 2   flat_type              76393 non-null  object 
 3   object_type            76393 non-null  object 
 4   rooms                  76393 non-null  object 
 5   floors                 76393 non-null  object 
 6   square                 76393 non-null  float64
 7   kitchen_square         40537 non-null  float64
 8   live_square            44509 non-null  float64
 9   price                  76393 non-null  float64
 10  build_matireal         76393 non-null  object 
 11  district_rating        76393 non-null  object 
 12  district               76393 non-null  object 
 13  underground            76393 non-null  object 
 14  eco_rating             76393 non-null  object 
 15  cl

In [11]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32738 entries, 0 to 32737
Data columns (total 33 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   page                   32738 non-null  object 
 1   description            32738 non-null  object 
 2   flat_type              32738 non-null  object 
 3   object_type            32738 non-null  object 
 4   rooms                  32738 non-null  object 
 5   floors                 32738 non-null  object 
 6   square                 32738 non-null  float64
 7   kitchen_square         17338 non-null  float64
 8   live_square            18980 non-null  float64
 9   price                  32738 non-null  float64
 10  build_matireal         32738 non-null  object 
 11  district_rating        32738 non-null  object 
 12  district               32738 non-null  object 
 13  underground            32738 non-null  object 
 14  eco_rating             32738 non-null  object 
 15  cl

In [12]:
train['sample'] = 1 # тренировочные
test['sample'] = 0 # тестовые

df = train.append(test, sort=False).reset_index(drop=True)

# Fill NaN

**Методы заполнения пропусков:**

1. Mean
2. Median
3. KNN

## Mean

In [22]:
kitchen_square_mean_value = np.round(df['kitchen_square'].mean(), 1)
live_square_mean_value = np.round(df['live_square'].mean(), 1)

In [23]:
df_mean = df.copy()

In [24]:
df_mean['kitchen_square'].fillna(kitchen_square_mean_value, inplace=True)

In [25]:
df_mean['live_square'].fillna(live_square_mean_value, inplace=True)

## Median

In [28]:
kitchen_square_median_value = np.round(df['kitchen_square'].median(), 1)
live_square_median_value = np.round(df['live_square'].median(), 1)

In [29]:
df_median = df.copy()

In [30]:
df_median['kitchen_square'].fillna(kitchen_square_mean_value, inplace=True)

In [31]:
df_median['live_square'].fillna(live_square_mean_value, inplace=True)

## KNN

In [70]:
df_knn_x = df.copy()

In [58]:
x = df[['square', 'kitchen_square', 'live_square', 'price']]

In [59]:
x

Unnamed: 0,square,kitchen_square,live_square,price
0,40.3,15.8,11.5,6213000.0
1,19.9,,,3100000.0
2,47.5,,,8930000.0
3,58.1,17.6,26.4,9121700.0
4,121.0,24.2,61.5,20520000.0
...,...,...,...,...
109126,80.4,,,10671700.0
109127,29.2,,29.2,3704925.0
109128,72.2,10.9,47.2,9500000.0
109129,18.4,,,3399000.0


In [37]:
imputer = KNNImputer(n_neighbors=5)

In [41]:
imputer.fit(df_knn[['square', 'kitchen_square', 'live_square', 'price']])

KNNImputer()

In [43]:
df_knn = imputer.transform(df_knn[['square', 'kitchen_square', 'live_square', 'price']])

In [61]:
y = pd.DataFrame(df_knn)

In [64]:
y

Unnamed: 0,0,1,2,3
0,40.3,15.80,11.50,6213000.0
1,19.9,2.62,13.16,3100000.0
2,47.5,15.02,35.42,8930000.0
3,58.1,17.60,26.40,9121700.0
4,121.0,24.20,61.50,20520000.0
...,...,...,...,...
109126,80.4,12.48,49.10,10671700.0
109127,29.2,7.00,29.20,3704925.0
109128,72.2,10.90,47.20,9500000.0
109129,18.4,7.60,13.74,3399000.0


In [71]:
df_knn_x['kitchen_square'] = y[1]
df_knn_x['live_square'] = y[2]

In [72]:
df_knn_x

Unnamed: 0,page,description,flat_type,object_type,rooms,floors,square,kitchen_square,live_square,price,...,total_floors,metro_station,num_of_metro_stations,num_of_kindg,num_of_schools,num_of_poly,num_of_hospitals,num_of_dentists,num_of_women_cons,sample
0,https://www.domofond.ru/1-komnatnaya-kvartira-...,Жилой квартал «Цивилизация» – проект комплексн...,Квартира,Новостройка,1,2,40.3,15.80,11.50,6213000.0,...,26,Улица Дыбенко,7,132,58,23,1,3,6,1
1,https://www.domofond.ru/kvartira-na-prodazhu-s...,🔑🔔 Продается студия в 10 минутах от метро Звен...,Студия,Вторичная,1,2,19.9,2.62,13.16,3100000.0,...,5,Звенигородская,9,73,46,12,6,5,4,1
2,https://www.domofond.ru/1-komnatnaya-kvartira-...,"ddrtrtrtrtrty,Организуем показы объектов в люб...",Квартира,Новостройка,1,11,47.5,15.02,35.42,8930000.0,...,23,Елизаровская,7,132,58,23,1,3,6,1
3,https://www.domofond.ru/2-komnatnaya-kvartira-...,"2 комнатная квартира (№ 253), общей площадью 5...",Квартира,Новостройка,2,12,58.1,17.60,26.40,9121700.0,...,26,Улица Дыбенко,7,132,58,23,1,3,6,1
4,https://www.domofond.ru/5-komnatnaya-kvartira-...,Клубный дом на 102 квартиры высотой 19 этажей....,Квартира,Новостройка,5,11,121.0,24.20,61.50,20520000.0,...,19,Пионерская,5,130,68,24,1,2,8,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109126,https://www.domofond.ru/3-komnatnaya-kvartira-...,Организуем показы объектов в любом формате (он...,Квартира,Новостройка,3,12,80.4,12.48,49.10,10671700.0,...,22,Ладожская,2,88,54,16,2,2,4,0
109127,https://www.domofond.ru/kvartira-na-prodazhu-s...,Продаётся квартира-студия в жилом комплексе Pa...,Студия,Новостройка,1,13,29.2,7.00,29.20,3704925.0,...,16,Парнас,6,136,73,27,7,4,5,0
109128,https://www.domofond.ru/3-komnatnaya-kvartira-...,"В продаже уютная, светлая 3-х комнатная кварти...",Квартира,Вторичная,3,2,72.2,10.90,47.20,9500000.0,...,9,Академическая,5,113,61,24,3,3,4,0
109129,https://www.domofond.ru/kvartira-na-prodazhu-s...,"Продается студия площадью 18.40 кв. метров ,на...",Студия,Новостройка,1,13,18.4,7.60,13.74,3399000.0,...,13,Черная речка,5,130,68,24,1,2,8,0
