In [1]:
%matplotlib notebook
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from matplotlib import pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
import seaborn as sns
from sklearn.model_selection import train_test_split
import sys
import math
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("new_dtrain.csv", delimiter=",")
zeros = df[df['latitude'] == 0]
df = df[df['latitude'] != 0]
subset = list(df.columns.values)
subset = subset.remove('price')
data = df.drop_duplicates(subset = subset)
data = data[data.price < 1000000000]
data = data[data.price < 800000000]
data = shuffle(data)
data = data.reset_index(drop=True)

In [3]:
#choosing data
X = data[['room_number', 'house_type',
       'built_time', 'appartments_floor', 'all_space', 'state', 'bathroom',
       'balcony', 'balcony_glassed', 'door', 'phone', 'ceiling', 'safety',
       'at_the_hotel', 'internet', 'furniture', 'parking', 'latitude',
       'longitude', 'building_floors', 'map_complex', 'floor',
          'trngl_first_point', 'trngl_second_point', 'trngl_third_point']]
y = data['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=7)

In [7]:
out = X_test[-1:]

In [9]:
data[data.index == 27515]

Unnamed: 0,Index,address,room_number,price,map_complex,house_type,built_time,appartments_floor,all_space,state,...,internet,furniture,floor,parking,latitude,longitude,building_floors,trngl_first_point,trngl_second_point,trngl_third_point
27515,27705,"мкр Таугуль, Берегового 50 — Пятницкого",2,14500000,1.0,4,1982,5.0,48.0,6,...,2,0.5,1,0,43.21197,76.870824,5.0,20.8151,-5.2577,9.3366


In [65]:
from math import sin, cos, sqrt, atan2, radians
lat = X_train['latitude'].values
lon = X_train['longitude'].values
out_lat = out['latitude'].values
out_lon = out['longitude'].values

indx = X_train.index

answer = np.zeros(len(lat))

for i in range(len(lat)):
    R = 6373.0
    lat1 = radians(lat[i])
    lon1 = radians(lon[i])
    lat2 = radians(out_lat)
    lon2 = radians(out_lon)
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    
    distance = R * c
    
    answer[i] = distance

In [66]:
newDf = pd.DataFrame()

In [67]:
newDf['distance'] = answer
newDf['indx'] = indx

In [68]:
newDf = newDf.sort_values(['distance'])
newDf = newDf[100:]

In [69]:
knn = KNeighborsRegressor(n_neighbors=15, n_jobs=-1)

In [83]:
mod_df = pd.DataFrame()
ind = newDf.indx

In [85]:
ind.values

array([ 3475, 24828,  2987, ...,  9397,  9256, 25370])

In [88]:
tofit = data.loc[ind.values]

In [90]:
X_fit = tofit[['room_number', 'house_type',
       'built_time', 'appartments_floor', 'all_space', 'state', 'bathroom',
       'balcony', 'balcony_glassed', 'door', 'phone', 'ceiling', 'safety',
       'at_the_hotel', 'internet', 'furniture', 'parking', 'latitude',
       'longitude', 'building_floors', 'map_complex', 'floor',
          'trngl_first_point', 'trngl_second_point', 'trngl_third_point']]
y_fit = tofit['price']

X_train_fit, X_test_fit, y_train_fit, y_test_fit = train_test_split(X_fit, y_fit, random_state=7)

In [91]:
knn.fit(X_train_fit, y_train_fit)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=-1, n_neighbors=15, p=2,
          weights='uniform')

In [92]:
knn.predict(out)

array([ 15600000.])

In [93]:
y_test[-1:]

27515    14500000
Name: price, dtype: int64

In [94]:
abs(knn.predict(out) - y_test[-1:])

27515    1100000.0
Name: price, dtype: float64

In [95]:
out

Unnamed: 0,room_number,house_type,built_time,appartments_floor,all_space,state,bathroom,balcony,balcony_glassed,door,...,furniture,parking,latitude,longitude,building_floors,map_complex,floor,trngl_first_point,trngl_second_point,trngl_third_point
27515,2,4,1982,5.0,48.0,6,2,1,1,0,...,0.5,0,43.21197,76.870824,5.0,1.0,1,20.8151,-5.2577,9.3366


In [96]:
tofit

Unnamed: 0,Index,address,room_number,price,map_complex,house_type,built_time,appartments_floor,all_space,state,...,internet,furniture,floor,parking,latitude,longitude,building_floors,trngl_first_point,trngl_second_point,trngl_third_point
3475,32176,"мкр Таугуль, Джандосова 10 — Сулейменова (Де...",2,15000000,1.0,2,1986,2.0,43.0,5,...,0,0.0,1,0,43.213291,76.874555,5.0,20.309904,-5.762896,8.831404
24828,7026,"мкр Таугуль, Жандосова 8 — Сулейменова",2,14700000,1.0,2,1986,2.0,43.0,5,...,0,0.0,1,0,43.213672,76.874337,5.0,20.293600,-5.779200,8.815100
2987,30377,"мкр Таугуль, Джандосова 8 — Сулейменова (Деж...",2,15000000,1.0,2,1986,2.0,43.0,5,...,0,0.0,1,0,43.213672,76.874337,5.0,20.293600,-5.779200,8.815100
32614,25577,"мкр Таугуль, Джандосова 8 — Сулейменова (Деж...",2,15000000,1.0,2,1986,2.0,43.0,5,...,2,0.5,1,1,43.213672,76.874337,5.0,20.293600,-5.779200,8.815100
26183,6979,"мкр Таугуль, Джандосова 8 — Сулейменова (Деж...",2,15200000,1.0,2,1986,2.0,43.0,5,...,0,0.0,1,0,43.213672,76.874337,5.0,20.293600,-5.779200,8.815100
29268,665,"мкр Таугуль, Джандосова 8",2,14700000,1.0,2,1986,2.0,43.0,5,...,2,0.5,1,1,43.213672,76.874337,5.0,20.293600,-5.779200,8.815100
24251,10408,"мкр Таугуль, Джандосова 8 — Сулейменова (Деж...",2,14700000,1.0,2,1986,2.0,43.0,5,...,0,0.0,1,0,43.213672,76.874337,5.0,20.293600,-5.779200,8.815100
30493,34776,"мкр Таугуль, Джандосова 8 — Сулейменова (Деж...",2,15000000,1.0,2,1986,2.0,43.0,5,...,2,0.5,1,1,43.213672,76.874337,5.0,20.293600,-5.779200,8.815100
23066,35887,Пятницкого 6/1 — Сулейменова,1,3150000,1.0,2,1973,1.0,10.4,4,...,2,1.0,1,1,43.212174,76.875118,4.0,20.365300,-5.707500,8.886800
32295,27810,"мкр Таугуль, Пятницкого 18 — Сулейменова",1,14400000,1.0,2,1989,5.0,40.0,5,...,0,0.0,1,0,43.212174,76.875118,9.0,20.365300,-5.707500,8.886800
