In [42]:
import pandas as pd
from geopy.distance import geodesic
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score, median_absolute_error
import math

center_coord = [55.7522, 37.6156]


def get_azimuth(latitude, longitude):
    #pi - число pi, rad - радиус сферы (Земли)
    rad = 6372795

    #координаты двух точек
    llat1 = 77.1539
    llong1 = -120.398

    llat2 = 77.1804
    llong2 = 129.55

    #в радианах
    lat1 = llat1*math.pi/180.
    lat2 = llat2*math.pi/180.
    long1 = llong1*math.pi/180.
    long2 = llong2*math.pi/180.

    #косинусы и синусы широт и разницы долгот
    cl1 = math.cos(lat1)
    cl2 = math.cos(lat2)
    sl1 = math.sin(lat1)
    sl2 = math.sin(lat2)
    delta = long2 - long1
    cdelta = math.cos(delta)
    sdelta = math.sin(delta)

    #вычисления длины большого круга
    y = math.sqrt(math.pow(cl2*sdelta,2)+math.pow(cl1*sl2-sl1*cl2*cdelta,2))
    x = sl1*sl2+cl1*cl2*cdelta
    ad = math.atan2(y,x)
    dist = ad*rad

    #вычисление начального азимута
    x = (cl1*sl2) - (sl1*cl2*cdelta)
    y = sdelta*cl2
    z = math.degrees(math.atan(-y/x))

    if (x < 0):
        z = z+180.

    z2 = (z+180.) % 360. - 180.
    z2 = - math.radians(z2)
    anglerad2 = z2 - ((2*math.pi)*math.floor((z2/(2*math.pi))) )
    angledeg = (anglerad2*180.)/math.pi

    return angledeg


url = 'https://raw.githubusercontent.com/maxbobkov/ml_moscow_flats/master/moscow_dataset_2020.csv'
df = pd.read_csv(url)
df['distance_from_center'] = list(map(lambda x, y: geodesic(center_coord, (x, y)).meters, df['latitude'], df['longitude']))
df['azimuth'] = list(map(lambda x, y: get_azimuth(x, y), df['latitude'], df['longitude']))

df = df.loc[(df['distance_from_center'] < 40000)]

In [43]:
categorical_columns = df.columns[df.dtypes == 'object']
labelencoder = LabelEncoder()
for column in categorical_columns:
    df[column] = labelencoder.fit_transform(df[column])
features =[
    'wallsMaterial',
    'floorNumber',
    'floorsTotal',
    'totalArea',
    'kitchenArea',
    'distance_from_center',
    'azimuth',
]

x = df[features]
y = df['price']

train_x, val_x, train_y, val_y = train_test_split(x, y, random_state=1)

In [44]:
rf_model = RandomForestRegressor()
rf_model.fit(train_x, train_y)
rf_predict = rf_model.predict(val_x)

In [45]:
flat = pd.DataFrame({
    'wallsMaterial': [0],
    'floorNumber': [4],
    'floorsTotal': [33],
    'totalArea': [25.7],
    'kitchenArea': [6.5],
    'latitude': [55.84173946524932],
    'longitude': [37.495861940133025],
})

flat['distance_from_center'] = list(map(lambda x, y: geodesic(center_coord, (x, y)).meters, flat['latitude'], flat['longitude']))
flat['azimuth'] = list(map(lambda x, y: get_azimuth(x, y), flat['latitude'], flat['longitude']))
flat = flat.drop('latitude', axis=1)
flat = flat.drop('longitude', axis=1)

In [46]:
# flat['wallsMaterial'] = labelencoder.fit_transform(flat['wallsMaterial'])

In [47]:
print(rf_model.predict(flat))

[8062630.]


In [50]:
print(r2_score(rf_predict, val_y))
print(mean_absolute_error(rf_predict, val_y))
print(median_absolute_error(rf_predict, val_y))

0.8120715281650346
2900965.138747892
864417.2533333357
