In [None]:
import pandas as pd
from geopy.distance import geodesic
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import math


center_coord = [55.7522, 37.6156]


def get_azimuth(latitude: float, longitude: float) -> float:
    #координаты двух точек
    llat1 = latitude
    llong1 = longitude

    llat2 = center_coord[0]
    llong2 = center_coord[1]

    #в радианах
    lat1 = llat1*math.pi/180.
    lat2 = llat2*math.pi/180.
    long1 = llong1*math.pi/180.
    long2 = llong2*math.pi/180.

    #косинусы и синусы широт и разницы долгот
    cl1 = math.cos(lat1)
    cl2 = math.cos(lat2)
    sl1 = math.sin(lat1)
    sl2 = math.sin(lat2)
    delta = long2 - long1
    cdelta = math.cos(delta)
    sdelta = math.sin(delta)

    #вычисление начального азимута
    x = (cl1*sl2) - (sl1*cl2*cdelta)
    y = sdelta*cl2
    z = math.degrees(math.atan(-y/x))

    if x < 0:
        z = z+180.

    z2 = (z+180.) % 360. - 180.
    z2 = - math.radians(z2)
    anglerad2 = z2 - ((2*math.pi)*math.floor((z2/(2*math.pi))) )
    angledeg = (anglerad2*180.)/math.pi

    return angledeg

In [None]:
#prepare dataset
url = 'https://raw.githubusercontent.com/maxbobkov/ml_moscow_flats/master/moscow_dataset_2020.csv'
df = pd.read_csv(url)
df['distance_from_center'] = list(map(lambda x, y: geodesic(center_coord, (x, y)).meters, df['latitude'], df['longitude']))
df['azimuth'] = list(map(lambda x, y: get_azimuth(x, y), df['latitude'], df['longitude']))

df = df.loc[(df['distance_from_center'] < 40000)]
categorical_columns = df.columns[df.dtypes == 'object']
labelencoder = LabelEncoder()
for column in categorical_columns:
    df[column] = labelencoder.fit_transform(df[column])
features =[
    'wallsMaterial',
    'floorNumber',
    'floorsTotal',
    'totalArea',
    'kitchenArea',
    'distance_from_center',
    'azimuth',
]

x = df[features]
y = df['price']

train_x, val_x, train_y, val_y = train_test_split(x, y, random_state=1)

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(train_x, train_y)

In [None]:
linear_model = LinearRegression()
linear_model.fit(train_x, train_y)