In [None]:
!pip install geodist
!pip install catboost

from geodist import GeoDist
import pandas as pd
import numpy as np
import re
from catboost import CatBoostRegressor, Pool
import pickle

from google.colab import drive
drive.mount('/content/gdrive')

import warnings
warnings.filterwarnings('ignore')

MEAN_YEAR_MONOLIT = 2011
MEAN_YEAR_PANEL = 1979
MEAN_YEAR_BRICK = 1961


data = pd.read_json('/content/gdrive/MyDrive/dataframes/text (1).json')

data = data.drop('id', axis=1)

data.segment.loc[data.material == 'монолит'] = MEAN_YEAR_MONOLIT
data.segment.loc[data.material == 'панель'] = MEAN_YEAR_PANEL
data.segment.loc[data.material == 'кирпич'] = MEAN_YEAR_BRICK

data.material.loc[data.material == 'монолит'] = 'монолитный'
data.material.loc[data.material == 'кирпич'] = 'кирпичный'
data.material.loc[data.material == 'панель'] = 'панельный'

data.rooms.loc[data.rooms == 'Студия'] = 'студия'

data.balcony.loc[data.balcony == 'Да'] = 'балкон'
data.balcony.loc[data.balcony == 'Нет'] = 'нет'

for i in range(data.shape[0]):
    if type(data.metro_remoteness[i]) == int:
        if data.metro_remoteness[i] <= 5:
            data.metro_remoteness[i] = 'до 5 мин.'
        elif 6 <= data.metro_remoteness[i] <= 10:
            data.metro_remoteness[i] = '6–10 мин.'
        elif 11 <= data.metro_remoteness[i] <= 15:
            data.metro_remoteness[i] = '11–15 мин.'
        elif 16 <= data.metro_remoteness[i] <= 20:
            data.metro_remoteness[i] = '16–20 мин.'
        elif 21 <= data.metro_remoteness[i] <= 30:
            data.metro_remoteness[i] = '21–30 мин.'
        elif data.metro_remoteness[i] >= 31:
            data.metro_remoteness[i] = 'от 31 мин.'

def dist(x):
        s = [float(s) for s in re.findall(r'-?\d+\.?\d*', x)]
        s[0], s[1] = s[1], s[0]
        s = tuple(s)
        res = float(format(GeoDist([s]).distance(37.617734, 55.751999)/1000, '.3f'))
        return res

data['location'] = [dist(x) for x in data['location']]

data.renovation.loc[data.renovation == 'Муниципальный ремонт'] = 'косметический'
data.renovation.loc[data.renovation == 'Без отделки'] = 'требует ремонта'
data.renovation.loc[data.renovation == 'Современная отделка'] = 'евро'

data.rename(columns = {'location':'coords',
                       'rooms':'rooms_count',
                       'segment':'year',
                       'floors':'house_floors',
                       'area':'flat_area',
                       'kitchen':'kitchen_area',
                       'metro_remoteness':'metro',
                       'renovation':'condition'}, inplace = True)

loaded_model = pickle.load(open('/content/gdrive/MyDrive/dataframes/lct_model.sav', 'rb'))

test_data = pd.DataFrame({'coords'})
pred = loaded_model.predict(data)
cost = pd.Series(pred)
cost = cost//100000 * 100000 + 90000
cost = cost.astype(int)
cost = cost.to_json()

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geodist
  Downloading geodist-0.4-py3-none-any.whl (4.4 kB)
Collecting pyproj
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 5.2 MB/s 
Installing collected packages: pyproj, geodist
Successfully installed geodist-0.4 pyproj-3.2.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.1.1-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 136 kB/s 
Installing collected packages: catboost
Successfully installed catboost-1.1.1
Mounted at /content/gdrive
