In [1]:
import time as tm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopy as geopy
from geopy.geocoders import Nominatim
from folium.plugins import HeatMap
import folium # map rendering library
import branca.colormap as cm
import re

print(f"Pandas version: {pd.__version__}")
print(f"geopy version: {geopy.__version__}")

Pandas version: 1.2.4
geopy version: 2.2.0


In [2]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 50)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [3]:
mo_geo = r'../data/_mo.geojson'

In [4]:
geolocator = Nominatim(user_agent="ny_explorer")
address = 'Moscow'
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))

moscow_map = folium.Map(location=[latitude, longitude], zoom_start=8)


moscow_map.choropleth(
    geo_data=mo_geo,
    #data=df,
    #columns=['Name_rus', 'Cost per meter'],
    #key_on='feature.properties.NAME',
    fill_color='PuBuGn', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Moscow districts'
)
                                                                                                                

moscow_map

The geograpical coordinate of Moscow are 55.7504461, 37.6174943.




In [5]:
#df = pd.read_csv('Moscow_districts.csv')
df = pd.read_csv('../data/Moscow_districts(old_borders).csv')
df

Unnamed: 0,Name,Name_rus,Population
0,Aeroport (Аэропорт)74775,,
1,Begovoy (Беговой)44385,,
2,Beskudnikovsky (Бескудниковский)74790,,
3,Dmitrovsky (Дмитровский)88931,,
4,Golovinsky (Головинский)102160,,
...,...,...,...
113,Pokrovskoye-Streshnevo (Покровское-Стрешнево)4...,,
114,Severnoe Tushino (Северное Тушино)138533,,
115,Shchukino (Щукино)89454,,
116,Strogino (Строгино)124149,,


In [6]:
df['Population'] = df['Name'].str.extract(r'(\d+)', expand=False).astype(int)
df['Name_rus']   = df['Name'].str.extract(r'([А-Яа-яЁё\-\s]+[А-Яа-яЁё])\)', expand=False)
df['Name'] = df.apply(lambda x : re.search(r'([a-zA-Z\-\s]+[a-zA-Z])', x['Name']).group(0), axis=1)
df

Unnamed: 0,Name,Name_rus,Population
0,Aeroport,Аэропорт,74775
1,Begovoy,Беговой,44385
2,Beskudnikovsky,Бескудниковский,74790
3,Dmitrovsky,Дмитровский,88931
4,Golovinsky,Головинский,102160
...,...,...,...
113,Pokrovskoye-Streshnevo,Покровское-Стрешнево,46707
114,Severnoe Tushino,Северное Тушино,138533
115,Shchukino,Щукино,89454
116,Strogino,Строгино,124149


In [7]:
cost_of_districts = []
with open('../data/costs.txt', encoding="utf8") as fp:
    print("Name of the file: ", fp.name)
    line = fp.readline()
    while line:
        line_to_list = line.split(r' - ')
        districts = line_to_list[0].split(r', ')
        for d in districts:
            lst = []
            lst.append(d)
            lst.append(int(line_to_list[-1].split(r' ')[0])*1000 + int(line_to_list[-1].split(r' ')[1]))
            cost_of_districts.append(lst)
            print(d)
            print(int(line_to_list[-1].split(r' ')[0])*1000 + int(line_to_list[-1].split(r' ')[1]))
        line = fp.readline()

Name of the file:  ../data/costs.txt
Китай-город
380000
Остоженка
373000
Тверской
356700
Якиманка
331586
Арбат
331000
Хамовники
309700
Пресненский
291518
Донской
280000
Красносельский
278100
Замоскворечье
277000
Мещанский
270500
Таганский
270000
Дорогомилово
258100
Беговой
231700
Раменки
221700
Гагаринский
221700
Ломоносовский
221700
Проспект Вернадского
214800
Басманный
207500
Сокол
207200
Аэропорт
207200
Черёмушки
201277
Сокольники
199000
Хорошёвский
197400
Академический
197000
Савёловский
195000
Марьина Роща
194700
Алексеевский
194100
Хорошёво-Мнёвники
189900
Крылатское
189200
Обручевский
189100
Коньково
189000
Филёвский Парк
188900
Преображенское
187400
Нижегородский
185000
Тропарёво-Никулино
180400
Ростокино
179900
Останкинский
179900
Тимирязевский
172700
Бутырский
172500
Войковский
172200
Коптево
172200
Соколиная Гора
171400
Южнопортовый
170000
Щукино
169900
Покровское-Стрешнево
169900
Даниловский
169000
Котловка
169000
Фили-Давыдково
168000
Можайский
168000
Нагорный
167900
Зюзин

In [8]:
temp = pd.DataFrame(cost_of_districts, columns=['Name_rus', 'Cost per meter'])
#temp = temp.sort_values(by=['Name_rus']).reset_index().drop('index', axis=1)
df = df.merge(temp, on='Name_rus', how='left')
df

Unnamed: 0,Name,Name_rus,Population,Cost per meter
0,Aeroport,Аэропорт,74775,207200
1,Begovoy,Беговой,44385,231700
2,Beskudnikovsky,Бескудниковский,74790,132700
3,Dmitrovsky,Дмитровский,88931,132700
4,Golovinsky,Головинский,102160,154400
...,...,...,...,...
113,Pokrovskoye-Streshnevo,Покровское-Стрешнево,46707,169900
114,Severnoe Tushino,Северное Тушино,138533,155200
115,Shchukino,Щукино,89454,169900
116,Strogino,Строгино,124149,167400


In [9]:
latitude = []
longitude = []
geolocator = Nominatim(user_agent="ny_explorer")
for i in df['Name']:
#for i in df['Name_rus']:
    for j in range(3):
        try:
            address = 'Moscow, ' + i + ' district'
            #address = 'Москва, ' + i + ' район'
            print(address)
            location = geolocator.geocode(address)
            latitude.append(location.latitude)
            longitude.append(location.longitude)
            print(location.latitude, location.latitude)
        except Exception as e:
            print(e)
            if j == 2:
                latitude.append(np.nan)
                longitude.append(np.nan)
        else:
            break

df['Latitude'] = pd.Series(latitude, index=df.index)
df['Longitude'] = pd.Series(longitude, index=df.index)

Moscow, Aeroport district
55.80050395 55.80050395
Moscow, Begovoy district
55.783367999999996 55.783367999999996
Moscow, Beskudnikovsky district
55.8637386 55.8637386
Moscow, Dmitrovsky district
56.36614795 56.36614795
Moscow, Golovinsky district
55.851951850000006 55.851951850000006
Moscow, Khoroshyovsky district
55.7789309 55.7789309
Moscow, Khovrino district
55.8693575 55.8693575
Moscow, Koptevo district
55.8301455 55.8301455
Moscow, Levoberezhny district
55.8656206 55.8656206
Moscow, Savyolovsky district
55.7997689 55.7997689
Moscow, Sokol district
55.803462800000005 55.803462800000005
Moscow, Timiryazevsky district
55.8258166 55.8258166
Moscow, Vostochnoye Degunino district
55.87587205 55.87587205
Moscow, Voykovsky district
55.82807015 55.82807015
Moscow, Zapadnoye Degunino district
55.87054815 55.87054815
Moscow, Arbat district
55.751199 55.751199
Moscow, Basmanny district
55.7672809 55.7672809
Moscow, Khamovniki district
55.729229 55.729229
Moscow, Krasnoselsky district
55.77744

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 118 entries, 0 to 117
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Name            118 non-null    object 
 1   Name_rus        118 non-null    object 
 2   Population      118 non-null    int64  
 3   Cost per meter  118 non-null    int64  
 4   Latitude        117 non-null    float64
 5   Longitude       117 non-null    float64
dtypes: float64(2), int64(2), object(2)
memory usage: 6.5+ KB


In [12]:
df = df.fillna(0)

In [13]:
geolocator = Nominatim(user_agent="ny_explorer")
address = 'Moscow'
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))

moscow_map = folium.Map(location=[latitude, longitude], zoom_start=10)


for i in range(len(df)):
    folium.Circle(
        radius=50,
        location=[df.Latitude[i], df.Longitude[i]],
        popup=df.Name[i],
        color='crimson',
        fill=True,
    ).add_to(moscow_map)

HeatMap(df[['Latitude', 'Longitude']], radius=10, blur=10).add_to(moscow_map)
    
moscow_map

The geograpical coordinate of Moscow are 55.7504461, 37.6174943.


In [14]:
geolocator = Nominatim(user_agent="ny_explorer")
address = 'Moscow'
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))

moscow_map = folium.Map(location=[latitude, longitude], zoom_start=10)

choropleth = folium.Choropleth(
    geo_data=mo_geo,
    data=df,
    columns=['Name_rus', 'Cost per meter'],
    key_on='properties.NAME',
    fill_color='YlGnBu', 
    fill_opacity=0.7, 
    line_opacity=0.9,
    highlight=True,
    legend_name='Moscow districts'
)

choropleth.add_to(moscow_map)
    
style_function = "font-size: 15px; font-weight: bold"
choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['NAME'], style=style_function, labels=False))
    
moscow_map

The geograpical coordinate of Moscow are 55.7504461, 37.6174943.
