<a href="https://colab.research.google.com/github/danil-zhembrovskii/study_projects/blob/main/1_Lab_2_%D0%93%D0%B5%D0%BE%D0%BA%D0%BE%D0%B4%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install geopandas
# !pip install osmnx
# !pip install shapely
# !pip install geopandas mapclassify  # mapclassify для визуализации
# !pip install keplergl

In [None]:
import shapely.geometry
import pandas as pd
import geopandas as gpd
from keplergl import KeplerGl
from geopandas.tools import geocode

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
addresses = gpd.read_file('/content/addresses.geojson')
addresses

In [None]:
columns = ['addr:city', 'addr:street', 'addr:housenumber']  # list of column names that I will be combining
addresses['full_add'] = addresses[columns].apply(', '.join, axis = 1)  # joining into a single address for obtaining the address before sjoin
addresses.head()

In [None]:
buildings = gpd.read_file('/content/buildings_geometries.geojson')
buildings

In [None]:
# merging 2 tables and assigning addresses to buildings
buildings_with_add = buildings.sjoin(addresses, how = 'left')
buildings_with_add

In [None]:
# visualizing buildings with specific addresses after the merge
buildings_with_add.loc[buildings_with_add['index_right'].notna()].explore()

In [None]:
# testing geocode through the OSM geocoder
geocode('Санкт-Петербург Садовая 45', provider = 'nominatim', user_agent = 'my-application', timeout = None)  
# nominatim - OSM geocoder, user-agent - name of the device, timeout - function execution time

In [None]:
buildings_without_add = buildings_with_add[buildings_with_add['full_add'].isna()]
buildings_without_add['centr'] = buildings_without_add['geometry'].apply(lambda x: x.centroid)   # determining the centroids of buildings for future geocoding
geoc = buildings_without_add[:50]    # selecting a certain number of rows for geocoding (average speed is 100 rows per minute)

In [None]:
# setting geocoding parameters using Nominatim
from geopy.geocoders import Nominatim
geocoder = Nominatim(user_agent = 'my-application', timeout = None)  

In [None]:
# creating a column with the required format for geocoding coordinates    30.42343, 59.4878264
geoc['coord'] = geoc['centr'].apply(lambda z: str(z.y) + ', ' + str(z.x))   
geoc

In [None]:
# also adding .address, as the full address after geocoding looks like "5, 10th Line, Saint Petersburg, (x, y)", which does not allow for later visualization
geoc['geocode_address'] = geoc['coord'].apply(lambda x: geocoder.reverse(x).address)    # reverse geocoding

In [None]:
# removing unnecessary columns (centroids do not need to be visualized)
geoc = geoc.drop(['index_right', 'addr:city', 'addr:street', 'addr:housenumber', 'full_add', 'centr', 'coord'], axis = 1)  

In [None]:
# selecting only those buildings for which an address was found through sjoin
buildings_with_add = buildings_with_add.loc[buildings_with_add['index_right'].notna()]  

In [None]:
# visualizing buildings with addresses using two different colors: through geocoding and through table merging
map = KeplerGl(height = 500)
map.add_data(data = geoc.copy(), name = 'geocode')
map.add_data(data = buildings_with_add.copy(), name = 'address')
map