In [1]:
# importing libraries

import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from time import time, sleep
from tqdm.notebook import tqdm

In [2]:
# configuring geolocator

geolocator = Nominatim(user_agent="echo_golf_oscar_romeo") # use your user_agent

In [3]:
# geocoder itself

def geocoder(row):
    try:
        point = geolocator.geocode(row).point
        return pd.Series({'Latitude': point.latitude, 'Longitude': point.longitude})
    except:
        return pd.Series({'Latitude': None, 'Longitude': None})
    
    
# rewriting house number and block/letter

def house_number(x):
    a = x.replace(' ', '').replace(',', '').replace('.', '').replace('№', '')
    if (a.find('корпус') or a.find('стр')) != -1:
        a = a.replace('корпус', 'к').replace('стр', 'с') 
    if (a.find('к') or a.find('с')) != -1 and a[-1].isalpha():
        a = a.replace('к', '').replace('с', '')

    return a

    
# rewriting address

def correct_address(buildings, city='any'):
    if city != 'any':
        buildings = buildings.loc[buildings["formalname_city"] == city]
#     city
    buildings.loc[:, 'address_upd'] = buildings.loc[:, 'formalname_city'] + ', ' + buildings.loc[:, 'formalname_street']
    
    for row in buildings.iterrows():
        try:
#             street
            if row[1][11] == ('ул' or 'Ул') and row[1][12][0].isdigit():
                buildings.loc[row[0], 'address_upd'] += ' улица'
            elif row[1][11] == 'мкр' and row[1][12][0].isdigit():
                buildings.loc[row[0], 'address_upd'] += ' микрорайон'
            elif row[1][11] == 'кв-л' and row[1][12][0].isdigit():
                buildings.loc[row[0], 'address_upd'] += ' квартал'

#             house number and block/letter
            if row[1][17].find(' д. ') != -1:
                buildings.loc[row[0], 'address_upd'] += (', ' + house_number(row[1][17][(row[1][17].find(' д. ') + 4):]))
            else:
                buildings.loc[row[0], 'address_upd'] += (', ' + row[1][13])
        except: buildings.loc[row[0], 'address_upd'] = row[1][17]
            
    return buildings

In [4]:
# your data

buildings = pd.read_csv(r"tver.csv", sep=';') # specify the directory of data

In [5]:
# let's geocode it!
tqdm.pandas()
start_time = time()
print('There are {} addresses to geocode!'.format(
    buildings.shape[0]))
buildings = correct_address(buildings, city='Тверь')
coor = buildings.progress_apply(lambda x: geocoder(x['address_upd']), axis=1)
print("--- %s seconds ---" % round(time() - start_time, 2))
print("{}% of addresses were geocoded!".format(
    round((1 - sum(np.isnan(coor["Latitude"])) / len(coor)) * 100, 2)))
buildings["Latitude"] = coor.Latitude
buildings["Longitude"] = coor.Longitude

There are 2800 addresses to geocode!


  0%|          | 0/2800 [00:00<?, ?it/s]

--- 1402.77 seconds ---
93.0% of addresses were geocoded!


In [6]:
# saving your data

buildings.to_csv(r"tver_geocoded.csv", encoding='utf-8') # specify the directory to store the result