In [3]:
import pandas as pd
import httpx
from tqdm import tqdm

client = httpx.Client()

In [244]:
TOKEN = 'token' # API TOKEN for yandex maps
usage = 0

In [179]:
def get_geo_data(address):
    url = f"https://geocode-maps.yandex.ru/1.x/?apikey={TOKEN}&format=json&geocode={address}"
    res = client.get(url)
    return res.json()['response']['GeoObjectCollection']['featureMember'][0]['GeoObject']['Point']

In [192]:
def dict_to_csv(d, filename='data/geo.csv'):
    f = open(filename, 'w')
    f.write("Индекс;Местоположение;Широта;Долгота\n")
    for (ind, address), coords in d.items():
        try:
            x, y = coords['pos'].split()
        except:
            x, y = 'nan', 'nan'
        f.write(';'.join([str(c) for c in [ind, address, x, y]]) + '\n')


def csv_to_dct(filename='data/geo.csv'):
    geo_df = pd.read_csv(filename, delimiter=';').set_index(['Индекс', 'Местоположение'])

    d = geo_df.to_dict('index')
    for ind, coords in d.items():
        d[ind] = {'pos' : f"{coords['Широта']} {coords['Долгота']}"}
    return d

In [213]:
def add_to_csv(df, index, filename='data/geo.csv'):
    geo_data = csv_to_dct(filename)

    added = 0
    failed = []

    df_segment = df['Местоположение'][index].reset_index()

    for ind, address in tqdm(zip(df_segment['index'], df_segment['Местоположение'])):
        if geo_data.get((ind, address)) is None:
            added += 1
            try:
                geo_data[(ind, address)] = get_geo_data(address)
            except Exception as ex:
                print(ex.args) 
                failed.append(address)
                geo_data[(ind, address)] = "nan"
    
    geo_df = dict_to_csv(geo_data, filename)
    return added, failed

In [43]:
df = pd.read_csv('flats.csv', sep='^')

In [273]:
segment = range(1700, 2534)

added, failed = add_to_csv(df, segment)
usage += added
print(f'Usage: {usage}', f'Added: {added}', f'Failed:', *failed, sep='\n')

834it [00:03, 220.00it/s] 

Usage: 842
Added: 34
Failed:





In [109]:
geo = pd.read_csv('data/geo.csv', delimiter=';')

geo.head()

Unnamed: 0,Индекс,Местоположение,Широта,Долгота
0,0,"Москва, Юровская ул., 19",37.372036,55.899219
1,1,"Москва, Онежская ул., 12",37.521426,55.84964
2,2,"Москва, Бобруйская ул., 28",37.39815,55.739032
3,3,"Москва, ул. Искры, 13к1",37.664051,55.86257
4,4,"Москва, Голиковский пер., 5",37.627499,55.740107


In [110]:
geo['Широта'], geo['Долгота'] = geo['Долгота'], geo['Широта']

In [112]:
geo[['Местоположение', 'Долгота', 'Широта']].to_csv('geo.csv', sep=';')