In [3]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, Point
import geopy.distance
import geocoder
from tqdm import tqdm


In [4]:
concat_df = pd.read_csv('concat_df.csv')
concat_df.head()

Unnamed: 0.1,Unnamed: 0,id,atm_group,address,address_rus,lat,long,target
0,0,8526.0,32.0,"EMELYANOVA,34 Y-SAKHALINSK","улица А.О. Емельянова, 34, Южно-Сахалинск, Сах...",46.940995,142.738319,0.0115
1,1,8532.0,32.0,"KOMSOMOLSKAYA,259B Y.SAKHALINSK","Комсомольская улица, 259, Южно-Сахалинск, Саха...",46.937353,142.753348,0.02971
2,2,8533.0,32.0,"KOMMUN. PR., 32 YUZHNO SAKHAL","Коммунистический проспект, Южно-Сахалинск, Сах...",46.959413,142.741113,0.00954
3,3,8684.0,32.0,"LENINGRADSKIY PR.,76A MOSCOW","Ленинградский проспект, 76А, Москва, Россия, 1...",55.805827,37.515146,-0.094035
4,4,37.0,32.0,"GVARDEYSKAYA PL., 2 NORILSK","Гвардейская площадь, 2, Норильск, Красноярский...",69.343541,88.211228,0.079277


In [9]:
# Список OSM тэгов
tag_list = [
    {'amenity': ['atm']},
    {'amenity': ['bank']},
    {'amenity': ['hospital','clinic']},
    {'government': ['public_service']},
    {'building': ['apartments']},
    {'aeroway': ['aerodrome']},
    {'railway': ['station']},
]

# Human-readable имена тегов 
tag_names = [
    'atms',
    'banks',
    'hospitals',
    'MFC',
    'apartments',
    'airport',
    'railway'
]



In [19]:
import overpy
import time
api = overpy.Overpass()

def flatten(l):
    return [item for sublist in l for item in sublist]

def query(api,radius,lat,lon):
    try:
        time.sleep(1)
        result = api.query("""
    (
        node["amenity"="atm"](around:{radius},{lat}, {lon});
        node["amenity"="bank"](around:{radius},{lat}, {lon});
        node["amenity"~"^(hospital|clinic)$"](around:{radius},{lat}, {lon});
        node["government"="public_service"](around:{radius},{lat}, {lon});
        node["building"="apartments"](around:{radius},{lat}, {lon});
        node["aeroway"="aerodrome"](around:{radius},{lat}, {lon});
        node["railway"="station"](around:{radius},{lat}, {lon});
    );
    out body;
            """.format(radius=radius,lat=lat,lon=lon))
    except Exception as e:
        time.sleep(30)
        result = api.query("""
    (
        node["amenity"="atm"](around:{radius},{lat}, {lon});
        node["amenity"="bank"](around:{radius},{lat}, {lon});
        node["amenity"~"^(hospital|clinic)$"](around:{radius},{lat}, {lon});
        node["government"="public_service"](around:{radius},{lat}, {lon});
        node["building"="apartments"](around:{radius},{lat}, {lon});
        node["aeroway"="aerodrome"](around:{radius},{lat}, {lon});
        node["railway"="station"](around:{radius},{lat}, {lon});
    );
    out body;
            """.format(radius=radius,lat=lat,lon=lon))
    return result

# res = query(api,10000,55.805827,37.515146)

def find_tag(node):
    for i in range(len(tag_list)):
        if list(tag_list[i].keys())[0] in node.tags:
            if node.tags[list(tag_list[i].keys())[0]] in tag_list[i][list(tag_list[i].keys())[0]]:
                return tag_names[i]
    raise ValueError('tag not found ',node.tags)

def process_query(idx,api,lat,lon):
    res = query(api,10000,lat,lon)
    
    row = dict.fromkeys(flatten([*map(lambda x: [x+'_min_dist',x+'_250m',x+'_500m'],tag_names)]),0)
    row.update({'id':idx})
    for key in row:
        if '_min_dist' in key:
            row[key] = 10000
    for node in res.nodes:
        coords_1 = (lat,lon)
        coords_2 = (node.lat,node.lon)
        distance = geopy.distance.geodesic(coords_1, coords_2).m
        try:
            tag = find_tag(node)
        
            if(distance <= 250):
                row[tag+'_250m']+=1
            if(distance <= 500):
                row[tag+'_500m']+=1
            if(distance <row[tag+'_min_dist']):
                row[tag+'_min_dist'] = distance
        except ValueError:
            print(node.tags)
        
    return row

In [11]:
process_query('8684',api,55.805827,37.515146)

{'atms_min_dist': 25.74102828675033,
 'atms_250m': 5,
 'atms_500m': 15,
 'banks_min_dist': 23.148759328112124,
 'banks_250m': 7,
 'banks_500m': 16,
 'hospitals_min_dist': 635.6151703439045,
 'hospitals_250m': 0,
 'hospitals_500m': 0,
 'MFC_min_dist': 987.5944070936766,
 'MFC_250m': 0,
 'MFC_500m': 0,
 'apartments_min_dist': 10000,
 'apartments_250m': 0,
 'apartments_500m': 0,
 'airport_min_dist': 10000,
 'airport_250m': 0,
 'airport_500m': 0,
 'railway_min_dist': 62.61923742783726,
 'railway_250m': 1,
 'railway_500m': 1,
 'id': '8684'}

In [22]:
# rows = []

for index, row in tqdm(concat_df.iterrows(), total=concat_df.shape[0]):
    if row['id'] not in [*map(lambda x: x['id'],rows)]:
        row = process_query(row['id'],api,row['lat'],row['long'])
        rows.append(row)

100%|█████████████████████████████████████| 8461/8461 [5:43:05<00:00,  2.43s/it]


In [23]:
adj_df = pd.DataFrame(rows)
adj_df.to_csv('adj_df_additional.csv')
adj_df.head()

Unnamed: 0,atms_min_dist,atms_250m,atms_500m,banks_min_dist,banks_250m,banks_500m,hospitals_min_dist,hospitals_250m,hospitals_500m,MFC_min_dist,...,apartments_min_dist,apartments_250m,apartments_500m,airport_min_dist,airport_250m,airport_500m,railway_min_dist,railway_250m,railway_500m,id
0,551.616998,0,0,610.460164,0,0,745.449695,0,0,10000.0,...,10000.0,0,0,10000.0,0,0,1969.853605,0,0,8526.0
1,349.931398,0,1,55.020161,2,2,657.157907,0,0,10000.0,...,10000.0,0,0,10000.0,0,0,2146.778178,0,0,8532.0
2,165.2685,1,2,40.854588,2,5,224.625626,1,3,10000.0,...,10000.0,0,0,10000.0,0,0,1186.015602,0,0,8533.0
3,25.741028,5,15,23.148759,7,16,635.61517,0,0,987.594407,...,10000.0,0,0,10000.0,0,0,62.619237,1,1,8684.0
4,463.12883,0,2,422.656199,0,1,152.98347,1,1,10000.0,...,10000.0,0,0,10000.0,0,0,3299.372688,0,0,37.0
