In [56]:
import osmium 
import pandas as pd
import geopandas as gpd
import glob
import os

[GeoFabric Russian Federal Districts](https://download.geofabrik.de/russia.html)

In [60]:
df = pd.read_csv('found_cut.csv')

groups = [
    'amenity',
    'place',
    'building',
    'craft',
    'healthcare',
    'historic',
    'leisure',
    'office',
    'shop',
    'tourism',    
]

df2 = df[df['key'].isin(groups)].drop_duplicates()

def full_tag_name(row):
    key, value = row['key'], row['value']
    return f'{key}::{value}'

tag_names = df2.apply(full_tag_name, axis=1).tolist()
tag_list = [{i:[j]} for i, j in list(zip(df2['key'].tolist(), df2['value'].tolist()))]
tags = dict(zip(tag_names, tag_list))

sorted([(i,j) for i,j in zip(df2['key'].tolist(), df2['value'].tolist())], key=lambda x:x[0])

[('amenity', 'bar'),
 ('amenity', 'pub'),
 ('amenity', 'restaurant'),
 ('amenity', 'biergarten'),
 ('amenity', 'cafe'),
 ('amenity', 'fast_food'),
 ('amenity', 'food_court'),
 ('amenity', 'ice_cream'),
 ('amenity', 'college'),
 ('amenity', 'driving_school'),
 ('amenity', 'kindergarten'),
 ('amenity', 'language_school'),
 ('amenity', 'library'),
 ('amenity', 'toy_library'),
 ('amenity', 'training'),
 ('amenity', 'music_school'),
 ('amenity', 'school'),
 ('amenity', 'university'),
 ('amenity', 'bicycle_parking'),
 ('amenity', 'bicycle_repair_station'),
 ('amenity', 'bicycle_rental'),
 ('amenity', 'boat_rental'),
 ('amenity', 'boat_sharing'),
 ('amenity', 'bus_station'),
 ('amenity', 'car_rental'),
 ('amenity', 'car_sharing'),
 ('amenity', 'car_wash'),
 ('amenity', 'compressed_air'),
 ('amenity', 'vehicle_inspection'),
 ('amenity', 'charging_station'),
 ('amenity', 'ferry_terminal'),
 ('amenity', 'fuel'),
 ('amenity', 'grit_bin'),
 ('amenity', 'motorcycle_parking'),
 ('amenity', 'parking'

In [None]:
def get_osmium_script(initial_file, output_file, tags):
    
    '''Функция возвращает скрипт osmium-tool обрезающую pbf по листу тегов
    
    initial_file -- путь к целому pbf файлу
    output_file --  название/путь файла обрезанного pbf файла 
    tags -- словарь с osm тегами
    '''
    
    osmium_string = f'osmium tags-filter {initial_file} '
    for name, tag in tags.items():
        for key, value_list in tag.items():
            if len(value_list) == 1:
                osmium_string += r'n/{}={} '.format(key, value_list[0])
            else:
                for value in value_list:
                    osmium_string += r'n/{}={} '.format(key, value)
    osmium_string += f'-o {output_file}'
    
    return osmium_string

[GeoFabric Russian Federal Districts](https://download.geofabrik.de/russia.html)

In [None]:
# Собираем лист с путями к цельным pbf файлам (скачаны с GeoFabric по ссылке выше)
input_files = glob.glob('/home/Irina/osm/*.osm.pbf')
# Собираем скрипт для каждого файла из списка
# Исполняем их и сохраняем обрезанные файлы в отдельную дерикторию fed_districts
for file in input_files:
    file_name = file[file.rfind('/') + 1:]
    os.system(get_osmium_script(file, 'fed_districts/cutted-{}'.format(file_name), tags))

In [61]:
# Создаем SimpleHandler для парсинга нужной информации из обрезанных pbf
class OSMHandler(osmium.SimpleHandler):
    def __init__(self):
        super(OSMHandler, self).__init__()
        self.infrastructure = []

    def node(self, o):
        for name, tag in tags.items():
            for key, value_list in tag.items():
                if len(value_list) == 1:
                    if o.tags.get(key) == value_list[0]:
                        self.infrastructure.append(
                            [name, o.location.lon, o.location.lat]
                        )
                else:
                    for value in value_list:
                        if o.tags.get(key) == value:
                            self.infrastructure.append(
                                [name, o.location.lon, o.location.lat]
                            )

In [62]:
# Названия обрезанных pbf файлов
osm_files = glob.glob('fed_districts/*.osm.pbf')

# Инициализируем словарик геофреймов
districts_gdfs = {}

# Итерируем над обрезанными pbf файлами
# Парсим из них инфо по тегам
# Записываем в геофрейм и крепим к словарю
for district in osm_files:
    handler = OSMHandler()
    handler.apply_file(district, locations=True)
    df = pd.DataFrame(handler.infrastructure, columns=['type', 'lon', 'lat'])
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']))
    gdf.crs = 'EPSG:4326'
    districts_gdfs[district.replace('fed_districts/', '').replace('-latest.osm.pbf', '')] = gdf

In [63]:
# Сохраняем все в csv
for name, gdf in districts_gdfs.items():
    gdf.to_csv('fed_districts/' + name + '.csv')

In [64]:
objs = gpd.read_file('fed_districts/cutted-central-fed-district.csv')

In [65]:
objs['type'].drop_duplicates().tolist()

['shop::supermarket',
 'place::city',
 'place::town',
 'place::village',
 'place::hamlet',
 'amenity::cinema',
 'place::suburb',
 'tourism::attraction',
 'amenity::place_of_worship',
 'building::church',
 'tourism::museum',
 'tourism::hotel',
 'amenity::pharmacy',
 'amenity::theatre',
 'amenity::cafe',
 'amenity::clock',
 'historic::memorial',
 'amenity::bank',
 'historic::monument',
 'amenity::restaurant',
 'shop::car_repair',
 'shop::car_parts',
 'amenity::post_office',
 'amenity::police',
 'amenity::telephone',
 'historic::city_gate',
 'place::locality',
 'place::neighbourhood',
 'historic::boundary_stone',
 'amenity::atm',
 'amenity::ferry_terminal',
 'amenity::fast_food',
 'amenity::bar',
 'amenity::post_box',
 'shop::convenience',
 'historic::locomotive',
 'shop::beauty',
 'shop::clothes',
 'shop::books',
 'amenity::doctors',
 'office::telecommunication',
 'shop::mobile_phone',
 'shop::kiosk',
 'amenity::clinic',
 'shop::electronics',
 'shop::doityourself',
 'shop::cosmetics',
 '

In [66]:
objs

Unnamed: 0,field_1,type,lon,lat,geometry
0,0,shop::supermarket,37.2075319,55.9951147,POINT (37.2075319 55.9951147)
1,1,place::city,35.9208284,56.858675,POINT (35.9208284 56.858675)
2,2,place::town,34.5720732,57.5883648,POINT (34.5720732 57.5883648)
3,3,place::city,32.0461261,54.7814057,POINT (32.0461261 54.7814057)
4,4,place::city,34.3668288,53.2423778,POINT (34.3668288 53.2423778)
...,...,...,...,...,...
416257,416257,amenity::drinking_water,36.8955369,54.9067184,POINT (36.8955369 54.9067184)
416258,416258,amenity::telephone,36.9077863,54.9033208,POINT (36.9077863 54.9033208)
416259,416259,tourism::information,36.8678973,54.911728,POINT (36.8678973 54.911728)
416260,416260,amenity::telephone,36.8678119,54.9117556,POINT (36.8678119 54.9117556)
