In [113]:
import requests
import json

import pandas as pd

from bs4 import BeautifulSoup

In [47]:
msk_bbox = 55.3085,36.8811,56.1941,38.3024

overpass_url = "http://overpass-api.de/api/interpreter"

overpass_query = '''
[out:json];
(node["station"="subway"]({},{},{},{});
);
out center;
'''.format(*msk_bbox)

wiki_page = '''
https://ru.wikipedia.org/wiki/%D0%A1%D0%BF%D0%B8%D1%81%D0%BE%D0%BA_%D1%81%D
1%82%D0%B0%D0%BD%D1%86%D0%B8%D0%B9_%D0%9C%D0%BE%D1%81%D0%BA%D0%BE%D0%B2%D1%8
1%D0%BA%D0%BE%D0%B3%D0%BE_%D0%BC%D0%B5%D1%82%D1%80%D0%BE%D0%BF%D0%BE%D0%BB%D
0%B8%D1%82%D0%B5%D0%BD%D0%B0
'''.replace('\n', '')

## OSM stations

In [71]:
response = requests.get(overpass_url, 
                        params={'data': overpass_query})
data = response.json()

In [107]:
stations_osm = pd.DataFrame(data['elements'])
stations_osm = stations_osm.join(pd.DataFrame([x['tags'] for x in data['elements']])).drop('tags', axis=1)
stations_osm = stations_osm[['name','id','lon','lat','colour','network','operator']]
stations_osm.columns = ['osm_'+x for x in stations_osm.columns]

In [109]:
stations_osm.head()

Unnamed: 0,osm_name,osm_id,osm_lon,osm_lat,osm_colour,osm_network,osm_operator
0,Медведково,60660466,37.66155,55.887177,orange,Московский метрополитен,ГУП «Московский метрополитен»
1,Бабушкинская,60660469,37.664184,55.869625,orange,Московский метрополитен,ГУП «Московский метрополитен»
2,Партизанская,68916801,37.751001,55.788533,blue,Московский метрополитен,ГУП «Московский метрополитен»
3,Семёновская,68937012,37.721361,55.783307,blue,Московский метрополитен,ГУП «Московский метрополитен»
4,Филёвский парк,241158259,37.483423,55.739457,lightblue,Московский метрополитен,ГУП «Московский метрополитен»


In [110]:
#235 станций, 12 веток

## Wiki stations

In [407]:
html = requests.get(wiki_page).text
bs = BeautifulSoup(html, "lxml")
table = bs.findAll(lambda tag: tag.name=='table') 

In [408]:
rows = table[3].findAll(lambda tag: tag.name=='tr')

In [409]:
df = []

for i, row in enumerate(rows):
    
    if i == 0:
        continue
        
    tds = row.findAll(lambda tag: tag.name=='td')
    
    # metro line name, id and station order
    line_info = tds[0].findAll(lambda tag: tag.name=='span')
    line_id = line_info[0].contents[0]
    line_name = line_info[1]['title']
    station_order = line_info[2].contents[0]
    
    # station name
    if tds[1].find(lambda tag: tag.name=='span'):
        station_name = tds[1].find(lambda tag: tag.name=='span').contents[0].contents[0]
    else:
        station_name = tds[1].contents[0].contents[0]
        
    # change to different lines
    changes = tds[3].findAll(lambda tag: tag.name=='span')
    if len(changes) == 0:
        change_ids = None
        change_descs = 'нет пересадки'
    else:
        change_ids = []
        change_descs = []
        for x in range(0,len(changes),2):
            change_ids.append(changes[x].contents[0])
            change_descs.append(changes[x+1]['title'])  
    
    # station depth
    depth = tds[4].contents[0]
    
    #station coordinates
    coords = tds[6].find(lambda tag: tag.name=='a')
    if coords:
        lat = coords['data-lat']
        lon = coords['data-lon']
    else:
        lat = None
        lon = None
        
    df.append([line_id,line_name,station_order,
                    station_name,change_ids,change_descs,
                    depth, lon, lat])
    
df = pd.DataFrame(df, columns=['line_id','line_name','station_order',
                    'station_name','change_ids','change_descs',
                    'depth', 'lon', 'lat'])

In [411]:
df.head()

Unnamed: 0,line_id,line_name,station_order,station_name,change_ids,change_descs,depth,lon,lat
0,1,Сокольническая линия,1,Бульвар Рокоссовского,[14],[Переход на станцию Бульвар Рокоссовского Моск...,−8,37.7342,55.8148
1,1,Сокольническая линия,2,Черкизовская,[14],[Переход на станцию Локомотив Московского цент...,−9,37.7448,55.8038
2,1,Сокольническая линия,3,Преображенская площадь,,нет пересадки,−8,37.7151,55.7963
3,1,Сокольническая линия,4,Сокольники,,нет пересадки,−9,37.6802,55.7888
4,1,Сокольническая линия,5,Красносельская,,нет пересадки,−8,37.6673,55.7801


In [413]:
df.line_name.unique().shape

(15,)

In [414]:
df.shape

(238, 9)

In [416]:
stations_osm.to_csv('data/stations_osm.csv', index=False)
df.to_csv('data/stations_wiki.csv', index=False)