In [411]:
import pandas as pd
import yaml
import geopy.distance

In [448]:
FOLDER = "config_dev" # "config"

# Stations Cleansing

In [449]:
stations = pd.read_csv("../data/stations_utf8.csv", delimiter=";")

In [450]:
stations.head()

Unnamed: 0,HstNummer,Name mit Ort,Name ohne Ort,Ort,GKZ,Globale ID,MVTT X,MVTT Y,WGS84 X,WGS84 Y
0,1,Karlsplatz (Stachus),Karlsplatz (Stachus),München,9162000,de:09162:1,4467762.0,826427.0,481393653703985,115653864723266
1,2,Marienplatz,Marienplatz,München,9162000,de:09162:2,4468673.0,826758.0,481364342160178,115776511503947
2,3,Isartor,Isartor,München,9162000,de:09162:3,4469056.0,827043.0,481338899412007,115828176253145
3,4,Rosenheimer Platz,Rosenheimer Platz,München,9162000,de:09162:4,4469877.0,827637.0,481285874606085,115938895812341
4,5,Ostbahnhof,Ostbahnhof,München,9162000,de:09162:5,4470634.0,827691.0,481281372572089,11604062351127


In [451]:
# we don't need most columns
stations = stations.drop(["HstNummer", "Name mit Ort", "GKZ", "Globale ID", "MVTT X", "MVTT Y"], axis=1)

In [452]:
# drop all where we have either no x or y coordinate (or both)
stations = stations[stations['WGS84 X'].notna()]
stations = stations[stations['WGS84 Y'].notna()]
stations = stations[stations['Ort'].isin(["München", "Garching (b München)", "Grünwald (Kr München)"])]


In [453]:
stations.head()

Unnamed: 0,Name ohne Ort,Ort,WGS84 X,WGS84 Y
0,Karlsplatz (Stachus),München,481393653703985,115653864723266
1,Marienplatz,München,481364342160178,115776511503947
2,Isartor,München,481338899412007,115828176253145
3,Rosenheimer Platz,München,481285874606085,115938895812341
4,Ostbahnhof,München,481281372572089,11604062351127


In [454]:
stations['WGS84 X'] = stations['WGS84 X'].apply(lambda x: round(float(x.replace(",", ".")), 4))
stations['WGS84 Y'] = stations['WGS84 Y'].apply(lambda x: round(float(x.replace(",", ".")), 4))

In [455]:
stations.head()

Unnamed: 0,Name ohne Ort,Ort,WGS84 X,WGS84 Y
0,Karlsplatz (Stachus),München,48.1394,11.5654
1,Marienplatz,München,48.1364,11.5777
2,Isartor,München,48.1339,11.5828
3,Rosenheimer Platz,München,48.1286,11.5939
4,Ostbahnhof,München,48.1281,11.6041


In [456]:
# stations['Name ohne Ort'] = stations['Name ohne Ort'].apply(lambda x: str(x).replace(" ", "_"))
# stations['Ort'] = stations['Ort'].apply(lambda x: str(x).replace(" ", "_"))


In [457]:
stations = stations.rename(columns={"Name ohne Ort": "name", "Ort": "city", "WGS84 X": "lon", "WGS84 Y": "lat"})
stations = stations.reset_index(drop=True)
stations['id'] = stations.index

In [458]:
stations

Unnamed: 0,name,city,lon,lat,id
0,Karlsplatz (Stachus),München,48.1394,11.5654,0
1,Marienplatz,München,48.1364,11.5777,1
2,Isartor,München,48.1339,11.5828,2
3,Rosenheimer Platz,München,48.1286,11.5939,3
4,Ostbahnhof,München,48.1281,11.6041,4
...,...,...,...,...,...
1182,Parkring 19,Garching (b München),48.2515,11.6373,1182
1183,BMW M GmbH (Daimlerstr. 19),Garching (b München),48.2479,11.6223,1183
1184,Taunusstraße,München,48.1889,11.5784,1184
1185,BTZ Lemgostraße,München,48.1921,11.5545,1185


In [459]:
# stations.to_csv("../data/stations_utf8_cleansed.csv", index=False)

In [460]:
text = yaml.dump(stations.to_dict(orient='records'), encoding="utf-8", allow_unicode=True)
with open("../{}/stations.yaml".format(FOLDER), "wb") as file:
    file.write(text)

# Lines Cleansing

In [461]:
subway_lines = pd.read_csv("../data/subway_lines_from_wiki.csv", delimiter=";")

In [462]:
subway_lines["split"] = subway_lines["description"].apply(lambda text: text.split("–"))
subway_lines["stations"] = subway_lines["split"].apply(lambda ls: ls[0::2])
subway_lines["distances"] = subway_lines["split"].apply(lambda ls: ls[1::2])

In [463]:
subway_lines["stations"] = subway_lines["stations"].apply(lambda ls: list(map(lambda e: e.strip(" "), ls)))
subway_lines["distances"] = subway_lines["distances"].apply(lambda ls: list(map(lambda e: int(e.strip(" ()m")), ls)))

In [464]:
subway_lines = subway_lines.drop(["description", "split"], axis=1)

In [465]:
def name_list_to_id_list(name_list):
    id_list = []
    for name in name_list:
        # print(name)
        id = stations.loc[stations['name'] == name, "id"]
        id_list.append(int(id))
    return id_list

In [466]:
subway_lines["stations"] = subway_lines["stations"].apply(name_list_to_id_list)

In [467]:
subway_lines["circular"] = False

In [468]:
text = yaml.dump(subway_lines.to_dict(orient='records'), encoding="utf-8", allow_unicode=True, default_flow_style=None)
with open("../{}/subway_lines.yaml".format(FOLDER), "wb") as file:
    file.write(text)

In [469]:
subway_lines

Unnamed: 0,name,stations,distances,circular
0,u1,"[268, 225, 158, 593, 156, 154, 153, 5, 44, 138...","[625, 788, 830, 1007, 1102, 878, 1071, 905, 74...",False
1,u2,"[231, 483, 481, 479, 474, 465, 463, 287, 131, ...","[1065, 631, 1112, 962, 1010, 657, 1094, 1103, ...",False
2,u3,"[212, 274, 268, 277, 258, 249, 287, 297, 354, ...","[797, 880, 1416, 1061, 944, 832, 793, 1042, 57...",False
3,u4,"[200, 704, 198, 197, 5, 0, 54, 386, 384, 383, ...","[806, 671, 927, 711, 521, 811, 933, 928, 791, ...",False
4,u5,"[202, 201, 200, 704, 198, 197, 5, 0, 54, 386, ...","[670, 791, 806, 671, 927, 711, 521, 811, 933, ...",False
5,u6,"[347, 353, 352, 351, 318, 305, 365, 364, 363, ...","[2560, 1827, 4208, 830, 1431, 1087, 660, 740, ...",False
6,u7,"[268, 225, 158, 593, 156, 154, 153, 5, 44, 138...","[625, 788, 830, 1007, 1102, 878, 1071, 905, 74...",False
7,u8,"[258, 249, 287, 131, 121, 111, 101, 5, 44, 138...","[944, 832, 1103, 756, 513, 730, 583, 905, 746,...",False


In [470]:
tram_lines = pd.read_csv("../data/tram_lines_from_wiki.csv", delimiter=";")

In [471]:
tram_lines["stations"] = tram_lines["stations"].apply(lambda text: text.split("–"))
tram_lines["stations"] = tram_lines["stations"].apply(lambda ls: list(map(lambda word: word.strip(), ls)))


In [472]:
tram_lines

Unnamed: 0,name,stations
0,t12,"[Scheidplatz, Hohenzollernplatz, Leonrodplatz,..."
1,t16,"[Romanplatz, Donnersbergerstraße, Hackerbrücke..."
2,t17,"[Amalienburgstraße, Romanplatz, Donnersbergers..."
3,t18,"[Gondrellplatz, Westendstraße, Lautensackstraß..."
4,t19,"[Pasing, Willibaldplatz, Fürstenrieder Straße,..."
5,t20,"[Moosach, Westfriedhof, Leonrodplatz, Stiglmai..."
6,t21,"[Westfriedhof, Leonrodplatz, Stiglmaierplatz, ..."
7,t23,"[Münchner Freiheit, Potsdamer Straße, Parzival..."
8,t25,"[Max-Weber-Platz, Rosenheimer Platz, Ostfriedh..."
9,t27,"[Petuelring, Hohenzollernplatz, Kurfürstenplat..."


In [473]:
def calc_distance(start, finish):
    start_lat = float(stations.loc[stations['id'] == start, "lat"])
    start_lon = float(stations.loc[stations['id'] == start, "lon"])
    finish_lat = float(stations.loc[stations['id'] == finish, "lat"])
    finish_lon = float(stations.loc[stations['id'] == finish, "lon"])

    d = int(geopy.distance.geodesic((start_lon, start_lat), (finish_lon, finish_lat)).m * 1.3) # The * 1.5 is a constant to approximate the fact that distances for trams are often no straight lines

    return d

def calc_distances(ls):
    distances = []
    for i in range(len(ls) - 1):
        distances.append(calc_distance(ls[i], ls[i+1]))
    return distances

In [474]:
# Stachus -> Marienplat in Meter
calc_distance(0, 1)

1266

In [475]:
tram_lines["stations"] = tram_lines["stations"].apply(name_list_to_id_list)
tram_lines["distances"] = tram_lines["stations"].apply(lambda ls: calc_distances(ls))
tram_lines["circular"] = False

In [476]:
tram_lines

Unnamed: 0,name,stations,distances,circular
0,t12,"[287, 131, 10, 156, 172]","[1556, 2007, 1640, 2151]",False
1,t16,"[172, 48, 6, 5, 0, 44, 2, 17, 386, 434, 439, 388]","[2707, 1311, 1276, 435, 887, 1589, 740, 293, 3...",False
2,t17,"[167, 172, 48, 6, 5, 0, 44, 2, 65, 384, 434, 4...","[2292, 2707, 1311, 1276, 435, 887, 1589, 866, ...",False
3,t18,"[903, 200, 744, 59, 1166, 0, 44, 36, 668, 673,...","[3466, 923, 1548, 2401, 600, 887, 1917, 803, 1...",False
4,t19,"[9, 911, 908, 744, 59, 5, 0, 19, 17, 20, 384, ...","[2957, 1394, 1481, 1548, 2583, 435, 957, 1268,...",False
5,t20,"[212, 158, 10, 153, 5, 0]","[2587, 2320, 2104, 1007, 435]",False
6,t21,"[158, 10, 153, 1167, 0, 19, 17, 20, 384, 4, 76...","[2320, 2104, 874, 515, 957, 1268, 794, 192, 12...",False
7,t23,"[354, 322, 321, 346, 355, 591, 356, 357]","[780, 550, 236, 524, 541, 646, 483]",False
8,t25,"[384, 3, 668, 722, 740, 685, 1153]","[1096, 1696, 689, 983, 5262, 5690]",False
9,t27,"[249, 131, 37, 46, 0, 44]","[2071, 689, 2216, 957, 887]",False


In [477]:
text = yaml.dump(tram_lines.to_dict(orient='records'), encoding="utf-8", allow_unicode=True, default_flow_style=None)
with open("../{}/tram_lines.yaml".format(FOLDER), "wb") as file:
    file.write(text)

# Meta Info and Relevant Stations

In [478]:
all_stations = []
for station_ids in subway_lines["stations"]:
    all_stations.extend(station_ids)
for station_ids in tram_lines["stations"]:
    all_stations.extend(station_ids)

In [479]:
stations

Unnamed: 0,name,city,lon,lat,id
0,Karlsplatz (Stachus),München,48.1394,11.5654,0
1,Marienplatz,München,48.1364,11.5777,1
2,Isartor,München,48.1339,11.5828,2
3,Rosenheimer Platz,München,48.1286,11.5939,3
4,Ostbahnhof,München,48.1281,11.6041,4
...,...,...,...,...,...
1182,Parkring 19,Garching (b München),48.2515,11.6373,1182
1183,BMW M GmbH (Daimlerstr. 19),Garching (b München),48.2479,11.6223,1183
1184,Taunusstraße,München,48.1889,11.5784,1184
1185,BTZ Lemgostraße,München,48.1921,11.5545,1185


In [480]:
stations_in_lines = stations[stations['id'].isin(all_stations)]

In [481]:
text = yaml.dump(stations_in_lines.to_dict(orient='records'), encoding="utf-8", allow_unicode=True)
with open("../{}/stations_in_lines.yaml".format(FOLDER), "wb") as file:
    file.write(text)

In [482]:
print(stations_in_lines["lat"].min())
print(stations_in_lines["lat"].max())
print(stations_in_lines["lon"].min())
print(stations_in_lines["lon"].max())

11.4606
11.7036
48.0416
48.2649


In [483]:
stations

Unnamed: 0,name,city,lon,lat,id
0,Karlsplatz (Stachus),München,48.1394,11.5654,0
1,Marienplatz,München,48.1364,11.5777,1
2,Isartor,München,48.1339,11.5828,2
3,Rosenheimer Platz,München,48.1286,11.5939,3
4,Ostbahnhof,München,48.1281,11.6041,4
...,...,...,...,...,...
1182,Parkring 19,Garching (b München),48.2515,11.6373,1182
1183,BMW M GmbH (Daimlerstr. 19),Garching (b München),48.2479,11.6223,1183
1184,Taunusstraße,München,48.1889,11.5784,1184
1185,BTZ Lemgostraße,München,48.1921,11.5545,1185
