In [10]:
import pandas as pd
import yaml

In [168]:
FOLDER = "config_dev" # "config"

# Stations Cleansing

In [149]:
stations = pd.read_csv("../data/stations_utf8.csv", delimiter=";")

In [150]:
stations.head()

Unnamed: 0,HstNummer,Name mit Ort,Name ohne Ort,Ort,GKZ,Globale ID,MVTT X,MVTT Y,WGS84 X,WGS84 Y
0,1,Karlsplatz (Stachus),Karlsplatz (Stachus),München,9162000,de:09162:1,4467762.0,826427.0,481393653703985,115653864723266
1,2,Marienplatz,Marienplatz,München,9162000,de:09162:2,4468673.0,826758.0,481364342160178,115776511503947
2,3,Isartor,Isartor,München,9162000,de:09162:3,4469056.0,827043.0,481338899412007,115828176253145
3,4,Rosenheimer Platz,Rosenheimer Platz,München,9162000,de:09162:4,4469877.0,827637.0,481285874606085,115938895812341
4,5,Ostbahnhof,Ostbahnhof,München,9162000,de:09162:5,4470634.0,827691.0,481281372572089,11604062351127


In [151]:
# we don't need most columns
stations = stations.drop(["HstNummer", "Name mit Ort", "GKZ", "Globale ID", "MVTT X", "MVTT Y"], axis=1)

In [152]:
# drop all where we have either no x or y coordinate (or both)
stations = stations[stations['WGS84 X'].notna()]
stations = stations[stations['WGS84 Y'].notna()]
stations = stations[stations['Ort'].isin(["München", "Garching (b München)"])]


In [153]:
stations.head()

Unnamed: 0,Name ohne Ort,Ort,WGS84 X,WGS84 Y
0,Karlsplatz (Stachus),München,481393653703985,115653864723266
1,Marienplatz,München,481364342160178,115776511503947
2,Isartor,München,481338899412007,115828176253145
3,Rosenheimer Platz,München,481285874606085,115938895812341
4,Ostbahnhof,München,481281372572089,11604062351127


In [154]:
stations['WGS84 X'] = stations['WGS84 X'].apply(lambda x: round(float(x.replace(",", ".")), 4))
stations['WGS84 Y'] = stations['WGS84 Y'].apply(lambda x: round(float(x.replace(",", ".")), 4))

In [155]:
stations.head()

Unnamed: 0,Name ohne Ort,Ort,WGS84 X,WGS84 Y
0,Karlsplatz (Stachus),München,48.1394,11.5654
1,Marienplatz,München,48.1364,11.5777
2,Isartor,München,48.1339,11.5828
3,Rosenheimer Platz,München,48.1286,11.5939
4,Ostbahnhof,München,48.1281,11.6041


In [156]:
# stations['Name ohne Ort'] = stations['Name ohne Ort'].apply(lambda x: str(x).replace(" ", "_"))
# stations['Ort'] = stations['Ort'].apply(lambda x: str(x).replace(" ", "_"))


In [157]:
stations = stations.rename(columns={"Name ohne Ort": "name", "Ort": "city", "WGS84 X": "lon", "WGS84 Y": "lat"})
stations = stations.reset_index(drop=True)
stations['id'] = stations.index

In [158]:
stations

Unnamed: 0,name,city,lon,lat,id
0,Karlsplatz (Stachus),München,48.1394,11.5654,0
1,Marienplatz,München,48.1364,11.5777,1
2,Isartor,München,48.1339,11.5828,2
3,Rosenheimer Platz,München,48.1286,11.5939,3
4,Ostbahnhof,München,48.1281,11.6041,4
...,...,...,...,...,...
1165,Parkring 19,Garching (b München),48.2515,11.6373,1165
1166,BMW M GmbH (Daimlerstr. 19),Garching (b München),48.2479,11.6223,1166
1167,Taunusstraße,München,48.1889,11.5784,1167
1168,BTZ Lemgostraße,München,48.1921,11.5545,1168


In [159]:
# stations.to_csv("../data/stations_utf8_cleansed.csv", index=False)

In [160]:
text = yaml.dump(stations.to_dict(orient='records'), encoding="utf-8", allow_unicode=True)
with open("../{}/stations.yaml".format(FOLDER), "wb") as file:
    file.write(text)

# Lines Cleansing

In [96]:
lines = pd.read_csv("../data/lines_from_wiki.csv", delimiter=";")

In [97]:
lines["split"] = lines["description"].apply(lambda text: text.split("–"))
lines["stations"] = lines["split"].apply(lambda ls: ls[0::2])
lines["distances"] = lines["split"].apply(lambda ls: ls[1::2])

In [98]:
lines["stations"] = lines["stations"].apply(lambda ls: list(map(lambda e: e.strip(" "), ls)))
lines["distances"] = lines["distances"].apply(lambda ls: list(map(lambda e: int(e.strip(" ()m")), ls)))

In [99]:
lines = lines.drop(["description", "split"], axis=1)

In [112]:
def name_list_to_id_list(name_list):
    id_list = []
    for name in name_list:
        id = stations.loc[stations['name'] == name, "id"]
        id_list.append(int(id))
    return id_list

In [116]:
lines["stations"] = lines["stations"].apply(name_list_to_id_list)

In [None]:
lines["circular"] = False

In [122]:
text = yaml.dump(lines.to_dict(orient='records'), encoding="utf-8", allow_unicode=True, default_flow_style=None)
with open("../{}/lines.yaml".format(FOLDER), "wb") as file:
    file.write(text)

In [121]:
lines

Unnamed: 0,name,stations,distances,circular
0,u1,"[268, 225, 158, 593, 156, 154, 153, 5, 44, 138...","[625, 788, 830, 1007, 1102, 878, 1071, 905, 74...",False
1,u2,"[231, 483, 481, 479, 474, 465, 463, 287, 131, ...","[1065, 631, 1112, 962, 1010, 657, 1094, 1103, ...",False
2,u3,"[212, 274, 268, 277, 258, 249, 287, 297, 354, ...","[797, 880, 1416, 1061, 944, 832, 793, 1042, 57...",False
3,u4,"[200, 704, 198, 197, 5, 0, 54, 386, 384, 383, ...","[806, 671, 927, 711, 521, 811, 933, 928, 791, ...",False
4,u5,"[202, 201, 200, 704, 198, 197, 5, 0, 54, 386, ...","[670, 791, 806, 671, 927, 711, 521, 811, 933, ...",False
5,u6,"[347, 353, 352, 351, 318, 305, 365, 364, 363, ...","[2560, 1827, 4208, 830, 1431, 1087, 660, 740, ...",False
6,u7,"[268, 225, 158, 593, 156, 154, 153, 5, 44, 138...","[625, 788, 830, 1007, 1102, 878, 1071, 905, 74...",False
7,u8,"[258, 249, 287, 131, 121, 111, 101, 5, 44, 138...","[944, 832, 1103, 756, 513, 730, 583, 905, 746,...",False


In [147]:
all_stations = []
for stations in lines["stations"]:
    all_stations.extend(stations)
    

In [173]:
stations_in_lines = stations[stations['id'].isin(all_stations)]

In [172]:
text = yaml.dump(stations_in_lines.to_dict(orient='records'), encoding="utf-8", allow_unicode=True)
with open("../{}/stations_in_lines.yaml".format(FOLDER), "wb") as file:
    file.write(text)

In [175]:
print(stations_in_lines["lat"].min())
print(stations_in_lines["lat"].max())
print(stations_in_lines["lon"].min())
print(stations_in_lines["lon"].max())

11.4738
11.7036
48.0886
48.2649


In [174]:
stations

Unnamed: 0,name,city,lon,lat,id
0,Karlsplatz (Stachus),München,48.1394,11.5654,0
1,Marienplatz,München,48.1364,11.5777,1
2,Isartor,München,48.1339,11.5828,2
3,Rosenheimer Platz,München,48.1286,11.5939,3
4,Ostbahnhof,München,48.1281,11.6041,4
...,...,...,...,...,...
1165,Parkring 19,Garching (b München),48.2515,11.6373,1165
1166,BMW M GmbH (Daimlerstr. 19),Garching (b München),48.2479,11.6223,1166
1167,Taunusstraße,München,48.1889,11.5784,1167
1168,BTZ Lemgostraße,München,48.1921,11.5545,1168
