In [1]:
import pandas as pd
import numpy as np

In [2]:
data_df = pd.read_csv("data/2017-09/2017-09-13istdaten.csv", sep=";")

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
tr_col_name = {
    'BETRIEBSTAG': 'TRIP_DATE',
    'FAHRT_BEZEICHNER': 'TRIP_ID',
    'BETREIBER_ID': 'OPERATOR_ID',
    'BETREIBER_ABK': 'OPERATOR_SHORT_NAME',
    'BETREIBER_NAME': 'OPERATOR_NAME',
    'PRODUKT_ID': 'TRANSPORT_TYPE',
    'LINIEN_ID': 'LINE_ID', # TRAIN_NB
    'LINIEN_TEXT': 'LINE_TYPE', #RE, IC, IR
    'UMLAUF_ID': 'CYCLE_ID',
    'VERKEHRSMITTEL_TEXT': 'TRANSPORT_TYPE',
    'ZUSATZFAHRT_TF': 'IS_ADDITIONAL_TRIP',
    'FAELLT_AUS_TF': 'HAS_TRIP_FAILED',
    'BPUIC': 'BPUIC', #UNKOWN
    'HALTESTELLEN_NAME': 'STOP_STATION_NAME',
    'ANKUNFTSZEIT': 'ARRIVAL_TIME',
    'AN_PROGNOSE': 'ACTUAL_ARRIVAL_TIME', #When AN_PROGNOSE_STATUS == GESCHAETZT
    'AN_PROGNOSE_STATUS': 'ACTUAL_ARRIVAL_TIME_STATUS',
    'ABFAHRTSZEIT': 'DEPARTURE_TIME',
    'AB_PROGNOSE': 'ACTUAL_DEPARTURE_TIME',
    'AB_PROGNOSE_STATUS':'ACTUAL_DEPARTURE_TIME_STATUS',
    'DURCHFAHRT_TF': 'PASS_THROUGH' #true if the transport does not stop there
}

In [4]:
data_df_tr = data_df.rename(columns=tr_col_name)

In [5]:
# Extract list of stop from metadata file.
with open("data/metadata/BFKOORD_GEO") as file:
    metadata = file.readlines()
    
metadata_cleaned = [line.split("%") for line in metadata]
metadata_cleaned = [[line[0].split(), line[1][1:-1]] for line in  metadata_cleaned]
metadata_cleaned = [[line[0][0], line[0][1], line[0][2], line[0][3], line[1]] for line in metadata_cleaned]

metadata_df = pd.DataFrame(metadata_cleaned, columns=["StationID", "Longitude", "Latitude", "Height", "Remark"])

metadata_df["Longitude"] = pd.to_numeric(metadata_df["Longitude"])
metadata_df["Latitude"] = pd.to_numeric(metadata_df["Latitude"])

In [6]:
def distance_from_gps_coord(s_lat, s_lon, e_lat, e_lon):
    """
        Compute distance from two points in gps coordinates.
        Output distance in kilometers.
    """
    s_lat = np.radians(s_lat)
    s_lon = np.radians(s_lon)
    e_lat = np.radians(e_lat)
    e_lon = np.radians(e_lon)
    
    return 6371.01 * np.arccos(np.sin(s_lat)* np.sin(e_lat) + np.cos(s_lat)* np.cos(e_lat)* np.cos(s_lon - e_lon))

In [7]:
# Retrieve stops near Zurich main station
zurichHB = metadata_df[metadata_df.Remark == "Zürich HB"]

metadata_df["dist"] = metadata_df.apply(lambda x: distance_from_gps_coord(zurichHB.Latitude, zurichHB.Longitude, x.Latitude, x.Longitude), axis=1)
zurich_stops_df = metadata_df[metadata_df["dist"] < 10]
zurich_stops_df.head()

Unnamed: 0,StationID,Longitude,Latitude,Height,Remark,dist
74,176,8.521961,47.351679,0,Zimmerberg-Basistunnel,3.250675
2084,8502220,8.434713,47.390882,442,Urdorf,8.065921
2085,8502221,8.437543,47.357432,488,Birmensdorf ZH,8.067443
2086,8502222,8.468175,47.325896,528,Bonstetten-Wettswil,7.9517
2092,8502229,8.43033,47.380971,456,Urdorf Weihermatt,8.277832


In [8]:
stops = list(zurich_stops_df['Remark'])

In [9]:
col_of_interest = ['TRIP_DATE', 'LINE_ID', 'HAS_TRIP_FAILED', 'STOP_STATION_NAME',
                   'ARRIVAL_TIME', 'ACTUAL_ARRIVAL_TIME', 'ACTUAL_ARRIVAL_TIME_STATUS',
                   'DEPARTURE_TIME', 'ACTUAL_DEPARTURE_TIME', 'ACTUAL_DEPARTURE_TIME_STATUS',
                   'PASS_THROUGH']

In [10]:
#filter out uninteresting cities
data_df_tr = data_df_tr[data_df_tr['STOP_STATION_NAME'].isin(stops)]
#data_df_tr = data_df_tr[data_df_tr['ACTUAL_ARRIVAL_TIME_STATUS'] == 'GESCHAETZT']
#data_df_tr = data_df_tr[data_df_tr['ACTUAL_DEPARTURE_TIME_STATUS'] == 'GESCHAETZT']
data_df_tr[col_of_interest]#[data_df_tr["LINE_ID"] == 11]#.dtypes

Unnamed: 0,TRIP_DATE,LINE_ID,HAS_TRIP_FAILED,STOP_STATION_NAME,ARRIVAL_TIME,ACTUAL_ARRIVAL_TIME,ACTUAL_ARRIVAL_TIME_STATUS,DEPARTURE_TIME,ACTUAL_DEPARTURE_TIME,ACTUAL_DEPARTURE_TIME_STATUS,PASS_THROUGH
443,13.09.2017,10,False,Zürich HB,13.09.2017 21:51,13.09.2017 21:53:57,GESCHAETZT,,,PROGNOSE,False
685,13.09.2017,11,False,Zürich HB,,,PROGNOSE,13.09.2017 06:09,13.09.2017 06:10:33,GESCHAETZT,False
1835,13.09.2017,12,False,Zürich HB,13.09.2017 10:51,13.09.2017 10:50:51,GESCHAETZT,,,PROGNOSE,False
4582,13.09.2017,1251,True,Zürich HB,13.09.2017 07:00,,PROGNOSE,,,PROGNOSE,False
4814,13.09.2017,1255,False,Zürich HB,13.09.2017 08:26,13.09.2017 08:30:04,GESCHAETZT,13.09.2017 08:37,13.09.2017 08:39:22,GESCHAETZT,False
4873,13.09.2017,1256,False,Zürich HB,13.09.2017 17:53,13.09.2017 17:55:07,GESCHAETZT,,,PROGNOSE,False
4975,13.09.2017,1258,True,Zürich HB,13.09.2017 21:23,,PROGNOSE,13.09.2017 21:34,,PROGNOSE,False
5039,13.09.2017,1260,True,Zürich HB,,,PROGNOSE,13.09.2017 21:00,,PROGNOSE,False
5618,13.09.2017,13,False,Zürich HB,,,PROGNOSE,13.09.2017 07:09,13.09.2017 07:10:39,GESCHAETZT,False
5630,13.09.2017,14,False,Zürich HB,13.09.2017 12:51,13.09.2017 12:54:07,GESCHAETZT,,,PROGNOSE,False


In [11]:
#deal with date
data_df_tr['DEPARTURE_TIME'] = pd.to_datetime(data_df_tr['DEPARTURE_TIME'])
data_df_tr['ACTUAL_DEPARTURE_TIME'] = pd.to_datetime(data_df_tr['ACTUAL_DEPARTURE_TIME'])
data_df_tr['ARRIVAL_TIME'] = pd.to_datetime(data_df_tr['ARRIVAL_TIME'])
data_df_tr['ACTUAL_ARRIVAL_TIME'] = pd.to_datetime(data_df_tr['ACTUAL_ARRIVAL_TIME'])

In [164]:
from geopy.distance import distance as geo_dist
#geo_dist((lat1, long1), (lat2, long2))geo_dist

def getLineID(df, station, time):
    df = df.copy()
    df = df[df['STOP_STATION_NAME'] == station][df['DEPARTURE_TIME'] > time]\
        [df['DEPARTURE_TIME'] < time + pd.Timedelta(minutes=60)]
    df = df.dropna(subset=['DEPARTURE_TIME'])
    #df = df[np.isfinite(df['DEPARTURE_TIME'])]
    return list(df['TRIP_ID'])#list(df['LINE_ID'])
def viewLine(df, line_id):
    return df[df["TRIP_ID"] == line_id]#df[df["LINE_ID"] == line_id]
def dist_between_station(df, station1, station2):
    df=df.copy()
    df = df[df['Remark'].isin([station1, station2])]
    df.index = df['Remark']
    df = df[['Longitude', 'Latitude']]
    s1 = df.loc[station1]
    s2 = df.loc[station2]
    return geo_dist((s1.Latitude,s1.Longitude), (s2.Latitude,s2.Longitude))
def getNextStations(df, line_id, station):
    df = df.copy()
    df = viewLine(df, line_id)
    #assumption it is already sorted
    df.index = df['STOP_STATION_NAME']
    df = df.loc[station:]
    return df
def getNextStation(df, line_id, station):
    #print(line_id)
    #print(station)
    df = df.copy()
    df = getNextStations(df, line_id, station)
    if df.shape[0] > 1:
        return df.iloc[1]
    else:
        None
def getNextStations2(df, line_id, station):
    df = df.copy()
    df = viewLine(df, line_id)
    df = df.sort_values('ARRIVAL_TIME')
    select_next = False
    nexts = []
    for index, row in df.iterrows():
        if select_next:
            nexts.append(row)
            select_next = False
        if(row['STOP_STATION_NAME'] == station):
            select_next = True
    return nexts
def getLongLat(df, station):
    df=df.copy()
    df = df[df['Remark'] == station]
    df.index = df['Remark']
    df = df[['Longitude', 'Latitude']]
    s1 = df.loc[station]
    return (s1.Latitude,s1.Longitude)

#To be run once for all station and result save in a map (or table) 
def getNearByStation(df, station):
    df = df.copy()
    loc = getLongLat(df, station)
    df = df.dropna(subset=['Remark'])
    df['newDist'] = df.apply(lambda x: geo_dist((x.Latitude, x.Longitude), loc), axis=1)
    df = df[df['newDist'] < 0.1]
    return df
getNearByStation(metadata_df, 'Zürich, Bürkliplatz')
#geo_dist((lat1, long1), (lat2, long2))
#(last_station='Zurich', medium=(walk, line, trainNb), ...)

Unnamed: 0,StationID,Longitude,Latitude,Height,Remark,dist,newDist
2800,8503651,8.540788,47.36601,406,Zürich Bürkliplatz (See),1.353655,0.09659217848548045 km
21684,8591105,8.540341,47.366824,408,"Zürich, Bürkliplatz",1.262448,0.0 km


In [113]:
getNextStations2(data_df_tr, '85:3849:80257-02002-1','Zürich, Bürkliplatz')#[col_of_interest]

TRIP_DATE                                          13.09.2017
TRIP_ID                                 85:3849:80257-02002-1
OPERATOR_ID                                           85:3849
OPERATOR_SHORT_NAME                                   VBZ Fpl
OPERATOR_NAME                   Verkehrsbetriebe Zürich INFO+
TRANSPORT_TYPE                                           Tram
LINE_ID                                           85:3849:002
LINE_TYPE                                                   2
CYCLE_ID                                                10001
TRANSPORT_TYPE                                            NaN
IS_ADDITIONAL_TRIP                                      False
HAS_TRIP_FAILED                                         False
BPUIC                                                 8576200
STOP_STATION_NAME                     Zürich, Wildbachstrasse
ARRIVAL_TIME                              2017-09-13 10:31:00
ACTUAL_ARRIVAL_TIME                       2017-09-13 10:34:04
ACTUAL_A

In [14]:
#getNextStation(data_df_tr, lines[0], 'Zürich HB').STOP_STATION_NAME

In [15]:
dist_between_station(metadata_df, 'Zürich HB', 'Bassersdorf, Bahnhof')

Distance(9.347745240044421)

In [74]:
viewLine(data_df_tr, 18390)[col_of_interest]

Unnamed: 0,TRIP_DATE,LINE_ID,HAS_TRIP_FAILED,STOP_STATION_NAME,ARRIVAL_TIME,ACTUAL_ARRIVAL_TIME,ACTUAL_ARRIVAL_TIME_STATUS,DEPARTURE_TIME,ACTUAL_DEPARTURE_TIME,ACTUAL_DEPARTURE_TIME_STATUS,PASS_THROUGH


In [85]:
getLineID(data_df_tr, start_station, departure_time)[0]
viewLine(data_df_tr, '85:3849:80257-02002-1')[col_of_interest]

  


Unnamed: 0,TRIP_DATE,LINE_ID,HAS_TRIP_FAILED,STOP_STATION_NAME,ARRIVAL_TIME,ACTUAL_ARRIVAL_TIME,ACTUAL_ARRIVAL_TIME_STATUS,DEPARTURE_TIME,ACTUAL_DEPARTURE_TIME,ACTUAL_DEPARTURE_TIME_STATUS,PASS_THROUGH
121089,13.09.2017,85:3849:002,False,"Zürich, Farbhof",2017-09-13 09:58:00,2017-09-13 10:00:13,PROGNOSE,2017-09-13 09:58:00,2017-09-13 10:00:13,PROGNOSE,False
121090,13.09.2017,85:3849:002,False,"Zürich, Bachmattstrasse",2017-09-13 09:58:00,2017-09-13 10:01:05,PROGNOSE,2017-09-13 09:58:00,2017-09-13 10:01:10,PROGNOSE,False
121091,13.09.2017,85:3849:002,False,"Zürich, Lindenplatz",2017-09-13 09:59:00,2017-09-13 10:02:05,PROGNOSE,2017-09-13 09:59:00,2017-09-13 10:02:43,PROGNOSE,False
121092,13.09.2017,85:3849:002,False,"Zürich, Grimselstrasse",2017-09-13 10:00:00,2017-09-13 10:03:29,PROGNOSE,2017-09-13 10:01:00,2017-09-13 10:03:47,PROGNOSE,False
121093,13.09.2017,85:3849:002,False,"Zürich, Kappeli",2017-09-13 10:02:00,2017-09-13 10:02:12,PROGNOSE,2017-09-13 10:02:00,2017-09-13 10:05:11,PROGNOSE,False
121094,13.09.2017,85:3849:002,False,"Zürich, Freihofstrasse",2017-09-13 10:03:00,2017-09-13 10:06:11,PROGNOSE,2017-09-13 10:03:00,2017-09-13 10:06:29,PROGNOSE,False
121095,13.09.2017,85:3849:002,False,"Zürich, Letzigrund",2017-09-13 10:04:00,2017-09-13 10:07:23,PROGNOSE,2017-09-13 10:05:00,2017-09-13 10:07:41,PROGNOSE,False
121096,13.09.2017,85:3849:002,False,"Zürich, Albisriederplatz",2017-09-13 10:06:00,2017-09-13 10:08:53,PROGNOSE,2017-09-13 10:06:00,2017-09-13 10:09:17,PROGNOSE,False
121097,13.09.2017,85:3849:002,False,"Zürich, Zypressenstrasse",2017-09-13 10:07:00,2017-09-13 10:10:11,PROGNOSE,2017-09-13 10:07:00,2017-09-13 10:10:29,PROGNOSE,False
121098,13.09.2017,85:3849:002,False,"Zürich, Lochergut",2017-09-13 10:08:00,2017-09-13 10:11:17,PROGNOSE,2017-09-13 10:08:00,2017-09-13 10:11:35,PROGNOSE,False


In [61]:
departure_time + pd.Timedelta(minutes=60)

Timestamp('2017-09-13 09:15:00')

In [62]:
total_dist*1.1

Distance(11.371315194582124)

In [63]:
ds = ['Zürich HB', 'Zürich HB', 'Zürich Wiedikon']
'Zürich HB' not in ds


False

In [64]:
# def reduce_queue(q):
#     q2 = queue.Queue()
#     all_elem = map()
#     while(not q.empty()):
#         elem = q.get()
#         q2.put(elem)
#         if elem in all_elem:
            
        

In [65]:
data_df_tr['STOP_STATION_NAME'].unique()

array(['Zürich HB', 'Zürich Flughafen', 'Bassersdorf', 'Zürich Wipkingen',
       'Zürich Oerlikon', 'Glattbrugg', 'Zürich Hardbrücke',
       'Zürich Stadelhofen', 'Thalwil', 'Zürich Enge', 'Zürich Wiedikon',
       'Dietlikon', 'Stettbach', 'Zürich Altstetten', 'Schlieren',
       'Glanzenberg', 'Urdorf', 'Urdorf Weihermatt', 'Birmensdorf ZH',
       'Bonstetten-Wettswil', 'Regensdorf-Watt', 'Zürich Affoltern',
       'Zürich Seebach', 'Zürich Tiefenbrunnen', 'Zollikon',
       'Küsnacht Goldbach', 'Küsnacht ZH', 'Erlenbach ZH', 'Opfikon',
       'Kloten Balsberg', 'Kloten', 'Wallisellen', 'Zürich Wollishofen',
       'Kilchberg', 'Rüschlikon', 'Rümlang', 'Dübendorf',
       'Schwerzenbach ZH', 'Zürich,Kalkbreite/Bhf.Wiedikon',
       'Zürich, Lochergut', 'Zürich, Zypressenstrasse',
       'Zürich, Albisriederplatz', 'Zürich, Letzigrund',
       'Zürich, Freihofstrasse', 'Zürich, Kappeli',
       'Zürich, Grimselstrasse', 'Zürich, Lindenplatz',
       'Zürich, Bachmattstrasse', 'Züri

In [165]:
start_station = 'Zürich, Feldeggstrasse'#'Zürich HB'
end_station = 'Zürich, Guggachstrasse'#'Dübendorf, Meiershofstrasse'#'Bassersdorf, Bahnhof'
total_dist = dist_between_station(metadata_df, start_station, end_station)
departure_time = pd.to_datetime('13.09.2017 8:15:00')#'13.09.2017 22:15:00'
#data_df_tr[col_of_interest][data_df_tr['STOP_STATION_NAME'] == start_station]
#lines = getLineID(data_df_tr, start_station, departure_time)

#data_df_tr['STOP_STATION_NAME'].unique()

In [166]:
import queue
#the queue contains a tuple (station, time at that station)
already_visited = []
q = queue.Queue()
q.put((start_station, departure_time))
closest_dist = total_dist
while(not q.empty()):
    s, time = q.get()
    already_visited.append(s)
    lines = getLineID(data_df_tr, s, time)
    print(s)
    #print(already_visited)
    for l in lines:
        #print(l)
        next_s_list = getNextStations2(data_df_tr, l, s)
        if len(next_s_list) == 0:
            next_s == None
        else:
            next_s = next_s_list[0]
        if next_s is not None:
            if next_s.STOP_STATION_NAME == end_station:
                print('End station founded')
            dist = dist_between_station(metadata_df,next_s.STOP_STATION_NAME, end_station)
            if next_s.STOP_STATION_NAME not in already_visited and\
                dist < total_dist:
                already_visited.append(next_s.STOP_STATION_NAME)
                q.put((next_s.STOP_STATION_NAME, next_s.ARRIVAL_TIME))

  


Zürich, Feldeggstrasse
Zürich, Kreuzstrasse
Zürich, Opernhaus
Zürich, Bellevue
Zürich, Bürkliplatz
Zürich, Börsenstrasse
Zürich, Helmhaus
Zürich, Kunsthaus
Zürich, Signaustrasse
Zürich, Englischviertelstrasse
Zürich, Rentenanstalt
Zürich, Paradeplatz
Zürich, Rathaus
Zürich, Neumarkt
Zürich, Hottingerplatz
Zürich, Kantonsschule
Zürich, Sprecherstrasse
Zürich, Römerhof
Zürich, Sihlstrasse
Zürich, Waffenplatzstrasse
Zürich, Schweizer Rück
Zürich, Stockerstrasse
Zürich, Rennweg
Zürich, Rudolf-Brun-Brücke
Zürich, Central
Zürich, Platte
Zürich, ETH/Universitätsspital
Zürich, Hölderlinstrasse
Zürich, Stauffacher
Zürich,Kalkbreite/Bhf.Wiedikon
Zürich, Tunnelstrasse
Zürich, Bahnhofstrasse/HB
Zürich, Löwenplatz
Zürich, Bahnhofplatz/HB
Zürich, Bahnhofquai/HB
Zürich, Haldenegg
Zürich, Voltastrasse
Zürich, Haldenbach
Zürich, Bezirksgebäude
Zürich, Werd
Zürich, Lochergut
Zürich, Zwinglihaus
Zürich, Kernstrasse
Zürich, Zoo
Zürich, Museum Rietberg
Zürich, Seebacherplatz
Zürich, Kanonengasse
Zürich, St

In [None]:
#TODO visualize on map all the stops that the algo has seen
#can be good to check that the heuristic is not going to a wrong direction
#DONE

#TODO Tag a station as having conection and with wich line it has connection 
# or for each station get to list of line_id connection precomputed
# including nearby station by walk

In [167]:
import folium
my_map = folium.Map(location=loc)
folium.Marker(getLongLat(metadata_df, start_station)).add_to(my_map)
folium.Marker(getLongLat(metadata_df, end_station)).add_to(my_map)
for place in already_visited:
    l = getLongLat(metadata_df, place)
    #folium.Marker(location=l).add_to(my_map)
    folium.RegularPolygonMarker(
    l,
    fill_color='#132b5e',
    number_of_sides=3,
    radius=3
    ).add_to(my_map)
my_map

In [None]:
#Todo vizualize all the lines to see if they are connected