# A Jupyter notebook to play around with your implemententation of theconnection scan algorithm and to analyse timetable-data

## Init

In [None]:
%cd ..
from scripts.helpers.my_logging import init_logging
init_logging("output", "log.log")

## Play with your implementation of the connection scan algorithm

In [None]:
# external import
from datetime import date
import io
import requests
import pandas as pd

In [None]:
# internal imports
from scripts.gtfs_parser import parse_gtfs
from scripts.connectionscan_router import ConnectionScanCore
from scripts.helpers.funs import hhmmss_to_sec, seconds_to_hhmmss

In [None]:
def cs_core_from_gtfs_weblink(url_to_gtfs_feed, desired_date):
    response = requests.get(url_to_gtfs_feed)
    f = io.BytesIO(response.content)
    return ConnectionScanCore(parse_gtfs(f, desired_date))

In [None]:
path_to_gtfs_ch = "https://opentransportdata.swiss/dataset/6f55f96d-7644-4901-b927-e9cf05a8c7f0/resource/a81c59c2-6fd7-47c8-b7b6-90a045a90aae/download/gtfsfp20202020-01-22.zip"
cs_core_ch = cs_core_from_gtfs_weblink(path_to_gtfs_ch, date(2020, 1, 24))

In [None]:
path_to_gtfs_paris = "https://transitfeeds.com/p/regie-autonome-des-transports-parisiens/413/20200118/download"
cs_core_paris = cs_core_from_gtfs_weblink(path_to_gtfs_paris, date(2020, 1, 24))

In [None]:
path_to_gtfs_new_york_subway = "https://transitfeeds.com/p/mta/79/20200109/download"
cs_core_ny_subway = cs_core_from_gtfs_weblink(path_to_gtfs_new_york_subway, date(2020, 1, 24))

In [None]:
path_to_gtfs_madrid = "https://transitfeeds.com/p/emt-madrid/212/20200123/download"
cs_core_madrid = cs_core_from_gtfs_weblink(path_to_gtfs_madrid, date(2020, 1, 24))

### Earliest arrival routing

#### Switzerland

In [None]:
# route
res = cs_core_ch.route_earliest_arrival_by_name("Bern", "Samedan", "07:20:00")
print(seconds_to_hhmmss(res))

In [None]:
# route
res = cs_core_ch.route_earliest_arrival_by_name("Bern", "Pontresina", "07:20:00")
print(seconds_to_hhmmss(res))

In [None]:
# route
res = cs_core_ch.route_earliest_arrival_by_name("Bern, Dübystrasse", "Ostermundigen, Bahnhof", "07:20:00")
print(seconds_to_hhmmss(res))

In [None]:
# route
res = cs_core_ch.route_earliest_arrival_by_name("Bern, Dübystrasse", "Maloja, Posta", "07:20:00")
print(seconds_to_hhmmss(res))

In [None]:
# route
res = cs_core_ch.route_earliest_arrival_by_name("Bern, Dübystrasse", "Heiterswil, Krummbach", "08:10:00")
print(seconds_to_hhmmss(res))

#### Paris

In [None]:
# route
res = cs_core_paris.route_earliest_arrival_by_name("Saint-Lazare", "Champs-Elysées-Clémenceau", "07:20:00")
print(seconds_to_hhmmss(res))

### Earliest arrival routing with reconstruction

In [None]:
def journey_to_df(cs_core, journey):
    cs_data = cs_core.connection_scan_data
    if journey is None:
        return "Nichts gefunden"
    else:
        columns = ["from_stop_id", "from_stop_name", "dep_time", "to_stop_id", "to_stop_name", "arr_time", "duration", "leg_type"]
        legs = []
        for jl in journey.journey_legs:
            if jl.in_connection is not None:
                from_stop_id = jl.in_connection.from_stop_id
                to_stop_id = jl.out_connection.to_stop_id
                dep_time = jl.in_connection.dep_time
                arr_time = jl.out_connection.arr_time
                legs += [[from_stop_id, cs_data.stops_per_id[from_stop_id].name, seconds_to_hhmmss(dep_time), to_stop_id, cs_data.stops_per_id[to_stop_id].name, seconds_to_hhmmss(arr_time), seconds_to_hhmmss(arr_time - dep_time), "öV"]]
            if jl.footpath is not None:
                from_stop_id = jl.footpath.from_stop_id
                to_stop_id = jl.footpath.to_stop_id
                legs += [[from_stop_id, cs_data.stops_per_id[from_stop_id].name, "", to_stop_id, cs_data.stops_per_id[to_stop_id].name, "", seconds_to_hhmmss(jl.footpath.walking_time), "Fuss"]]
        return pd.DataFrame.from_records(legs, columns=columns)

#### Switzerland

In [None]:
# route (not optimized)
res = cs_core_ch.route_earliest_arrival_with_reconstruction_by_name("Bern", "Samedan", "16:20:00")
journey_to_df(cs_core_ch, res)

In [None]:
# route
res = cs_core_ch.route_optimized_earliest_arrival_with_reconstruction_by_name("Bern", "Samedan", "16:20:00")
journey_to_df(cs_core_ch, res)

In [None]:
# route
res = cs_core_ch.route_optimized_earliest_arrival_with_reconstruction_by_name("Bern", "Pontresina", "07:20:00")
journey_to_df(cs_core_ch, res)

In [None]:
# route
res = cs_core_ch.route_optimized_earliest_arrival_with_reconstruction_by_name("Bern, Dübystrasse", "Ostermundigen, Bahnhof", "07:30:00")
journey_to_df(cs_core_ch, res)

In [None]:
# route
res = cs_core_ch.route_optimized_earliest_arrival_with_reconstruction_by_name("Bern, Dübystrasse", "Maloja, Posta", "07:30:00")
journey_to_df(cs_core_ch, res)

In [None]:
# route
res = cs_core_ch.route_optimized_earliest_arrival_with_reconstruction_by_name("Bern, Dübystrasse", "Heiterswil, Krummbach", "08:10:00")
journey_to_df(cs_core_ch, res)

In [None]:
# route
res = cs_core_ch.route_optimized_earliest_arrival_with_reconstruction_by_name("Ebikon, Falken", "Dietwil, Wannenmatt", "12:35:23")
journey_to_df(cs_core_ch, res)

In [None]:
# route
res = cs_core_ch.route_optimized_earliest_arrival_with_reconstruction_by_name("Bern", "Zürich HB", "12:35:23")
journey_to_df(cs_core_ch, res)

In [None]:
# route
res = cs_core_ch.route_optimized_earliest_arrival_with_reconstruction_by_name("Bern, Wyleregg", "La Ferrière", "17:30:00")
journey_to_df(cs_core_ch, res)

#### Paris

In [None]:
# route (not optimized)
res = cs_core_paris.route_earliest_arrival_with_reconstruction_by_name("Saint-Lazare", "Champs-Elysées-Clémenceau", "07:20:00")
journey_to_df(cs_core_paris, res)

In [None]:
# route
res = cs_core_paris.route_optimized_earliest_arrival_with_reconstruction_by_name("Saint-Lazare", "Champs-Elysées-Clémenceau", "07:20:00")
journey_to_df(cs_core_paris, res)

In [None]:
# route (unoptimized)
res = cs_core_paris.route_earliest_arrival_with_reconstruction_by_name("Jourdain", "Billancourt", "12:30:00")
journey_to_df(cs_core_paris, res)

In [None]:
# route
res = cs_core_paris.route_optimized_earliest_arrival_with_reconstruction_by_name("Jourdain", "Billancourt", "12:30:00")
journey_to_df(cs_core_paris, res)

In [None]:
# route (unoptimized)
res = cs_core_paris.route_earliest_arrival_with_reconstruction_by_name("La Défense (Grande Arche)", "Rambuteau", "21:10:00")
journey_to_df(cs_core_paris, res)

In [None]:
# route
res = cs_core_paris.route_optimized_earliest_arrival_with_reconstruction_by_name("La Défense (Grande Arche)", "Rambuteau", "21:10:00")
journey_to_df(cs_core_paris, res)

In [None]:
# route (unoptimized)
res = cs_core_paris.route_earliest_arrival_with_reconstruction_by_name("JEAN BONAL", "RESIDENCE VERDUN", "05:10:00")
journey_to_df(cs_core_paris, res)

In [None]:
# route
res = cs_core_paris.route_optimized_earliest_arrival_with_reconstruction_by_name("JEAN BONAL", "RESIDENCE VERDUN", "05:10:00")
journey_to_df(cs_core_paris, res)

### New York subway

In [None]:
# route
res = cs_core_ny_subway.route_optimized_earliest_arrival_with_reconstruction_by_name("Aqueduct - N Conduit Av", "Forest Av", "05:10:00")
journey_to_df(cs_core_ny_subway, res)

### Madrid

In [None]:
# route
res = cs_core_madrid.route_optimized_earliest_arrival_with_reconstruction_by_name("Madre Antonia París", "Velázquez - Villanueva", "09:35:00")
journey_to_df(cs_core_madrid, res)

## Analyse timetable data (don't expect that data from any gtfs-file is always complete nor correct)

In [None]:
# imports
from collections import defaultdict, namedtuple
import pandas as pd
import math
import folium
from folium.plugins import MarkerCluster
from collections import defaultdict

In [None]:
def get_nb_deps_arrs_per_stop(cs_core):
    cs_data = cs_core.connection_scan_data
    """returns the number of departures and arrivals per stop_id in two dicts"""
    nb_deps_per_stop = defaultdict(lambda : 0)
    nb_arrs_per_stop = defaultdict(lambda : 0)
    for con in cs_data.sorted_connections:
        nb_deps_per_stop[con.from_stop_id] += 1
        nb_arrs_per_stop[con.to_stop_id] += 1
    return dict(nb_deps_per_stop), dict(nb_arrs_per_stop)

In [None]:
nb_deps_per_stop, nb_arrs_per_stop = get_nb_deps_arrs_per_stop(cs_core_paris)

In [None]:
def get_arrivals_per_stop_id(cs_core, stop_id):
    """helper function which returns a list of trips serving a stop_id together with its corrsponding trip index"""
    cs_data = cs_core.connection_scan_data
    TripAndIndex = namedtuple("TripAndIndex", ["trip", "trip_index"])
    arriving_tripandindices = []
    for trip in cs_data.trips_per_id.values():
        for ind, con in enumerate(trip.connections):
            if con.to_stop_id == stop_id:
                arriving_tripandindices += [TripAndIndex(trip, ind)]
    return arriving_tripandindices

def get_arrivals_per_stop_name(cs_core, stop_name):
    cs_data = cs_core.connection_scan_data
    stop_id = cs_data.stops_per_name[stop_name].id
    return get_arrivals_per_stop_id(cs_core, stop_id)

In [None]:
def get_arrivals_df_per_stop_id(cs_core, stop_id):
    """returns a pandas dataframe with all arrivals in stop_id"""
    cs_data = cs_core.connection_scan_data
    arriving_tripandindices = get_arrivals_per_stop_id(cs_core, stop_id)
    columns = ("first_stop_id", "first_stop_name", "dep_first_stop", "stop_id", "stop_name", "arr", "last_stop_id", "last_stop_name", "arr_last_stop")
    temp = []
    for entry in arriving_tripandindices:
        trip = entry.trip
        first_stop_id = trip.connections[0].from_stop_id
        last_stop_id = trip.connections[-1].to_stop_id
        temp += [
            (
                first_stop_id,
                cs_data.stops_per_id[first_stop_id].name,
                seconds_to_hhmmss(trip.connections[0].dep_time),
                stop_id,
                cs_data.stops_per_id[stop_id].name,
                seconds_to_hhmmss(trip.connections[entry.trip_index].arr_time),
                last_stop_id,
                cs_data.stops_per_id[last_stop_id].name,
                seconds_to_hhmmss(trip.connections[-1].arr_time)
            )
        ]
    return pd.DataFrame.from_records(temp, columns=columns).sort_values("arr").reset_index().drop(["index"], axis=1)

def get_arrivals_df_per_stop_name(cs_core, stop_name):
    cs_data = cs_core.connection_scan_data
    stop_id = cs_data.stops_per_name[stop_name].id
    return get_arrivals_df_per_stop_id(cs_core, stop_id)

In [None]:
get_arrivals_df_per_stop_name(cs_core_ny_subway, "Euclid Av")

In [None]:
def haversine(coord1, coord2):
    """returns the distance in meters between two points on earth"""
    R = 6372800  # Earth radius in meters
    lat1, lon1 = coord1
    lat2, lon2 = coord2
    
    phi1, phi2 = math.radians(lat1), math.radians(lat2) 
    dphi       = math.radians(lat2 - lat1)
    dlambda    = math.radians(lon2 - lon1)
    
    a = math.sin(dphi/2)**2 + \
        math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2
    
    return 2*R*math.atan2(math.sqrt(a), math.sqrt(1 - a))

def get_distance_between_stops(stop_1, stop_2):
    """calculates the distance in meters between two stops"""
    return haversine([stop_1.northing, stop_1.easting], [stop_2.northing, stop_2.easting])

In [None]:
# use this map to analyse the timetable data if your router is not doing what you expect (maybe the timetable data is not complete).
def plot_stops_around_stop_id(cs_core, stop_name, nb_stops=500, circle_min_radius=7, draw_footpaths=True):
    """
    plots a folium map with the stops around stop_id and corresponding footpaths.
    """
    cs_data = cs_core.connection_scan_data
    stop = cs_data.stops_per_name[stop_name]
    outgoing_stops_per_stop_id = defaultdict(list)
    for footpath in cs_data.footpaths_per_from_to_stop_id.values():
        outgoing_stops_per_stop_id[footpath.from_stop_id] += [footpath.to_stop_id]
    map_stops = folium.Map(location=[stop.northing, stop.easting], zoom_start=13, max_zoom=30, tiles="cartodbpositron")
    # marker_cluster = MarkerCluster().add_to(map_stops)
    nb_deps_per_stop, nb_arrs_per_stop = get_nb_deps_arrs_per_stop(cs_core)
    stops_sorted = sorted(cs_data.stops_per_id.values(), key=lambda s: get_distance_between_stops(stop, s))
    for a_stop in stops_sorted[:nb_stops]:
        nb_deps = nb_deps_per_stop.get(a_stop.id, 0)
        nb_arrs = nb_arrs_per_stop.get(a_stop.id, 0)
        nb_deps_arrs = nb_deps + nb_arrs
        folium.CircleMarker(
            [a_stop.northing, a_stop.easting], 
            popup="{}, {}, #arrs: {}, #deps: {}".format(a_stop.id, a_stop.name,nb_arrs, nb_deps), 
            fill_color="red" if nb_deps_arrs > 0 else None,
            radius=circle_min_radius if nb_deps_arrs == 0 or 7.5*math.log(nb_deps_arrs, 10) < circle_min_radius else 7.5*math.log(nb_deps_arrs, 10)
        ).add_to(map_stops)
        if draw_footpaths:
            for connected_stop_id in outgoing_stops_per_stop_id[a_stop.id]:
                folium.PolyLine([
                    [cs_data.stops_per_id[a_stop.id].northing, cs_data.stops_per_id[a_stop.id].easting], 
                    [cs_data.stops_per_id[connected_stop_id].northing, cs_data.stops_per_id[connected_stop_id].easting]],
                    color="black",
                    weight=1.0
                ).add_to(map_stops)
    return map_stops

In [None]:
map_stops = plot_stops_around_stop_id(cs_core_madrid, "Torrelaguna - Avenida De Badajoz", nb_stops=1000, draw_footpaths=False)
map_stops