# A Jupyter notebook to play around with your implemententation of theconnection scan algorithm and to analyse timetable-data

## Init

In [None]:
%cd ..
from scripts.helpers.my_logging import init_logging
init_logging("output", "log.log")

## Play with your implementation of the connection scan algorithm

In [None]:
# external import
from datetime import date
import io
import requests

In [None]:
# internal imports
from scripts.gtfs_parser import parse_gtfs
from scripts.connectionscan_router import ConnectionScanCore
from scripts.helpers.funs import hhmmss_to_sec, seconds_to_hhmmss

In [None]:
# path or link to your gtfs-file
# path_to_gtfs = r"D:\data\20_gtfs_hrdf\gtfs\gtfsfp20202020-01-22.zip"
path_to_gtfs = "https://opentransportdata.swiss/dataset/6f55f96d-7644-4901-b927-e9cf05a8c7f0/resource/a81c59c2-6fd7-47c8-b7b6-90a045a90aae/download/gtfsfp20202020-01-22.zip"

In [None]:
# download gtfs-file
response = requests.get(path_to_gtfs)
f = io.BytesIO(response.content)

In [None]:
# load timetable data from the gtfs-file (specify the date)
cs_data = parse_gtfs(f, date(2019, 8, 1))
cs_core = ConnectionScanCore(cs_data)

In [None]:
# route
res = cs_core.route_earliest_arrival("8507000:0:1", "8509251:0:1", hhmmss_to_sec("07:20:00"))
print(seconds_to_hhmmss(res))

In [None]:
# route
res = cs_core.route_earliest_arrival("8507000:0:1", "8509255:0:1", hhmmss_to_sec("07:20:00"))
print(seconds_to_hhmmss(res))

In [None]:
# route
res = cs_core.route_earliest_arrival("8590054", "8577003", hhmmss_to_sec("07:30:00"))
print(seconds_to_hhmmss(res) if res else "nichts gefunden")

In [None]:
# route
res = cs_core.route_earliest_arrival("8590054", "8509794", hhmmss_to_sec("07:30:00"))
print(seconds_to_hhmmss(res) if res else "nichts gefunden")

In [None]:
# route
res = cs_core.route("8590054", "8574014", hhmmss_to_sec("08:10:00"))
print(seconds_to_hhmmss(res) if res else "nichts gefunden")

## Analyse timetable data (don't expect that data from any gtfs-file is always correct)

In [None]:
# imports
from collections import defaultdict, namedtuple
import pandas as pd
import math
import folium
from folium.plugins import MarkerCluster
from collections import defaultdict

In [None]:
def get_nb_deps_arrs_per_stop(cs_data):
    """returns the number of departures and arrivals per stop_id in two dicts"""
    nb_deps_per_stop = defaultdict(lambda : 0)
    nb_arrs_per_stop = defaultdict(lambda : 0)
    for con in cs_data.sorted_connections:
        nb_deps_per_stop[con.from_stop_id] += 1
        nb_arrs_per_stop[con.to_stop_id] += 1
    return nb_deps_per_stop, nb_arrs_per_stop

In [None]:
nb_deps_per_stop, nb_arrs_per_stop = get_nb_deps_arrs_per_stop(cs_data)

In [None]:
nb_deps_per_stop["8594277"]

In [None]:
def get_arrivals_per_stop_id(cs_data, stop_id):
    """helper function which returns a list of trips serving a stop_id together with its corrsponding trip index"""
    TripAndIndex = namedtuple("TripAndIndex", ["trip", "trip_index"])
    arriving_tripandindices = []
    for trip in cs_data.trips_per_id.values():
        for ind, con in enumerate(trip.connections):
            if con.to_stop_id == stop_id:
                arriving_tripandindices += [TripAndIndex(trip, ind)]
    return arriving_tripandindices

In [None]:
def get_arrivals_df_per_stop_id(cs_data, stop_id):
    """returns a pandas dataframe with all arrivals in stop_id"""
    arriving_tripandindices = get_arrivals_per_stop_id(cs_data, stop_id)
    columns = ("first_stop_id", "first_stop_name", "dep_first_stop", "stop_id", "stop_name", "arr", "last_stop_id", "last_stop_name", "arr_last_stop")
    temp = []
    for entry in arriving_tripandindices:
        trip = entry.trip
        first_stop_id = trip.connections[0].from_stop_id
        last_stop_id = trip.connections[-1].to_stop_id
        temp += [
            (
                first_stop_id,
                cs_data.stops_per_id[first_stop_id].name,
                seconds_to_hhmmss(trip.connections[0].dep_time),
                stop_id,
                cs_data.stops_per_id[stop_id].name,
                seconds_to_hhmmss(trip.connections[entry.trip_index].arr_time),
                last_stop_id,
                cs_data.stops_per_id[last_stop_id].name,
                seconds_to_hhmmss(trip.connections[-1].arr_time)
            )
        ]
    return pd.DataFrame.from_records(temp, columns=columns).sort_values("arr").reset_index().drop(["index"], axis=1)

In [None]:
get_arrivals_df_per_stop_id(cs_data, "8574008")

In [None]:
def haversine(coord1, coord2):
    """returns the distance in meters between two points on earth"""
    R = 6372800  # Earth radius in meters
    lat1, lon1 = coord1
    lat2, lon2 = coord2
    
    phi1, phi2 = math.radians(lat1), math.radians(lat2) 
    dphi       = math.radians(lat2 - lat1)
    dlambda    = math.radians(lon2 - lon1)
    
    a = math.sin(dphi/2)**2 + \
        math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2
    
    return 2*R*math.atan2(math.sqrt(a), math.sqrt(1 - a))

def get_distance_between_stops(stop_1, stop_2):
    """calculates the distance in meters between two stops"""
    return haversine([stop_1.northing, stop_1.easting], [stop_2.northing, stop_2.easting])

In [None]:
# use this map to analyse the timetable data if your router is not doing what you expect (maybe the timetable data is not complete).
def plot_stops_around_stop_id(cs_data, stop_id, radius=5000, circle_min_radius=7, draw_footpaths=True):
    """
    plots a folium map with the stops around stop_id and corresponding footpaths.
    """
    stop = cs_data.stops_per_id[stop_id]
    outgoing_stops_per_stop_id = defaultdict(list)
    for footpath in cs_data.footpaths_per_from_to_stop_id.values():
        outgoing_stops_per_stop_id[footpath.from_stop_id] += [footpath.to_stop_id]
    map_stops = folium.Map(location=[stop.northing, stop.easting], zoom_start=13, max_zoom=30, tiles="cartodbpositron")
    # marker_cluster = MarkerCluster().add_to(map_stops)
    nb_deps_per_stop, nb_arrs_per_stop = get_nb_deps_arrs_per_stop(cs_data)
    for a_stop in list(cs_data.stops_per_id.values()):
        if get_distance_between_stops(stop, a_stop) <= radius:
            nb_deps = nb_deps_per_stop.get(a_stop.id, 0)
            nb_arrs = nb_arrs_per_stop.get(a_stop.id, 0)
            nb_deps_arrs = nb_deps + nb_arrs
            folium.CircleMarker(
                [a_stop.northing, a_stop.easting], 
                popup="{}, {}, #arrs: {}, #deps: {}".format(a_stop.id, a_stop.name,nb_arrs, nb_deps), 
                fill_color="red" if nb_deps_arrs > 0 else None,
                radius=circle_min_radius if nb_deps_arrs == 0 or 7.5*math.log(nb_deps_arrs, 10) < circle_min_radius else 7.5*math.log(nb_deps_arrs, 10)
            ).add_to(map_stops)
            if draw_footpaths:
                for connected_stop_id in outgoing_stops_per_stop_id[a_stop.id]:
                    folium.PolyLine([
                        [cs_data.stops_per_id[a_stop.id].northing, cs_data.stops_per_id[a_stop.id].easting], 
                        [cs_data.stops_per_id[connected_stop_id].northing, cs_data.stops_per_id[connected_stop_id].easting]],
                        color="black",
                        weight=1.0
                    ).add_to(map_stops)
    return map_stops

In [None]:
map_stops = plot_stops_around_stop_id(cs_data, "8507000P", radius=200, draw_footpaths=True)
map_stops