In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pyspark
from pyspark.sql import SparkSession

from pyspark.sql import functions as F
from pyspark.sql.functions import udf
from pyspark.sql.types import IntegerType

from src.journey_finder import JourneyFinder
from src.delay_prediction import DelayPredictor

from pyspark.ml import PipelineModel
import getpass
import os

In [None]:
spark = SparkSession.builder.appName('final-project-{0}'.format(getpass.getuser())).getOrCreate()

sc = spark.sparkContext
conf = sc.getConf()

spark
print(f'Start Spark name:{spark._sc.appName}, version:{spark.version}')

In [None]:
username  = os.environ.get('USER', 'anonym') #'kli' if team member testing
#username = 'kli' uncomment for team testing
loadedPipelineModel = PipelineModel.load(f"/user/{username}/models")

hdfs_path = f"/user/{username}/features_with_edge_stats.parquet"
features_with_stats = spark.read.parquet(hdfs_path)

In [None]:
delayPredictor = DelayPredictor(features_with_stats=features_with_stats, loadedPipelineModel=loadedPipelineModel, spark=spark)

In [None]:
import datetime
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
import json

# NOTE: remember to run data preparation. Otherwise the files below
# may be not relevant to the area you are interested in.
timetable = pd.read_csv('data/timetable.csv')
footpaths = pd.read_csv('data/footpaths.csv')
stops_info = pd.read_csv('data/stops.csv')
stops_info['stop_name_id'] = stops_info['stop_name'] + " (" + stops_info['stop_id'] + ")"
stops_matching = pd.read_csv('data/stop_matching.csv').set_index('journey_stop_id')['isdaten_stop_id'].to_dict()

# Define the journey finder object, it contains all the logic.
journey_finder = JourneyFinder(
    timetable=timetable,
    footpaths=footpaths,
    stops_info=stops_info,
    delay_predictor=delayPredictor,
    stops_matching=stops_matching
)

# Ensure that the stops offered in the UI are also present in the other dataframes.
possible_stops = set(timetable['dep_stop']).union(set(timetable['arr_stop'])).union(set(footpaths['stop_id_a'])).union(set(footpaths['stop_id_b']))
stops_info_subset = stops_info[stops_info['stop_id'].isin(possible_stops)]

source_widget = widgets.Dropdown(
    options=sorted(list(set(stops_info_subset['stop_name_id']))),
    description='Origin Stop:',
)

destination_widget = widgets.Dropdown(
    options=sorted(list(set(stops_info_subset['stop_name_id']))),
    description='Dest. Stop:',
)

day_widget = widgets.Dropdown(
    options=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
    value='Monday',
    description='Select a Day:'
)

time_widget = widgets.Text(
    value='20:00:00',
    description='Arr. Time (HH:MM:SS)',
)

confidence_widget = widgets.IntSlider(
    value=90,
    min=1,
    max=100,
    step=1,
    description='Confidence:',
    orientation='horizontal',
)

button = widgets.Button(
    description='Find Journeys'
)

output = widgets.Output()
running = widgets.Label(value='')

# Search for the journeys and plot them.
def update_profile_return(b):    
    output.clear_output()
    with output:
        running.value = 'running...'
        arrival_time = time_widget.value
        journey_finder.find_and_plot_journeys(
            start_station_name_id=source_widget.value, 
            end_station_name_id=destination_widget.value, 
            arrival_datetime=arrival_time,
            day=day_widget.value
        )
        running.value = ''
        

button.on_click(update_profile_return)
output.clear_output()
display(source_widget, destination_widget, time_widget, day_widget, confidence_widget, button, running, output)