# Plot stop-to-stop segments with speed and delay

In [None]:
import dask.dataframe as dd
import dask_geopandas as dg
import geopandas as gpd
import pandas as pd

from segment_speed_utils import helpers, gtfs_schedule_wrangling
from segment_speed_utils.project_vars import SEGMENT_GCS, analysis_date

CONFIG_PATH = "./scripts/config.yml"

In [None]:
STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments")

In [None]:
df = dd.read_parquet(
    f"{SEGMENT_GCS}{STOP_SEG_DICT['stop_delay_diagnostics']}_{analysis_date}.parquet", 
    columns = ["gtfs_dataset_key", "_gtfs_dataset_name", "trip_id", 
               "shape_array_key", "feed_key", "name", 
               "stop_id", "stop_sequence", 
               "max_time",
               "actual_minus_scheduled_sec", "speed_mph",
              ]
)

In [None]:
df.columns

In [None]:
test_key = "48b51d6d96bb01c42c87e40824248fe4"


#scheduled_feed_key = df.feed_key.iloc[0]
#stop_segments = gpd.read_parquet(
#    f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet"
#)

In [None]:
def get_trip_departure_hour(df: dd.DataFrame) -> dd.DataFrame:
    """
    In the future, we would want to get trip departure hour based
    off of scheduled trips, not RT.
    """
    trip_cols = ["gtfs_dataset_key", "trip_id"]
    timestamp_col = "max_time"
    
    trip_df = (df.groupby(trip_cols)
               [timestamp_col].min().dt.hour
               .reset_index()
               .compute()
              ).rename(columns = {timestamp_col: "departure_hour"})
    
    return trip_df

In [None]:
trip_departure = get_trip_departure_hour(df)

df2 = df.merge(
    trip_departure,
    on = ["gtfs_dataset_key", "trip_id"],
    how = "inner"
).drop(columns = "max_time")

In [None]:
stop_cols = [
    "gtfs_dataset_key", "_gtfs_dataset_name", 
    #"shape_array_key", 
    "feed_key", "name", 
    "stop_id", "stop_sequence", "departure_hour", 
]

stop_metrics = (df2.groupby(stop_cols)
                .agg({
                    "actual_minus_scheduled_sec": "mean", 
                    "speed_mph": "mean",
                }).reset_index()
               )

stop_metrics = stop_metrics.assign(
    actual_minus_scheduled_min = stop_metrics.actual_minus_scheduled_sec.divide(60).round(1),
)

In [None]:
stops = helpers.import_scheduled_stops(
    analysis_date, 
    columns = ["feed_key", "stop_id", "geometry"],
)

stop_metrics_with_geom = gtfs_schedule_wrangling.attach_stop_geometry(
    stop_metrics, 
    stops,
).compute()


stop_metrics_with_geom = gpd.GeoDataFrame(stop_metrics_with_geom)

In [None]:
one_operator = "Big Blue Bus VehiclePositions"

In [None]:
gdf = stop_metrics_with_geom[
    stop_metrics_with_geom._gtfs_dataset_name==one_operator]

In [None]:
gdf.explore(
    "actual_minus_scheduled_min",
    tiles = "CartoDB Positron"
)

In [None]:
import altair as alt

In [None]:
def make_chart(df, stop):
    df = df.drop(columns = "geometry")
    chart = (
        alt.Chart(df[df.stop_id==stop])
        .mark_bar()
        .encode(
            x=alt.X("departure_hour:Q", title="Hour"),
            y=alt.Y("actual_minus_scheduled_min:Q", 
                    title="Minutes Delayed"),
        )
    )
    
    return chart

In [None]:
for i in gdf.stop_id.unique()[:10]:
    chart = make_chart(gdf, i)
    display(chart)

In [None]:
df.actual_minus_scheduled_sec.describe().compute()