# Plot stop-to-stop segments with speed and delay

* Explore this: https://github.com/justinbois/altair-catplot?

In [None]:
import altair as alt
import dask.dataframe as dd
import geopandas as gpd
import pandas as pd

from segment_speed_utils.project_vars import SEGMENT_GCS, analysis_date
from shared_utils import calitp_color_palette as cp

In [None]:
'''
def get_distribution(df):
    group_cols = ["gtfs_dataset_key", "trip_id"]
    col = "actual_minus_scheduled_sec"
    
    minimum = (df.groupby(group_cols)[col]
               .min()
               .reset_index()
               .rename(columns = {col: "min_delay"})
              )
    
    maximum = (df.groupby(group_cols)[col]
               .max()
               .reset_index()
               .rename(columns = {col: "max_delay"})
              )
    
    mean = (df.groupby(group_cols)[col]
               .mean()
               .reset_index()
               .rename(columns = {col: "mean_delay"})
              )
    
    stats = dd.merge(
        minimum,
        maximum,
        on = group_cols
    ).merge(
        mean, 
        on = group_cols
    )
    
    return stats

delay = get_distribution(df)

# over 1 hr delayed
for i in range(1, 12):
    # find how many trips have over 1, 2, 3 hr delay
    subset = delay[delay.max_delay >= 60*60*i]
    print(f"max_delay is over {i} hr: {len(subset)}")
'''

In [None]:
gdf = gpd.read_parquet(
    f"./scripts/data/stop_metrics_by_hour_{analysis_date}.parquet")

one_operator = "Big Blue Bus VehiclePositions"

gdf = gdf[gdf._gtfs_dataset_name==one_operator]

In [None]:
'''
gdf.explore(
    "actual_minus_scheduled_min",
    tiles = "CartoDB Positron"
)
'''

In [None]:
test_routes = gdf.route_id.unique().tolist()[:2]

gdf2 = gdf[gdf.route_id.isin(test_routes)]

alt.data_transformers.enable('default', max_rows=None)

In [None]:
input_dropdown = alt.binding_select(
    options=gdf2.route_id.unique().tolist(), name='Route')

select_route = alt.selection_single(
    name="Route", fields=['route_id'],
    bind=input_dropdown, #init={'Route ID': "3567"}
)

In [None]:
df = gdf2.drop(columns = ["actual_minus_scheduled_sec", "geometry"])

speed_chart = (
    alt.Chart(df[(df.direction_id==0)])
    .mark_point(size=20, opacity=0.9, strokeWidth=1.1)
    #.mark_tick()
    .encode(
        y=alt.Y(
            'jitter:Q', title=None,
            axis=alt.Axis(values=[0], ticks=True, 
                          grid=False, labels=False),
            scale=alt.Scale(),
            #stack='zero',
            ),
        #x=alt.X(
        #    "stop_sequence:Q", title="Hour",
        #    scale = alt.Scale(domain=[1,23])
        #),   
        x=alt.X("speed_mph:Q", title="Speed (mph)", 
                scale=alt.Scale(domain=[0,40])
               ),
        color = alt.Color(
            "time_of_day:N", title="Time of Day",
            scale = alt.Scale(range=cp.CALITP_CATEGORY_BRIGHT_COLORS)
        ),
        row = alt.Row("stop_sequence:Q"),
        tooltip = ["speed_mph"],
    ).transform_calculate(
            # Generate Gaussian jitter with a Box-Muller transform
            jitter='sqrt(-100*log(random()))*cos(2*PI*random())'
    ).configure_facet(spacing=1)
    .configure_view(stroke=None)
    .configure_axis(labelFontSize=12, titleFontSize=12)
    .add_selection(select_route)
    .interactive()
    .properties(
        title=f"Speed Variation for Route: ", width=250, height=60)
   )

speed_chart.transform_filter(select_route)

In [None]:
df = df.assign(
    peak = df.apply(lambda x: 
                    "peak" if x.time_of_day in ["AM Peak", "PM Peak"]
                    else "off peak", 
                    axis=1)
)

time_of_day_agg = (
    df.groupby(["gtfs_dataset_key", "_gtfs_dataset_name", 
                "route_id", "direction_id", "stop_sequence", "peak"])
    .agg({"speed_mph": "mean"})
    .reset_index()
)

avg_chart = (
    alt.Chart(time_of_day_agg)
    .mark_point()
    .encode(
        x=alt.X('speed_mph:Q'),
        y=alt.Y('stop_sequence:O'),
        color=alt.Color('peak:N', 
                        scale=alt.Scale(range=cp.CALITP_CATEGORY_BOLD_COLORS[2:])
                       ),
        facet=alt.Facet('direction_id:O', columns=2),
        tooltip=["speed_mph", "route_id", "peak", "direction_id"],
    ).add_selection(select_route)
    .interactive()
    .properties(title="Peak vs Offpeak Avg Speed",
                width=150,height=700)
)


avg_chart.transform_filter(select_route)