# Plot stop-to-stop segments with speed and delay

* Explore this: https://github.com/justinbois/altair-catplot?

In [1]:
import altair as alt
import dask.dataframe as dd
import geopandas as gpd
import pandas as pd

from segment_speed_utils.project_vars import SEGMENT_GCS, analysis_date
from shared_utils import calitp_color_palette as cp

alt.data_transformers.enable('default', max_rows=None)



DataTransformerRegistry.enable('default')

In [None]:
'''
def get_distribution(df):
    group_cols = ["gtfs_dataset_key", "trip_id"]
    col = "actual_minus_scheduled_sec"
    
    minimum = (df.groupby(group_cols)[col]
               .min()
               .reset_index()
               .rename(columns = {col: "min_delay"})
              )
    
    maximum = (df.groupby(group_cols)[col]
               .max()
               .reset_index()
               .rename(columns = {col: "max_delay"})
              )
    
    mean = (df.groupby(group_cols)[col]
               .mean()
               .reset_index()
               .rename(columns = {col: "mean_delay"})
              )
    
    stats = dd.merge(
        minimum,
        maximum,
        on = group_cols
    ).merge(
        mean, 
        on = group_cols
    )
    
    return stats

delay = get_distribution(df)

# over 1 hr delayed
for i in range(1, 12):
    # find how many trips have over 1, 2, 3 hr delay
    subset = delay[delay.max_delay >= 60*60*i]
    print(f"max_delay is over {i} hr: {len(subset)}")
'''

In [2]:
gdf = gpd.read_parquet(
    f"./scripts/data/stop_metrics_by_hour_{analysis_date}.parquet")

one_operator = "Big Blue Bus VehiclePositions"

OPERATORS = sorted(gdf._gtfs_dataset_name.unique().tolist())

In [3]:
def stop_avg_by_peak_off_peak(gdf): 
    """
    Aggregate to peak/offpeak
    """
    gdf = gdf.assign(
        peak = gdf.apply(
            lambda x: 
            "peak" if x.time_of_day in ["AM Peak", "PM Peak"]
            else "off peak", axis=1)
    )

    
    # Calculate weighted average
    gdf = gdf.assign(
        speed_multiplied_trips = gdf.speed_mph * gdf.n_trips
    )
    
    agg_df = (
        gdf.groupby(["gtfs_dataset_key", "_gtfs_dataset_name", 
                    "route_id", "direction_id", "stop_sequence", "peak"])
        .agg({"speed_multiplied_trips": "sum",
              "n_trips": "sum",
             })
        .reset_index()
    )
    
    agg_df = agg_df.assign(
        avg_speed_mph = agg_df.speed_multiplied_trips.divide(agg_df.n_trips)
    ).drop(columns = "speed_multiplied_trips")
    
    return agg_df

In [None]:
'''
gdf.explore(
    "actual_minus_scheduled_min",
    tiles = "CartoDB Positron"
)
'''

In [4]:
# altair chart can't take geometry
df = gdf.drop(columns = ["actual_minus_scheduled_sec", "geometry"])

operator_name = "Big Blue Bus VehiclePositions"
operator_df = df[df._gtfs_dataset_name==operator_name
                ].reset_index(drop=True)

def get_operator_route_dropdown(df):
    input_dropdown = alt.binding_select(
        options=df.route_id.unique().tolist(), name='Route ')

    select_route = alt.selection_single(
        name="Route", fields=['route_id'],
        bind=input_dropdown,
    )
    
    return select_route

select_operator_route = get_operator_route_dropdown(operator_df)

peak_operator_df = stop_avg_by_peak_off_peak(operator_df)

Unnamed: 0,gtfs_dataset_key,_gtfs_dataset_name,route_id,direction_id,stop_sequence,peak,n_trips,avg_speed_mph
0,8ee6ecf3c45ac2669ee96ae4c8550950,Big Blue Bus VehiclePositions,3554,0.0,1,off peak,33,0.515616
1,8ee6ecf3c45ac2669ee96ae4c8550950,Big Blue Bus VehiclePositions,3554,0.0,1,peak,26,0.650146
2,8ee6ecf3c45ac2669ee96ae4c8550950,Big Blue Bus VehiclePositions,3554,0.0,2,off peak,25,5.668917
3,8ee6ecf3c45ac2669ee96ae4c8550950,Big Blue Bus VehiclePositions,3554,0.0,2,peak,27,8.373779
4,8ee6ecf3c45ac2669ee96ae4c8550950,Big Blue Bus VehiclePositions,3554,0.0,3,off peak,11,6.495059


In [52]:
def stripplot_base(df: pd.DataFrame) -> alt.Chart:
    chart = (
        alt.Chart(df)
        .encode(
            # horiz jitter 
            y=alt.Y('jitter:Q', title=None,
                axis=alt.Axis(values=[0], ticks=True, 
                              grid=False, labels=False),
                    scale=alt.Scale(), #stack='zero',
            ),
        ).transform_calculate(
            # Generate Gaussian jitter with a Box-Muller transform
            jitter='sqrt(-0.5*log(random()))*cos(2*PI*random())'
        ).configure_facet(spacing=0)
        .configure_view(stroke=None)
        .configure_axis(labelFontSize=12, titleFontSize=12)
        .configure(padding={'top': 10}) #https://github.com/altair-viz/altair/issues/1993
    ).interactive()

    return chart


def stripplot_by_time_of_day(
    df: pd.DataFrame, 
    x_col: str,
    grouping_col: str
) -> alt.Chart: 
    
    base = stripplot_base(df)

    chart = (base
        .mark_point(size=10, opacity=0.9, strokeWidth=1.1)
        .encode( 
            x=alt.X(f"{x_col}:Q"),
        color = alt.Color(
            "time_of_day:N", title="Time of Day",
            scale = alt.Scale(range=cp.CALITP_CATEGORY_BRIGHT_COLORS)
        ),
        row = alt.Row(f"{grouping_col}:O", 
                      header=alt.Header(labelAngle=0)
                     ),
        )#.configure(autoresize=alt.AutoSizeParams(resize=True))
    )
        
    return chart

In [53]:
from IPython.display import HTML

speed_chart = (stripplot_by_time_of_day(
    operator_df[operator_df.direction_id==0],
    x_col = "speed_mph",
    grouping_col = "stop_sequence" 
).add_selection(select_operator_route)
.properties(
    title=f"Speed Variation for ", width=200, height=30)
)

display(
    HTML("""
        <style>
        form.vega-bindings {
          position: absolute;
          left: 125px;
          top: 4px;
        }
        </style>
        """
        )
)

display(speed_chart.transform_filter(select_operator_route))


In [69]:
avg_chart = (
    alt.Chart(peak_operator_df)
    .mark_point(size=10, opacity=0.9, strokeWidth=1.1)
    .encode(
        x=alt.X('avg_speed_mph:Q', 
                scale=alt.Scale(domain=[-1,40])
               ),
        y=alt.Y('stop_sequence:O', title="Stop Sequence"),
        color=alt.Color('peak:N', 
                        scale=alt.Scale(range=cp.CALITP_CATEGORY_BOLD_COLORS[2:])
                       ),
        facet=alt.Facet('direction_id:O', columns=2),
        tooltip=["avg_speed_mph", "route_id", "peak", "direction_id"],
    ).add_selection(select_operator_route)
    .interactive()
    .configure(padding={'top': 10}) #https://github.com/altair-viz/altair/issues/1993
    .properties(title="Peak vs Offpeak Avg Speed for  ",
                width=100,height=450)
)


display(HTML("""
<style>
form.vega-bindings {
  position: absolute;
  left: 195px;
  top: 4px;
}
</style>
"""))

display(avg_chart.transform_filter(select_operator_route))