# Segment averaging by various grains

Let's see how well we can do across time, across space.

Next: across operators and space and time. how well we can take segments that travel along one major boulevard with multiple operators and see how we can combine those.

Scripts:
* [quarterly and annual averages](https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/quarter_year_averages.py)

Past notebooks:
* [segment stability notebook](https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/39_segment_stability.ipynb)
* [weekly average speeds](https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/26_weekly_avg_speeds.ipynb)
* [stop combinations notebook](https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/16_stop_combos_for_segments.ipynb)

In [None]:
import folium
import geopandas as gpd
import pandas as pd

from shared_utils import rt_dates, rt_utils
from segment_speed_utils.project_vars import GTFS_DATA_DICT, SEGMENT_GCS, SCHED_GCS
from segment_speed_utils import time_series_utils

In [None]:
#QUARTER_FILE = GTFS_DATA_DICT.stop_segments.route_dir_quarter_segment
YEAR_FILE = GTFS_DATA_DICT.stop_segments.route_dir_year_segment

# In case route_ids don't match, use crosswalk to find unharmonized version
CLEANED_ROUTE_NAMING = GTFS_DATA_DICT.schedule_tables.route_identification
route_df = pd.read_parquet(f"{SCHED_GCS}{CLEANED_ROUTE_NAMING}.parquet")

In [None]:
def basic_formatting(
    df: gpd.GeoDataFrame
) -> gpd.GeoDataFrame:
    speed_cols = [c for c in df.columns if "_mph" in c]
    df[speed_cols] = df[speed_cols].round(1)
    
    return df

## Annual Averages

In [None]:
subset_operators = [    
    "Culver City Schedule",
    "Big Blue Bus Schedule",
]

gdf = gpd.read_parquet(
    f"{SEGMENT_GCS}{YEAR_FILE}.parquet",
    filters = [[
        ("name", "in", subset_operators),
        ("year", "==", 2024)
    ]]
).pipe(basic_formatting)

Culver City Route 6

Big Blue Bus Route 3740 and 3744

In [None]:
culver_route = "6"
bbb_route1 = "3740"
bbb_route2 = "3744"
bbb_route = [bbb_route1, bbb_route2]

In [None]:
route_df[
    (route_df.name=="Big Blue Bus Schedule") & 
    (route_df.route_id.isin(bbb_route))
]

In [None]:
culver_gdf = gdf[
    (gdf.name == "Culver City Schedule") & 
    (gdf.route_id==culver_route)
]

bbb_gdf = gdf[
    (gdf.name == "Big Blue Bus Schedule") & 
    (gdf.route_id.isin(bbb_route))
]

In [None]:
def plot_two_operators(
    operator1_gdf: gpd.GeoDataFrame,
    operator2_gdf: gpd.GeoDataFrame,
    plot_col: str = "p50_mph"
):
    
    layer1_name = operator1_gdf.name.unique()[0].replace(" Schedule", "")
    layer2_name = operator2_gdf.name.unique()[0].replace(" Schedule", "")
    
    m = operator1_gdf.explore(
        plot_col, 
        tiles = "CartoDB Positron",
        cmap = rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE,
        name = layer1_name
    )
    
    m = operator2_gdf.explore(
        plot_col, 
        m=m, 
        cmap = rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE,
        name = layer2_name
    )
    
    folium.LayerControl().add_to(m)
    
    return m 

In [None]:
plot_two_operators(
    culver_gdf,
    bbb_gdf,
    "p50_mph"
)

In [None]:
def find_intersecting_segments(
    operator1_route: str,
    operator2_route: str,
    segment_cols: list
) -> pd.DataFrame:
    s1 = gpd.sjoin(
        operator1_route[segment_cols + ["geometry"]],
        operator2_route[segment_cols + ["geometry"]],
        how = "inner",
        predicate = "intersects"
    ).drop(columns = ["geometry", "index_right"]).drop_duplicates()
    
    return s1

In [None]:
segment_crosswalk = find_intersecting_segments(
    gdf[gdf.route_id==culver_route],
    gdf[gdf.route_id.isin(bbb_route)],
    ["stop_pair_name"]
)

In [None]:
segment_crosswalk.head(2)

In [None]:
plot_two_operators(
    culver_gdf[culver_gdf.stop_pair_name.isin(segment_crosswalk.stop_pair_name_left)],
    bbb_gdf[bbb_gdf.stop_pair_name.isin(segment_crosswalk.stop_pair_name_right)],
    "p50_mph"
)

In [None]:
plot_two_operators(
    culver_gdf[culver_gdf.stop_pair_name.isin(segment_crosswalk.stop_pair_name_left)],
    bbb_gdf[bbb_gdf.stop_pair_name.isin(segment_crosswalk.stop_pair_name_right)],
)