# Transit Service Density

* Use GTFS `stops` and count trips per hour
* Consider whether `stops` (point geometry) would be the best for pairing this analysis with accessibility. 
   * Is accessibility also number of jobs reachable from a given stop? Census tract? 
   * Transit service increase analysis aggregated stops per hour to census tract. Census tract gave us CalEnviroScreen designations and categorizing of transit route into urban/suburban/rural.

In [None]:
import geopandas as gpd
import intake
import pandas as pd

from shared_utils import rt_dates, rt_utils
from segment_speed_utils import helpers
from segment_speed_utils.project_vars import RT_SCHED_GCS, SEGMENT_GCS
                                             

catalog = intake.open_catalog(
    "../_shared_utils/shared_utils/shared_data_catalog.yml")

analysis_date = rt_dates.DATES["sep2023"]

In [None]:
stop_times_with_dir = gpd.read_parquet(
    f"{RT_SCHED_GCS}stop_times_direction_{analysis_date}.parquet"
)

In [None]:
# need trip_instance_key to merge to stop_times
# grab arrival_hour from stop_times...categorize as peak/offpeak
trips = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["trip_instance_key", "trip_id", "feed_key"]
)

stop_times = helpers.import_scheduled_stop_times(
    analysis_date,
    columns = ["feed_key", "trip_id", "stop_id", "stop_sequence", 
               "arrival_hour"]
).merge(
    trips,
    on = ["feed_key", "trip_id"],
    how = "inner"
)[["trip_instance_key", "stop_id", 
   "stop_sequence", "arrival_hour"]].query('arrival_hour.notnull()').compute()

In [None]:
gdf = pd.merge(
    stop_times_with_dir,
    stop_times,
    on = ["trip_instance_key", "stop_id", "stop_sequence"],
    how = "inner"
).astype({"arrival_hour": "int64"})

In [None]:
gdf = gdf.assign(
    time_of_day = gdf.apply(
        lambda x:
        rt_utils.categorize_time_of_day(x.arrival_hour), 
        axis=1)
)

In [None]:
gdf.time_of_day.value_counts()

In [None]:
gdf = gdf.assign(
    peak_category = gdf.apply(
        lambda x: "peak" if x.time_of_day in ["AM Peak", "PM Peak"]
        else "offpeak", axis=1)
)

In [None]:
gdf.peak_category.value_counts()

In [None]:
stop_cols = ["schedule_gtfs_dataset_key", "stop_id"]

peak_st = gdf[gdf.peak_category=="peak"]

arrivals_by_stop = (gdf.groupby(stop_cols, 
                                observed=True, group_keys=False)
                    .agg({"arrival_hour": "count"})
                    .reset_index()
                    .rename(columns = {"arrival_hour": "all_arrivals"})
                   )

peak_arrivals_by_stop = (peak_st.groupby(stop_cols, 
                                         observed=True, group_keys=False)
                    .agg({"arrival_hour": "count"})
                    .reset_index()
                    .rename(columns = {"arrival_hour": "peak_arrivals"})
                   )

In [None]:
stop_arrivals_gdf = pd.merge(
    stop_times_with_dir[stop_cols + ["geometry"]].drop_duplicates(),
    arrivals_by_stop,
    on = stop_cols,
    how = "inner"
).merge(
    peak_arrivals_by_stop,
    on = stop_cols,
    how = "left"
).astype({
    "all_arrivals": "int64",
    "peak_arrivals": "Int64"
})

In [None]:
# Disneyland shuttle in Toy Story lot has 6_000 arrivals a day
stop_arrivals_gdf.describe()

## Spatially join to tracts?

In [None]:
tracts = catalog.calenviroscreen_lehd_by_tract.read()
tracts.head(2)

## Spatially join to road segments?

In [None]:
road_segments = gpd.read_parquet(
    f"{SEGMENT_GCS}road_segments_{analysis_date}"
)

In [None]:
road_segments.head(2)

In [None]:
road_segments.shape