In [None]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

In [None]:
import geopandas as gpd
import pandas as pd
from siuba import *

from segment_speed_utils import helpers, gtfs_schedule_wrangling
from shared_utils import rt_dates, gtfs_utils_v2
import folium

In [None]:
from update_vars import analysis_date, AM_PEAK, PM_PEAK, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS

In [None]:
import sjoin_stops_to_segments

In [None]:
analysis_date

In [None]:
# (1) Aggregate stop times - by stop_id, find max trips in AM/PM peak
# takes 1 min
max_arrivals_by_stop = helpers.import_scheduled_stop_times(
    analysis_date,
    get_pandas = True,
).pipe(sjoin_stops_to_segments.prep_stop_times).pipe(sjoin_stops_to_segments.stop_times_aggregation_max_by_stop, analysis_date)

In [None]:
# (1) Aggregate stop times - by stop_id, find max trips in AM/PM peak
# takes 1 min
max_arrivals_by_stop_single = helpers.import_scheduled_stop_times(
    analysis_date,
    get_pandas = True,
).pipe(sjoin_stops_to_segments.prep_stop_times).pipe(
    sjoin_stops_to_segments.stop_times_aggregation_max_by_stop, analysis_date, single_route_dir=True)

In [None]:
new_hq_multi = max_arrivals_by_stop >> filter(_.am_max_trips_hr > 4, _.pm_max_trips_hr > 4) #  new HQ corridor (still multi-route)
new_ms_multi = max_arrivals_by_stop >> filter(_.am_max_trips_hr > 3, _.pm_max_trips_hr > 3) #  new major stop precursor (still multi-route)
new_hq_single = max_arrivals_by_stop_single >> filter(_.am_max_trips_hr > 4, _.pm_max_trips_hr > 4) #  new HQ corridor (single-route)
new_ms_single = max_arrivals_by_stop_single >> filter(_.am_max_trips_hr > 3, _.pm_max_trips_hr > 3) #  new major stop precursor (single-route)

## preliminary mapping

* add bus/rail/ferry MTS in orange to help decide

In [None]:
OLD_EXPORT_PATH = f"{GCS_FILE_PATH}export/{rt_dates.DATES['aug2024']}/"

In [None]:
old_stops = gpd.read_parquet(f"{OLD_EXPORT_PATH}ca_hq_transit_stops.parquet")

In [None]:
CURRENT_EXPORT_PATH = f"{GCS_FILE_PATH}export/{rt_dates.DATES['oct2024g']}/"

In [None]:
current_stops = gpd.read_parquet(f"{CURRENT_EXPORT_PATH}ca_hq_transit_stops.parquet")

In [None]:
non_bus = current_stops >> filter(-_.hqta_type.isin(['hq_corridor_bus', 'major_stop_bus']))

In [None]:
feeds = (gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=analysis_date)
         >> select(_.feed_key, _.schedule_gtfs_dataset_key == _.gtfs_dataset_key, _.name)
         >> filter(_.schedule_gtfs_dataset_key.isin(max_arrivals_by_stop.schedule_gtfs_dataset_key))
        )

In [None]:
stops = gtfs_utils_v2.get_stops(selected_date=analysis_date, operator_feeds=feeds.feed_key,
                       stop_cols=['feed_key', 'stop_id'])

In [None]:
stops = stops >> inner_join(_, feeds, on='feed_key')

In [None]:
def add_to_map(test_df, m=None, **kwargs):
    gdf = stops >> inner_join(_, test_df, on=['schedule_gtfs_dataset_key', 'stop_id'])
    m = gdf.explore(m = m, **kwargs)
    return m

In [None]:
m1 = old_stops.explore(color='blue')
m1 = non_bus.explore(m = m1, color='orange')

In [None]:
m1 = add_to_map(new_hq_multi, m=m1, color='green')

In [None]:
m1 = add_to_map(new_hq_single, m=m1, color='red')

In [None]:
folium.LayerControl().add_to(m1);

In [None]:
m1

## major stops

In [None]:
m = old_stops.explore(color='blue')
m = non_bus.explore(m = m, color='orange')

In [None]:
m = add_to_map(new_ms_multi, m=m, color='green')

In [None]:
m = add_to_map(new_ms_single, m=m, color='red')

In [None]:
folium.LayerControl().add_to(m);

In [None]:
m

# full pipeline check

In [None]:
# new_stops = gpd.read_parquet(f"{EXPORT_PATH}ca_hq_transit_stops.parquet")

In [None]:
# new_areas = gpd.read_parquet(f"{EXPORT_PATH}ca_hq_transit_areas.parquet")

In [None]:
f"{EXPORT_PATH}ca_hq_transit_areas.parquet"

In [None]:
old_areas = gpd.read_parquet(f"{OLD_EXPORT_PATH}ca_hq_transit_areas.parquet")

In [None]:
new_stops.info()

In [None]:
new_stops >> count(_.hqta_type)

In [None]:
old_stops >> count(_.hqta_type)

In [None]:
old_stops.info()

In [None]:
m2 = (old_stops >> filter(_.hqta_type == 'major_stop_bus')).explore(color='blue')
m2 = (new_stops >> filter(_.hqta_type == 'major_stop_bus')).explore(m = m2, color='orange')
folium.LayerControl().add_to(m2);

In [None]:
# m2

In [None]:
from calitp_data_analysis.geography_utils import CA_NAD83Albers

In [None]:
new_dissolved = new_areas.to_crs(CA_NAD83Albers).dissolve(by='hqta_type').reset_index()
new_dissolved['area'] = new_dissolved.geometry.apply(lambda x: x.area)
new_dissolved[['hqta_type', 'area']]

In [None]:
old_dissolved = old_areas.to_crs(CA_NAD83Albers).dissolve(by='hqta_type').reset_index()
old_dissolved['area'] = old_dissolved.geometry.apply(lambda x: x.area)
old_dissolved[['hqta_type', 'area']]

In [None]:
old_hq = old_dissolved >> filter(_.hqta_type == 'major_stop_bus')

In [None]:
new_hq = new_dissolved >> filter(_.hqta_type == "major_stop_bus")

In [None]:
only_old = old_hq.overlay(new_hq, how='difference')

In [None]:
only_new = new_hq.overlay(old_hq, how='difference')

In [None]:
# only_new.explore()

In [None]:
# only_old.explore()

In [None]:
import intake

In [None]:
catalog = intake.open_catalog("*.yml")

In [None]:
hqta_points = catalog.hqta_points.read().to_crs(PROJECT_CRS)

In [None]:
hqta_points >> filter(_.stop_id=='62965')

## Counting rail/ferry stops

* Prior refactors have us keeping a row per stop_id x route_id, will keep
* did scan and remove a few ferry stops without bus/rail (Havasu, other Angel Island operator)

In [None]:
# (new_stops >> filter(_.hqta_type == 'major_stop_ferry')).explore()

In [None]:
# (old_stops >> filter(_.hqta_type == 'major_stop_ferry')).explore()

In [None]:
(old_stops >> filter(_.hqta_type == 'major_stop_ferry')) >> count(_.agency_primary)

In [None]:
(new_stops >> filter(_.hqta_type == 'major_stop_ferry')) >> count(_.agency_primary)

In [None]:
# old_stops >> filter(_.hqta_type == 'major_stop_ferry', _.agency_primary.str.contains('Golden'))

# new_stops >> filter(_.hqta_type == 'major_stop_ferry', _.agency_primary.str.contains('Golden'))

# new_stops >> filter(_.hqta_type == 'major_stop_rail', _.agency_primary.str.contains('Bay Area'))

# old_stops >> filter(_.hqta_type == 'major_stop_rail', _.agency_primary.str.contains('Bay Area'))

## Finding Amtrak

* Present for dates on/after Oct 21, 2024!

In [None]:
# (new_stops >> filter(_.hqta_type == 'major_stop_rail')).explore()