In [None]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

In [None]:
import geopandas as gpd
import pandas as pd
from siuba import *
import numpy as np

from segment_speed_utils import helpers, gtfs_schedule_wrangling
from shared_utils import rt_dates, gtfs_utils_v2
import folium
import itertools

In [None]:
from update_vars import (analysis_date, AM_PEAK, PM_PEAK, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS,
SEGMENT_BUFFER_METERS, AM_PEAK, PM_PEAK, HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD, SHARED_STOP_THRESHOLD)

In [None]:
import create_aggregate_stop_frequencies as casf

In [None]:
import importlib
importlib.reload(casf)

In [None]:
am_peak_hrs = list(range(AM_PEAK[0].hour, AM_PEAK[1].hour))
pm_peak_hrs = list(range(PM_PEAK[0].hour, PM_PEAK[1].hour))
both_peaks_hrs = am_peak_hrs + pm_peak_hrs

In [None]:
analysis_date

In [None]:
stop_times = helpers.import_scheduled_stop_times(
    analysis_date,
    get_pandas = True,
)

In [None]:
stop_times = casf.add_route_dir(stop_times, analysis_date)

In [None]:
st_prepped = stop_times.pipe(casf.prep_stop_times)

## HCD Y-Branching

* combine single route frequencies with collinearity screen...
* actually, these routes need to pass _both_ a collinearity screen and a non-collinearity screen: 8+ stops shared and also 8+ stops not shared (meaningful service on all 3 legs of the Y)

In [None]:
max_arrivals_by_stop_single = st_prepped.pipe(casf.stop_times_aggregation_max_by_stop, analysis_date, single_route_dir=True)
max_arrivals_by_stop_multi = st_prepped.pipe(casf.stop_times_aggregation_max_by_stop, analysis_date, single_route_dir=False)

In [None]:
multi_only_explode = casf.get_explode_multiroute_only(max_arrivals_by_stop_single, max_arrivals_by_stop_multi, (HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD))

In [None]:
multi_only_explode.head(3)

In [None]:
def get_explode_singles(
    single_route_aggregation: pd.DataFrame,
    frequency_thresholds: tuple
) -> pd.DataFrame:
    ms_precursor_threshold = min(frequency_thresholds)
    single_qual = (single_route_aggregation.query('am_max_trips_hr >= @ms_precursor_threshold & pm_max_trips_hr >= @ms_precursor_threshold')
                   .explode('route_dir')
                   .sort_values(['schedule_gtfs_dataset_key','stop_id', 'route_dir'])[['schedule_gtfs_dataset_key','stop_id', 'route_dir']]
                  )
    return single_qual

In [None]:
singles_explode = get_explode_singles(max_arrivals_by_stop_single, (HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD)).explode('route_dir')

In [None]:
from tqdm import tqdm
tqdm.pandas()

In [None]:
share_counts = {}
singles_explode.groupby(['schedule_gtfs_dataset_key', 'stop_id']).progress_apply(casf.accumulate_share_count, share_counts=share_counts)

In [None]:
qualify_dict = {key: share_counts[key] for key in share_counts.keys() if share_counts[key] >= SHARED_STOP_THRESHOLD}

In [None]:
feeds_to_filter = np.unique([key.split('__')[0] for key in qualify_dict.keys()])

In [None]:
qualify_dict

In [None]:
(singles_explode.query('route_dir.isin(["20-13191_1", "720-13191_1"])').stop_id.value_counts() == 1).value_counts()
#  True: unshared stop count False: shared stop count

* local/rapid overlap, there are many unshared stops but that does not offer riders meaningful choice (still a spatial subset)
* need a spatial methodology here...

In [None]:
feeds_to_filter = np.unique([key.split('__')[0] for key in qualify_dict.keys()])

In [None]:
feeds_to_filter

In [None]:
helpers.import_scheduled_shapes?

In [None]:
## TODO integrate into script

def get_trips_with_route_dir(analysis_date):
    trips = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["feed_key", "gtfs_dataset_key", "trip_id",
               "route_id", "direction_id", "route_type",
              "shape_array_key", "route_short_name", "name"],
    get_pandas = True
    )
    trips = trips[trips['route_type'].isin(['3', '11'])] #  bus only

    trips.direction_id = trips.direction_id.fillna(0).astype(int).astype(str)
    trips['route_dir'] = trips[['route_id', 'direction_id']].agg('_'.join, axis=1)
    
    return trips

In [None]:
shapes = helpers.import_scheduled_shapes(analysis_date, columns=['shape_array_key', 'geometry'])

trips = (get_trips_with_route_dir(analysis_date)
         .query("schedule_gtfs_dataset_key.isin(@feeds_to_filter)")
         .drop_duplicates(subset=['schedule_gtfs_dataset_key', 'shape_array_key', 'route_dir'])
        
        )

In [None]:
trips.head(3)

In [None]:
trips.drop_duplicates(subset=['name'])

* dissolve shapes by route+dir, pick longest
* also try  dissolve shapes by route+dir, dissolve
* find non-overlap by previously qualifying pair

In [None]:
shapes = shapes.merge(trips, on='shape_array_key')
shapes = shapes.assign(length = shapes.geometry.length)

In [None]:
OVERLAY_BUFFER = 20
AREA_MULTIPLIER = OVERLAY_BUFFER * 2

TARGET_METERS_DIFFERENCE = 5000 #  5km per route
TARGET_AREA_DIFFERENCE = TARGET_METERS_DIFFERENCE * AREA_MULTIPLIER

In [None]:
shapes.geometry = shapes.buffer(OVERLAY_BUFFER)
shapes = shapes.assign(area = shapes.geometry.map(lambda x: x.area))

In [None]:
max_by_route_dir = shapes.groupby(['schedule_gtfs_dataset_key', 'route_dir']).length.max().reset_index()

In [None]:
shapes = (shapes.merge(max_by_route_dir, on = ['schedule_gtfs_dataset_key', 'route_dir', 'length'])
          .drop_duplicates(subset = ['schedule_gtfs_dataset_key', 'route_dir', 'length'])
         )

In [None]:
# shapes.explore(column='length')

In [None]:
feeds_to_filter

In [None]:
gtfs_dataset_key = feeds_to_filter[0]
gtfs_dataset_key

In [None]:
this_feed_qual = {key.split(gtfs_dataset_key)[1][2:]:qualify_dict[key] for key in qualify_dict.keys() if key.split('__')[0] == gtfs_dataset_key}
qualify_pairs = [tuple(key.split('__')) for key in this_feed_qual.keys()]

In [None]:
qualify_sets = [set(x) for x in qualify_pairs]
qualify_sets = set(map(frozenset, qualify_sets))

In [None]:
unique_qualify_pairs = [list(x) for x in qualify_sets]

In [None]:
for pair in unique_qualify_pairs:
    print(f'{pair}...', end='')
    these_shapes = shapes.query('route_dir.isin(@pair) & schedule_gtfs_dataset_key == @gtfs_dataset_key')
    first_row = these_shapes.iloc[0:1][['schedule_gtfs_dataset_key', 'route_dir', 'shape_array_key', 'geometry']]
    sym_diff = first_row.overlay(these_shapes.iloc[1:2][['route_dir', 'geometry']], how='symmetric_difference')
    sym_diff = sym_diff.assign(area = sym_diff.geometry.map(lambda x: x.area),
                          route_dir = sym_diff.route_dir_1.fillna(sym_diff.route_dir_2))
    diff_area = sym_diff.area.sum()
    area_ratios = (sym_diff.area / TARGET_AREA_DIFFERENCE)
    if (sym_diff.area > TARGET_AREA_DIFFERENCE).all():
        print(f'passed, {area_ratios[0]:.2f} and {area_ratios[1]:.2f} times area target')
        m = these_shapes.explore(color='gray', tiles='CartoDB Positron')
        display(sym_diff.explore(column='route_dir', m=m, tiles='CartoDB Positron'))
    else:
        print(f'failed, {area_ratios[0]:.2f} and {area_ratios[1]:.2f} times area target')
        display(these_shapes.explore(column='route_dir', tiles='CartoDB Positron'))

## LBT check

That location is at the intersection of 2nd and PCH, Stop name: PCH & 2nd NE, stop ID #1465.

The Long Beach Transit 121 has 20 minute frequencies almost all day. route-121 - ridelbt
The Long Beach Transit 171 has 20 minute frequencies at peak hours (morning and evening). routes-171-175 - ridelbt

Under AB2553, this should qualify that stop as being a major transit stop.

-> frequency actually insufficient with averaging.

In [None]:
stops = helpers.import_scheduled_stops(analysis_date,
                                      columns=['feed_key', 'stop_id', 'geometry'])

In [None]:
trips = get_trips_with_route_dir(analysis_date)

In [None]:
feeds = trips[['feed_key', 'schedule_gtfs_dataset_key', 'name']].drop_duplicates()

In [None]:
feeds = feeds.query('name.str.contains("Long Beach")')

In [None]:
max_stops = stops.merge(feeds, on = 'feed_key').merge(max_arrivals_by_stop_single, on = ['schedule_gtfs_dataset_key', 'stop_id'])

In [None]:
# max_stops.explode('route_dir').query('~route_dir.str.contains("131")').explore()