In [None]:
import pandas as pd
import geopandas as gpd
from siuba import *

In [None]:
import zipfile

In [None]:
# ! pip install pygris

In [None]:
import _utils
import importlib
importlib.reload(_utils)

# "What if VMT decreased by 25% per the CARB target, and all those trips were on (existing) transit instead?"

## VMT is a spatial phenomenon, our analysis should be spatial too

* Start with "big data" weekday residential VMT per Census tract via Replica
* Per target, future VMT should be 25% less
* Assume tripmaking remains constant, and that transit entirely replaces that VMT
    * optional: find tracts with no transit service, hold their VMT constant and redistribute missed target among remaining tracts (30% reduction instead of 25% perhaps?)
    
## From reduced VMT to transit trips

* Replica gives transit trip lengths but it may not be reliable ("good for auto, less so for transit")
    * It's generally showing the median transit trip as longer than the median auto trip, which seems questionable
    * We have plenty of good spatial data on transit service _provision_, but not ridership (generally agency-level only)
    * May need to refer to research/default to a fixed "median transit trip" length based on population density
* Regardless, get a rough estimate by dividing reduced VMT in each tract by median transit trip distance, or median auto trip if transit data not available

In [None]:
gdf = gpd.read_parquet('outputs/new_trips_with_uza.parquet')

In [None]:
#  filtered test
gdf = gdf >> filter(_.NAME10.str.contains('Seaside'))

## Mapping...

In [None]:
# gdf.explore(column = 'total_mi_transit', scheme = 'NaturalBreaks')

In [None]:
gdf.explore(column = 'total_mi_auto', scheme = 'NaturalBreaks')

## New transit trips

In [None]:
def map_per_capita(gdf):
    gdf = gdf >> filter(_.total_pop != 0) # remove tracts where nobody lives
    return gdf.explore(column = 'new_trips_per_capita', scheme = 'NaturalBreaks')

In [None]:
map_per_capita(gdf)

In [None]:
gdf.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
gdf.projected_new_transit_trips.sum()

## Next Steps

* caveat: other strategies (land use, active modes...)
* caveat: induced travel
* stratify into "good transit, not riding", "bad transit"
* LODES o/d data? Replica? -> Conveyal transit o/d find that "good transit but not riding it"
   * find what doesn't show up in aggregate accessibility...
* https://walker-data.com/pygris/

In [None]:
import shared_utils

In [None]:
analysis_date = '2023-04-15'

In [None]:
feeds = shared_utils.gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=analysis_date)

In [None]:
mst = feeds >> filter(_.name.str.contains('Monterey'))
mst

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, mst.feed_key)

In [None]:
stops.columns

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, mst.feed_key)

In [None]:
trips.columns

In [None]:
trips.route_short_name.unique()

In [None]:
ab_trips = trips >> filter(_.route_short_name.isin(['A', 'B']), _.direction_id == 0)

In [None]:
trips_20 = trips >> filter(_.route_short_name == '20', _.direction_id == 0)

In [None]:
st_20 = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, mst.feed_key, trip_df=trips_20)

In [None]:
st_20 = st_20 >> distinct(_.stop_id, _.stop_sequence) >> collect()

In [None]:
st_20 = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st_20, on='stop_id')

In [None]:
def trips_to_stops(trip_df, feed_list):
    st = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, feed_list, trip_df=trip_df)
    st = st >> distinct(_.stop_id, _.stop_sequence) >> collect()
    st = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st, on='stop_id')
    return st

In [None]:
st_20 = trips_to_stops(trips_20)

In [None]:
#  SURF BRT area for joins...
st_20 = st_20 >> filter(_.stop_sequence <= 27)

In [None]:
# st_20.explore()

In [None]:
st_ab = trips_to_stops(ab_trips)

In [None]:
# st_ab.explore()

In [None]:
surf_corridor = pd.concat([st_20, st_ab])

In [None]:
surf_corridor.explore()

## Wilshire

In [None]:
feeds = shared_utils.gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=analysis_date)

In [None]:
metro = feeds >> filter(_.name.str.contains('LA Metro Bus'))
metro

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, metro.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips_720 = trips >> filter(_.route_short_name.isin(['720']), _.direction_id == 0)

In [None]:
st_720 = trips_to_stops(trips_720, metro.feed_key)

In [None]:
#  only keep W of Wil/Wstn
st_720 = st_720 >> filter(_.stop_sequence <= 11)

In [None]:
st_720.explore()

## Sjoin and calculate

In [None]:
gdf = gpd.read_parquet('outputs/new_trips_with_uza.parquet')

In [None]:
surf_corridor = surf_corridor.to_crs(shared_utils.geography_utils.CA_NAD83Albers)
st_720 = st_720.to_crs(shared_utils.geography_utils.CA_NAD83Albers)

In [None]:
#  half-mile buffer stop areas, corridors should be broader...
surf_corridor.geometry = surf_corridor.buffer(804) 
st_720.geometry = st_720.buffer(804)

In [None]:
gdf = gdf >> select(-_.index_left, -_.index_right)

In [None]:
surf = gpd.sjoin(gdf, surf_corridor) >> distinct(_.GEOID, _keep_all=True)

In [None]:
surf.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
wilshire = gpd.sjoin(gdf, st_720) >> distinct(_.GEOID, _keep_all=True)

In [None]:
wilshire.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
surf.describe()

In [None]:
wilshire.describe()

In [None]:
surf.sum()

In [None]:
wilshire.sum()