In [None]:
import pandas as pd
import geopandas as gpd
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
gcsgp = GCSGeoPandas()

In [None]:
import zipfile

In [None]:
! pip install pygris

In [None]:
from _utils import GCS_PATH

In [None]:
import _utils
import importlib
importlib.reload(_utils)

In [None]:
import shared_utils
from calitp_data_analysis import geography_utils, utils

# Selecting Corridors

* also map and get per-corridor VMT-ridership results

In [None]:
analysis_date = '2023-04-15'

In [None]:
feeds = shared_utils.gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=analysis_date)

In [None]:
tracts = _utils.get_tract_geoms()

In [None]:
tracts.GEOID = tracts.GEOID.astype('int64')

In [None]:
trips_all = gcsgp.read_parquet(f'{GCS_PATH}outputs/new_trips_with_uza.parquet')

In [None]:
def trips_to_stops(trip_df, feed_list):
    st = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, feed_list, trip_df=trip_df,
                                                  get_df=True)
    st = st[['stop_id', 'stop_sequence']].drop_duplicates()
    st = stops[['stop_id', 'geometry']].merge(st, on='stop_id')
    return st

In [None]:
def sjoin_tracts(stops_gdf, tracts_gdf, buffer_m):
    
    stops_gdf = stops_gdf.to_crs(geography_utils.CA_NAD83Albers_m)
    assert stops_gdf.crs == tracts_gdf.crs
    
    stops_gdf.geometry = stops_gdf.buffer(buffer_m)
    tracts_sjoined = gpd.sjoin(tracts_gdf, stops_gdf).drop_duplicates(subset=['GEOID'])
    
    return tracts_sjoined

## Wilshire

In [None]:
metro = feeds.query("name.str.contains('LA Metro Bus')")
metro

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, metro.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips_720 = trips.query('route_short_name.isin(["720"]) & direction_id == 0')

In [None]:
st_720 = trips_to_stops(trips_720, metro.feed_key)

In [None]:
#  only keep W of Wil/Wstn
st_720 = st_720.query('stop_sequence <= 11')

In [None]:
# st_720.explore()

In [None]:
wilshire = sjoin_tracts(st_720, tracts, 804)

In [None]:
# wilshire.explore()

In [None]:
utils.geojson_gcs_export(wilshire, f'{GCS_PATH}intermediate/', 'wilshire')

In [None]:
# includes non-corridor vmt...
wilshire_results = trips_all.query('GEOID.isin(@wilshire.GEOID)')

In [None]:
# utils.make_zipped_shapefile(wilshire_results, 'wilsh')

In [None]:
wilshire_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
wilshire_results.sum()

In [None]:
wilshire_results.describe()

## Fresno Route 1

In [None]:
fresno = feeds.query("name.str.contains('Fresno Sch')")
fresno

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, fresno.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, fresno.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips_1 = trips.query("route_short_name.isin(['01']) & direction_id == 0 & trip_instance_key == 'db65a5adda0fc0a2744580354516ac68'")

In [None]:
st_1 = trips_to_stops(trips_1, fresno.feed_key)

In [None]:
st_1 = st_1.query('stop_sequence < 20') #  vertical portion only

In [None]:
# st_1.explore()

In [None]:
fresno = sjoin_tracts(st_1, tracts, 804) #  half-mile

In [None]:
# fresno.explore()

In [None]:
utils.geojson_gcs_export(fresno, f'{GCS_PATH}intermediate/', 'fresno')

In [None]:
# includes non-corridor vmt...
fresno_results = trips_all.query('GEOID.isin(@fresno.GEOID)')

In [None]:
fresno_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
fresno_results.sum()

In [None]:
fresno_results.describe()

## San Pablo Ave

In [None]:
ac = feeds.query("name.str.contains('AC Transit')")
ac

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, ac.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, ac.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips_72r = trips.query("route_short_name.isin(['72R']) & direction_id == 0")

In [None]:
st_72r = trips_to_stops(trips_72r, ac.feed_key)

In [None]:
# st_72r.explore()

In [None]:
san_pablo = sjoin_tracts(st_72r, tracts, 804) #  half-mile

In [None]:
# san_pablo.explore()

In [None]:
utils.geojson_gcs_export(san_pablo, f'{GCS_PATH}intermediate/', 'san_pablo')

In [None]:
# includes non-corridor vmt...
san_pablo_results = trips_all.query('GEOID.isin(@san_pablo.GEOID)')

In [None]:
san_pablo_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
san_pablo_results.sum()

In [None]:
san_pablo_results.describe()

## Eureka H Street/Purple Route

In [None]:
eureka = feeds.query("name.str.contains('Humboldt Schedule')")
eureka

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, eureka.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, eureka.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips.route_long_name.unique()

In [None]:
trips_rainbow = trips.query("route_long_name.isin(['Rainbow Route']) & direction_id == 0")

In [None]:
st_rainbow = trips_to_stops(trips_rainbow, eureka.feed_key)

In [None]:
st_rainbow = st_rainbow.query('stop_sequence >= 35')

In [None]:
# st_rainbow.explore()

In [None]:
eureka = sjoin_tracts(st_rainbow, tracts, 804) #  half-mile

In [None]:
utils.geojson_gcs_export(eureka, f'{GCS_PATH}intermediate/', 'eureka')

In [None]:
# includes non-corridor vmt...
eureka_results = trips_all.query('GEOID.isin(@eureka.GEOID)')

In [None]:
eureka_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
eureka_results.sum()

In [None]:
eureka_results.describe()

# All Corridors Summary

In [None]:
fresno_results['corridor'] = 'Fresno'
san_pablo_results['corridor'] = 'San Pablo Ave'
wilshire_results['corridor'] = 'Wilshire'
eureka_results['corridor'] = 'Eureka'

In [None]:
all_results = pd.concat([fresno_results, san_pablo_results, wilshire_results, eureka_results])

In [None]:
grouped = all_results.groupby('corridor').agg(
    total_new_transit_trips = ('projected_new_transit_trips', 'sum'),
    total_population = ('total_pop', 'sum'),
    total_vmt = ('total_mi_auto', 'sum'),
    p50_auto_trip_mi =('p50_mi_auto', 'median'),
    total_auto_trips = ('total_trips_auto', 'sum')
).reset_index()
grouped

In [None]:
# grouped.to_csv(f'vmt_transit_corridors.csv')