In [None]:
import pandas as pd
import geopandas as gpd
from siuba import *

In [None]:
import zipfile

In [None]:
# ! pip install pygris

In [None]:
import _utils
import importlib
importlib.reload(_utils)

In [None]:
import shared_utils
from calitp_data_analysis import geography_utils, utils

# Selecting Corridors

In [None]:
analysis_date = '2023-04-15'

In [None]:
feeds = shared_utils.gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=analysis_date)

In [None]:
tracts = _utils.get_tract_geoms()

In [None]:
def trips_to_stops(trip_df, feed_list):
    st = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, feed_list, trip_df=trip_df)
    st = st >> distinct(_.stop_id, _.stop_sequence) >> collect()
    st = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st, on='stop_id')
    return st

In [None]:
def sjoin_tracts(stops_gdf, tracts_gdf, buffer_m):
    
    stops_gdf = stops_gdf.to_crs(geography_utils.CA_NAD83Albers)
    assert stops_gdf.crs == tracts_gdf.crs
    
    stops_gdf.geometry = stops_gdf.buffer(buffer_m)
    tracts_sjoined = gpd.sjoin(tracts_gdf, stops_gdf) >> distinct(_.GEOID, _keep_all=True)
    
    return tracts_sjoined

## Wilshire

In [None]:
metro = feeds >> filter(_.name.str.contains('LA Metro Bus'))
metro

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, metro.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips_720 = trips >> filter(_.route_short_name.isin(['720']), _.direction_id == 0)

In [None]:
st_720 = trips_to_stops(trips_720, metro.feed_key)

In [None]:
#  only keep W of Wil/Wstn
st_720 = st_720 >> filter(_.stop_sequence <= 11)

In [None]:
# st_720.explore()

In [None]:
wilshire = sjoin_tracts(st_720, tracts, 804)

In [None]:
# wilshire.explore()

In [None]:
# wilshire.to_file('wilshire.geojson')

In [None]:
#  includes non-corridor vmt...

# trips_all = gpd.read_parquet('outputs/new_trips_with_uza.parquet')

# trips_all >> filter(_.GEOID.isin(wilshire_results.GEOID))

In [None]:
wilshire_results = gpd.read_parquet('outputs/wilshire_trips_with_uza.parquet')

In [None]:
# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')

In [None]:
# utils.make_zipped_shapefile(wilshire_results, 'wilsh')

In [None]:
wilshire_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
wilshire_results.sum()

In [None]:
wilshire_results.describe()

## Fresno Route 1

In [None]:
fresno = feeds >> filter(_.name.str.contains('Fresno Sch'))
fresno

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, fresno.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, fresno.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips_1 = trips >> filter(_.route_short_name.isin(['01']), _.direction_id == 0)

In [None]:
trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')

In [None]:
st_1 = trips_to_stops(trips_1, fresno.feed_key)

In [None]:
st_1 = st_1 >> filter(_.stop_sequence < 20) #  vertical portion only

In [None]:
# st_1.explore()

In [None]:
fresno = sjoin_tracts(st_1, tracts, 804) #  half-mile

In [None]:
# fresno.explore()

In [None]:
fresno.to_file('fresno.geojson')

In [None]:
fresno_results = gpd.read_parquet('outputs/fresno_trips_with_uza.parquet')

In [None]:
# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')

In [None]:
# utils.make_zipped_shapefile(wilshire_results, 'wilsh')

In [None]:
fresno_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
fresno_results.sum()

In [None]:
fresno_results.describe()

## San Pablo Ave

In [None]:
ac = feeds >> filter(_.name.str.contains('AC Transit'))
ac

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, ac.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, ac.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips_72r = trips >> filter(_.route_short_name.isin(['72R']), _.direction_id == 0)

In [None]:
st_72r = trips_to_stops(trips_72r, ac.feed_key)

In [None]:
# st_72r.explore()

In [None]:
san_pablo = sjoin_tracts(st_72r, tracts, 804) #  half-mile

In [None]:
# san_pablo.explore()

In [None]:
san_pablo.to_file('san_pablo.geojson')

In [None]:
san_pablo_results = gpd.read_parquet('outputs/sanpablo_trips_with_uza.parquet')

In [None]:
# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')

In [None]:
# utils.make_zipped_shapefile(wilshire_results, 'wilsh')

In [None]:
san_pablo_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
san_pablo_results.sum()

In [None]:
san_pablo_results.describe()

## Eureka H Street/Purple Route

In [None]:
eureka = feeds >> filter(_.name.str.contains('Humboldt Schedule'))
eureka

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, eureka.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, eureka.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips.route_long_name.unique()

In [None]:
trips_rainbow = trips >> filter(_.route_long_name.isin(['Rainbow Route']), _.direction_id == 0)

In [None]:
tr

In [None]:
# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')

In [None]:
st_rainbow = trips_to_stops(trips_purple, eureka.feed_key)

In [None]:
st_rainbow = st_rainbow >> filter(_.stop_sequence >= 35)

In [None]:
# st_rainbow.explore()

In [None]:
eureka = sjoin_tracts(st_rainbow, tracts, 804) #  half-mile

In [None]:
eureka.explore()

In [None]:
eureka.to_file('eureka.geojson')

In [None]:
eureka_results = gpd.read_parquet('outputs/eureka_trips_with_uza.parquet')

In [None]:
# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')

In [None]:
# utils.make_zipped_shapefile(wilshire_results, 'wilsh')

In [None]:
eureka_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
eureka_results.sum()

In [None]:
eureka_results.describe()

# All Corridors Summary

In [None]:
fresno_results['corridor'] = 'Fresno'
san_pablo_results['corridor'] = 'San Pablo Ave'
wilshire_results['corridor'] = 'Wilshire'
eureka_results['corridor'] = 'Eureka'

In [None]:
all_results = pd.concat([fresno_results, san_pablo_results, wilshire_results, eureka_results])

In [None]:
(all_results >> group_by(_.corridor)
             >> summarize(total_new_transit_trips = _.projected_new_transit_trips.sum(),
                          total_population = _.total_pop.sum(),
                          total_vmt = _.total_mi_auto.sum(),
                          p50_auto_trip_mi = _.p50_mi_auto.quantile(.5),
                          total_auto_trips = _.total_trips_auto.sum()
                         )

).to_csv('vmt_transit_corridors.csv')

## Redding Route 4

In [None]:
redding = feeds >> filter(_.name.str.contains('Redding'))
redding

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, redding.feed_key)

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, redding.feed_key)

In [None]:
trips.route_short_name.unique()

In [None]:
trips_4 = trips >> filter(_.route_short_name.isin(['4']), _.direction_id == 0)

In [None]:
# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')

In [None]:
st_4 = trips_to_stops(trips_4, redding.feed_key)

In [None]:
# st_4.explore()

In [None]:
redding = sjoin_tracts(st_4, tracts, 804) #  half-mile

In [None]:
# redding.explore()

In [None]:
redding.to_file('redding.geojson')

In [None]:
redding_results = gpd.read_parquet('outputs/redding_trips_with_uza.parquet')

In [None]:
# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')

In [None]:
# utils.make_zipped_shapefile(wilshire_results, 'wilsh')

In [None]:
redding_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')

In [None]:
redding_results.sum()

In [None]:
redding_results.describe()

## MST (table)

In [None]:
mst = feeds >> filter(_.name.str.contains('Monterey'))
mst

In [None]:
stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, mst.feed_key)

In [None]:
stops.columns

In [None]:
trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, mst.feed_key)

In [None]:
trips.columns

In [None]:
trips.route_short_name.unique()

In [None]:
ab_trips = trips >> filter(_.route_short_name.isin(['A', 'B']), _.direction_id == 0)

In [None]:
trips_20 = trips >> filter(_.route_short_name == '20', _.direction_id == 0)

In [None]:
st_20 = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, mst.feed_key, trip_df=trips_20)

In [None]:
st_20 = st_20 >> distinct(_.stop_id, _.stop_sequence) >> collect()

In [None]:
st_20 = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st_20, on='stop_id')

In [None]:
st_20 = trips_to_stops(trips_20)

In [None]:
#  SURF BRT area for joins...
st_20 = st_20 >> filter(_.stop_sequence <= 27)

In [None]:
# st_20.explore()

In [None]:
st_ab = trips_to_stops(ab_trips)

In [None]:
# st_ab.explore()

In [None]:
surf_corridor = pd.concat([st_20, st_ab])

In [None]:
surf_corridor.explore()