In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

import pandas as pd
import numpy as np
import geopandas as gpd
from siuba import *
import json

import shared_utils
import warnings
from path_example_vars import GCS_PATH

import conveyal_path_utils

In [2]:
from shapely.ops import split, substring, LineString
from calitp_data_analysis import geography_utils
from IPython.display import Markdown

In [3]:
import importlib

In [4]:
importlib.reload(conveyal_path_utils)

<module 'conveyal_path_utils' from '/home/jovyan/data-analyses/sb125_analyses/path_examples_tttf4/conveyal_path_utils.py'>

# Conveyal Transit Paths

* GH Issue: https://github.com/cal-itp/data-analyses/issues/1098

## Conveyal SOP

* prepare a csv with lat, lon, and od column with 0 for origin and 1 for destination
    * allow freeform, use od col as id in upload
* run Conveyal Analysis: 8-10am, standard transit parameters, add JSON feed_id param
* run Regional Analysis: 120min max time, 5, 50, 95 %ile, get paths and travel times

## Metrics

* modal trip by n_iterations (most available trip) (how fast is it, how many xfers, fares?)
* fastest trip (how available?, fares?, xfer count?)
* fewest xfer trip (how fast, how available, fares?)

## Visuals??

In [5]:
#  TODO make parameter cell

region = 'sac'
region

'sac'

In [6]:
regions = ['la', 'sac', 'clovis', 'napa', 'sonoma']
assert region in regions
region_human = ['Los Angeles', 'Sacramento', 'Clovis', 'Napa', 'Sonoma']
region_human = dict(zip(regions, region_human))

In [7]:
display(Markdown(f'## {region_human[region]}'))

## Sacramento

In [8]:
auto_df = pd.read_parquet(f'{GCS_PATH}streetlight_results.parquet')

In [9]:
auto_df.city = auto_df.city.str.replace('Solano', 'Sonoma')
auto_df.set_index('city', drop=True, inplace=True)

In [10]:
car_p50_time = auto_df.loc[region_human[region], '50_ttp_minutes']

In [11]:
car_p50_time

17.916666666666668

In [12]:
df = conveyal_path_utils.read_conveyal_path_df(f'{GCS_PATH}{region}_PATHS.csv')
df >> head(3)

Unnamed: 0,trip_group_id,origin,destination,routes,boardStops,alightStops,feedIds,rideTimes,accessTime,egressTime,transferTime,waitTimes,totalTime,nIterations,total_iterations
3,3,0,1,"[062, 086, 019]","[2252, 528, 706]","[528, 706, 717]","[6543c259dad1a760f30e33b0, 6543c259dad1a760f30...","[7.0, 21.0, 8.0]",4.6,6.9,0.0,"[1.4, 2.0, 11.0]",61.9,22,240
4,4,0,1,"[507, 086, 019]","[7081, 528, 706]","[7036, 706, 717]","[6543c259dad1a760f30e33b0, 6543c259dad1a760f30...","[8.0, 21.0, 8.0]",5.5,6.9,4.1,"[1.5, 1.9, 11.0]",67.9,10,240
5,5,0,1,"[533, 019]","[7032, 9807]","[7042, 717]","[6543c259dad1a760f30e33b0, 6543c259dad1a760f30...","[22.0, 16.0]",19.2,6.9,3.8,"[1.8, 3.2]",72.9,192,240


In [13]:
df = conveyal_path_utils.add_warehouse_identifiers(df)

### Get warehouse data

In [14]:
warehouse_data = conveyal_path_utils.get_warehouse_data(df)

  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(


### Map

In [15]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    spatial_routes = conveyal_path_utils.compile_all_spatial_routes(df, warehouse_data)

In [16]:
spatial_routes >> head(3)

Unnamed: 0,feed_key,trip_id,stop_id,stop_sequence,stop_geom,name,shape_id,route_short_name,route_long_name,segment_geom,stop_pair,trip_group_id,availability_pct,total_time,xfer_count,route_name
0,294252b3b4b42fbb31a31ce184fcb3f5,1108250,2252,54,POINT (-128904.927 62545.360),Sacramento Schedule,46214,62,FREEPORT,"LINESTRING (-128914.852 62548.698, -128904.389...","(2252, 528)",3,0.091667,61.9,2,62
0,294252b3b4b42fbb31a31ce184fcb3f5,1109493,528,3,POINT (-130200.280 63537.013),Sacramento Schedule,46244,86,GRAND,"LINESTRING (-130203.028 63529.839, -130229.649...","(528, 706)",3,0.091667,61.9,2,86
0,294252b3b4b42fbb31a31ce184fcb3f5,1106689,706,11,POINT (-126677.916 69308.522),Sacramento Schedule,46180,19,RIO LINDA,"LINESTRING (-126685.915 69308.645, -126685.909...","(706, 717)",3,0.091667,61.9,2,19


In [17]:
warehouse_data['trips'].columns

Index(['feed_key', 'name', 'trip_id', 'route_id', 'route_short_name',
       'route_long_name', 'shape_id', 'trip_first_departure_ts'],
      dtype='object')

In [18]:
def map_trip_groups(spatial_routes: pd.DataFrame, which='trip_group_id'):
    
    col_list = ['trip_group_id', 'xfer_count', 'car_p50_ratio',
                     'route_name']
    cmaps = ['tab20', 'coolwarm', 'coolwarm', 'Accent'] #  TODO fixed scale for car ratio
    cmaps = dict(zip(col_list, cmaps))
    assert which in col_list
    display_list = ['availability_pct', 'total_time']
    human_names = {col: col.replace('_', ' ').title() for col in col_list + display_list}
    human_names['name'] = 'GTFS Feed Name'
    map_routes = spatial_routes.copy().round(2) >> select(-_.trip_id, -_.route_short_name, -_.route_long_name,
                                                         -_.stop_id, -_.stop_sequence)
    map_routes['car_p50_ratio'] = map_routes.total_time / car_p50_time
    if which != 'route_name':
        map_routes.segment_geom = map_routes.apply(lambda x: x.segment_geom.buffer(x.availability_pct * 800), axis=1)
        map_routes = map_routes >> arrange(-_.availability_pct)
        map_routes = map_routes.rename(columns=human_names)
    else:
        route_grouped = spatial_routes >> group_by(_.route_name, _.segment_geom) >> summarize(route_avail_pct = _.availability_pct.sum())
        route_grouped.segment_geom = route_grouped.apply(lambda x: x.segment_geom.buffer(x.route_avail_pct * 800), axis=1)
        route_grouped = route_grouped >> arrange(-_.route_avail_pct)
        map_routes = gpd.GeoDataFrame(route_grouped, geometry='segment_geom', crs=geography_utils.CA_NAD83Albers)
    
    map_routes = map_routes.rename(columns=human_names)
    return map_routes.explore(column = human_names[which], cmap=cmaps[which], tiles="CartoDB positron")

In [None]:
map_trip_groups(spatial_routes, 'trip_group_id')

In [None]:
map_trip_groups(spatial_routes, 'xfer_count')

In [None]:
#  TODO don't drop info cols
map_trip_groups(spatial_routes, 'route_name')

In [None]:
map_trip_groups(spatial_routes, 'car_p50_ratio')

### LA Storytelling - infrequent regional rail TODO parameterize

* only two usable trips for 8-10am departures from origin, at 10:41 (Metrolink) and 11:01 (Amtrak)

In [43]:
(warehouse_data['trips'] >> filter(_.route_id == 'Antelope Valley Line') >> arrange(_.trip_first_departure_ts))[:3]

Unnamed: 0,feed_key,trip_id,route_id,route_short_name,shape_id,trip_first_departure_ts
16321,1ac8ee536d38b537e2cf55383222d379,294300161,Antelope Valley Line,,AVin,2023-10-18 10:41:00+00:00
16264,1ac8ee536d38b537e2cf55383222d379,294300162,Antelope Valley Line,,AVin,2023-10-18 11:41:00+00:00
16393,1ac8ee536d38b537e2cf55383222d379,294300163,Antelope Valley Line,,AVin,2023-10-18 12:11:00+00:00


In [44]:
(warehouse_data['trips'] >> filter(_.route_id == '78') >> arrange(_.trip_first_departure_ts))[:3]

Unnamed: 0,feed_key,trip_id,route_id,route_short_name,shape_id,trip_first_departure_ts
350,b4970d6cc7e206d9e667796130394790,26944,78,,102,2023-10-18 04:01:00+00:00
361,b4970d6cc7e206d9e667796130394790,17148,78,,148,2023-10-18 11:01:00+00:00
8574,b4970d6cc7e206d9e667796130394790,19891,78,,149,2023-10-18 13:01:00+00:00


## Sac

In [25]:
df = conveyal_path_utils.read_conveyal_path_df(f'{GCS_PATH}sac_PATHS.csv')
df >> head(3)

Unnamed: 0,trip_group_id,origin,destination,routes,boardStops,alightStops,feedIds,rideTimes,accessTime,egressTime,transferTime,waitTimes,totalTime,nIterations,total_iterations
3,3,0,1,"[062, 086, 019]","[2252, 528, 706]","[528, 706, 717]","[6543c259dad1a760f30e33b0, 6543c259dad1a760f30...","[7.0, 21.0, 8.0]",4.6,6.9,0.0,"[1.4, 2.0, 11.0]",61.9,22,240
4,4,0,1,"[507, 086, 019]","[7081, 528, 706]","[7036, 706, 717]","[6543c259dad1a760f30e33b0, 6543c259dad1a760f30...","[8.0, 21.0, 8.0]",5.5,6.9,4.1,"[1.5, 1.9, 11.0]",67.9,10,240
5,5,0,1,"[533, 019]","[7032, 9807]","[7042, 717]","[6543c259dad1a760f30e33b0, 6543c259dad1a760f30...","[22.0, 16.0]",19.2,6.9,3.8,"[1.8, 3.2]",72.9,192,240


In [26]:
df = conveyal_path_utils.add_warehouse_identifiers(df)

### Get warehouse data

In [27]:
warehouse_data = conveyal_path_utils.get_warehouse_data(df)

  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(


### Map

In [29]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    spatial_routes = conveyal_path_utils.compile_all_spatial_routes(df, warehouse_data)

In [30]:
spatial_routes >> head(3)

Unnamed: 0,feed_key,trip_id,stop_id,stop_sequence,stop_geom,shape_id,segment_geom,stop_pair,trip_group_id,nIterations,totalTime,xfer_count
0,294252b3b4b42fbb31a31ce184fcb3f5,1108250,2252,54,POINT (-128904.927 62545.360),46214,"LINESTRING (-128914.852 62548.698, -128904.389...","(2252, 528)",3,22,61.9,2
0,294252b3b4b42fbb31a31ce184fcb3f5,1109493,528,3,POINT (-130200.280 63537.013),46244,"LINESTRING (-130203.028 63529.839, -130229.649...","(528, 706)",3,22,61.9,2
0,294252b3b4b42fbb31a31ce184fcb3f5,1106689,706,11,POINT (-126677.916 69308.522),46180,"LINESTRING (-126685.915 69308.645, -126685.909...","(706, 717)",3,22,61.9,2


In [31]:
map_routes = spatial_routes.copy()

In [32]:
map_routes.segment_geom = map_routes.apply(lambda x: x.segment_geom.buffer(x.nIterations * 2), axis=1)
map_routes = map_routes >> arrange(-_.nIterations)

In [33]:
map_routes.explore(column = 'trip_group_id', cmap='tab20', tiles="CartoDB positron")

In [34]:
# map_routes.explore(column = 'shape_id', cmap='Accent', tiles="CartoDB positron")

In [35]:
map_routes.explore(column = 'xfer_count', cmap='coolwarm', tiles="CartoDB positron")

In [42]:
map_routes['car_p50_ratio'] = map_routes['totalTime'] / 18
#  car is 18min...

In [45]:
map_routes.explore(column = 'totalTime', cmap='coolwarm', tiles="CartoDB positron")
#  TODO fixed scale @1.5, 2, 2.5, 3
# map_routes.explore(column = 'car_p50_ratio', cmap='coolwarm', tiles="CartoDB positron")

### full shape map

In [38]:
shape_grouped = spatial_routes >> group_by(_.shape_id, _.segment_geom) >> summarize(total_iterations = _.nIterations.sum())

In [39]:
shape_grouped.segment_geom = shape_grouped.apply(lambda x: x.segment_geom.buffer(x.total_iterations * 2), axis=1)
shape_grouped = shape_grouped >> arrange(-_.total_iterations)

In [40]:
shape_grouped = gpd.GeoDataFrame(shape_grouped, geometry='segment_geom', crs=geography_utils.CA_NAD83Albers)

In [41]:
shape_grouped.explore(column = 'shape_id', cmap='Accent', tiles="CartoDB positron")

## Solano

In [None]:
df = read_conveyal_path_df(f'{GCS_PATH}la_PATHS.csv')
df >> head(3)

In [None]:
df = add_warehouse_identifiers(df)

### Get warehouse data

In [None]:
warehouse_data = get_warehouse_data(df)

### Map

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    spatial_routes = compile_all_spatial_routes(df)

In [None]:
spatial_routes >> head(3)

In [None]:
map_routes = spatial_routes.copy()

In [None]:
map_routes.segment_geom = map_routes.apply(lambda x: x.segment_geom.buffer(x.nIterations * 2), axis=1)
map_routes = map_routes >> arrange(-_.nIterations)

In [None]:
map_routes.explore(column = 'trip_group_id', cmap='tab20', tiles="CartoDB positron")

In [None]:
# map_routes.explore(column = 'shape_id', cmap='Accent', tiles="CartoDB positron")

In [None]:
map_routes.explore(column = 'xfer_count', cmap='coolwarm', tiles="CartoDB positron")

In [None]:
map_routes['car_p50_ratio'] = map_routes['totalTime'] / 46

In [None]:
# map_routes.explore(column = 'totalTime', cmap='coolwarm', tiles="CartoDB positron")
#  TODO fixed scale @1.5, 2, 2.5, 3
map_routes.explore(column = 'car_p50_ratio', cmap='coolwarm', tiles="CartoDB positron")

### full shape map

In [None]:
shape_grouped = spatial_routes >> group_by(_.shape_id, _.segment_geom) >> summarize(total_iterations = _.nIterations.sum())

In [None]:
shape_grouped.segment_geom = shape_grouped.apply(lambda x: x.segment_geom.buffer(x.total_iterations * 2), axis=1)
shape_grouped = shape_grouped >> arrange(-_.total_iterations)

In [None]:
shape_grouped = gpd.GeoDataFrame(shape_grouped, geometry='segment_geom', crs=geography_utils.CA_NAD83Albers)

In [None]:
shape_grouped.explore(column = 'shape_id', cmap='Accent', tiles="CartoDB positron")