In [None]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

import pandas as pd
import numpy as np
import geopandas as gpd
from siuba import *
import json

import shared_utils
import warnings
from path_example_vars import GCS_PATH

import conveyal_path_utils
import branca

In [None]:
from shapely.ops import split, substring, LineString
from calitp_data_analysis import geography_utils
from IPython.display import Markdown

In [None]:
import importlib

In [None]:
importlib.reload(conveyal_path_utils)

In [None]:
ratio_cmap = branca.colormap.step.Spectral_05
ratio_cmap.colors.reverse() #  note this modifies inplace
ratio_cmap = ratio_cmap.scale(vmin=1, vmax=3)

In [None]:
xfer_cmap = branca.colormap.step.Spectral_04
xfer_cmap.colors.reverse() #  note this modifies inplace
xfer_cmap = xfer_cmap.scale(vmin=0, vmax=4)

# Conveyal Transit Paths

* GH Issue: https://github.com/cal-itp/data-analyses/issues/1098

## Conveyal SOP

* prepare a csv with lat, lon, and od column with 0 for origin and 1 for destination
    * allow freeform, use od col as id in upload
* run Conveyal Analysis: 8-10am, standard transit parameters, add JSON feed_id param
* run Regional Analysis: 120min max time, 5, 50, 95 %ile, get paths and travel times

## Metrics

* modal trip by n_iterations (most available trip) (how fast is it, how many xfers, fares?)
* fastest trip (how available?, fares?, xfer count?)
* fewest xfer trip (how fast, how available, fares?)

## Visuals??

In [None]:
#  TODO make parameter cell

region = 'napa'
region

In [None]:
regions = ['la', 'sac', 'clovis', 'napa', 'solano']
assert region in regions
region_human = ['Los Angeles', 'Sacramento', 'Clovis', 'Napa', 'Solano']
region_human = dict(zip(regions, region_human))

In [None]:
display(Markdown(f'## {region_human[region]}'))

In [None]:
auto_df = pd.read_parquet(f'{GCS_PATH}streetlight_results.parquet')

In [None]:
# auto_df.city = auto_df.city.str.replace('Solano', 'Sonoma')
auto_df.set_index('city', drop=True, inplace=True)

In [None]:
car_p50_time = auto_df.loc[region_human[region], '50_ttp_minutes']
if region == 'clovis':
    car_p50_time = car_p50_time * 0.7 #  scale since transit route is shorter than planned

In [None]:
car_p50_time

In [None]:
df = conveyal_path_utils.read_conveyal_path_df(f'{GCS_PATH}{region}_PATHS.csv')
df >> head(3)

In [None]:
df = conveyal_path_utils.add_warehouse_identifiers(df)

### Get warehouse data

In [None]:
warehouse_data = conveyal_path_utils.get_warehouse_data(df)

### Map

In [None]:
importlib.reload(conveyal_path_utils)

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    spatial_routes = conveyal_path_utils.compile_all_spatial_routes(df, warehouse_data, verbose=True)

In [None]:
df

In [None]:
def map_trip_groups(spatial_routes: pd.DataFrame, which='trip_group_id'):
    
    col_list = ['trip_group_id', 'xfer_count', 'car_p50_ratio',
                     'route_name']
    cmaps = ['tab20', xfer_cmap, ratio_cmap, 'Accent'] #  TODO fixed scale for car ratio
    cmaps = dict(zip(col_list, cmaps))
    # display(cmaps)
    assert which in col_list
    display_list = ['optimal_pct', 'total_time', 'route_optimal_pct']
    human_names = {col: col.replace('_', ' ').title() for col in col_list + display_list}
    human_names['name'] = 'GTFS Feed Name'
    map_routes = spatial_routes.copy() >> select(-_.trip_id, -_.route_short_name, -_.route_long_name,
                                                         -_.stop_id, -_.stop_sequence, -_.stop_geom)
    map_routes['car_p50_ratio'] = map_routes.total_time / car_p50_time
    if which != 'route_name':
        map_routes.segment_geom = map_routes.apply(lambda x: x.segment_geom.buffer(min(x.optimal_pct * 800, 500)), axis=1)
        map_routes = map_routes >> arrange(-_.optimal_pct)
    else:
        route_grouped = (spatial_routes >> group_by(_.route_name, _.segment_geom, _.name)
                            >> summarize(route_optimal_pct = _.optimal_pct.sum())
                        )
        route_grouped.segment_geom = route_grouped.apply(lambda x: x.segment_geom.buffer(min(x.route_optimal_pct * 800, 500)), axis=1)
        route_grouped = route_grouped >> arrange(-_.route_optimal_pct)
        map_routes = gpd.GeoDataFrame(route_grouped, geometry='segment_geom', crs=geography_utils.CA_NAD83Albers).round(2)

    map_routes = map_routes.rename(columns=human_names).round(2).fillna('none') # needed for explore to work?
    # return(map_routes)
    return map_routes.explore(column = human_names[which], cmap=cmaps[which], tiles="CartoDB positron")

In [None]:
map_trip_groups(spatial_routes, 'trip_group_id')

In [None]:
map_trip_groups(spatial_routes, 'car_p50_ratio')

In [None]:
map_trip_groups(spatial_routes, 'route_name')

In [None]:
map_trip_groups(spatial_routes, 'xfer_count')

### LA Storytelling - infrequent regional rail TODO parameterize

* only two usable trips for 8-10am departures from origin, at 10:41 (Metrolink) and 11:01 (Amtrak)

In [None]:
(warehouse_data['trips'] >> filter(_.route_id == 'Antelope Valley Line') >> arrange(_.trip_first_departure_ts))[:3]

In [None]:
(warehouse_data['trips'] >> filter(_.route_id == '78') >> arrange(_.trip_first_departure_ts))[:3]

### Napa Storytelling - Amtrak Thruway is best trip, but actually only departs once

* only one trip from Napa to Vallejo at 9:30!

In [None]:
thruway_ids = warehouse_data['st'] >> filter(_.stop_id.isin(('NAP', 'VAL')))

In [None]:
thruway_ids >> filter(_.stop_id == 'NAP') >> select(_.trip_id, _.stop_id, _.departure_time)