In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

import pandas as pd
import geopandas as gpd
from siuba import *
import json

# Conveyal Transit Paths

* GH Issue: https://github.com/cal-itp/data-analyses/issues/1098

## Conveyal SOP

* prepare a csv with lat, lon, and od column with 0 for origin and 1 for destination
* run Conveyal Analysis: 8-10am, standard transit parameters, add JSON feed_id param
* run Regional Analysis: 120min max time, 5, 50, 95 %ile, get paths and travel times

## Metrics

* modal trip by n_iterations (most available trip) (how fast is it, how many xfers, fares?)
* fastest trip (how available?, fares?, xfer count?)
* fewest xfer trip (how fast, how available, fares?)

## Visuals??

# Basic Paths Parse

In [2]:
array_cols = ['routes', 'boardStops', 'alightStops',
       'rideTimes', 'waitTimes', 'feedIds']

def unpack_conveyal_path_df(df, array_cols = array_cols):
    
    for col in array_cols:
        df.loc[:,col] = df[col].map(lambda x: x.split('|'))
    return df

In [3]:
df = pd.read_csv('./la/6643b1ec46ade8368e2cb698_PATHS.csv')
df.index.rename('trip_group_id', inplace=True)
df.reset_index(inplace=True)
df = (df >> filter(_.origin == 0, _.destination == 1)
         >> select(-_.group)
     )
df = unpack_conveyal_path_df(df)
# df = df.dropna() #  remove same o/d
df >> head(3)

Unnamed: 0,trip_group_id,origin,destination,routes,boardStops,alightStops,feedIds,rideTimes,accessTime,egressTime,transferTime,waitTimes,totalTime,nIterations
11,11,0,1,"[209-13168, 804, 805, 90-13168, 96-13168]","[141078, 80128, 80211, 12102, 3611]","[2489, 80122, 80213, 3611, 1574]","[654404a5dad1a760f30e340d, 654404b9dad1a760f30...","[14.0, 18.0, 2.0, 3.0, 46.0]",8.6,7.2,2.3,"[1.4, 4.3, 1.5, 1.9, 2.0]",112.2,2
12,12,0,1,"[108-13168, 910-13168, 802, 94-13168]","[7962, 2322, 80211, 30001]","[1539, 65300042, 80201, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30...","[15.0, 12.0, 26.0, 20.0]",1.4,3.8,9.8,"[1.6, 5.3, 2.4, 5.4]",102.8,56
13,13,0,1,"[108-13168, 910-13168, 802, 3163, 164-13168]","[7963, 2322, 80211, 3068903, 16428]","[1539, 65300042, 80201, 3068909, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30...","[16.0, 12.0, 26.0, 12.0, 8.0]",1.3,3.8,10.5,"[1.7, 5.3, 4.4, 4.6, 2.1]",107.8,18


# Conveyal bundle-feed matching

In [10]:
socal_conveyal_joined = pd.read_parquet('../../conveyal_update/socal_conveyal_joined.parquet')

In [17]:
unique_feeds = df.feedIds.explode().unique()

In [18]:
[feed for feed in unique_feeds if feed in socal_conveyal_joined.feedId]

[]

In [19]:
unique_feeds

array(['654404a5dad1a760f30e340d', '654404b9dad1a760f30e340e',
       '6544048cdad1a760f30e33f1', '654404c3dad1a760f30e3418',
       '654404e2dad1a760f30e3431', '65440468dad1a760f30e33e3'],
      dtype=object)

In [23]:
socal_conveyal_joined >> filter(_.feedId.isin(unique_feeds))

Unnamed: 0,feedId,name,bundleScopedFeedId,serviceStart,serviceEnd,checksum,span,feed_key,region,gtfs_dataset_name,base64_url,date,span_x,span_y
0,65440468dad1a760f30e33e3,"Martz Trailways, Executive Transportation, Rou...",65440468dad1a760f30e33e3_65440468dad1a760f30e33e1,2023-10-15,2024-10-14,3610834750,2023-10-15_2024-10-14,b4970d6cc7e206d9e667796130394790,socal,Amtrak Schedule,aHR0cHM6Ly9jb250ZW50LmFtdHJhay5jb20vY29udGVudC...,2023-10-18,,
9,6544048cdad1a760f30e33f1,Burbank Bus: 2023-07-30 to 2099-12-31,6544048cdad1a760f30e33f1_65440468dad1a760f30e33e1,2023-07-31,2099-12-31,1241248906,2023-07-31_2099-12-31,375afdc8035694073c435027b09b4642,socal,Burbank Schedule,aHR0cHM6Ly9yaWRlYnVyYmFua2J1cy5jb20vZ3Rmcw==,2023-10-18,,
28,654404a5dad1a760f30e340d,us_ca_lacmta_bus: 2023-06-25 to 2025-04-01,654404a5dad1a760f30e340d_65440468dad1a760f30e33e1,2023-06-25,2025-04-01,3472770058,2023-06-25_2025-04-01,edb0fd72b98a7f739407ceca3fae9034,socal,LA Metro Bus Schedule,aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX2J1cy...,2023-10-18,,
29,654404b9dad1a760f30e340e,us_ca_lacmta_rail: 2023-10-17 to 2023-10-31,654404b9dad1a760f30e340e_65440468dad1a760f30e33e1,2023-10-17,2023-10-31,359833427,2023-10-17_2023-10-31,beaaee89e671fea3d48124dc23335bed,socal,LA Metro Rail Schedule,aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX3JhaW...,2023-10-18,,
37,654404c3dad1a760f30e3418,Metrolink Trains: 2023-06-07 to 2024-12-31,654404c3dad1a760f30e3418_65440468dad1a760f30e33e1,2023-06-07,2024-12-31,1194532972,2023-06-07_2024-12-31,1ac8ee536d38b537e2cf55383222d379,socal,Metrolink Schedule,aHR0cHM6Ly93d3cubWV0cm9saW5rdHJhaW5zLmNvbS9nbG...,2023-10-18,,
59,654404e2dad1a760f30e3431,TORRANCE TRANSIT SYSTEM: 2023-10-08 to 2024-01-13,654404e2dad1a760f30e3431_65440468dad1a760f30e33e1,2023-10-08,2024-01-13,1146343135,2023-10-08_2024-01-13,24487472b6cd8d19984eb6496780bcad,socal,Torrance Schedule,aHR0cHM6Ly90cmFuc2l0LnRvcnJhbmNlY2EuZ292L2hvbW...,2023-10-18,,


In [22]:
'654404a5dad1a760f30e340d' in socal_conveyal_joined.feedId

False

In [20]:
socal_conveyal_joined >> filter(_.name.str.contains('lac'))

Unnamed: 0,feedId,name,bundleScopedFeedId,serviceStart,serviceEnd,checksum,span,feed_key,region,gtfs_dataset_name,base64_url,date,span_x,span_y
28,654404a5dad1a760f30e340d,us_ca_lacmta_bus: 2023-06-25 to 2025-04-01,654404a5dad1a760f30e340d_65440468dad1a760f30e33e1,2023-06-25,2025-04-01,3472770058,2023-06-25_2025-04-01,edb0fd72b98a7f739407ceca3fae9034,socal,LA Metro Bus Schedule,aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX2J1cy...,2023-10-18,,
29,654404b9dad1a760f30e340e,us_ca_lacmta_rail: 2023-10-17 to 2023-10-31,654404b9dad1a760f30e340e_65440468dad1a760f30e33e1,2023-10-17,2023-10-31,359833427,2023-10-17_2023-10-31,beaaee89e671fea3d48124dc23335bed,socal,LA Metro Rail Schedule,aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX3JhaW...,2023-10-18,,
30,654404bbdad1a760f30e3410,dpwlacounty-ca-us: 2023-07-18 to 2024-06-30,654404bbdad1a760f30e3410_65440468dad1a760f30e33e1,2019-04-01,2024-06-30,3134830938,2019-04-01_2024-06-30,dceccbf372ba48fe8e399210e815a266,socal,LADPW Schedule,aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3...,2023-10-18,,
10,654404a3dad1a760f30e340b,lacampana-ca-us: 2023-01-01 to 2024-12-31,654404a3dad1a760f30e340b_65440468dad1a760f30e33e1,2023-01-01,2024-12-31,1292430411,,bf1af24c3bdcaa8f2dbb49b04bb749bc,socal,La Campana Schedule,aHR0cHM6Ly9naXRodWIuY29tL0xBQ01UQS9sb3MtYW5nZW...,2023-10-18,2023-01-01_2024-12-31,2023-01-01_2024-12-31


In [16]:
with pd.option_context('display.max_colwidth', 100):
    display(df)

Unnamed: 0,trip_group_id,origin,destination,routes,boardStops,alightStops,feedIds,rideTimes,accessTime,egressTime,transferTime,waitTimes,totalTime,nIterations,feedNames
11,11,0,1,"[209-13168, 804, 805, 90-13168, 96-13168]","[141078, 80128, 80211, 12102, 3611]","[2489, 80122, 80213, 3611, 1574]","[654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[14.0, 18.0, 2.0, 3.0, 46.0]",8.6,7.2,2.3,"[1.4, 4.3, 1.5, 1.9, 2.0]",112.2,2,"[us_ca_lacmta_bu, us_ca_lacmta_ra, us_ca_lacmta_ra, us_ca_lacmta_bu, us_ca_lacmta_bu]"
12,12,0,1,"[108-13168, 910-13168, 802, 94-13168]","[7962, 2322, 80211, 30001]","[1539, 65300042, 80201, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[15.0, 12.0, 26.0, 20.0]",1.4,3.8,9.8,"[1.6, 5.3, 2.4, 5.4]",102.8,56,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, us_ca_lacmta_bu]"
13,13,0,1,"[108-13168, 910-13168, 802, 3163, 164-13168]","[7963, 2322, 80211, 3068903, 16428]","[1539, 65300042, 80201, 3068909, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 6544048cdad1a760f...","[16.0, 12.0, 26.0, 12.0, 8.0]",1.3,3.8,10.5,"[1.7, 5.3, 4.4, 4.6, 2.1]",107.8,18,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, Burbank Bus: 20, us_ca_lacmta_bu]"
14,14,0,1,"[807, 804, 802, 94-13168]","[80706, 80128, 80211, 30001]","[80709, 80122, 80201, 11812]","[654404b9dad1a760f30e340e, 654404b9dad1a760f30e340e, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[9.0, 18.0, 26.0, 20.0]",14.5,3.8,9.2,"[1.5, 1.9, 8.4, 1.5]",113.8,2,"[us_ca_lacmta_ra, us_ca_lacmta_ra, us_ca_lacmta_ra, us_ca_lacmta_bu]"
15,15,0,1,"[108-13168, 910-13168, 802, Antelope Valley Line, 154-13168]","[7962, 2322, 80211, 107, 30003]","[1539, 65300042, 80214, 102, 1326]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 654404c3dad1a760f...","[15.0, 12.0, 8.0, 19.0, 4.0]",1.4,3.5,10.7,"[1.6, 5.3, 3.4, 4.7, 1.9]",90.5,20,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, Metrolink Train, us_ca_lacmta_bu]"
16,16,0,1,"[108-13168, 4X, 802, 94-13168]","[7962, 373, 80211, 30001]","[1539, 380, 80201, 11812]","[654404a5dad1a760f30e340d, 654404e2dad1a760f30e3431, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[15.0, 12.4, 26.0, 20.0]",1.4,3.8,17.5,"[1.6, 4.1, 3.5, 2.5]",107.8,20,"[us_ca_lacmta_bu, TORRANCE TRANSI, us_ca_lacmta_ra, us_ca_lacmta_bu]"
17,17,0,1,"[108-13168, 910-13168, 802, 3163, 164-13168]","[7962, 2322, 80211, 3068903, 16428]","[1539, 65300042, 80201, 3068909, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 6544048cdad1a760f...","[15.0, 12.0, 26.0, 12.0, 8.0]",1.4,3.8,10.5,"[1.6, 5.3, 4.4, 4.6, 1.1]",105.8,20,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, Burbank Bus: 20, us_ca_lacmta_bu]"
18,18,0,1,"[40-13168, Antelope Valley Line, 154-13168]","[141013, 107, 30003]","[652, 102, 1326]","[654404a5dad1a760f30e340d, 654404c3dad1a760f30e3418, 654404a5dad1a760f30e340d]","[53.0, 19.0, 4.0]",13.3,3.5,6.4,"[1.7, 1.7, 1.9]",104.5,12,"[us_ca_lacmta_bu, Metrolink Train, us_ca_lacmta_bu]"
19,19,0,1,"[108-13168, 4X, 802, 3163, 164-13168]","[7962, 373, 80211, 3068903, 16428]","[1539, 380, 80201, 3068909, 11812]","[654404a5dad1a760f30e340d, 654404e2dad1a760f30e3431, 654404b9dad1a760f30e340e, 6544048cdad1a760f...","[15.0, 12.4, 26.0, 12.0, 8.0]",1.4,3.8,9.4,"[1.6, 2.1, 8.4, 4.6, 1.1]",105.8,2,"[us_ca_lacmta_bu, TORRANCE TRANSI, us_ca_lacmta_ra, Burbank Bus: 20, us_ca_lacmta_bu]"
20,20,0,1,"[108-13168, 460-13168, 802, 94-13168]","[7962, 2322, 80211, 30001]","[1539, 5019, 80201, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[15.0, 13.0, 26.0, 20.0]",1.4,3.8,9.6,"[1.6, 1.4, 3.5, 2.5]",97.8,20,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, us_ca_lacmta_bu]"


In [5]:
# # works, but is it useful?
# df_exploded = df.explode(array_cols + ['feedNames']).reset_index(drop=True)