In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

import pandas as pd
import geopandas as gpd
from siuba import *
import json

# Conveyal Transit Paths

* GH Issue: https://github.com/cal-itp/data-analyses/issues/1098

## Conveyal SOP

* prepare a csv with lat, lon, and od column with 0 for origin and 1 for destination
* run Conveyal Analysis: 8-10am, standard transit parameters, add JSON feed_id param
* run Regional Analysis: 120min max time, 5, 50, 95 %ile, get paths and travel times

## Metrics

* modal trip by n_iterations (most available trip) (how fast is it, how many xfers, fares?)
* fastest trip (how available?, fares?, xfer count?)
* fewest xfer trip (how fast, how available, fares?)

## Visuals??

# Basic Paths Parse

In [2]:
array_cols = ['routes', 'boardStops', 'alightStops',
       'rideTimes', 'waitTimes', 'feedIds']

def unpack_conveyal_path_df(df, array_cols = array_cols):
    
    for col in array_cols:
        df.loc[:,col] = df[col].map(lambda x: x.split('|'))
    return df

In [3]:
df = pd.read_csv('./la/6643b1ec46ade8368e2cb698_PATHS.csv')
df.index.rename('trip_group_id', inplace=True)
df.reset_index(inplace=True)
df = (df >> filter(_.origin == 0, _.destination == 1)
         >> select(-_.group)
     )
df = unpack_conveyal_path_df(df)
# df = df.dropna() #  remove same o/d
df >> head(3)

Unnamed: 0,trip_group_id,origin,destination,routes,boardStops,alightStops,feedIds,rideTimes,accessTime,egressTime,transferTime,waitTimes,totalTime,nIterations
11,11,0,1,"[209-13168, 804, 805, 90-13168, 96-13168]","[141078, 80128, 80211, 12102, 3611]","[2489, 80122, 80213, 3611, 1574]","[654404a5dad1a760f30e340d, 654404b9dad1a760f30...","[14.0, 18.0, 2.0, 3.0, 46.0]",8.6,7.2,2.3,"[1.4, 4.3, 1.5, 1.9, 2.0]",112.2,2
12,12,0,1,"[108-13168, 910-13168, 802, 94-13168]","[7962, 2322, 80211, 30001]","[1539, 65300042, 80201, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30...","[15.0, 12.0, 26.0, 20.0]",1.4,3.8,9.8,"[1.6, 5.3, 2.4, 5.4]",102.8,56
13,13,0,1,"[108-13168, 910-13168, 802, 3163, 164-13168]","[7963, 2322, 80211, 3068903, 16428]","[1539, 65300042, 80201, 3068909, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30...","[16.0, 12.0, 26.0, 12.0, 8.0]",1.3,3.8,10.5,"[1.7, 5.3, 4.4, 4.6, 2.1]",107.8,18


# Conveyal bundle-feed matching

In [4]:
bundle_url = 'https://analysis.conveyal.com/regions/635602532d6ff920d83ff32a/bundles/6544044adad1a760f30e33de'

In [5]:
bundle_url_uuid = lambda x: x.split('/')[-1]

In [6]:
bundle_url_uuid(bundle_url)

'6544044adad1a760f30e33de'

In [7]:
with open('bundles.json') as f:
    bundle_json = json.loads(f.read())

In [8]:
matched_bundle = [bundle for bundle in bundle_json if bundle['_id'] == bundle_url_uuid(bundle_url)][0]

In [9]:
#  ['feeds'][{'feedId'...}...]
# matched_bundle

In [10]:
path_unique_feed_ids = df.feedIds.explode().unique()

In [11]:
path_feeds = [feed for feed in matched_bundle['feeds'] if feed['feedId'] in path_unique_feed_ids]

In [12]:
path_feeds[0]['name'] = 'Amtrak'

In [13]:
# path_feeds

In [14]:
name_from_feedId = lambda x: [path_feed['name'] for path_feed in path_feeds if path_feed['feedId'] == x][0][:15]
#  first 10 chars

In [15]:
df['feedNames'] = df.feedIds.apply(lambda x: [name_from_feedId(name) for name in x])

In [16]:
with pd.option_context('display.max_colwidth', 100):
    display(df)

Unnamed: 0,trip_group_id,origin,destination,routes,boardStops,alightStops,feedIds,rideTimes,accessTime,egressTime,transferTime,waitTimes,totalTime,nIterations,feedNames
11,11,0,1,"[209-13168, 804, 805, 90-13168, 96-13168]","[141078, 80128, 80211, 12102, 3611]","[2489, 80122, 80213, 3611, 1574]","[654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[14.0, 18.0, 2.0, 3.0, 46.0]",8.6,7.2,2.3,"[1.4, 4.3, 1.5, 1.9, 2.0]",112.2,2,"[us_ca_lacmta_bu, us_ca_lacmta_ra, us_ca_lacmta_ra, us_ca_lacmta_bu, us_ca_lacmta_bu]"
12,12,0,1,"[108-13168, 910-13168, 802, 94-13168]","[7962, 2322, 80211, 30001]","[1539, 65300042, 80201, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[15.0, 12.0, 26.0, 20.0]",1.4,3.8,9.8,"[1.6, 5.3, 2.4, 5.4]",102.8,56,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, us_ca_lacmta_bu]"
13,13,0,1,"[108-13168, 910-13168, 802, 3163, 164-13168]","[7963, 2322, 80211, 3068903, 16428]","[1539, 65300042, 80201, 3068909, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 6544048cdad1a760f...","[16.0, 12.0, 26.0, 12.0, 8.0]",1.3,3.8,10.5,"[1.7, 5.3, 4.4, 4.6, 2.1]",107.8,18,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, Burbank Bus: 20, us_ca_lacmta_bu]"
14,14,0,1,"[807, 804, 802, 94-13168]","[80706, 80128, 80211, 30001]","[80709, 80122, 80201, 11812]","[654404b9dad1a760f30e340e, 654404b9dad1a760f30e340e, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[9.0, 18.0, 26.0, 20.0]",14.5,3.8,9.2,"[1.5, 1.9, 8.4, 1.5]",113.8,2,"[us_ca_lacmta_ra, us_ca_lacmta_ra, us_ca_lacmta_ra, us_ca_lacmta_bu]"
15,15,0,1,"[108-13168, 910-13168, 802, Antelope Valley Line, 154-13168]","[7962, 2322, 80211, 107, 30003]","[1539, 65300042, 80214, 102, 1326]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 654404c3dad1a760f...","[15.0, 12.0, 8.0, 19.0, 4.0]",1.4,3.5,10.7,"[1.6, 5.3, 3.4, 4.7, 1.9]",90.5,20,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, Metrolink Train, us_ca_lacmta_bu]"
16,16,0,1,"[108-13168, 4X, 802, 94-13168]","[7962, 373, 80211, 30001]","[1539, 380, 80201, 11812]","[654404a5dad1a760f30e340d, 654404e2dad1a760f30e3431, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[15.0, 12.4, 26.0, 20.0]",1.4,3.8,17.5,"[1.6, 4.1, 3.5, 2.5]",107.8,20,"[us_ca_lacmta_bu, TORRANCE TRANSI, us_ca_lacmta_ra, us_ca_lacmta_bu]"
17,17,0,1,"[108-13168, 910-13168, 802, 3163, 164-13168]","[7962, 2322, 80211, 3068903, 16428]","[1539, 65300042, 80201, 3068909, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 6544048cdad1a760f...","[15.0, 12.0, 26.0, 12.0, 8.0]",1.4,3.8,10.5,"[1.6, 5.3, 4.4, 4.6, 1.1]",105.8,20,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, Burbank Bus: 20, us_ca_lacmta_bu]"
18,18,0,1,"[40-13168, Antelope Valley Line, 154-13168]","[141013, 107, 30003]","[652, 102, 1326]","[654404a5dad1a760f30e340d, 654404c3dad1a760f30e3418, 654404a5dad1a760f30e340d]","[53.0, 19.0, 4.0]",13.3,3.5,6.4,"[1.7, 1.7, 1.9]",104.5,12,"[us_ca_lacmta_bu, Metrolink Train, us_ca_lacmta_bu]"
19,19,0,1,"[108-13168, 4X, 802, 3163, 164-13168]","[7962, 373, 80211, 3068903, 16428]","[1539, 380, 80201, 3068909, 11812]","[654404a5dad1a760f30e340d, 654404e2dad1a760f30e3431, 654404b9dad1a760f30e340e, 6544048cdad1a760f...","[15.0, 12.4, 26.0, 12.0, 8.0]",1.4,3.8,9.4,"[1.6, 2.1, 8.4, 4.6, 1.1]",105.8,2,"[us_ca_lacmta_bu, TORRANCE TRANSI, us_ca_lacmta_ra, Burbank Bus: 20, us_ca_lacmta_bu]"
20,20,0,1,"[108-13168, 460-13168, 802, 94-13168]","[7962, 2322, 80211, 30001]","[1539, 5019, 80201, 11812]","[654404a5dad1a760f30e340d, 654404a5dad1a760f30e340d, 654404b9dad1a760f30e340e, 654404a5dad1a760f...","[15.0, 13.0, 26.0, 20.0]",1.4,3.8,9.6,"[1.6, 1.4, 3.5, 2.5]",97.8,20,"[us_ca_lacmta_bu, us_ca_lacmta_bu, us_ca_lacmta_ra, us_ca_lacmta_bu]"


In [17]:
# works, but is it useful?
df_exploded = df.explode(array_cols + ['feedNames']).reset_index(drop=True)