In [13]:
import pandas as pd
import geopandas as gpd
from calitp_data_analysis.sql import query_sql
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
from calitp_data_analysis.geography_utils import CA_NAD83Albers_m, WGS84
gcsgp = GCSGeoPandas()
from shared_utils import gtfs_utils_v2
from shared_utils.rt_utils import show_full_df

import utils
import altair as alt

In [2]:
path = 'gs://calitp-analytics-data/data-analyses/thruway_intercity_bus/source_data/25.09.08CABusODPairRidershipFFY24-FFY25TD.xlsx'

In [3]:
source_ridership = pd.read_excel(path)

In [4]:
source_ridership

Unnamed: 0,ca_bus_route,orig,dest,trip_month,trip_year,ridership,revenue
0,Rt 03,ARN,BKY,12,2023,1,26.33
1,Rt 03,ARN,BKY,1,2024,2,39.00
2,Rt 03,ARN,BKY,2,2024,7,141.59
3,Rt 03,ARN,BKY,3,2024,7,165.14
4,Rt 03,ARN,BKY,4,2024,2,41.92
...,...,...,...,...,...,...,...
20475,Rt 99,TRU,SFC,5,2024,2,79.66
20476,Rt 99,TRU,SFC,6,2024,1,39.83
20477,Rt 99,USF,RLP,6,2025,0,3241.10
20478,Rt 99,USF,RLP,7,2025,0,2495.00


In [6]:
source_ridership = source_ridership.assign(od = source_ridership.orig + '->' + source_ridership.dest)

In [10]:
all_years_group = source_ridership.groupby(['ca_bus_route', 'od'])[['ridership', 'revenue']].sum()

In [11]:
all_years_group

Unnamed: 0_level_0,Unnamed: 1_level_0,ridership,revenue
ca_bus_route,od,Unnamed: 2_level_1,Unnamed: 3_level_1
Rt 03,ARN->BKY,19,413.98
Rt 03,ARN->DAV,1,10.00
Rt 03,ARN->HAY,0,200.70
Rt 03,ARN->MTZ,5,155.90
Rt 03,ARN->OAC,1,19.50
...,...,...,...
Rt 99,SUI->SAC,1,10.70
Rt 99,SUI->SFC,347,4572.57
Rt 99,TRN->VOU,0,324.00
Rt 99,TRU->SFC,5,199.15


In [17]:
source_ridership.ca_bus_route.unique()

array(['Rt 03', 'Rt 06', 'Rt 07', 'Rt 10', 'Rt 15', 'Rt 17', 'Rt 18',
       'Rt 19', 'Rt 1A', 'Rt 1B', 'Rt 1C', 'Rt 20', 'Rt 20 - B', 'Rt 21',
       'Rt 35', 'Rt 39', 'Rt 3R', 'Rt 56', 'Rt 68', 'Rt 99'], dtype=object)

In [23]:
sb_1c = ['BFD', 'NHL', 'BUR', 'VNC', 'WES', 'SMN']

In [27]:
sb_1c = dict(zip(sb_1c, range(len(sb_1c))))

In [28]:
sb_1c

{'BFD': 0, 'NHL': 1, 'BUR': 2, 'VNC': 3, 'WES': 4, 'SMN': 5}

### quick test route 1c

In [29]:
test_1c = source_ridership.query('trip_year == 2025 & trip_month == 4 & ca_bus_route == "Rt 1C"')

In [31]:
def determine_direction(row, sequence_dict):
    if sequence_dict[row.orig] < sequence_dict[row.dest]:
        return 'sb'
    elif sequence_dict[row.orig] > sequence_dict[row.dest]:
        return 'nb'
    else:
        return ''

In [35]:
test_1c = test_1c.assign(direction = test_1c.apply(determine_direction, axis=1, sequence_dict = sb_1c))

In [38]:
test_1c = test_1c.query('direction == "sb"') 

In [41]:
test_1c = test_1c.assign(orig_seq = test_1c.orig.apply(lambda x: sb_1c[x]),
              dest_seq = test_1c.dest.apply(lambda x: sb_1c[x]))

In [46]:
sb_1c

{'BFD': 0, 'NHL': 1, 'BUR': 2, 'VNC': 3, 'WES': 4, 'SMN': 5}

In [47]:
sb_1c.keys()

dict_keys(['BFD', 'NHL', 'BUR', 'VNC', 'WES', 'SMN'])

In [95]:
def running_ridership(df, sequence_dict):

    running_df = []
    for stn in sequence_dict.keys():
        stn_seq = sequence_dict[stn]
        if stn_seq < max(sequence_dict.values()):
            df_at = df.query('orig_seq <= @stn_seq & dest_seq > @stn_seq').assign(departing_station = str(stn_seq) + '_' + stn)
            running_df += [df_at]
    return pd.concat(running_df)

In [128]:
running_ridership(test_1c, sb_1c)

Unnamed: 0,ca_bus_route,orig,dest,trip_month,trip_year,ridership,revenue,od,direction,orig_seq,dest_seq,departing_station
13466,Rt 1C,BFD,BUR,4,2025,231,3278.95,BFD->BUR,sb,0,2,0_BFD
13499,Rt 1C,BFD,NHL,4,2025,296,3420.51,BFD->NHL,sb,0,1,0_BFD
13523,Rt 1C,BFD,SMN,4,2025,51,829.34,BFD->SMN,sb,0,5,0_BFD
13546,Rt 1C,BFD,VNC,4,2025,518,5618.8,BFD->VNC,sb,0,3,0_BFD
13569,Rt 1C,BFD,WES,4,2025,288,4948.75,BFD->WES,sb,0,4,0_BFD
13466,Rt 1C,BFD,BUR,4,2025,231,3278.95,BFD->BUR,sb,0,2,1_NHL
13523,Rt 1C,BFD,SMN,4,2025,51,829.34,BFD->SMN,sb,0,5,1_NHL
13546,Rt 1C,BFD,VNC,4,2025,518,5618.8,BFD->VNC,sb,0,3,1_NHL
13569,Rt 1C,BFD,WES,4,2025,288,4948.75,BFD->WES,sb,0,4,1_NHL
13712,Rt 1C,NHL,BUR,4,2025,2,13.0,NHL->BUR,sb,1,2,1_NHL


In [96]:
alt.Chart(running_ridership(test_1c, sb_1c)).mark_bar().encode(
    x='departing_station',
    y='sum(ridership)',
    color='od'
)

### quick test route 19

In [97]:
test_19 = source_ridership.query('trip_year == 2025 & trip_month == 4 & ca_bus_route == "Rt 19"')

In [98]:
test_19

Unnamed: 0,ca_bus_route,orig,dest,trip_month,trip_year,ridership,revenue,od
11062,Rt 19,BFD,CLM,4,2025,187,3696.32,BFD->CLM
11093,Rt 19,BFD,LCA,4,2025,14,202.6,BFD->LCA
11117,Rt 19,BFD,ONA,4,2025,358,7389.17,BFD->ONA
11140,Rt 19,BFD,PAS,4,2025,206,3348.38,BFD->PAS
11163,Rt 19,BFD,RIV,4,2025,604,14117.81,BFD->RIV
11186,Rt 19,BFD,SNB,4,2025,368,9255.3,BFD->SNB
11209,Rt 19,CLM,BFD,4,2025,199,4002.72,CLM->BFD
11244,Rt 19,CLM,PAS,4,2025,15,82.0,CLM->PAS
11266,Rt 19,CLM,RIV,4,2025,7,41.0,CLM->RIV
11303,Rt 19,LCA,BFD,4,2025,11,159.0,LCA->BFD


In [99]:
sb_19 = ['BFD', 'LCA', 'PAS', 'CLM', 'ONA', 'RIV', 'SNB']

In [100]:
sb_19 = dict(zip(sb_19, range(len(sb_19))))

In [101]:
sb_19

{'BFD': 0, 'LCA': 1, 'PAS': 2, 'CLM': 3, 'ONA': 4, 'RIV': 5, 'SNB': 6}

In [102]:
test_19 = test_19.assign(direction = test_19.apply(determine_direction, axis=1, sequence_dict = sb_19))

In [103]:
test_19 = test_19.query('direction == "sb"') 

In [104]:
test_19 = test_19.assign(orig_seq = test_19.orig.apply(lambda x: sb_19[x]),
              dest_seq = test_19.dest.apply(lambda x: sb_19[x]))

In [105]:
test_19

Unnamed: 0,ca_bus_route,orig,dest,trip_month,trip_year,ridership,revenue,od,direction,orig_seq,dest_seq
11062,Rt 19,BFD,CLM,4,2025,187,3696.32,BFD->CLM,sb,0,3
11093,Rt 19,BFD,LCA,4,2025,14,202.6,BFD->LCA,sb,0,1
11117,Rt 19,BFD,ONA,4,2025,358,7389.17,BFD->ONA,sb,0,4
11140,Rt 19,BFD,PAS,4,2025,206,3348.38,BFD->PAS,sb,0,2
11163,Rt 19,BFD,RIV,4,2025,604,14117.81,BFD->RIV,sb,0,5
11186,Rt 19,BFD,SNB,4,2025,368,9255.3,BFD->SNB,sb,0,6
11266,Rt 19,CLM,RIV,4,2025,7,41.0,CLM->RIV,sb,3,5
11427,Rt 19,ONA,RIV,4,2025,17,86.0,ONA->RIV,sb,4,5
11442,Rt 19,ONA,SNB,4,2025,1,5.0,ONA->SNB,sb,4,6
11487,Rt 19,PAS,CLM,4,2025,18,87.5,PAS->CLM,sb,2,3


In [106]:
alt.Chart(running_ridership(test_19, sb_19)).mark_bar().encode(
    x='departing_station',
    y='sum(ridership)',
    color='od'
)

### ideas

* daily ridership? per trip?
* revenue hours? miles?
* 

### quick test route 1a

In [107]:
test_1a = source_ridership.query('trip_year == 2025 & trip_month == 4 & ca_bus_route == "Rt 1A"')

In [118]:
sb_1a = ['FNO', 'HNF', 'BFD', 'NHL', 'BUR', 'GDL', 'LAX', 'FUL', 'ANA', 'SNA', 'IRV', 'SNC', 'OSD', 'SOL', 'SAN']

In [119]:
sb_1a = dict(zip(sb_1a, range(len(sb_1a))))

In [120]:
sb_1a

{'FNO': 0,
 'HNF': 1,
 'BFD': 2,
 'NHL': 3,
 'BUR': 4,
 'GDL': 5,
 'LAX': 6,
 'FUL': 7,
 'ANA': 8,
 'SNA': 9,
 'IRV': 10,
 'SNC': 11,
 'OSD': 12,
 'SOL': 13,
 'SAN': 14}

In [None]:
sb_1c

{'BFD': 0, 'NHL': 1, 'BUR': 2, 'VNC': 3, 'WES': 4, 'SMN': 5}

In [None]:
sb_1c.keys()

dict_keys(['BFD', 'NHL', 'BUR', 'VNC', 'WES', 'SMN'])

In [121]:
test_1a = test_1a.assign(direction = test_1a.apply(determine_direction, axis=1, sequence_dict = sb_1a))

In [122]:
test_1a = test_1a.query('direction == "sb"') 

In [123]:
test_1a = test_1a.assign(orig_seq = test_1a.orig.apply(lambda x: sb_1a[x]),
              dest_seq = test_1a.dest.apply(lambda x: sb_1a[x]))

In [124]:
def running_ridership(df, sequence_dict):

    running_df = []
    for stn in sequence_dict.keys():
        stn_seq = sequence_dict[stn]
        if stn_seq < max(sequence_dict.values()):
            df_at = df.query('orig_seq <= @stn_seq & dest_seq > @stn_seq').assign(departing_station = str(stn_seq) + '_' + stn)
            running_df += [df_at]
    return pd.concat(running_df)

In [125]:
alt.Chart(running_ridership(test_1a, sb_1a)).mark_bar().encode(
    x='departing_station',
    y='sum(ridership)',
    color='od'
)

In [None]:
alt.Chart(running_ridership(test_1a, sb_1a)).mark_bar().encode(
    x='departing_station',
    y='sum(ridership)',
    color='od'
)