In [1]:
import pandas as pd
import geopandas as gpd
from calitp_data_analysis.sql import query_sql
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
from calitp_data_analysis.geography_utils import CA_NAD83Albers_m, WGS84
gcsgp = GCSGeoPandas()
from shared_utils import gtfs_utils_v2
from shared_utils.rt_utils import show_full_df

import utils
import altair as alt

In [2]:
path = 'gs://calitp-analytics-data/data-analyses/thruway_intercity_bus/source_data/25.09.08CABusODPairRidershipFFY24-FFY25TD.xlsx'

In [27]:
source_ridership = pd.read_excel(path)
source_ridership = source_ridership.assign(od = source_ridership.orig + '->' + source_ridership.dest)

In [28]:
source_ridership

Unnamed: 0,ca_bus_route,orig,dest,trip_month,trip_year,ridership,revenue,od
0,Rt 03,ARN,BKY,12,2023,1,26.33,ARN->BKY
1,Rt 03,ARN,BKY,1,2024,2,39.00,ARN->BKY
2,Rt 03,ARN,BKY,2,2024,7,141.59,ARN->BKY
3,Rt 03,ARN,BKY,3,2024,7,165.14,ARN->BKY
4,Rt 03,ARN,BKY,4,2024,2,41.92,ARN->BKY
...,...,...,...,...,...,...,...,...
20475,Rt 99,TRU,SFC,5,2024,2,79.66,TRU->SFC
20476,Rt 99,TRU,SFC,6,2024,1,39.83,TRU->SFC
20477,Rt 99,USF,RLP,6,2025,0,3241.10,USF->RLP
20478,Rt 99,USF,RLP,7,2025,0,2495.00,USF->RLP


In [6]:
all_years_group = source_ridership.groupby(['ca_bus_route', 'od'])[['ridership', 'revenue']].sum()

In [7]:
all_years_group

Unnamed: 0_level_0,Unnamed: 1_level_0,ridership,revenue
ca_bus_route,od,Unnamed: 2_level_1,Unnamed: 3_level_1
Rt 03,ARN->BKY,19,413.98
Rt 03,ARN->DAV,1,10.00
Rt 03,ARN->HAY,0,200.70
Rt 03,ARN->MTZ,5,155.90
Rt 03,ARN->OAC,1,19.50
...,...,...,...
Rt 99,SUI->SAC,1,10.70
Rt 99,SUI->SFC,347,4572.57
Rt 99,TRN->VOU,0,324.00
Rt 99,TRU->SFC,5,199.15


In [8]:
source_ridership.ca_bus_route.unique()

array(['Rt 03', 'Rt 06', 'Rt 07', 'Rt 10', 'Rt 15', 'Rt 17', 'Rt 18',
       'Rt 19', 'Rt 1A', 'Rt 1B', 'Rt 1C', 'Rt 20', 'Rt 20 - B', 'Rt 21',
       'Rt 35', 'Rt 39', 'Rt 3R', 'Rt 56', 'Rt 68', 'Rt 99'], dtype=object)

In [9]:
def determine_direction(row, sequence_dict):
    if sequence_dict[row.orig] < sequence_dict[row.dest]:
        return 'sb'
    elif sequence_dict[row.orig] > sequence_dict[row.dest]:
        return 'nb'
    else:
        return ''

In [10]:
def running_ridership(df, sequence_dict):

    running_df = []
    for stn in sequence_dict.keys():
        stn_seq = sequence_dict[stn]
        if stn_seq < max(sequence_dict.values()):
            df_at = df.query('orig_seq <= @stn_seq & dest_seq > @stn_seq').assign(departing_station = str(stn_seq) + '_' + stn)
            running_df += [df_at]
    return pd.concat(running_df)

### quick test route 1c

In [11]:
sb_1c = ['BFD', 'NHL', 'BUR', 'VNC', 'WES', 'SMN']

In [12]:
sb_1c = dict(zip(sb_1c, range(len(sb_1c))))

In [13]:
sb_1c

{'BFD': 0, 'NHL': 1, 'BUR': 2, 'VNC': 3, 'WES': 4, 'SMN': 5}

In [14]:
test_1c = source_ridership.query('trip_year == 2025 & trip_month == 4 & ca_bus_route == "Rt 1C"')

In [15]:
test_1c = test_1c.assign(direction = test_1c.apply(determine_direction, axis=1, sequence_dict = sb_1c))

In [16]:
test_1c = test_1c.query('direction == "sb"') 

In [17]:
test_1c = test_1c.assign(orig_seq = test_1c.orig.apply(lambda x: sb_1c[x]),
              dest_seq = test_1c.dest.apply(lambda x: sb_1c[x]))

In [18]:
sb_1c

{'BFD': 0, 'NHL': 1, 'BUR': 2, 'VNC': 3, 'WES': 4, 'SMN': 5}

In [19]:
sb_1c.keys()

dict_keys(['BFD', 'NHL', 'BUR', 'VNC', 'WES', 'SMN'])

In [20]:
test = list(zip(['0_BFD'] * len(range(50)), list(range(50))))

In [21]:
df = pd.DataFrame(test, columns=['departing_station', 'distance'])

In [22]:
distance_dict = {'0_BFD': (0, 50), '1_NHL': (50, 75), '2_BUR': (75, 82), '3_VNC': (82, 97), '4_WES': (97, 102)}

In [23]:
dfs = []
for stn in distance_dict.keys():
    dist_range = range(distance_dict[stn][0], distance_dict[stn][1])
    enum_dist = list(zip([stn] * len(dist_range), list(dist_range)))
    dfs += [pd.DataFrame(enum_dist, columns=['departing_station', 'distance'])]

In [24]:
df = pd.concat(dfs)

In [25]:
df

Unnamed: 0,departing_station,distance
0,0_BFD,0
1,0_BFD,1
2,0_BFD,2
3,0_BFD,3
4,0_BFD,4
...,...,...
0,4_WES,97
1,4_WES,98
2,4_WES,99
3,4_WES,100


In [26]:
with_distance = running_ridership(test_1c, sb_1c).merge(df, on = 'departing_station')

In [152]:
with_distance

Unnamed: 0,ca_bus_route,orig,dest,trip_month,trip_year,ridership,revenue,od,direction,orig_seq,dest_seq,departing_station,distance
0,Rt 1C,BFD,BUR,4,2025,231,3278.95,BFD->BUR,sb,0,2,0_BFD,0
1,Rt 1C,BFD,BUR,4,2025,231,3278.95,BFD->BUR,sb,0,2,0_BFD,1
2,Rt 1C,BFD,BUR,4,2025,231,3278.95,BFD->BUR,sb,0,2,0_BFD,2
3,Rt 1C,BFD,BUR,4,2025,231,3278.95,BFD->BUR,sb,0,2,0_BFD,3
4,Rt 1C,BFD,BUR,4,2025,231,3278.95,BFD->BUR,sb,0,2,0_BFD,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
641,Rt 1C,VNC,SMN,4,2025,21,113.65,VNC->SMN,sb,3,5,4_WES,97
642,Rt 1C,VNC,SMN,4,2025,21,113.65,VNC->SMN,sb,3,5,4_WES,98
643,Rt 1C,VNC,SMN,4,2025,21,113.65,VNC->SMN,sb,3,5,4_WES,99
644,Rt 1C,VNC,SMN,4,2025,21,113.65,VNC->SMN,sb,3,5,4_WES,100


In [149]:
base = alt.Chart(with_distance).encode(alt.X('distance:Q'))

flow = base.mark_area().encode(
    # alt.X('distance:Q'),
    alt.Y('sum(ridership):Q'),
    color='od',
    tooltip = ['departing_station', 'od', 'ridership']
)

In [151]:
flow

In [112]:
stn_locations = with_distance.groupby('departing_station')[['distance']].min().reset_index()

In [113]:
stn_locations

Unnamed: 0,departing_station,distance
0,0_BFD,0
1,1_NHL,50
2,2_BUR,75
3,3_VNC,82
4,4_WES,97


In [96]:
left = alt.Chart(with_distance).mark_text().encode(
    alt.X('departing_station:N', sort=alt.EncodingSortField('distance', order="ascending"), title=None),
    # alt.X2('distance:Q', title=None)
)

In [97]:
left

### quick test route 19xOffset=

In [22]:
test_19 = source_ridership.query('trip_year == 2025 & trip_month == 4 & ca_bus_route == "Rt 19"')

In [23]:
test_19

Unnamed: 0,ca_bus_route,orig,dest,trip_month,trip_year,ridership,revenue,od
11062,Rt 19,BFD,CLM,4,2025,187,3696.32,BFD->CLM
11093,Rt 19,BFD,LCA,4,2025,14,202.6,BFD->LCA
11117,Rt 19,BFD,ONA,4,2025,358,7389.17,BFD->ONA
11140,Rt 19,BFD,PAS,4,2025,206,3348.38,BFD->PAS
11163,Rt 19,BFD,RIV,4,2025,604,14117.81,BFD->RIV
11186,Rt 19,BFD,SNB,4,2025,368,9255.3,BFD->SNB
11209,Rt 19,CLM,BFD,4,2025,199,4002.72,CLM->BFD
11244,Rt 19,CLM,PAS,4,2025,15,82.0,CLM->PAS
11266,Rt 19,CLM,RIV,4,2025,7,41.0,CLM->RIV
11303,Rt 19,LCA,BFD,4,2025,11,159.0,LCA->BFD


In [24]:
sb_19 = ['BFD', 'LCA', 'PAS', 'CLM', 'ONA', 'RIV', 'SNB']

In [25]:
sb_19 = dict(zip(sb_19, range(len(sb_19))))

In [26]:
sb_19

{'BFD': 0, 'LCA': 1, 'PAS': 2, 'CLM': 3, 'ONA': 4, 'RIV': 5, 'SNB': 6}

In [27]:
test_19 = test_19.assign(direction = test_19.apply(determine_direction, axis=1, sequence_dict = sb_19))

In [28]:
test_19 = test_19.query('direction == "sb"') 

In [29]:
test_19 = test_19.assign(orig_seq = test_19.orig.apply(lambda x: sb_19[x]),
              dest_seq = test_19.dest.apply(lambda x: sb_19[x]))

In [30]:
test_19

Unnamed: 0,ca_bus_route,orig,dest,trip_month,trip_year,ridership,revenue,od,direction,orig_seq,dest_seq
11062,Rt 19,BFD,CLM,4,2025,187,3696.32,BFD->CLM,sb,0,3
11093,Rt 19,BFD,LCA,4,2025,14,202.6,BFD->LCA,sb,0,1
11117,Rt 19,BFD,ONA,4,2025,358,7389.17,BFD->ONA,sb,0,4
11140,Rt 19,BFD,PAS,4,2025,206,3348.38,BFD->PAS,sb,0,2
11163,Rt 19,BFD,RIV,4,2025,604,14117.81,BFD->RIV,sb,0,5
11186,Rt 19,BFD,SNB,4,2025,368,9255.3,BFD->SNB,sb,0,6
11266,Rt 19,CLM,RIV,4,2025,7,41.0,CLM->RIV,sb,3,5
11427,Rt 19,ONA,RIV,4,2025,17,86.0,ONA->RIV,sb,4,5
11442,Rt 19,ONA,SNB,4,2025,1,5.0,ONA->SNB,sb,4,6
11487,Rt 19,PAS,CLM,4,2025,18,87.5,PAS->CLM,sb,2,3


In [31]:
alt.Chart(running_ridership(test_19, sb_19)).mark_bar().encode(
    x='departing_station',
    y='sum(ridership)',
    color='od'
)

### ideas

* daily ridership? per trip?
* revenue hours? miles?
* 

### quick test route 1a

In [32]:
test_1a = source_ridership.query('trip_year == 2025 & trip_month == 4 & ca_bus_route == "Rt 1A"')

In [33]:
sb_1a = ['FNO', 'HNF', 'BFD', 'NHL', 'BUR', 'GDL', 'LAX', 'FUL', 'ANA', 'SNA', 'IRV', 'SNC', 'OSD', 'SOL', 'SAN']

In [34]:
sb_1a = dict(zip(sb_1a, range(len(sb_1a))))

In [35]:
sb_1a

{'FNO': 0,
 'HNF': 1,
 'BFD': 2,
 'NHL': 3,
 'BUR': 4,
 'GDL': 5,
 'LAX': 6,
 'FUL': 7,
 'ANA': 8,
 'SNA': 9,
 'IRV': 10,
 'SNC': 11,
 'OSD': 12,
 'SOL': 13,
 'SAN': 14}

In [36]:
sb_1c

{'BFD': 0, 'NHL': 1, 'BUR': 2, 'VNC': 3, 'WES': 4, 'SMN': 5}

In [37]:
sb_1c.keys()

dict_keys(['BFD', 'NHL', 'BUR', 'VNC', 'WES', 'SMN'])

In [38]:
test_1a = test_1a.assign(direction = test_1a.apply(determine_direction, axis=1, sequence_dict = sb_1a))

In [39]:
test_1a = test_1a.query('direction == "sb"') 

In [40]:
test_1a = test_1a.assign(orig_seq = test_1a.orig.apply(lambda x: sb_1a[x]),
              dest_seq = test_1a.dest.apply(lambda x: sb_1a[x]))

In [42]:
alt.Chart(running_ridership(test_1a, sb_1a)).mark_bar().encode(
    x='departing_station',
    y='sum(ridership)',
    color='od'
)

In [43]:
alt.Chart(running_ridership(test_1a, sb_1a)).mark_bar().encode(
    x='departing_station',
    y='sum(ridership)',
    color='od'
)