In [1]:
pip install shared_utils

Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)

import branca
import folium
from shared_utils import gtfs_utils_v2


import pandas as pd
import geopandas as gpd 

import datetime as dt
import time


In [None]:
import sys
sys.path.append('../rt_segment_speeds/segment_speed_utils')

from helpers import *

In [None]:
#Selecting Analysis Date and Agency
analysis_date = "2022-03-15"
agency_name = "Salinas"

In [None]:
get_trips = import_scheduled_trips(analysis_date=analysis_date, columns =[
        "gtfs_dataset_key", "feed_key", "name", "trip_id", 
        "shape_id", "shape_array_key", 
        "route_id", "route_key", "direction_id"
    ], get_pandas=True)

In [None]:
def compute_feed_key(agency_name):
    filtered = get_trips[get_trips['name'].str.contains(agency_name, na=False)]
    if not filtered.empty:
        return filtered.iloc[0]['feed_key']
    else:
        return None

In [None]:
feed_key = compute_feed_key(agency_name)

In [None]:
stops_data = import_scheduled_stops(analysis_date).drop_duplicates().reset_index(drop=True)
if feed_key is not None:
    stops_data = stops_data[stops_data['feed_key'].isin([feed_key])]

In [None]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/ahsc_grant/'
yr_mst_raw = pd.read_excel(f'{GCS_FILE_PATH}MST Stop Level Data 9.1.21-8.31.22.xlsx')

In [None]:
yr_mst_raw

In [None]:
yr_mst_raw.info()

In [None]:
mst_stopnames = yr_mst_raw.drop_duplicates(subset = ['Stop_ID', 'Stop_Name']).copy()
mst_stopnames['Stop_ID'] = mst_stopnames['Stop_ID'].astype('int64').astype(str)
mst_stopnames.rename(columns={'Stop_ID': 'stop_id'}, inplace=True)

In [None]:
# day_cols = {'Weekday': 'weekday_ons', 'Saturday': 'sat_ons', 'Sunday': 'sun_ons'}

# yr_mst_grouped = (yr_mst_raw
#                  >> mutate(Stop_ID = _.Stop_ID.astype('int64').astype(str))
#                  >> mutate(DAY_TYPE = _.Schedule.apply(lambda x: day_cols[x]))
#                  >> group_by(_.Stop_ID, _.DAY_TYPE)
#                  >> summarize(stop_total_ons = _.Yearly_Boardings.sum())
#                 )

In [None]:
day_cols = {'Weekday': 'weekday_ons', 'Saturday': 'sat_ons', 'Sunday': 'sun_ons'}

yr_mst_raw['Stop_ID'] = yr_mst_raw['Stop_ID'].astype('int64').astype('str')
yr_mst_raw['DAY_TYPE'] = yr_mst_raw['Schedule'].map(day_cols)

yr_mst_grouped = (
    yr_mst_raw
    .groupby(['Stop_ID', 'DAY_TYPE'], as_index=False)
    .agg(stop_total_ons=('Yearly_Boardings', 'sum'))
)



In [None]:
yr_mst_grouped.head(3)

In [None]:
yr_mst_grouped_wide = (
    yr_mst_grouped
    .pivot(index='Stop_ID', columns='DAY_TYPE', values='stop_total_ons')
    .reset_index()
    .rename(columns={'Stop_ID': 'stop_id'})
)

yr_mst_grouped_wide['feed_key'] = feed_key
yr_mst_grouped_wide['name'] = 'Monterey Salinas Schedule'

In [None]:
yr_mst_grouped_wide.head(3)

In [None]:
len(yr_mst_grouped_wide)

In [None]:
stops_to_join = stops_data[['feed_key', 'stop_id', 'stop_name', 'geometry']]

# Inner join
yr_mst_joined = stops_to_join.merge(yr_mst_grouped_wide, on=['feed_key', 'stop_id'], how='inner')


In [None]:
yr_mst_joined.head(3)

In [None]:
stops_remainder = stops_to_join.merge(
    yr_mst_grouped_wide[['feed_key', 'stop_id']],
    on=['feed_key', 'stop_id'],
    how='left',
    indicator=True
)

stops_remainder = stops_remainder[stops_remainder['_merge'] == 'left_only'].drop(columns=['_merge'])

len(stops_remainder)



In [None]:
yr_mst_remainder = yr_mst_grouped_wide.merge(
    stops_to_join[['feed_key', 'stop_id']],
    on=['feed_key', 'stop_id'],
    how='left',
    indicator=True
)

yr_mst_remainder = yr_mst_remainder[yr_mst_remainder['_merge'] == 'left_only'].drop(columns=['_merge'])

len(yr_mst_remainder)


In [None]:
yr_mst_remainder['stop_id'] = yr_mst_remainder['stop_id'].apply(lambda x: x.zfill(4))

In [None]:
yr_mst_joined_zero = stops_remainder.merge(
    yr_mst_remainder,
    on = ['feed_key', 'stop_id'],
    how = 'inner',
    indicator = False
)

yr_mst_joined_zero.info()

In [None]:
stops_leftovers = stops_remainder.merge(
    yr_mst_remainder[['feed_key', 'stop_id']],
    on=['feed_key', 'stop_id'],
    how='left',
    indicator=True
)

stops_leftovers = stops_leftovers[stops_leftovers['_merge'] == 'left_only'].drop(columns=['_merge'])

stops_leftovers


In [None]:
riders_leftovers = yr_mst_remainder.merge(
    stops_remainder[['feed_key', 'stop_id']],
    on=['feed_key', 'stop_id'],
    how='left',
    indicator=True
)

riders_leftovers = riders_leftovers[riders_leftovers['_merge'] == 'left_only'].drop(columns=['_merge'])

In [None]:
riders_leftovers.columns

In [None]:
mst_stopnames.columns

In [None]:
riders_leftovers.columns

In [None]:
riders_leftovers_joined = riders_leftovers.merge(
    mst_stopnames[['stop_id', 'Stop_Name']],
    how='left',
    on='stop_id'
)

riders_leftovers_joined


In [None]:
# set tables together
gdfs = [yr_mst_joined_zero,yr_mst_joined]

yr_mst_geo_all = pd.concat(gdfs, ignore_index=True)

yr_mst_geo_all

In [None]:
# fill nan with 0
values = {"sat_ons": 0, "sun_ons": 0, "weekday_ons": 0}
yr_mst_geo_all = yr_mst_geo_all.fillna(value=values)

In [None]:
mst_0check = yr_mst_geo_all[yr_mst_geo_all['weekday_ons'] == 0]

In [50]:
mst_0check.explore("weekday_ons", legend=True)

In [31]:
GCS_FILE_PATH  = 'gs://calitp-analytics-data/data-analyses/ahsc_grant'
yr_mst_joined.to_parquet(f"{GCS_FILE_PATH}/ridership_mst_08_26_2024.parquet")