In [1]:
pip install shared_utils

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)

import branca
import folium
from shared_utils import gtfs_utils_v2


import pandas as pd
import geopandas as gpd 

import datetime as dt
import time
import re
import google.auth
import os
import gcsfs
credentials, project = google.auth.default()
fs = gcsfs.GCSFileSystem()



In [3]:
import sys
sys.path.append('../rt_segment_speeds/segment_speed_utils')

from helpers import *

In [4]:
#Selecting Analysis Date and Agency
analysis_date = "2022-06-01"
agency_name = "Salinas"

In [5]:
get_trips = import_scheduled_trips(analysis_date=analysis_date, columns =[
        "gtfs_dataset_key", "feed_key", "name", "trip_id", 
        "shape_id", "shape_array_key", 
        "route_id", "route_key", "direction_id"
    ], get_pandas=True)

In [6]:
def compute_feed_key(agency_name):
    filtered = get_trips[get_trips['name'].str.contains(agency_name, na=False)]
    if not filtered.empty:
        return filtered.iloc[0]['feed_key']
    else:
        return None

In [7]:
feed_key = compute_feed_key(agency_name)

In [8]:
stops_data = import_scheduled_stops(analysis_date).drop_duplicates().reset_index(drop=True)
if feed_key is not None:
    stops_data = stops_data[stops_data['feed_key'].isin([feed_key])]

In [9]:
stops_data.columns

Index(['feed_key', 'service_date', 'feed_timezone',
       'first_stop_arrival_datetime_pacific',
       'last_stop_departure_datetime_pacific', 'stop_id', 'stop_key',
       'stop_name', 'stop_event_count', 'route_type_0', 'route_type_1',
       'route_type_2', 'route_type_3', 'route_type_4', 'route_type_5',
       'route_type_6', 'route_type_7', 'route_type_11', 'route_type_12',
       'missing_route_type', 'geometry'],
      dtype='object')

In [10]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/ahsc_grant/'
yr_mst_raw = pd.read_excel(f'{GCS_FILE_PATH}MST Stop Level Data 9.1.21-8.31.22.xlsx')

In [11]:
yr_mst_raw

Unnamed: 0,Stop_ID,Stop_Name,Latitude,Longitude,Yearly_Boardings,Schedule
0,2,DELMONTECENTER/GATE1,36.584641,-121.897424,9287,Weekday
1,2,DELMONTECENTER/GATE1,36.584644,-121.897400,2240,Saturday
2,2,DELMONTECENTER/GATE1,36.584670,-121.897378,1740,Sunday
3,3,DELMONTECENTER/GATE2,36.584786,-121.897339,1757,Weekday
4,3,DELMONTECENTER/GATE2,36.584781,-121.897337,616,Saturday
...,...,...,...,...,...,...
2747,9304,MARINATRANSITEXCHANGEGATE4,36.683744,-121.794475,1288,Saturday
2748,9304,MARINATRANSITEXCHANGEGATE4,36.683754,-121.794503,1044,Sunday
2749,9305,MARINATRANSITEXCHANGEGATE5,36.683734,-121.794164,251,Weekday
2750,9305,MARINATRANSITEXCHANGEGATE5,36.683892,-121.794205,616,Saturday


In [12]:
yr_mst_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2752 entries, 0 to 2751
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Stop_ID           2752 non-null   int64  
 1   Stop_Name         2752 non-null   object 
 2   Latitude          2752 non-null   float64
 3   Longitude         2752 non-null   float64
 4   Yearly_Boardings  2752 non-null   int64  
 5   Schedule          2752 non-null   object 
dtypes: float64(2), int64(2), object(2)
memory usage: 129.1+ KB


In [13]:
mst_stopnames = yr_mst_raw.drop_duplicates(subset = ['Stop_ID', 'Stop_Name']).copy()
mst_stopnames['Stop_ID'] = mst_stopnames['Stop_ID'].astype('int64').astype(str)
mst_stopnames.rename(columns={'Stop_ID': 'stop_id'}, inplace=True)

In [14]:
day_cols = {'Weekday': 'weekday_ons', 'Saturday': 'sat_ons', 'Sunday': 'sun_ons'}

yr_mst_raw['Stop_ID'] = yr_mst_raw['Stop_ID'].astype('int64').astype('str')
yr_mst_raw['DAY_TYPE'] = yr_mst_raw['Schedule'].map(day_cols)

yr_mst_grouped = (
    yr_mst_raw
    .groupby(['Stop_ID', 'DAY_TYPE'], as_index=False)
    .agg(stop_total_ons=('Yearly_Boardings', 'sum'))
)



In [15]:
yr_mst_grouped_wide = (
    yr_mst_grouped
    .pivot(index='Stop_ID', columns='DAY_TYPE', values='stop_total_ons')
    .reset_index()
    .rename(columns={'Stop_ID': 'stop_id'})
)

In [16]:
# Select only Stop_ID and Stop_Name columns, and drop duplicates from raw data to add stopnames column back 
mst_stopnames = (
    yr_mst_raw[['Stop_ID', 'Stop_Name']]
    .drop_duplicates(subset=['Stop_ID'])
    .copy()
)

# Renaming to match keys 
mst_stopnames['Stop_ID'] = mst_stopnames['Stop_ID'].astype(str)
mst_stopnames.rename(columns={'Stop_ID': 'stop_id', 'Stop_Name': 'stop_name'}, inplace=True)

# Merging only those two columns onto yr_mst_grouped_wide
yr_mst_grouped_wide = yr_mst_grouped_wide.merge(
    mst_stopnames[['stop_id', 'stop_name']],  
    on='stop_id',
    how='left'
)



In [17]:
yr_mst_grouped_wide['feed_key'] = feed_key
yr_mst_grouped_wide['name'] = 'Monterey Salinas Schedule'

In [18]:
len(yr_mst_grouped_wide)

974

In [19]:
yr_mst_grouped_wide.head(20)

Unnamed: 0,stop_id,sat_ons,sun_ons,weekday_ons,stop_name,feed_key,name
0,1001,112.0,58.0,753.0,FREMONT/HILBYAVENUE,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule
1,1004,672.0,754.0,8534.0,FREMONT/TRINITYAVENUE,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule
2,1007,112.0,58.0,753.0,FREMONT/ELMAVENUE,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule
3,1010,224.0,232.0,1757.0,FREMONT/BROADWAY,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule
4,1016,112.0,116.0,1004.0,FREMONT/ECHO,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule
5,1022,952.0,812.0,11797.0,ORDGROVE/NOCHEBUENA,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule
6,1025,56.0,116.0,1255.0,ORDGROVE/WARING,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule
7,1028,56.0,174.0,502.0,ORDGROVE/LUZERN,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule
8,1031,56.0,58.0,2259.0,ORDGROVE/MENDOCINO,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule
9,1034,56.0,58.0,4267.0,YOSEMITE/LASALLE,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule


In [20]:
yr_mst_grouped_wide.to_csv('yr_mst_grouped_wide.csv', index=False)

In [21]:
stops_to_join = stops_data[['feed_key', 'stop_id', 'stop_name', 'geometry']]

In [22]:
yr_mst_grouped_wide['stop_id'] = yr_mst_grouped_wide['stop_id'].astype(str).str.zfill(4)

In [23]:
#Now join on feed_key + stop_id as these are updated
yr_mst_joined = stops_to_join.merge(yr_mst_grouped_wide, on=['feed_key', 'stop_id'], how='inner')

In [24]:
yr_mst_joined.head(5)

Unnamed: 0,feed_key,stop_id,stop_name_x,geometry,sat_ons,sun_ons,weekday_ons,stop_name_y,name
0,118c3a62eab691ac449fe0c1c7505413,2,Del Monte Center / Gate 1,POINT (-169532.243 -157455.258),2240.0,1740.0,9287.0,DELMONTECENTER/GATE1,Monterey Salinas Schedule
1,118c3a62eab691ac449fe0c1c7505413,3,Del Monte Center / Gate 2,POINT (-169524.827 -157441.956),616.0,348.0,1757.0,DELMONTECENTER/GATE2,Monterey Salinas Schedule
2,118c3a62eab691ac449fe0c1c7505413,4,Del Monte Center / Gate 3,POINT (-169673.036 -157470.002),56.0,58.0,502.0,DELMONTECENTER/GATE3,Monterey Salinas Schedule
3,118c3a62eab691ac449fe0c1c7505413,6,6th / Mission Street,POINT (-171687.391 -160632.627),4312.0,4234.0,21837.0,6TH/MISSIONSTREET,Monterey Salinas Schedule
4,118c3a62eab691ac449fe0c1c7505413,11,Northridge Mall,POINT (-147895.267 -143297.134),9128.0,13224.0,40662.0,NORTHRIDGEMALL,Monterey Salinas Schedule


In [25]:
len(yr_mst_joined)

926

In [26]:
yr_mst_joined = yr_mst_joined[['feed_key', 'stop_id', 'stop_name_x', 'geometry', 'sat_ons', 'sun_ons', 'weekday_ons', 'name']].rename(columns={'stop_name_x': 'stop_name'})

In [27]:
stops_remainder = stops_to_join.merge(
    yr_mst_grouped_wide[['feed_key', 'stop_id']],
    on=['feed_key', 'stop_id'],
    how='left',
    indicator=True
)

stops_remainder = stops_remainder[stops_remainder['_merge'] == 'left_only'].drop(columns=['_merge'])

len(stops_remainder)



22

In [28]:
stops_remainder['stop_name']

188                      Ardennes / Aachen Rd
189            Ardennes / 214 Ardennes Circle
190                Ardennes / Remagen (Fitch)
191                     Ardennes / Tunisia Rd
192                  Ardennes / 2775 Ardennes
193                           Ardennes / Metz
194                      Ardennes / Hatten Rd
469            San Miguel Canyon Rd /  # 1469
470    San Miguel Canyon Rd /  Woodland Hills
492             San Miguel Canyon Rd /  #1335
493             San Miguel Canyon Rd /  #1347
669                        Malmedy / Carentan
670                        Normandy / Malmedy
671                       Normandy / Carentan
672                        Normandy / Salerno
899                     Kit Carson / Bldg 614
900                     Kit Carson / Bldg 422
901                     Fitch / Stilwell -POM
907               CPL Ewing / Presidio Museum
908             Pvt Bolio Rd / Pvt Bolio Gate
909                    Pvt Bolio / Lighthouse
910                   Pvt Bolio Rd

In [29]:
yr_mst_remainder = yr_mst_grouped_wide.merge(
    stops_to_join[['feed_key', 'stop_id']],
    on=['feed_key', 'stop_id'],
    how='left',
    indicator=True
)

yr_mst_remainder = yr_mst_remainder[yr_mst_remainder['_merge'] == 'left_only'].drop(columns=['_merge'])

len(yr_mst_remainder)


48

In [30]:
yr_mst_remainder['stop_name']

22      INTERGARRISON/OTTERSPORTSCT
40      SCHILLING/COUNTYGOVERNMENTC
88        DELMONTE/CONFERENCECENTER
90              CANNERYROW/PRESCOTT
91         OLIVIER/FISHERMAN'SWHARF
92                FRANKLIN/ALVARADO
93                ALVARADO/FRANKLIN
94                 CANNERYROW/DRAKE
95              CANNERYROW/AQUARIUM
96                 CANNERYROW/DRAKE
97                     REESIDE/FOAM
98            WAVE/CR1PARKINGGARAGE
373           LAUREL/AMERICANLEGION
374                 LAUREL/PARKSIDE
386                   LAUREL/MARYAL
388                   LAUREL/MARYAL
391                  LAUREL/BALDWIN
422                    LAUREL/TYLER
423                    LAUREL/DAVIS
426                    LAUREL/TYLER
427                    LAUREL/DAVIS
428                 LAUREL/PARKSIDE
429                     LAUREL/MAIN
430              LAUREL/SANTATERESA
433                  LAUREL/GRANADA
443                  LAUREL/LINWOOD
444                 LAUREL/TAPADERO
446                   LAUREL

In [31]:
yr_mst_joined_zero = stops_remainder.merge(
    yr_mst_remainder,
    on = ['feed_key', 'stop_id'],
    how = 'inner',
    indicator = False
)

yr_mst_joined_zero.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 0 entries
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   feed_key     0 non-null      object  
 1   stop_id      0 non-null      object  
 2   stop_name_x  0 non-null      object  
 3   geometry     0 non-null      geometry
 4   sat_ons      0 non-null      float64 
 5   sun_ons      0 non-null      float64 
 6   weekday_ons  0 non-null      float64 
 7   stop_name_y  0 non-null      object  
 8   name         0 non-null      object  
dtypes: float64(3), geometry(1), object(5)
memory usage: 108.0+ bytes


In [32]:
stops_leftovers = stops_remainder.merge(
    yr_mst_remainder[['feed_key', 'stop_id']],
    on=['feed_key', 'stop_id'],
    how='left',
    indicator=True
)

stops_leftovers = stops_leftovers[stops_leftovers['_merge'] == 'left_only'].drop(columns=['_merge'])

stops_leftovers


Unnamed: 0,feed_key,stop_id,stop_name,geometry
0,118c3a62eab691ac449fe0c1c7505413,1200,Ardennes / Aachen Rd,POINT (-161583.473 -152082.519)
1,118c3a62eab691ac449fe0c1c7505413,1201,Ardennes / 214 Ardennes Circle,POINT (-161333.901 -152104.620)
2,118c3a62eab691ac449fe0c1c7505413,1202,Ardennes / Remagen (Fitch),POINT (-161134.036 -152218.476)
3,118c3a62eab691ac449fe0c1c7505413,1204,Ardennes / Tunisia Rd,POINT (-160847.861 -152467.362)
4,118c3a62eab691ac449fe0c1c7505413,1206,Ardennes / 2775 Ardennes,POINT (-161076.437 -152693.118)
5,118c3a62eab691ac449fe0c1c7505413,1208,Ardennes / Metz,POINT (-161527.050 -152565.923)
6,118c3a62eab691ac449fe0c1c7505413,1210,Ardennes / Hatten Rd,POINT (-161757.659 -152527.616)
7,118c3a62eab691ac449fe0c1c7505413,2933,San Miguel Canyon Rd / # 1469,POINT (-150146.190 -129409.199)
8,118c3a62eab691ac449fe0c1c7505413,2935,San Miguel Canyon Rd / Woodland Hills,POINT (-150465.744 -128852.457)
9,118c3a62eab691ac449fe0c1c7505413,2977,San Miguel Canyon Rd / #1335,POINT (-150441.815 -129039.087)


In [33]:
riders_leftovers = yr_mst_remainder.merge(
    stops_remainder[['feed_key', 'stop_id']],
    on=['feed_key', 'stop_id'],
    how='left',
    indicator=True
)

riders_leftovers = riders_leftovers[riders_leftovers['_merge'] == 'left_only'].drop(columns=['_merge'])

In [34]:
riders_leftovers_joined = riders_leftovers.merge(
    mst_stopnames[['stop_id', 'stop_name']],
    how='left',
    on='stop_id'
)

riders_leftovers_joined


Unnamed: 0,stop_id,sat_ons,sun_ons,weekday_ons,stop_name_x,feed_key,name,stop_name_y
0,1101,0.0,0.0,251.0,INTERGARRISON/OTTERSPORTSCT,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,INTERGARRISON/OTTERSPORTSCT
1,1394,0.0,,0.0,SCHILLING/COUNTYGOVERNMENTC,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,SCHILLING/COUNTYGOVERNMENTC
2,1601,8680.0,8062.0,23343.0,DELMONTE/CONFERENCECENTER,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,DELMONTE/CONFERENCECENTER
3,1603,22624.0,22562.0,74296.0,CANNERYROW/PRESCOTT,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,CANNERYROW/PRESCOTT
4,1604,19936.0,17922.0,33634.0,OLIVIER/FISHERMAN'SWHARF,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,OLIVIER/FISHERMAN'SWHARF
5,1611,1288.0,1856.0,5020.0,FRANKLIN/ALVARADO,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,FRANKLIN/ALVARADO
6,1612,168.0,58.0,502.0,ALVARADO/FRANKLIN,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,ALVARADO/FRANKLIN
7,1614,4480.0,4640.0,20582.0,CANNERYROW/DRAKE,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,CANNERYROW/DRAKE
8,1616,28952.0,31204.0,74045.0,CANNERYROW/AQUARIUM,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,CANNERYROW/AQUARIUM
9,1619,2408.0,2726.0,6526.0,CANNERYROW/DRAKE,118c3a62eab691ac449fe0c1c7505413,Monterey Salinas Schedule,CANNERYROW/DRAKE


In [35]:
# set tables together
gdfs = [yr_mst_joined_zero,yr_mst_joined]

yr_mst_geo_all = pd.concat(gdfs, ignore_index=True)

yr_mst_geo_all

Unnamed: 0,feed_key,stop_id,stop_name_x,geometry,sat_ons,sun_ons,weekday_ons,stop_name_y,name,stop_name
0,118c3a62eab691ac449fe0c1c7505413,0002,,POINT (-169532.243 -157455.258),2240.0,1740.0,9287.0,,Monterey Salinas Schedule,Del Monte Center / Gate 1
1,118c3a62eab691ac449fe0c1c7505413,0003,,POINT (-169524.827 -157441.956),616.0,348.0,1757.0,,Monterey Salinas Schedule,Del Monte Center / Gate 2
2,118c3a62eab691ac449fe0c1c7505413,0004,,POINT (-169673.036 -157470.002),56.0,58.0,502.0,,Monterey Salinas Schedule,Del Monte Center / Gate 3
3,118c3a62eab691ac449fe0c1c7505413,0006,,POINT (-171687.391 -160632.627),4312.0,4234.0,21837.0,,Monterey Salinas Schedule,6th / Mission Street
4,118c3a62eab691ac449fe0c1c7505413,0011,,POINT (-147895.267 -143297.134),9128.0,13224.0,40662.0,,Monterey Salinas Schedule,Northridge Mall
...,...,...,...,...,...,...,...,...,...,...
921,118c3a62eab691ac449fe0c1c7505413,9209,,POINT (-147845.975 -147669.935),1960.0,1914.0,13554.0,,Monterey Salinas Schedule,Salinas Transit Center / Gate 9
922,118c3a62eab691ac449fe0c1c7505413,9301,,POINT (-160096.179 -146574.140),952.0,754.0,7781.0,,Monterey Salinas Schedule,Marina Transit Exchange Gate 1
923,118c3a62eab691ac449fe0c1c7505413,9302,,POINT (-160104.587 -146589.210),896.0,812.0,7279.0,,Monterey Salinas Schedule,Marina Transit Exchange Gate 2
924,118c3a62eab691ac449fe0c1c7505413,9303,,POINT (-160116.937 -146605.095),2184.0,1798.0,7530.0,,Monterey Salinas Schedule,Marina Transit Exchange Gate 3


In [36]:
# fill nan with 0
values = {"sat_ons": 0, "sun_ons": 0, "weekday_ons": 0}
yr_mst_geo_all = yr_mst_geo_all.fillna(value=values)

In [37]:
mst_0check = yr_mst_geo_all[yr_mst_geo_all['weekday_ons'] == 0]

In [38]:
mst_0check.explore("weekday_ons", legend=True)

In [39]:
def export_gdf(gdf, filename: str):
    
    gdf.to_parquet(f"{filename}.parquet")
    
    fs.put(
        f"{filename}.parquet",
        f"{GCS_FILE_PATH}/{filename}.parquet",
        token = credentials.token
    )
    
    os.remove(f"{filename}.parquet")
    print(f"saved {GCS_FILE_PATH}/{filename}.parquet")
    
    return

In [40]:
GCS_FILE_PATH  = 'gs://calitp-analytics-data/data-analyses/ahsc_grant'
export_gdf(yr_mst_joined, "ridership_mst_08_26_2024")

saved gs://calitp-analytics-data/data-analyses/ahsc_grant/ridership_mst_08_26_2024.parquet
