In [1]:
pip install shared_utils

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)

import branca
import folium
from shared_utils import gtfs_utils_v2

import pandas as pd
import geopandas as gpd 

import datetime as dt
import time



In [3]:
import sys
sys.path.append('../rt_segment_speeds/segment_speed_utils')

from helpers import *

In [4]:
stops_data = import_scheduled_stops("2022-06-01").drop_duplicates().reset_index(drop=True)

In [5]:
metro_stops = stops_data[stops_data['feed_key'] == '06d1f3ac2b0ae5e74424edbbfefa19ed']

In [6]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/ahsc_grant/'

In [7]:
mar_metro_raw = pd.read_excel(f'{GCS_FILE_PATH}rider_182_2022_03.xlsx')

In [8]:
mar_metro_raw = mar_metro_raw.iloc[:-2,:]

In [9]:
mar_metro_raw['STOP_ID'] = mar_metro_raw['STOP_ID'].astype('int64').astype(str)

In [10]:
day_cols = {'DX': 'weekday_ons', 'SA': 'sat_ons', 'SU': 'sun_ons'}
mar_metro_raw['DAY_TYPE'] = mar_metro_raw['DAY_TYPE'].apply(lambda x: day_cols[x])

In [11]:
mar_metro_grouped = (
    mar_metro_raw
    .groupby(['STOP_ID', 'STOP_NAME', 'DAY_TYPE'], as_index=False)
    ['Total_Ons']
    .sum()
    .rename(columns={'Total_Ons': 'stop_total_ons'})
)

In [12]:
mar_metro_grouped.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35655 entries, 0 to 35654
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   STOP_ID         35655 non-null  object 
 1   STOP_NAME       35655 non-null  object 
 2   DAY_TYPE        35655 non-null  object 
 3   stop_total_ons  35655 non-null  float64
dtypes: float64(1), object(3)
memory usage: 1.1+ MB


In [13]:
yr_metro_grouped = mar_metro_grouped.copy()
yr_metro_grouped['stop_total_ons'] = yr_metro_grouped['stop_total_ons'] * 11.91221463

#Pivot DAY_TYPE values into separate columns
yr_metro_grouped = yr_metro_grouped.pivot_table(
    index=['STOP_ID', 'STOP_NAME'],
    columns='DAY_TYPE',
    values='stop_total_ons',
).reset_index()

#Rename STOP_ID to stop_id
yr_metro_grouped = yr_metro_grouped.rename(columns={'STOP_ID': 'stop_id'})

#Add fixed values for feed_key and name
yr_metro_grouped['feed_key'] = '06d1f3ac2b0ae5e74424edbbfefa19ed'
yr_metro_grouped['name'] = 'LA Metro Bus Schedule'


In [14]:
yr_metro_grouped.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12159 entries, 0 to 12158
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   stop_id      12159 non-null  object 
 1   STOP_NAME    12159 non-null  object 
 2   sat_ons      11749 non-null  float64
 3   sun_ons      11749 non-null  float64
 4   weekday_ons  12157 non-null  float64
 5   feed_key     12159 non-null  object 
 6   name         12159 non-null  object 
dtypes: float64(3), object(4)
memory usage: 665.1+ KB


In [15]:
stops_to_join = metro_stops[['feed_key', 'stop_id', 'stop_name', 'geometry']]

In [16]:
yr_metro_joined = pd.merge(
    stops_to_join,
    yr_metro_grouped,
    how = 'inner',
    on = ['feed_key', 'stop_id']
)

In [17]:
yr_metro_joined.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 12151 entries, 0 to 12150
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   feed_key     12151 non-null  object  
 1   stop_id      12151 non-null  object  
 2   stop_name    12151 non-null  object  
 3   geometry     12151 non-null  geometry
 4   STOP_NAME    12151 non-null  object  
 5   sat_ons      11741 non-null  float64 
 6   sun_ons      11741 non-null  float64 
 7   weekday_ons  12151 non-null  float64 
 8   name         12151 non-null  object  
dtypes: float64(3), geometry(1), object(5)
memory usage: 949.3+ KB


In [18]:
stops_merged = pd.merge(stops_to_join,
                        yr_metro_grouped,
                        on = ['feed_key', 'stop_id'],
                        how='left',
                        indicator=True
                       )

In [19]:
stops_remainder = stops_merged[stops_merged['_merge'] == 'left_only'].drop(columns=['_merge'])

In [20]:
len(stops_remainder)

5

In [21]:
stops_remainder[['stop_name', 'stop_id']].drop_duplicates()

Unnamed: 0,stop_name,stop_id
2643,Dodger Stadium Express - Union Station West Po...,2155
4787,Dodger Express Loading Zone #3 (Harbor Gateway),63500003
4788,Dodger Express Loading Zone #2 (Union Station),63500004
7273,Dodger Express Loading Zone #1 (Union Station),63500001
11985,Cesar E Chavez / Broadway,63500002


In [22]:
yr_metro_merged = pd.merge(stops_to_join,
                        yr_metro_grouped,
                        on = ['feed_key', 'stop_id'],
                        how='right',
                        indicator=True
                       )

In [23]:
yr_metro_remainder = yr_metro_merged[yr_metro_merged['_merge'] == 'right_only'].drop(columns=['_merge'])
len(yr_metro_remainder)

8

In [24]:
yr_metro_remainder

Unnamed: 0,feed_key,stop_id,stop_name,geometry,STOP_NAME,sat_ons,sun_ons,weekday_ons,name
6324,06d1f3ac2b0ae5e74424edbbfefa19ed,20400011,,,71ST / CRENSHAW,845.767239,774.293951,,LA Metro Bus Schedule
6819,06d1f3ac2b0ae5e74424edbbfefa19ed,2506,,,DO NOT ANNOUNCE THIS STOP!,0.0,0.0,47.648859,LA Metro Bus Schedule
6820,06d1f3ac2b0ae5e74424edbbfefa19ed,2507,,,DO NOT ANNOUNCE THIS STOP!,0.0,11.912215,11.912215,LA Metro Bus Schedule
7022,06d1f3ac2b0ae5e74424edbbfefa19ed,2741,,,DO NOT ANNOUNCE THIS STOP!,47.648859,35.736644,238.244293,LA Metro Bus Schedule
7275,06d1f3ac2b0ae5e74424edbbfefa19ed,3034,,,DO NOT ANNOUNCE THIS STOP!,0.0,0.0,23.824429,LA Metro Bus Schedule
7486,06d1f3ac2b0ae5e74424edbbfefa19ed,3258,,,DO NOT ANNOUNCE THIS STOP!,59.561073,0.0,262.068722,LA Metro Bus Schedule
7674,06d1f3ac2b0ae5e74424edbbfefa19ed,3513,,,ROSCOE / TOPANGA CANYON,571.786302,238.244293,,LA Metro Bus Schedule
7706,06d1f3ac2b0ae5e74424edbbfefa19ed,3559,,,DO NOT ANNOUNCE THIS STOP!,47.648859,0.0,23.824429,LA Metro Bus Schedule


In [18]:
GCS_FILE_PATH  = 'gs://calitp-analytics-data/data-analyses/ahsc_grant'
yr_metro_joined.to_parquet(f"{GCS_FILE_PATH}/ridership_metro_08_26_2024.parquet")