In [1]:
import pandas as pd 
import yaml
from shared_utils import portfolio_utils, schedule_rt_utils 
from segment_speed_utils import helpers
from calitp_data_analysis.sql import get_engine
db_engine = get_engine()
import gcsfs
from calitp_data_analysis import get_fs
fs = get_fs()
import geopandas as gpd
import numpy as np

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/ahsc_grant/'

In [4]:
# read in and concatenate all geoparquets
fs_list = fs.ls(f"{GCS_FILE_PATH}tool_data_2025/")

In [5]:
filelist = []
for f in fs_list[1:]:
    try: 
        test_pqt = pd.read_parquet(f"gs://{f}")
        filelist.append(f)
    except:
        print(f"error on {f.split('tool_data_2025/')}")

In [6]:
df = (pd.concat(
        pd.read_parquet(f"gs://{f}")
        for f in filelist
        )
    )

In [7]:
wkd_coeff = np.array([-0.1610594,0.0001214,-0.0000173,0.0224169,-0.0152673,-0.0505976,-0.0423512,0.0111763])
sat_coeff = np.array([-0.1424400,0.0001344,-0.0000186,0.0256008,-0.0169793,-0.0408743,-0.0419725,0.0126354])
sun_coeff = np.array([-0.1082477,0.0001477,-0.0000202,0.0209053,-0.0145447,-0.0449611,-0.0502937, 0.0132250])

In [9]:
# read in NTD ridership
NTD_ridership1 = pd.read_excel("gs://calitp-analytics-data/data-analyses/2021-Annual-Database-Files/September 2022 Adjusted Database.xlsx", sheet_name="UPT")

In [10]:
NTD_ridership1.columns

Index(['5 digit NTD ID', '4 digit NTD ID', 'Agency', 'Active', 'Reporter Type',
       'UZA', 'UZA Name', 'Modes', 'TOS', 'JAN02',
       ...
       'DEC21', 'JAN22', 'FEB22', 'MAR22', 'APR22', 'MAY22', 'JUN22', 'JUL22',
       'AUG22', 'SEP22'],
      dtype='object', length=258)

In [11]:
with db_engine.connect() as connection:
    query = """
        SELECT
            key, ntd_id, agency, reporter_type, primary_uza_name, primary_uza_code, mode, tos, upt, period_year_month
        FROM 
            cal-itp-data-infra.mart_ntd.dim_monthly_ridership_with_adjustments
    """
    NTD_ridership= pd.read_sql(query, connection)

In [12]:
NTD_ridership.head(5)

Unnamed: 0,key,ntd_id,agency,reporter_type,primary_uza_name,primary_uza_code,mode,tos,upt,period_year_month
0,dabeb1fec1d35b2edd94ab5a3e09383f,20120,City of Glens Falls,Building Reporter,"Glens Falls, NY",33598,OR,DO,,2019-10
1,eb66d26c78c7e0f6700e744194d5b1e1,20120,City of Glens Falls,Building Reporter,"Glens Falls, NY",33598,OR,DO,,2022-07
2,df464618e4ddde3ae846bf1c6541dfc6,20120,City of Glens Falls,Building Reporter,"Glens Falls, NY",33598,OR,DO,,2009-12
3,11e7aa69617dfd98450394aa6c789a47,20120,City of Glens Falls,Building Reporter,"Glens Falls, NY",33598,OR,DO,,2017-04
4,0efd4f4862a89bfdf07470caa832dd37,20120,City of Glens Falls,Building Reporter,"Glens Falls, NY",33598,OR,DO,,2020-12


In [13]:
NTD_ridership.dtypes

key                   object
ntd_id                object
agency                object
reporter_type         object
primary_uza_name      object
primary_uza_code      object
mode                  object
tos                   object
upt                  float64
period_year_month     object
dtype: object

In [18]:
NTD_ridership_ca_2022 = NTD_ridership[
    NTD_ridership['period_year_month'].str.startswith('2022', na=False) &
    NTD_ridership['primary_uza_name'].str.endswith(', CA', na=False)
]

In [19]:
NTD_ridership_ca_2022.head(20)

Unnamed: 0,key,ntd_id,agency,reporter_type,primary_uza_name,primary_uza_code,mode,tos,upt,period_year_month
6772,ad9554807e57e5192eb1ce7464bfe5b7,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,333351.0,2022-10
6812,c72e29ba8ff0fa4da2d1efbce1acea07,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,324740.0,2022-11
6828,ab3f132c389e7e2643526e5968ac56b4,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,280812.0,2022-01
6843,390ed68bc28000613cdd9f74d0711e94,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,299272.0,2022-06
6844,c903dd83d96bb6fd294f6a6ee0486a8e,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,326195.0,2022-08
6868,ffaac36ddc84c726305bfd89edf8c95d,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,288582.0,2022-02
6869,9bfef8fe49d4c7dfaafa31d31bd3b251,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,316055.0,2022-04
6960,dbc2d81f929b86c697aec16f32d0c378,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,332782.0,2022-05
6989,7eb791db076eba6784dc602b2ae55089,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,299996.0,2022-07
6995,fd76e55aa72ac15322963650f43caf05,90014,Alameda-Contra Costa Transit District,Full Reporter,"San Francisco--Oakland, CA",78904,RB,DO,320060.0,2022-03
