# fct_monthly_routes

In [20]:
import datetime
import sys

import _operator_grain_route_dir_visuals as _report_operator_visuals
import _sql_query
import google.auth
import pandas as pd
from google.cloud import bigquery
from IPython.display import HTML, Image, Markdown, display, display_html
from loguru import logger
from omegaconf import OmegaConf
from shared_utils import gtfs_utils_v2, portfolio_utils, publish_utils, rt_dates
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SEGMENT_GCS

readable_dict = OmegaConf.load("readable2.yml")

import altair as alt
import geopandas as gpd
from shapely import wkt

In [21]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [22]:
analysis_name = "City and County of San Francisco"

In [23]:
df = _sql_query.download_with_pandas_gbq(
    project="cal-itp-data-infra-staging",
    filename="tiffany_mart_gtfs_rollup.fct_monthly_routes",
)


        SELECT 
            *
        FROM `cal-itp-data-infra-staging`.`tiffany_mart_gtfs_rollup`.`fct_monthly_routes`
        WHERE month_first_day >=  DATE('2025-01-01')
    
Downloading: 100%|[32m██████████[0m|
download time: 0:02:23.418838


In [24]:
df = df.pipe(portfolio_utils.standardize_portfolio_organization_names)

In [25]:
df.columns

Index(['name', 'year', 'month', 'month_first_day', 'route_name',
       'direction_id', 'shape_id', 'shape_array_key', 'n_trips', 'pt_array',
       'analysis_name', 'source_record_id'],
      dtype='object')

In [31]:
df.route_name.unique()

array(['AS Grass Valley - Alta Sierra', 'Route 4', '181 Magnolia Ave',
       ..., '20 UCSC via Main Gate - Delaware',
       'CART Cudahy Area Rapid Transit - Daily Service',
       'Greyhound US0422 New York - State College - Cleveland'],
      dtype=object)

In [35]:
df = df.set_geometry(gpd.GeoSeries.from_wkt(df.pt_array))

TypeError: Expected bytes or string, got numpy.ndarray

In [26]:
route_typologies_df = _sql_query.download_with_pandas_gbq(
    project="cal-itp-data-infra-staging",
    filename="tiffany_mart_gtfs_rollup.fct_monthly_schedule_rt_route_direction_summary",
)


        SELECT 
            *
        FROM `cal-itp-data-infra-staging`.`tiffany_mart_gtfs_rollup`.`fct_monthly_schedule_rt_route_direction_summary`
        WHERE month_first_day >=  DATE('2025-01-01')
    
Downloading: 100%|[32m██████████[0m|
download time: 0:00:08.136121


In [27]:
route_typologies_df.sample()

Unnamed: 0,name,month_first_day,month,year,day_type,route_name,direction_id,route_type,route_color,route_typology,daily_trips_all_day,daily_stop_arrivals_all_day,daily_distinct_stops_all_day,frequency_all_day,daily_service_hours,daily_flex_service_hours,daily_trips_owl,daily_trips_early_am,daily_trips_am_peak,daily_trips_midday,daily_trips_pm_peak,daily_trips_evening,daily_trips_peak,daily_trips_offpeak,frequency_owl,frequency_early_am,frequency_am_peak,frequency_midday,frequency_pm_peak,frequency_evening,frequency_peak,frequency_offpeak,schedule_base64_url,tu_name,vp_name,schedule_name,tu_base64_url,vp_base64_url,tu_num_distinct_updates,daily_tu_num_distinct_updates,daily_tu_num_skipped_stops,daily_tu_num_canceled_stops,daily_tu_num_added_stops,daily_tu_num_scheduled_stops,n_tu_trips,daily_tu_trips,vp_num_distinct_updates,daily_vp_num_distinct_updates,n_vp_trips,daily_vp_trips,n_rt_trips,n_rt_days
22248,Bay Area 511 Muni Schedule,2025-10-01,10,2025,Weekday,1__1 CALIFORNIA,1,3,005B95,bus,188.65,7316.65,7316.65,7.86,109.13,,2.04,16.87,35.26,58.7,53.87,21.91,89.13,99.52,0.51,5.62,11.75,11.74,10.77,5.48,11.14,6.22,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TRg==,Bay Area 511 Muni TripUpdates,,Bay Area 511 Muni Schedule,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cGRhdGVzP2FnZW5jeT1TRg==,,1383,1383.0,0.0,0.0,0.0,0.0,9,9.0,,,0,0.0,9,1


In [34]:
route_typologies_df.route_name.unique()

array(['020__20 FSL Route 20', '040__40 Salinas - Rancho Cielo',
       '084__84 King City - Paso Robles', ..., 'SF:14__14 MISSION',
       'Guad Route 41__Route 41 - Guadalupe Local',
       '8f305689-4315-445e-abea-920dbbf0be5e__Toy Story Line 20 Toy Story Line'],
      dtype=object)

In [33]:
df.route_name.unique()

array(['AS Grass Valley - Alta Sierra', 'Route 4', '181 Magnolia Ave',
       ..., '20 UCSC via Main Gate - Delaware',
       'CART Cudahy Area Rapid Transit - Daily Service',
       'Greyhound US0422 New York - State College - Cleveland'],
      dtype=object)

In [28]:
route_typologies_df2 = route_typologies_df[
    [
        "name",
        "month_first_day",
        "route_name",
        "route_type",
        "route_color",
        "route_typology",
    ]
]

In [29]:
m1 = pd.merge(
    df,
    route_typologies_df2,
    on=[
        "name",
        "month_first_day",
        "route_name",
    ],
    how="outer",
    indicator=True,
)

In [30]:
m1._merge.value_counts()

right_only    22276
left_only     12015
both              0
Name: _merge, dtype: int64