# fct_monthly_routes

In [1]:
import datetime
import sys

import _operator_grain_route_dir_visuals as _report_operator_visuals
import _sql_query
import google.auth
import pandas as pd
from google.cloud import bigquery
from IPython.display import HTML, Image, Markdown, display, display_html
from loguru import logger
from omegaconf import OmegaConf
from shared_utils import gtfs_utils_v2, portfolio_utils, publish_utils, rt_dates
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SEGMENT_GCS
from calitp_data_analysis import geography_utils

readable_dict = OmegaConf.load("readable2.yml")

import altair as alt
import geopandas as gpd
import shapely
from typing import Literal, Union

ModuleNotFoundError: No module named 'segment_speed_utils'

In [None]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [None]:
analysis_name = "City and County of San Francisco"

In [None]:
df = _sql_query.download_with_pandas_gbq(
    project="cal-itp-data-infra-staging",
    filename="tiffany_mart_gtfs_rollup.fct_monthly_routes",
)

In [None]:
df.columns

In [None]:
df[["pt_array"]].sample(3)

In [None]:
def convert_to_gdf(
    df: pd.DataFrame, 
    geom_col: str,
    geom_type: Literal["point", "line"]
) -> gpd.GeoDataFrame:
    """
    For stops, we want to make pt_geom a point.
    For vp_path and shapes, we want to make pt_array a linestring.
    """
    if geom_type == "point":
        df["geometry"] = [shapely.wkt.loads(x) for x in df[geom_col]]

    elif geom_type == "line":
        df["geometry"] = df[geom_col].apply(geography_utils.make_linestring)

    gdf = gpd.GeoDataFrame(
        df.drop(columns = geom_col), geometry="geometry", 
        crs="EPSG:4326"
    )

    return gdf

In [None]:
point_df = convert_to_gdf(
        df,
        "pt_array",
        "point"
    )

In [None]:
df.columns

In [None]:
df = df.pipe(portfolio_utils.standardize_portfolio_organization_names)

In [None]:
sf_only = df.loc[df.analysis_name == analysis_name]

In [None]:
sf_only = sf_only.loc[sf_only.month_first_day == '2025-10-01T00:00:00.000000000']

In [None]:
type(sf_only)

In [None]:
sf_only.explore()

In [None]:
df.route_name.unique()

In [None]:
df.drop(columns = ["geometry", "pt_array"]).sample(5)

In [None]:
route_typologies_df = _sql_query.download_with_pandas_gbq(
    project="cal-itp-data-infra-staging",
    filename="tiffany_mart_gtfs_rollup.fct_monthly_schedule_rt_route_direction_summary",
)

In [None]:
route_typologies_df.sample()

In [None]:
route_typologies_df[["route_name"]].drop_duplicates().sort_values(by = ["route_name"])

In [None]:
df[["route_name"]].drop_duplicates().sort_values(by = ["route_name"])

In [None]:
route_typologies_df2 = route_typologies_df[
    [
        "name",
        "month_first_day",
        "route_name",
        "route_type",
        "route_color",
        "route_typology",
    ]
]

In [None]:
m1 = pd.merge(
    df,
    route_typologies_df2,
    on=[
        "name",
        "month_first_day",
        "route_name",
    ],
    how="outer",
    indicator=True,
)

In [None]:
m1._merge.value_counts()