# fct_operator_hourly_summary

In [12]:
import datetime
import sys

import _operator_grain_route_dir_visuals as _report_operator_visuals
import _operator_grain_scheduled_service
import _sql_query
import google.auth
import pandas as pd
from google.cloud import bigquery
from IPython.display import HTML, Image, Markdown, display, display_html
from loguru import logger
from omegaconf import OmegaConf
from shared_utils import gtfs_utils_v2, portfolio_utils, publish_utils, rt_dates
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SEGMENT_GCS

import altair as alt
import geopandas as gpd
from shapely import wkt

from omegaconf import OmegaConf
readable_dict = OmegaConf.load("new_readable.yml")

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
analysis_name = "City and County of San Francisco"

In [4]:
df = _sql_query.download_with_pandas_gbq(
    project="cal-itp-data-infra-staging",
    filename="tiffany_mart_gtfs_rollup.fct_operator_hourly_summary",
)


        SELECT 
            *
        FROM `cal-itp-data-infra-staging`.`tiffany_mart_gtfs_rollup`.`fct_operator_hourly_summary`
        WHERE month_first_day >=  DATE('2025-01-01')
    


  import pkg_resources  # noqa


Downloading: 100%|[32m██████████[0m|
download time: 0:00:01.823251


In [5]:
def prep_hourly_summary() -> pd.DataFrame:
    df = _sql_query.download_with_pandas_gbq(
        project="cal-itp-data-infra-staging",
        filename="tiffany_mart_gtfs_rollup.fct_operator_hourly_summary",
    )
    df2 = (
        df.groupby(["analysis_name", "month_first_day", "day_type", "departure_hour"])
        .agg({"n_trips": "sum"})
        .reset_index()
    )
    df2.columns = df2.columns.str.replace("_", " ").str.title()

    df2 = df2.rename(columns={"Month First Day": "Date"})
    return df2

In [6]:
hourly_summary = prep_hourly_summary()


        SELECT 
            *
        FROM `cal-itp-data-infra-staging`.`tiffany_mart_gtfs_rollup`.`fct_operator_hourly_summary`
        WHERE month_first_day >=  DATE('2025-01-01')
    
Downloading: 100%|[32m██████████[0m|
download time: 0:00:02.241149


In [7]:
hourly_summary.columns

Index(['Analysis Name', 'Date', 'Day Type', 'Departure Hour', 'N Trips'], dtype='object')

In [8]:
hourly_summary.head()

Unnamed: 0,Analysis Name,Date,Day Type,Departure Hour,N Trips
0,Alameda-Contra Costa Transit District,2025-08-01,Saturday,0,14
1,Alameda-Contra Costa Transit District,2025-08-01,Saturday,1,14
2,Alameda-Contra Costa Transit District,2025-08-01,Saturday,2,14
3,Alameda-Contra Costa Transit District,2025-08-01,Saturday,3,18
4,Alameda-Contra Costa Transit District,2025-08-01,Saturday,4,26


In [9]:
hourly_summary["Departure Hour"].unique()

<IntegerArray>
[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
Length: 31, dtype: Int64

In [10]:
sf_only = hourly_summary.loc[hourly_summary["Analysis Name"] == analysis_name]

In [11]:
sf_only.columns

Index(['Analysis Name', 'Date', 'Day Type', 'Departure Hour', 'N Trips'], dtype='object')

'Daily Scheduled Service Hours for '

In [20]:
def create_hourly_summary(df: pd.DataFrame, day_type: str):
    
    chart_dict = readable_dict.hourly_summary
    df2 = df.loc[df["Day Type"] == "Saturday"]
    df2["Date"] = df["Date"].astype(str)
    
    date_list = list(df2["Date"].unique())
    
    date_dropdown = alt.binding_select(
        options=date_list,
        name="Dates: ",
    )
    xcol_param = alt.selection_point(
        fields=["Date"], value=date_list[0], bind=date_dropdown
    )

    chart = (
        (
            alt.Chart(df2)
            .mark_line(size=3)
            .encode(
                x=alt.X(
                    "Departure Hour",
                    title="Departure Hour",
                    axis=alt.Axis(
                        labelAngle=-45,
                    ),
                ),
                y=alt.Y(
                    "N Trips",
                    title="N Trips",
                ),
            )
        )
        .add_params(xcol_param)
        .transform_filter(xcol_param)
    )
    
    bg = _operator_grain_scheduled_service.create_bg_service_chart()
    
    chart = (chart + bg).properties(
    resolve=alt.Resolve(
        scale=alt.LegendResolveMap(color=alt.ResolveMode("independent"))
    )
)
    chart = _report_operator_visuals.configure_chart(
    chart,
    width=400,
    height=250,
    title=f"{chart_dict.title} {day_type}",
    subtitle=chart_dict.subtitle)
    
    return chart

In [22]:
create_hourly_summary(sf_only, "Saturday")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["Date"] = df["Date"].astype(str)


In [23]:
create_hourly_summary(sf_only, "Sunday")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["Date"] = df["Date"].astype(str)


In [24]:
create_hourly_summary(sf_only, "Weekday")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["Date"] = df["Date"].astype(str)
