# Redo `service_hours` work due to `portfolio_organization_name` changes 
* [Issue](https://github.com/cal-itp/data-analyses/issues/1500)

In [1]:
import _report_route_dir_visuals
import merge_operator_service
from segment_speed_utils import gtfs_schedule_wrangling, time_series_utils, project_vars
import altair as alt
import calitp_data_analysis.magics
import deploy_portfolio_yaml
import pandas as pd
import yaml
from IPython.display import HTML, Image, Markdown, display, display_html
from shared_utils import gtfs_utils_v2, catalog_utils, rt_dates, rt_utils, publish_utils,  portfolio_utils
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS

with open("readable2.yml") as f:
    readable_dict = yaml.safe_load(f)
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
from omegaconf import OmegaConf

readable_dict = OmegaConf.load("readable2.yml")

In [4]:
analysis_date_list = rt_dates.y2025_dates

In [5]:
portfolio_name = "City and County of San Francisco"
# portfolio_name =  "Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)"

In [6]:
ROUTE_DIR_FILE = GTFS_DATA_DICT.digest_tables.monthly_route_schedule_vp_report
route_dir_df = pd.read_parquet(
    f"{RT_SCHED_GCS}{ROUTE_DIR_FILE}.parquet",
    filters=[[("Portfolio Organization Name", "==", portfolio_name)]],
)

## Add `portfolio_oganization_name` further up in the pipeline

In [None]:
public_datasets = gtfs_utils_v2.filter_to_public_schedule_gtfs_dataset_keys(
        get_df=True
    )

In [None]:
public_feeds = public_datasets.gtfs_dataset_name.unique().tolist()

In [None]:
df = merge_operator_service.concatenate_trips(analysis_date_list).pipe(
        publish_utils.exclude_private_datasets, 
        col = "name", 
        public_gtfs_dataset_keys = public_feeds
    )


In [None]:
with open(
    "../_shared_utils/shared_utils/portfolio_organization_name.yml", "r"
) as f:
    PORTFOLIO_ORGANIZATIONS_DICT = yaml.safe_load(f)


In [None]:
df.sample()

In [None]:
df = df.pipe(
        portfolio_utils.standardize_portfolio_organization_names, 
        PORTFOLIO_ORGANIZATIONS_DICT
    )

In [None]:
df.sample()

In [None]:
df = df.drop(columns = ["name"])

In [None]:
WEEKDAY_DICT = {
        **{k: "Weekday" for k in ["Monday", "Tuesday", "Wednesday",
                             "Thursday", "Friday"]},
        "Saturday": "Saturday",
        "Sunday": "Sunday"
    }
    
# Find day type (Monday, Tuesday, etc), departure hour, month_year, and weekday_weekend
df = df.assign(
        day_type = df.service_date.dt.day_name(),
        departure_hour = df.trip_first_departure_datetime_pacific.dt.hour.astype("Int64"),
        # get month_year that's 2024-04 for Apr2024 format
        month_year = (df.service_date.dt.year.astype(str) + 
                      "-" +  df.service_date.dt.month.astype(str).str.zfill(2)),
    ).pipe(
        gtfs_schedule_wrangling.add_weekday_weekend_column, WEEKDAY_DICT
    )
    
    
# Total up hourly service hours by weekday, Sunday, and Saturday.
df2 = (
        df.groupby(["portfolio_organization_name", 
                    "month_year", 
                    "weekday_weekend", 
                    "departure_hour"])
        .agg({"service_hours": "sum"})
        .reset_index()
    )
    

In [None]:
df2.head(2)

In [None]:
df2.month_year.unique()

## Load Service Hours

In [None]:
readable_col_names = {
    "portfolio_organization_name": "Portfolio Organization Name",
    "month_year": "Month",
    "weekday_weekend": "Weekday or Weekend",
    "departure_hour": "Departure Hour (in Military Time)",
    "service_hours": "Service Hours",
    "daily_service_hours": "Daily Service Hours",
}

In [None]:
f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.scheduled_service_hours}.parquet"

In [7]:
SCHEDULED_SERVICES_FILE = GTFS_DATA_DICT.digest_tables.scheduled_service_hours
SCHEDULED_SERVICES_REPORT = f"{SCHEDULED_SERVICES_FILE}_report"

In [None]:
f"{GTFS_DATA_DICT.digest_tables.dir}{SCHEDULED_SERVICES_FILE}.parquet"

In [None]:
url = f"{GTFS_DATA_DICT.digest_tables.dir}{SCHEDULED_SERVICES_FILE}.parquet"
    
df = pd.read_parquet(url)

In [None]:
def prep_operator_service_hours() -> pd.DataFrame:
    """
    Load dataframe with the total scheduled service hours
    a transit operator.
    """
    SCHEDULED_SERVICES_FILE = GTFS_DATA_DICT.digest_tables.scheduled_service_hours
    SCHEDULED_SERVICES_REPORT = f"{SCHEDULED_SERVICES_FILE}_report"
    
    url = f"{GTFS_DATA_DICT.digest_tables.dir}{SCHEDULED_SERVICES_FILE}.parquet"
    
    df = pd.read_parquet(url)

    # Rename dataframe
    df = df.rename(columns=readable_col_names)
    
    # Save out the dataframe
    df.to_parquet(f"{GTFS_DATA_DICT.digest_tables.dir}{SCHEDULED_SERVICES_REPORT}.parquet")
    return df

In [None]:
service_hours_df = prep_operator_service_hours()

In [None]:
service_hours_df.head(2)

In [8]:
SCHEDULED_SERVICES_REPORT = f"{SCHEDULED_SERVICES_FILE}_report"

In [9]:
service_hours_df = pd.read_parquet(
    f"{RT_SCHED_GCS}{SCHEDULED_SERVICES_REPORT}.parquet",
    filters=[[("Portfolio Organization Name", "==", portfolio_name)]],
)

## Analyze Name to Portfolio Organization Name Relationship

In [None]:
route_dir_df_all_ops = pd.read_parquet(
    f"{RT_SCHED_GCS}{ROUTE_DIR_FILE}.parquet",)

In [None]:
route_dir_df_all_ops.Date.unique()

In [None]:
one_date = route_dir_df_all_ops.loc[route_dir_df_all_ops.Date == '2025-04-16T00:00:00.000000000']

In [None]:
one_date.columns

In [None]:
one_date = one_date[["Portfolio Organization Name", "Name"]].drop_duplicates()

In [None]:
url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.scheduled_service_hours}.parquet"

service_hrs_all_ops = pd.read_parquet(url)

In [None]:
service_hrs_all_ops.columns

In [None]:
service_hrs_all_ops.month_year.unique()

In [None]:
service_hrs_all_ops = service_hrs_all_ops.loc[service_hrs_all_ops.month_year == '2024-10']

In [None]:
service_hrs_all_ops = service_hrs_all_ops[["name"]].drop_duplicates()

In [None]:
one_date.groupby(["Portfolio Organization Name"]).agg({"Name":"nunique"})

In [None]:
one_date.loc[one_date["Portfolio Organization Name"] == "Alameda-Contra Costa Transit District"]

## Make Charts

In [10]:
def create_bg_service_chart() -> alt.Chart:
    """
    Create a shaded background for the Service Hour Chart
    to differentiate between time periods.
    """
    specific_chart_dict = readable_dict.background_graph
    cutoff = pd.DataFrame(
        {
            "start": [0, 4, 7, 10, 15, 19],
            "stop": [3.99, 6.99, 9.99, 14.99, 18.99, 24],
            "Time Period": [
                "Owl:12-3:59AM",
                "Early AM:4-6:59AM",
                "AM Peak:7-9:59AM",
                "Midday:10AM-2:59PM",
                "PM Peak:3-7:59PM",
                "Evening:8-11:59PM",
            ],
        }
    )

    # Sort legend by time, 12am starting first.
    chart = (
        alt.Chart(cutoff.reset_index())
        .mark_rect(opacity=0.15)
        .encode(
            x="start",
            x2="stop",
            y=alt.value(0),
            y2=alt.value(250),
            color=alt.Color(
                "Time Period:N",
                sort=(
                    [
                        "Owl:12-3:59AM",
                        "Early AM:4-6:59AM",
                        "AM Peak:7-9:59AM",
                        "Midday:10AM-2:59PM",
                        "PM Peak:3-7:59PM",
                        "Evening:8-11:59PM",
                    ]
                ),
                scale=alt.Scale(range=[*specific_chart_dict.colors]),
            ),
        )
    )

    return chart

In [11]:
bg_chart = create_bg_service_chart()

In [12]:
bg_chart

In [14]:
main_chart = _report_route_dir_visuals.line_chart(
    df=service_hours_df.loc[service_hours_df["Weekday or Weekend"] == "Weekday"],
    x_col="Departure Hour (in Military Time)",
    y_col="Daily Service Hours",
    color_col="Month",
    color_scheme=["#5b8efd", "#765fec", "#fcb40e", "#fc5c04", "#dd217d", "#ccbb44"],
    tooltip_cols=[
        "Weekday or Weekend",
    ],
    date_format="",
)

In [15]:
(main_chart + bg_chart).properties(
    resolve=alt.Resolve(
        scale=alt.LegendResolveMap(color=alt.ResolveMode("independent"))
    )
)

In [None]:
service_hours_df.columns

In [None]:
service_hours_df.loc[service_hours_df["Weekday or Weekend"] == "Weekday"].sample()

In [16]:
def scheduled_service_hours(
    df: pd.DataFrame, weekday_weekend: str, specific_chart_dict: dict
) -> alt.Chart:
    df2 = df.loc[df["Weekday or Weekend"] == weekday_weekend]

    # Create an interactive legend so you can view one time period at a time.
    selection = alt.selection_point(fields=["Month"], bind="legend")

    line = _report_route_dir_visuals.line_chart(
        df=df2,
        x_col="Departure Hour (in Military Time)",
        y_col="Daily Service Hours",
        color_col="Month",
        color_scheme=[*specific_chart_dict.colors],
        tooltip_cols=[*specific_chart_dict.tooltip],
        date_format="",
    )

    bg = create_bg_service_chart()
    chart = (line + bg).properties(
        resolve=alt.Resolve(
            scale=alt.LegendResolveMap(color=alt.ResolveMode("independent"))
        )
    )

    chart = _report_route_dir_visuals.configure_chart(
        chart=chart,
        width=400,
        height=250,
        title=specific_chart_dict.title,
        subtitle=specific_chart_dict.subtitle,
    )

    chart = chart.add_params(selection)

    return chart

In [None]:
service_hours_df["Weekday or Weekend"].unique()

In [17]:
scheduled_service_hours(
    service_hours_df,
    "Weekday",
    readable_dict.weekday_scheduled_service_graph
)

In [None]:
scheduled_service_hours(
    service_hours_df,
    "Sunday",
    readable_dict.weekday_scheduled_service_graph
)