## Total Service Hours

* Hourly scheduled service breakdown for weekday, Sat, Sun - April / October 2023
* rt_dates.get_week(“apr2023”)
* helpers.import_scheduled_trips(
   analysis_date, 
  columns = [“trip_instance_key”, “service_hours”, “trip_start_time or whatever”],   
  get_pandas =True
)
* See if you can pair this with `time_series_utils` like this example to get whole week in 1 df
* Parse out hour
* Aggregate for weekday, saturday, sunday (3 categories) by hour

In [1]:
import altair as alt
import geopandas as gpd
import pandas as pd
from segment_speed_utils import helpers, time_series_utils
from segment_speed_utils.project_vars import (
    COMPILED_CACHED_VIEWS,
    GTFS_DATA_DICT,
    PROJECT_CRS,
    RT_SCHED_GCS,
    SCHED_GCS,
    SEGMENT_GCS,
)
from shared_utils import catalog_utils, rt_dates, rt_utils

alt.data_transformers.enable("default", max_rows=None)

DataTransformerRegistry.enable('default')

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
import _report_utils
import _section2_utils
import yaml

with open("color_palettes.yml") as f:
    color_dict = yaml.safe_load(f)
with open("readable.yml") as f:
    readable_dict = yaml.safe_load(f)

### Get April and October

In [4]:
apr_week = rt_dates.get_week(month="apr2023", exclude_wed=False)
oct_week = rt_dates.get_week(month="oct2023", exclude_wed=False)

In [5]:
oct_week = rt_dates.get_week(month="oct2023", exclude_wed=False)

### `import_scheduled_trips` with one date first
* Use `trip_first_departure_datetime_pacific` 
* Test manipulating with only one day.

In [6]:
TABLE = GTFS_DATA_DICT.schedule_downloads.trips

In [7]:
FILE = f"{COMPILED_CACHED_VIEWS}{TABLE}_{apr_week[0]}.parquet"

In [8]:
#april_og = pd.read_parquet(FILE)

In [9]:
#april_og = april_og.loc[april_og.name == "Bay Area 511 SamTrans Schedule"].reset_index(
#    drop=True
#)

In [10]:
""" april_df = helpers.import_scheduled_trips(
    analysis_date=apr_week[0],
    columns=[
        "name",
        "service_date",
        "route_long_name",
        "trip_first_departure_datetime_pacific",
        "service_hours",
    ],
)"""

' april_df = helpers.import_scheduled_trips(\n    analysis_date=apr_week[0],\n    columns=[\n        "name",\n        "service_date",\n        "route_long_name",\n        "trip_first_departure_datetime_pacific",\n        "service_hours",\n    ],\n)'

In [11]:
# april_df.head(2)

### Lift [this code](https://github.com/cal-itp/data-analyses/blob/main/gtfs_digest/merge_data.py#L27) to read in an entire week.

In [12]:
# time_series_utils.concatenate_datasets_across_dates??

In [13]:
def concatenate_trips(
    date_list: list,
) -> pd.DataFrame:
    """
    Concatenate schedule data that's been
    aggregated to route-direction-time_period.
    """
    FILE = GTFS_DATA_DICT.schedule_downloads.trips

    df = (
        time_series_utils.concatenate_datasets_across_dates(
            COMPILED_CACHED_VIEWS,
            FILE,
            date_list,
            data_type="df",
            columns=[
                "name",
                "service_date",
                "route_long_name",
                "trip_first_departure_datetime_pacific",
                "service_hours",
            ],
        )
        .sort_values(["service_date"])
        .reset_index(drop=True)
    )

    return df

In [14]:
april_df = concatenate_trips(apr_week)

In [15]:
def get_day_type(date):
    """
    Function to return the day type (e.g., Monday, Tuesday, etc.) from a datetime object.
    """
    days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
    return days_of_week[date.weekday()]


In [16]:
def weekday_or_weekend(row):
    if row.day_type == "Sunday":
        return "Sunday"
    if row.day_type == "Saturday":
        return "Saturday"
    else:
        return "Weekday"

In [17]:
def total_service_hours(date_list: list, name: str) -> pd.DataFrame:
    # Combine all the days' data for a week
    df = concatenate_trips(date_list)
    
     # Filter
    df = df.loc[df.name == name].reset_index(drop=True)
    
    # Add day type aka Monday, Tuesday, Wednesday...
    df['day_type'] = df['service_date'].apply(get_day_type)
    
    # Tag if the day is a weekday, Saturday, or Sunday
    df["weekend_weekday"] = df.apply(weekday_or_weekend, axis=1)
    
    # Find the minimum departure hour
    df["departure_hour"] = df.trip_first_departure_datetime_pacific.dt.hour
    
    # Delete out the specific day, leave only month & year
    df["month"] = df.service_date.astype(str).str.slice(stop=7)
    
    df2 = (
        df.groupby(["name", "month", "weekend_weekday", "departure_hour"])
        .agg(
            {
                "service_hours": "sum",
            }
        )
        .reset_index()
    )

    return df2

In [18]:
april_sum = total_service_hours(apr_week, "Bay Area 511 SamTrans Schedule")

In [19]:
def all_months(name: str) -> pd.DataFrame:
    
    # Grab the dataframes with a full week's worth of data. 
    apr_week = rt_dates.get_week(month="apr2023", exclude_wed=False)
    oct_week = rt_dates.get_week(month="oct2023", exclude_wed=False)
    
    # Sum up total service_hours
    apr_df = total_service_hours(apr_week, name)
    oct_df = total_service_hours(oct_week, name)

    # Combine everything
    all_df = pd.concat([apr_df, oct_df])
    
    all_df.columns = all_df.columns.map(_report_utils.replace_column_names)
    return all_df

In [20]:
samtrans = all_months("Bay Area 511 SamTrans Schedule")

In [21]:
samtrans.head(2)

Unnamed: 0,Transit Operator,Month,Weekend or Weekday,Departure Hour,Service Hours
0,Bay Area 511 SamTrans Schedule,2023-04,Saturday,0,7.52
1,Bay Area 511 SamTrans Schedule,2023-04,Saturday,1,8.32


In [22]:
def create_bg_service_chart():
    """
    Create a shaded background for the Service Hour Chart
    by Time Period. 
    """
    cutoff = pd.DataFrame(
    {
        "start": [0, 4, 7, 10, 15, 19],
        "stop": [3.99, 6.99, 9.99, 14.99, 18.99, 24],
        "time_period": [
            "Owl:12-3:59AM",
            "Early AM:4-6:59AM",
            "AM Peak:7-9:59AM",
            "Midday:10AM-2:59PM",
            "PM Peak:3-7:59PM",
            "Evening:8-11:59PM",
        ],
    }
    )
    
    # Sort legend by time, 12am starting first. 
    chart = alt.Chart(cutoff.reset_index()).mark_rect(opacity=0.15).encode(
    x="start",
    x2="stop",
    y=alt.value(0),  # pixels from top
    y2=alt.value(250),  # pixels from top
    color=alt.Color(
        "time_period:N",
        sort = (
            [
                "Owl:12-3:59AM",
                "Early AM:4-6:59AM",
                "AM Peak:7-9:59AM",
                "Midday:10AM-2:59PM",
                "PM Peak:3-7:59PM",
                "Evening:8-11:59PM",
            ]
        ),
        title=_report_utils.labeling("time_period"),
        scale=alt.Scale(range=color_dict["full_color_scale"]),
    ))
    
    return chart

In [23]:
create_bg_service_chart()

In [24]:
def create_service_hour_chart(df:pd.DataFrame, day_type:str):
    # Create an interactive legend
    selection = alt.selection_point(fields=['Month'], bind='legend')
    
    # Create the main line chart
    df = df.loc[df["Weekend or Weekday"] == day_type].reset_index(drop = True)
    
    # Create a new title that incorporates day type
    title = readable_dict["daily_scheduled_hour"]["title"]
    title = title + ' for ' + day_type

    main_chart = (
    alt.Chart(df)
    .mark_line(size=3)
    .encode(
        x=alt.X("Departure Hour", 
                title=_report_utils.labeling("Departure Hour in Military Time")),
        y=alt.Y("Service Hours"),
        color=alt.Color(
            "Month",
            scale=alt.Scale(range=color_dict["longest_shortest_route"]),  # Specify desired order
        ),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
        tooltip=list(df.columns),
    )
    .properties(
        width=400,
        height=250,
        title={"text": title, 
               "subtitle": readable_dict["daily_scheduled_hour"]["subtitle"]},
    )
    .add_params(selection)
    )
    
    # display(main_chart)
    # Load background chart
    bg_chart = create_bg_service_chart()
    # display(bg_chart)
    
    # Combine
    final_chart = (main_chart + bg_chart).properties(
    resolve=alt.Resolve(
        scale=alt.LegendResolveMap(color=alt.ResolveMode("independent"))
    )
    )
    
    return final_chart

In [25]:
create_service_hour_chart(samtrans, "Sunday")

In [26]:
create_service_hour_chart(samtrans, "Saturday")

In [27]:
create_service_hour_chart(samtrans, "Weekday")