## Total Service Hours

* Hourly scheduled service breakdown for weekday, Sat, Sun - April / October 2023
* rt_dates.get_week(“apr2023”)
* helpers.import_scheduled_trips(
   analysis_date, 
  columns = [“trip_instance_key”, “service_hours”, “trip_start_time or whatever”],   
  get_pandas =True
)
* See if you can pair this with `time_series_utils` like this example to get whole week in 1 df
* Parse out hour
* Aggregate for weekday, saturday, sunday (3 categories) by hour

In [22]:
import altair as alt
import geopandas as gpd
import pandas as pd
from segment_speed_utils import helpers, time_series_utils
from segment_speed_utils.project_vars import (
    COMPILED_CACHED_VIEWS,
    GTFS_DATA_DICT,
    PROJECT_CRS,
    RT_SCHED_GCS,
    SCHED_GCS,
    SEGMENT_GCS,
)
from shared_utils import catalog_utils, rt_dates, rt_utils

alt.data_transformers.enable("default", max_rows=None)

DataTransformerRegistry.enable('default')

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Get April and October

In [3]:
apr_week = rt_dates.get_week(month="apr2023", exclude_wed=False)
oct_week = rt_dates.get_week(month="oct2023", exclude_wed=False)

In [4]:
oct_week = rt_dates.get_week(month="oct2023", exclude_wed=False)

In [5]:
apr_week

['2023-04-10',
 '2023-04-11',
 '2023-04-12',
 '2023-04-13',
 '2023-04-14',
 '2023-04-15',
 '2023-04-16']

### `import_scheduled_trips` with one date first
* Use `trip_first_departure_datetime_pacific` 
* Test manipulating with only one day.

In [6]:
TABLE = GTFS_DATA_DICT.schedule_downloads.trips

In [7]:
FILE = f"{COMPILED_CACHED_VIEWS}{TABLE}_{apr_week[0]}.parquet"

In [8]:
april_og = pd.read_parquet(FILE)

In [9]:
april_og = april_og.loc[april_og.name == "Bay Area 511 SamTrans Schedule"].reset_index(
    drop=True
)

In [10]:
april_og.trip_id.nunique()

1390

In [11]:
april_df = helpers.import_scheduled_trips(
    analysis_date=apr_week[0],
    columns=[
        "name",
        "service_date",
        "route_long_name",
        "trip_first_departure_datetime_pacific",
        "service_hours",
    ],
)

In [12]:
april_df.head(2)

Unnamed: 0,name,service_date,route_long_name,trip_first_departure_datetime_pacific,service_hours
0,VCTC GMV Schedule,2023-04-10,Route 11,2023-04-10 06:00:00,0.53
1,VCTC GMV Schedule,2023-04-10,Route 11,2023-04-10 15:40:00,0.6


In [13]:
april_df = april_df.loc[april_df.name == "Bay Area 511 SamTrans Schedule"].reset_index(
    drop=True
)

In [14]:
april_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1381 entries, 0 to 1380
Data columns (total 5 columns):
 #   Column                                 Non-Null Count  Dtype         
---  ------                                 --------------  -----         
 0   name                                   1381 non-null   object        
 1   service_date                           1381 non-null   object        
 2   route_long_name                        1381 non-null   object        
 3   trip_first_departure_datetime_pacific  1381 non-null   datetime64[ns]
 4   service_hours                          1381 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 54.1+ KB


In [15]:
april_df["departure_hour"] = april_df.trip_first_departure_datetime_pacific.dt.hour
april_df["month"] = april_df.service_date.astype(str).str.slice(stop=7)

In [16]:
# april_df['month'] = april_df.service_date.astype(str).str.slice(stop=7)

In [29]:
# april_df.loc[april_df.route_long_name == "Colma BART - Brunswick/Templeton"].sort_values(by = ['trip_first_departure_datetime_pacific'])

### Lift [this code](https://github.com/cal-itp/data-analyses/blob/main/gtfs_digest/merge_data.py#L27) to read in an entire week.

In [27]:
# time_series_utils.concatenate_datasets_across_dates??

In [28]:
def concatenate_trips(
    date_list: list,
) -> pd.DataFrame:
    """
    Concatenate schedule data that's been
    aggregated to route-direction-time_period.
    """
    FILE = GTFS_DATA_DICT.schedule_downloads.trips

    df = (
        time_series_utils.concatenate_datasets_across_dates(
            COMPILED_CACHED_VIEWS,
            FILE,
            date_list,
            data_type="df",
            columns=[
                "name",
                "service_date",
                "route_long_name",
                "trip_first_departure_datetime_pacific",
                "service_hours",
            ],
        )
        .sort_values(["service_date"])
        .reset_index(drop=True)
    )

    return df

In [25]:
all_april = concatenate_trips(apr_week)

In [26]:
all_april.shape

(674114, 5)

In [31]:
def total_service_hours(date_list: list, name: str) -> pd.DataFrame:

    df = concatenate_trips(date_list)
    df["departure_hour"] = df.trip_first_departure_datetime_pacific.dt.hour
    df["month"] = df.service_date.astype(str).str.slice(stop=7)
    df = df.loc[df.name == name].reset_index(drop=True)

    df2 = (
        df.groupby(["name", "month", "departure_hour"])
        .agg(
            {
                "service_hours": "sum",
            }
        )
        .reset_index()
    )

    return df2

In [32]:
april_sum = total_service_hours(apr_week, "Bay Area 511 SamTrans Schedule")

In [82]:
def all_months(name:str)->pd.DataFrame:
    apr_week = rt_dates.get_week(month="apr2023", exclude_wed=False)
    oct_week = rt_dates.get_week(month="oct2023", exclude_wed=False)
    
    apr_df = total_service_hours(apr_week, name)
    oct_df = total_service_hours(oct_week, name)
    
    all_df = pd.concat([apr_df, oct_df])
    return all_df

In [83]:
samtrans = all_months("Bay Area 511 SamTrans Schedule")

In [46]:
import _report_utils
import _section2_utils
import yaml

with open("color_palettes.yml") as f:
    color_dict = yaml.safe_load(f)

In [74]:
with open("readable.yml") as f:
    readable_dict = yaml.safe_load(f)

In [84]:
samtrans.head(2)

Unnamed: 0,name,month,departure_hour,service_hours
0,Bay Area 511 SamTrans Schedule,2023-04,0.0,43.45
1,Bay Area 511 SamTrans Schedule,2023-04,1.0,66.55


In [85]:
dropdown_list = samtrans.month.unique().tolist()

In [86]:
dropdown_list.sort(reverse=True)

In [88]:
dropdown = alt.binding_select(options=dropdown_list, name=_report_utils.labeling('month '))

In [90]:
selector = alt.selection_point(
        name=_report_utils.labeling("month "), fields=["month"], bind=dropdown
    )

In [116]:
cutoff = pd.DataFrame({
    'start': [0, 3, 6, 10, 14],
    'stop': [4, 7, 10, 14, 20],
})

In [111]:
index_values = ['AM Peak:7-9AM', 'Early AM:4-6AM', 'Evening:8-11PM', 'Midday:10AM-2PM', 'Owl:12-3AM', 'PM Peak:3-7PM']

In [113]:
# cutoff.index = index_values

In [117]:
alt.Chart(
    cutoff.reset_index()
).mark_rect(
    opacity=0.2
).encode(
    x='start',
    x2='stop',
    y=alt.value(0),  # pixels from top
    y2=alt.value(300),  # pixels from top
    color='index:N'
)

In [97]:
(alt.Chart(samtrans)
 .mark_line(size=3)
 .encode(
     x = alt.X("departure_hour", title = _report_utils.labeling("departure_hour")),
     y = alt.Y("service_hours", title = _report_utils.labeling("service_hours")),
     color = alt.Color("month", scale=alt.Scale(range=color_dict["longest_shortest_route"])),
     tooltip = list(samtrans.columns)
 )
   ).properties(
            width=400, height = 250,
            title={
                "text":readable_dict["total_scheduled_hours"]["title"],
                "subtitle": readable_dict["total_scheduled_hours"]["subtitle"],
            }
        )