# Quarterly Performance Objective - Historical Comparisons

Let's compare quarterly metrics for PMAC.

* Get % change on total service hours by on_shn/intersects_shn
* Get % change on unique routes by on_shn/intersects_shn


Only show first 2 groups for historical comparisons

In [1]:
import altair as alt
import geopandas as gpd
import pandas as pd

from IPython.display import HTML
from typing import Literal

import B1_report_metrics as report_metrics
from shared_utils import rt_dates, rt_utils
from shared_utils import calitp_color_palette as cp
from update_vars import BUS_SERVICE_GCS

#alt.renderers.enable("html")



In [2]:
def concatenate_summary_tables_across_dates(
    summary_category: Literal["service_hours", "delay"], rt_dates_dict: dict) -> pd.DataFrame:
    
    df = pd.DataFrame()
    
    for quarter, date in rt_dates_dict.items():
    
        path = rt_utils.check_cached(
            f"routes_categorized_with_delay_{date}.parquet", 
            BUS_SERVICE_GCS,
            subfolder = ""
        )

        if path is not None:
            date_df = gpd.read_parquet(path)
            plot_df = date_df[date_df._merge=="both"]
            
            if summary_category=="service_hours":
                summary = report_metrics.get_service_hours_summary_table(plot_df)
            
            elif summary_category == "delay":
                summary = report_metrics.get_delay_summary_table(plot_df)

            summary = summary.assign(
                service_date = date,
                #year = int(quarter.split('_')[1]),
                #qtr = quarter.split('_')[0],
            )

            df = pd.concat([df, summary], axis=0)
        else:
            continue
        
        return df


In [3]:
service_hours_df = concatenate_summary_tables_across_dates(
    "service_hours", rt_dates.PMAC)
delay_df = concatenate_summary_tables_across_dates(
    "delay", rt_dates.PMAC)

In [4]:
test2 = service_hours_df.assign(
    service_date = "2022-08-17"
)

In [5]:
df = pd.concat([service_hours_df,test2], axis=0)

In [7]:
def reshape_to_long(df: pd.DataFrame, 
                    summary_category: Literal["service_hours", "delay"]
                   ) -> pd.DataFrame: 
    if summary_category == "service_hours":
        value_vars = ["total_service_hours", "unique_route",  "service_hrs_per_route",
                      "pct_total_service_hours", "pct_unique_route", 
                     ]
        
    elif summary_category == "delay":
        value_vars = ["delay_hours", "unique_route", "delay_hours_per_route",
                      "pct_delay_hours", "pct_unique_route"]
        
    df_long = pd.melt(
        df, 
        id_vars = ["category", "service_date"],
        value_vars = value_vars)
    
    return df_long

In [9]:
service_hours_df2 = reshape_to_long(df, summary_category="service_hours")
delay_df2 = reshape_to_long(delay_df, summary_category="delay")

In [12]:
from shared_utils import styleguide

In [26]:
def base_bar(df: pd.DataFrame, x_col: str, y_col: str) -> alt.Chart:
    bar = (alt.Chart(df)
           .mark_bar(size=50)
           .encode(
               x=alt.X(f"{x_col}:T"),
               y=alt.Y(f"{y_col}:Q")
           )
          )
    
    return bar

def time_series_bar_by_category(df: pd.DataFrame, 
                                variable: str = "total_service_hours", 
                                x_col: str = "quarter(service_date)", 
                                y_col: str = "value") -> alt.Chart: 
    
    subset = df[(df.variable == variable)]
    on_shn = subset[(subset.category=="On SHN")]
    intersects_shn = subset[(subset.category=="Intersects SHN")]
    
    bar1 = base_bar(on_shn, x_col, y_col)
    bar2 = base_bar(intersects_shn, x_col, y_col)
    
    bar1 = styleguide.apply_chart_config(bar1).interactive()
    #bar2 = styleguide.apply_chart_config(bar2)
    
    #combined = bar1 | bar2
    
    return bar1 

In [29]:
service_hours_df2[service_hours_df2.variable=="total_service_hours"]

Unnamed: 0,category,service_date,variable,value
0,On SHN,2022-05-04,total_service_hours,15636.0
1,Intersects SHN,2022-05-04,total_service_hours,53232.0
2,Other,2022-05-04,total_service_hours,29484.0
3,On SHN,2022-08-17,total_service_hours,15636.0
4,Intersects SHN,2022-08-17,total_service_hours,53232.0
5,Other,2022-08-17,total_service_hours,29484.0


In [89]:
chart = (alt.Chart(
    service_hours_df2[(service_hours_df2.variable=="total_service_hours") & 
                      (service_hours_df2.category=="On SHN")
                     ])
         .mark_bar(size=10)
         .encode(
             x=alt.X("service_date:O"),
             y=alt.Y("sum(value):Q"),
         ).properties(width=100, height=100)
        )

In [90]:
chart2 = (alt.Chart(
    service_hours_df2[(service_hours_df2.variable=="total_service_hours") & 
                      (service_hours_df2.category=="Intersects SHN")
                     ])
         .mark_bar(size=10)
         .encode(
             x=alt.X("service_date:O"),
             y=alt.Y("sum(value):Q"),
         ).properties(width=100, height=100)
        )

In [95]:
combined = alt.hconcat(chart, chart2).properties(
    title="title").resolve_scale(x='shared', y='shared')

In [96]:
combined

In [37]:
time_series_bar_by_category(service_hours_df2,
                            variable = "total_service_hours",
                            x_col = "quarter(service_date)",
                            y_col = "value"
                           )

In [None]:
def change_from_prior(current: pd.DataFrame, prior: pd.DataFrame, 
                      col: str) -> pd.DataFrame:
    keep_cols = ["category", col]
    
    current2 = current[keep_cols].rename(columns={col: "current"})
    prior2 = prior[keep_cols].rename(columns={col: "prior"})
                  
    df = pd.merge(current2, prior2, on = "category", how = "inner")
    
    df = df.assign(
        change = df.current - df.prior,
        pct_change = (df.current - df.prior).divide(df.prior)
    )
        
    df_style = (pmac_utils.sort_by_column(df)
           .style.format(
               subset=['current', 'prior', 'change'], 
               **{'formatter': '{:,}'})
                .format(
                    subset=['pct_change'],
                    **{'formatter': '{:,.3f}'}
                )
                .set_properties(
                    subset=['current', 'prior', 'change', 'pct_change'], 
                    **{'text-align': 'center'})
                .set_properties(
                    subset='category',
                    **{'text-align': 'left'}
                )
                .set_table_styles([dict(selector='th', 
                                        props=[('text-align', 'center')])
                                        ])
           .hide(axis="index")
           .to_html()
          )
    
    display(HTML(df_style))
    
    return df

In [None]:
service_hours_df = change_from_prior(dfs["Q2_2022"], dfs["Q1_2022"], "total_service_hours")

In [None]:
unique_route_df = change_from_prior(dfs["Q2_2022"], dfs["Q1_2022"], "unique_route")

In [None]:
def make_grouped_bar_chart(df: pd.DataFrame, 
                           x_col: str, y_col: str, 
                           group_col: str = "category"
                          ) -> alt.Chart:
    
    chart = (alt.Chart(df)
             .mark_bar()
             .encode(
                 x=alt.X(x_col, title=""),
                 y=alt.Y(y_col, 
                         title=y_col),
                 column=alt.Column(group_col, 
                                   title=group_col,
                                   sort=["On SHN", "Intersects SHN"]
                                  ),
                 color=alt.Color(f"{x_col}:N", scale=alt.Scale(
                     range=cp.CALITP_CATEGORY_BRIGHT_COLORS))
             )
            )
    
    return chart

In [None]:
include_me = ["On SHN", "Intersects SHN"]

In [None]:
make_grouped_bar_chart(
    service_hours_df[service_hours_df.category.isin(include_me)], 
    x_col="qtr", y_col="total_service_hours", group_col="category")