# PMAC: Historical Comparisons

Let's compare quarterly metrics for PMAC.

* Get % change on total service hours by parallel/on_shn/other
* Get % change on unique routes by parallel/on_shn/other


Only show first 2 groups for historical comparisons

move scripts related to pmac all into its own folder, rename performance objectives, rework everything into 1 report



3 new groups:
* previous parallel def was for Chad Edison, we adapted it, but let's narrowly define a new set for this performance objective

* on shn - directly on it
* affected by SHN, within a mile after, can consider moving or supplementing its service, but that SHN can impact and cause bottlenecks where causes are getting on/off
* other

on speedmaps, interface where local streets intersect with highway onramps, that is a bottleneck

objective is trying to say we want to run more transit ON SHN!

In [1]:
import altair as alt
import pandas as pd

from IPython.display import HTML

import pmac_utils
from shared_utils import rt_dates
from shared_utils import calitp_color_palette as cp
from setup_tract_charts import labeling

alt.renderers.enable("html")



RendererRegistry.enable('html')

In [2]:
dfs = {}
df = pd.DataFrame()

for key, date in rt_dates.PMAC.items():
    quarter_df = pmac_utils.flag_parallel_intersecting_routes(date)
    
    # In D2_pmac, only keep subset where _merge=="both"
    # If it's not able to merge correctly at itp_id-route_id level, let's drop those
    plot_df = quarter_df[quarter_df._merge=="both"]
    summary = pmac_utils.get_summary_table(plot_df)
    summary = summary.assign(
        qtr = key,
        service_date = date
    )
    
    dfs[key] = summary
    
    df = pd.concat([df, summary], axis=0)

In [3]:
def clean_df_for_viz(df: pd.DataFrame) -> pd.DataFrame:
    """
    Extra cleaning for the 'long' df for making bar chart.
    """
    cleaned_categories = {
        "parallel": "Parallel",
        "on_shn": "On SHN",
        "other": "Other",
    }
    
    df = df.assign(
        year = df.qtr.str.split('_', expand=True)[1].astype(int),
        qtr = df.qtr.str.split('_', expand=True)[0],
        category = df.category.map(cleaned_categories)
    )
    
    return df

df = clean_df_for_viz(df)

In [4]:
for date, dataset in dfs.items():
    print(f"{date}: {dataset.total_service_hours.sum()}")

Q1_2022: 91271
Q2_2022: 107676


In [5]:
dfs["Q1_2022"]

Unnamed: 0,category,total_service_hours,unique_route,pct_total_service_hours,pct_unique_route,qtr,service_date
0,parallel,69435,1977,0.761,0.751,Q1_2022,2022-02-08
2,on_shn,1229,59,0.013,0.022,Q1_2022,2022-02-08
1,other,20607,596,0.226,0.226,Q1_2022,2022-02-08


In [6]:
def change_from_prior(current: pd.DataFrame, prior: pd.DataFrame, 
                      col: str) -> pd.DataFrame:
    keep_cols = ["category", col]
    
    current2 = current[keep_cols].rename(columns={col: "current"})
    prior2 = prior[keep_cols].rename(columns={col: "prior"})
                  
    df = pd.merge(current2, prior2, on = "category", how = "inner")
    
    df = df.assign(
        change = df.current - df.prior,
        pct_change = (df.current - df.prior).divide(df.prior)
    )
        
    df_style = (pmac_utils.sort_by_column(df)
           .style.format(
               subset=['current', 'prior', 'change'], 
               **{'formatter': '{:,}'})
                .format(
                    subset=['pct_change'],
                    **{'formatter': '{:,.3f}'}
                )
                .set_properties(
                    subset=['current', 'prior', 'change', 'pct_change'], 
                    **{'text-align': 'center'})
                .set_properties(
                    subset='category',
                    **{'text-align': 'left'}
                )
                .set_table_styles([dict(selector='th', 
                                        props=[('text-align', 'center')])
                                        ])
           .hide(axis="index")
           .to_html()
          )
    
    display(HTML(df_style))
    
    return df

In [7]:
service_hours_df = change_from_prior(dfs["Q2_2022"], dfs["Q1_2022"], "total_service_hours")

category,current,prior,change,pct_change
parallel,71816,69435,2381,0.034
on_shn,1409,1229,180,0.146
other,34451,20607,13844,0.672


In [8]:
unique_route_df = change_from_prior(dfs["Q2_2022"], dfs["Q1_2022"], "unique_route")

category,current,prior,change,pct_change
parallel,1997,1977,20,0.01
on_shn,66,59,7,0.119
other,850,596,254,0.426


In [9]:
def make_grouped_bar_chart(df: pd.DataFrame, 
                           x_col: str, y_col: str, 
                           group_col: str = "category"
                          ) -> alt.Chart:
    
    chart = (alt.Chart(df)
             .mark_bar()
             .encode(
                 x=alt.X(x_col, title=""),
                 y=alt.Y(y_col, 
                         title=labeling(y_col)),
                 column=alt.Column(group_col, 
                                   title=labeling(group_col),
                                   sort=["Parallel", "On SHN", "Other"]
                                  ),
                 color=alt.Color(f"{x_col}:N", scale=alt.Scale(
                     range=cp.CALITP_CATEGORY_BRIGHT_COLORS))
             )
            )
    
    return chart



In [10]:
make_grouped_bar_chart(df, x_col="qtr", 
                       y_col="total_service_hours",
                       group_col="category")

In [11]:
make_grouped_bar_chart(df, x_col="qtr", 
                       y_col="unique_route",
                       group_col="category")