# Historical Trends

<b>01 - Increase total amount of service on the SHN and reliability of that service by 2024</b>

## Routes on the State Highway Network (SHN)
Transit routes along the SHN can be categorized into 3 groups:
1. **On SHN** - where at least 20% of the transit route runs the SHN (within 50 ft) 
2. **Intersects SHN** - where at least 35% of the transit route runs within 0.5 mile of the SHN.
3. **Other** - all other transit routes.

## Metrics
* service hours, service hours per route
* delay hours, delay hours per route

The metrics are shown for for transit routes **on the SHN** and **intersects SHN**.

In [1]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import calitp.magics
import geopandas as gpd
import pandas as pd

import C1_report_metrics as report_metrics
from shared_utils import rt_dates, geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide
from bus_service_utils import chart_utils
from update_vars import BUS_SERVICE_GCS, get_filename

In [2]:
def quarterly_summary_long(analysis_date: str) -> pd.DataFrame: 
    """
    For historical report, get a long df of service hours and delay hours 
    summary tables.
    """
    #df = report_metrics.prep_data_for_report(analysis_date)
    routes_file = get_filename(
        f"{BUS_SERVICE_GCS}routes_categorized_", analysis_date, "v2")
    
    df = gpd.read_parquet(routes_file) 

        
    service_summary = report_metrics.get_service_hours_summary_table(df)                      
    '''
    delay_summary = (get_delay_summary_table(df)
                     .rename(columns = {"unique_route": "delay_unique_route"})
                    )
    '''                     
    # Make long
    service_value_vars = [c for c in service_summary.columns if c != "category"]
    #delay_value_vars = [c for c in delay_summary.columns if c != "category"]

    service_long = pd.melt(
        service_summary,
        id_vars = "category",
        value_vars = service_value_vars,
    )
    '''
    delay_long = pd.melt(
        delay_summary, 
        id_vars = "category", 
        value_vars = delay_value_vars
    )
    '''
    # Concatenante
    summary = pd.concat([service_long, 
                         #delay_long
                        ], axis=0)
    summary = summary.assign(
        service_date = analysis_date
    )
    
    return summary

In [3]:
def concatenate_summary_across_dates(
    rt_dates_dict: dict, 
    summary_dataset: str) -> pd.DataFrame: 
    df = pd.DataFrame()

    rt_dates_reversed = {value: key for key, value in rt_dates_dict.items()}

    for date, quarter in rt_dates_reversed.items():
        if summary_dataset == "summary":
            one_quarter = quarterly_summary_long(date)
            
        elif summary_dataset == "district":
            one_quarter = district_breakdown_long(date)
        df = pd.concat([df, one_quarter], axis=0)

    df = df.assign(
        year_quarter = df.service_date.map(rt_dates_reversed)
    )

    df = df.assign(
        quarter = df.year_quarter.str.split('_', expand=True)[0],
        year = df.year_quarter.str.split('_', expand=True)[1].astype(int),
    )
    
    # Get it to be year first
    df = df.assign(
        year_quarter = df.year.astype(str) + ' ' + df.quarter
    )
    
    return df

In [4]:
quarterly_metrics_dict = {k: v for k, v in rt_dates.PMAC.items() 
                          if k != "Q1_2022"}

summary_df = concatenate_summary_across_dates(
    quarterly_metrics_dict, summary_dataset = "summary")

#summary_df = report_metrics.concatenate_summary_across_dates(
#    quarterly_metrics_dict, summary_dataset = "summary")

In [5]:
def get_statewide_averages(df: pd.DataFrame) -> pd.DataFrame:

    var_list = ["service_hours", 
                #"delay_hours", 
                "unique_route", 
                #"delay_unique_route"
               ]

    group_cols = ["year_quarter", "service_date", "year", "quarter"]

    all_routes = geography_utils.aggregate_by_geography(
        df[df.variable.isin(var_list)],
        group_cols + ["variable"],
        sum_cols = ["value"]
    )
    
    # Make wide, to calculate average again
    all_routes2 = pd.pivot(all_routes, 
         index = group_cols, 
         columns = "variable", values = "value"
        ).reset_index()
    
    all_routes2 = all_routes2.assign(
        service_hours_per_route = (all_routes2.service_hours.divide(
            all_routes2.unique_route)).round(2), 
        #delay_hours_per_route = (all_routes2.delay_hours.divide(
        #    all_routes2.delay_unique_route)).round(2),
        category = "All"
    )
    
    #https://stackoverflow.com/questions/55027108/pandas-rename-index
    # Get rid of column name
    all_routes2.columns.name = ""
    
    # Wrangle back to long!
    value_vars = [c for c in all_routes2.columns if c != "category" and 
                  c not in group_cols
                 ]

    all_routes3 = pd.melt(
        all_routes2, 
        id_vars = group_cols + ["category"],
        var_name = "variable",
        value_vars = value_vars 
    )
    
    return all_routes3

In [6]:
statewide_avg = get_statewide_averages(summary_df)

In [7]:
# chart utils
HEIGHT = 250
WIDTH = 200

def base_quarterly_bar(df: pd.DataFrame, variable: str,
                       x_col: str, y_col: str) -> alt.Chart:
        
    bar = (alt.Chart(df)
           .mark_bar()
           .encode(
               x=alt.X(f"{x_col}:O", 
                       # formatting for quarters is weird, construct our own string
                       #axis=alt.Axis(format='Q%q-%Y'), 
                       title = None),
               y=alt.Y(f"{y_col}:Q", title = chart_utils.labeling(variable)),
           )
          )
           
    return bar

In [8]:
def quarterly_bar_for_category(
    df: pd.DataFrame, 
    variable_list: list = ["service_hours", "delay_hours"], 
    category: str = "On SHN",
    x_col: str = "year_quarter", 
    y_col: str = "value", 
    chart_height: int = 200, chart_width: int = 500,
) -> alt.Chart: 
    """
    Plot quarterly metrics within the same category.
    Ex: for all routes on SHN, show service hours, delay hours, 
    avg service hours, etc
    """
    subset = df[(df.variable.isin(variable_list)) & 
                (df.category==category)]
   
    var1 = variable_list[0]
    df1 = subset[subset.variable==var1]
    #var2 = variable_list[1]
    #df2 = subset[subset.variable==var2]

    category = df1.category.iloc[0]
    
    color_dict = {
        "On SHN": cp.CALITP_CATEGORY_BRIGHT_COLORS[4],
        "Intersects SHN": cp.CALITP_CATEGORY_BRIGHT_COLORS[3],
        "All": cp.CALITP_CATEGORY_BRIGHT_COLORS[0],
        "service_hours": cp.CALITP_CATEGORY_BRIGHT_COLORS[4], # light blue
        "delay_hours": cp.CALITP_CATEGORY_BRIGHT_COLORS[1], # light orange
        "service_hours_per_route": cp.CALITP_CATEGORY_BRIGHT_COLORS[0], # med blue
        "delay_hours_per_route": cp.CALITP_CATEGORY_BOLD_COLORS[1], # dark orange
    }
    
    tooltip = ['year', 'quarter', 'year_quarter', 
               'variable', 'category', 'value']
    
    bar1 = (base_quarterly_bar(df1, var1, x_col, y_col)
            .encode(color = alt.value(color_dict[var1]), 
                    tooltip = tooltip)
            .properties(title={
                "text": f"{chart_utils.labeling(var1)}",
                "subtitle": f"{category}"
            }, width = chart_width, height = chart_height)
            .interactive()
           )
    '''
    bar2 = (base_quarterly_bar(df2, var2, x_col, y_col)
            .encode(color=alt.value(color_dict[var2]), 
                    tooltip = tooltip)
            .properties(title={
                "text": f"{chart_utils.labeling(var2)}",
                "subtitle": f"{category}"
            }, width = chart_width, height = chart_height)
            .interactive()
           )
    '''
    if var1 == "service_hours":
        space = 0
    else:
        space = 25
    combined = (styleguide.apply_chart_config(alt.hconcat(bar1, #bar2, 
                                                          spacing=space))
                .resolve_scale(y="independent")
               )

    return combined

## All Routes

In [10]:
category = "All"
var_list = ["service_hours_per_route", 
            #"delay_hours_per_route"
           ]

s1 = quarterly_bar_for_category(
    statewide_avg,
    variable_list = var_list, 
    category = category,
    x_col = "year_quarter",
    y_col = "value",
    chart_height = HEIGHT, chart_width = WIDTH
)


var_list = ["service_hours", 
            #"delay_hours"
           ]

s2 = quarterly_bar_for_category(
    statewide_avg,
    variable_list = var_list, 
    category = category,
    x_col = "year_quarter",
    y_col = "value",
    chart_height = HEIGHT, chart_width = WIDTH
)

In [11]:
s1

In [12]:
s2

## Routes on SHN

In [13]:
category = "On SHN"
var_list = ["service_hours_per_route", 
            #"delay_hours_per_route"
           ]

o1 = quarterly_bar_for_category(
    summary_df,
    variable_list = var_list, 
    category = category,
    x_col = "year_quarter",
    y_col = "value",
    chart_height = HEIGHT, chart_width = WIDTH
)

var_list = ["service_hours", 
            #"delay_hours"
           ]

o2 = quarterly_bar_for_category(
    summary_df,
    variable_list = var_list, 
    category = category,
    x_col = "year_quarter",
    y_col = "value",
    chart_height = HEIGHT, chart_width = WIDTH
)

In [14]:
o1

In [15]:
o2

## Routes Intersecting SHN

In [16]:
category = "Intersects SHN"
var_list = ["service_hours_per_route", 
            #"delay_hours_per_route"
           ]

i1 = quarterly_bar_for_category(
    summary_df,
    variable_list = var_list, 
    category = category,
    x_col = "year_quarter",
    y_col = "value",
    chart_height = HEIGHT, chart_width = WIDTH
)

var_list = ["service_hours", 
            #"delay_hours"
           ]

i2 = quarterly_bar_for_category(
    summary_df,
    variable_list = var_list, 
    category = category,
    x_col = "year_quarter",
    y_col = "value",
    chart_height = HEIGHT, chart_width = WIDTH
)


In [17]:
i1

In [18]:
i2

## Routes on SHN by District

In [19]:
def district_breakdown_long(analysis_date: str) -> pd.DataFrame: 
    """
    For historical report, get a long df of service hours and delay hours 
    summary tables.
    """
    routes_file = get_filename(
        f"{BUS_SERVICE_GCS}routes_categorized_", analysis_date, "v2")
    df = gpd.read_parquet(routes_file)
    
    by_district_summary = report_metrics.by_district_on_shn_breakdown(
        df, sum_cols = ["service_hours", "unique_route"])
    '''
    by_district_delay = by_district_on_shn_breakdown(
        df, sum_cols = ["delay_hours", "unique_route"]
    ).rename(columns = {"unique_route": "delay_unique_route"})
    '''                     
    # Make long
    service_value_vars = [c for c in by_district_summary.columns if c != 'district']
    #delay_value_vars = [c for c in by_district_delay.columns if c != 'district']

    service_long = pd.melt(
        by_district_summary,
        id_vars = "district",
        value_vars = service_value_vars,
    )
    '''
    delay_long = pd.melt(
        by_district_delay, 
        id_vars = "district", 
        value_vars = delay_value_vars
    )
    '''
    # Concatenante
    summary = pd.concat([service_long, 
                         #delay_long
                        ], axis=0)
    summary = summary.assign(
        service_date = analysis_date
    )
    
    return summary

In [20]:
district_df = concatenate_summary_across_dates(
    quarterly_metrics_dict, summary_dataset="district")
                                              
#district_df = report_metrics.concatenate_summary_across_dates(
#    quarterly_metrics_dict, summary_dataset = "district")

In [21]:
def facet_by_district(df: pd.DataFrame, variable: str) -> alt.Chart:
    subset = df[df.variable == variable]
    
    bar = (alt.Chart(subset)
          .mark_bar()
          .encode(
              x=alt.X(f"year_quarter:O"),
              y = alt.Y("value:Q", title=f"{variable.replace('_', ' '.title())}"),
              color = alt.Color("district:N", title=None, 
                                scale = alt.Scale(
                                   range = cp.CALITP_CATEGORY_BRIGHT_COLORS + 
                                   cp.CALITP_CATEGORY_BOLD_COLORS), legend=None),
              tooltip = ["district", "year_quarter", "value", "variable"]
          ).facet(facet="district:N", columns = 1, spacing=10, 
                  title = f"{variable.replace('_', ' ').title()}")
          .interactive()
         )
    
    return bar

In [22]:
bar1 = facet_by_district(district_df, "avg_service_hours")
#bar2 = facet_by_district(district_df, "avg_delay_hours")

district_chart = styleguide.apply_chart_config(alt.hconcat(bar1, 
                                                           #bar2
                                                          ))
district_chart