# Quarterly Mass Transit Performance Objective

**01 - Increase total amount of service on the SHN and reliability of that service by 2024**

Metrics: total service hours, average service hours, and number of transit routes (n, %).

## Routes on the State Highway Network (SHN)

Transit routes along the SHN can be categorized into 3 groups:
1. **On SHN** - where at least 20% of the transit route runs the SHN (within 50 ft) 
2. **Intersects SHN** - where at least 35% of the transit route runs within 0.5 mile of the SHN.
3. **Other** - all other transit routes.

In [1]:
import branca
import geopandas as gpd
import intake
import pandas as pd

from IPython.display import HTML, Markdown

import B1_report_metrics as report_metrics
import B2_report_charts as report_charts
from update_vars import ANALYSIS_DATE, BUS_SERVICE_GCS, CURRENT_QUARTER
from shared_utils import geography_utils, portfolio_utils
from shared_utils import calitp_color_palette as cp
from bus_service_utils import chart_utils

hq_catalog = intake.open_catalog("../high_quality_transit_areas/*.yml")
catalog = intake.open_catalog("*.yml")



In [25]:
from shared_utils import rt_dates

In [26]:
rt_dates.PMAC

{'Q1_2022': '2022-02-08',
 'Q2_2022': '2022-05-04',
 'Q3_2022': '2022-08-17',
 'Q4_2022': '2022-10-12'}

In [2]:
ANALYSIS_DATE = "2022-05-04"
df = gpd.read_parquet(
    f"{BUS_SERVICE_GCS}routes_categorized_with_delay_{ANALYSIS_DATE}.parquet")

# Can remove this once 5/4 is re-run
df = df.rename(columns = {"total_service_hours": "service_hours"})

In [3]:
def route_type_names(row): 
    if row.route_type in ['0', '1', '2']:
        return "Rail"
    elif row.route_type == '3':
        return "Bus"
    elif row.route_type == '4':
        return "Ferry"
    else:
        return "Unknown"

    
# Some interest in excluding modes like rail from District 4
df = df.assign(
    route_type_name = df.apply(lambda x: route_type_names(x), axis=1),
    delay_hours = round(df.delay_seconds / 60 ** 2, 2)
).drop(columns = "delay_seconds")


#df[df.category=="on_shn"].route_type_name.value_counts()
# This shows that only Bus and Unknown are present for on_shn

In [None]:
#df.route_type_name.value_counts()
#df[(df.route_type_name=="Unknown")].calitp_itp_id.value_counts()
#df[df.category=="intersects_shn"].route_type_name.value_counts()

In [4]:
# Should I subset to df[df._merge=="both"]?
# both means that it found a corresponding match in itp_id-route_id 
# since it's been aggregated up to route_id level (shape_id can mismatch more easily)
# Decide here, this is the subset of data I will use for rest of notebook
plot_df = df[df._merge=="both"]

## Statewide Stats  

* How many service hours are scheduled for a typical weekday for (1)?

In [5]:
summary = report_metrics.get_service_hours_summary_table(plot_df)  

In [6]:
all_hours = geography_utils.aggregate_by_geography(
    summary.assign(category="All"),
    group_cols = ["category"],
    sum_cols = ["unique_route", "service_hours"]
)

In [7]:
STATEWIDE_HOURS = all_hours.service_hours.iloc[0]
FORMATTED_HOURS = f'{STATEWIDE_HOURS:,}' 

display(
    Markdown(
        f"### {CURRENT_QUARTER.replace('_', ' ')} ({ANALYSIS_DATE}): "
        f"{FORMATTED_HOURS} total service hours statewide"
    )
)

### Q3 2022 (2022-05-04): 98,352 total service hours statewide

In [8]:
service_cols_dict = {
    "category": "Category",
    "service_hours": "Service Hours",
    "pct_total_service_hours": "% Service Hours",
    "unique_route": "# Routes",
    "pct_unique_route": "% Routes",
    "service_hrs_per_route": "Service Hours per Route",
}

summary_styled = portfolio_utils.style_table(
    summary, 
    rename_cols = service_cols_dict, 
    integer_cols = ["Service Hours", "# Routes"],
    one_decimal_cols = ["Service Hours per Route"],
    left_align_cols = "first",
    center_align_cols = "all",
    custom_format_cols = {'{:.1%}': ["% Service Hours", "% Routes"]},
    display_table = True
)

Category,Service Hours,# Routes,pct_service_hours,% Routes,Service Hours per Route
On SHN,15636,657,0.159,24.2%,23.8
Intersects SHN,53232,1503,0.541,55.4%,35.42
Other,29484,551,0.3,20.3%,53.51


## Reliability (Delay)

Be careful here, since delay is not merged onto every route. 

Need apples to apples comparison across quarters.

In [9]:
delay_df = plot_df[plot_df.merge_delay=="both"]
delay_summary = report_metrics.get_delay_summary_table(delay_df)

In [10]:
delay_cols_dict = {
    "category": "Category",
    "delay_hours": "Total Delay Hours",
    "pct_delay_hours": "% Delay Hours",
    "unique_route": "# Routes",
    "pct_unique_route": "% Routes",
    "delay_hours_per_route": "Delay Hours per Route",
}

delay_summary_styled = portfolio_utils.style_table(
    delay_summary, 
    rename_cols = delay_cols_dict, 
    integer_cols = ["Total Delay Hours", "# Routes"],
    two_decimal_cols = ["Delay Hours per Route"],
    left_align_cols = "first",
    center_align_cols = "all",
    custom_format_cols = {'{:.1%}': ["% Delay Hours", "% Routes"]},
    display_table = True
)

Category,Total Delay Hours,# Routes,Delay Hours per Route,% Delay Hours,% Routes
On SHN,750,231,3.25,16.1%,20.3%
Intersects SHN,2620,669,3.92,56.3%,58.7%
Other,1282,240,5.34,27.6%,21.1%


In [11]:
# Have some rows where district is missing
# focus on just the on_shn category and do district breakdown
#plot_df[(plot_df.District.isna())].category.value_counts()


# Chart utils
WIDTH = 300
HEIGHT = 200

In [12]:
by_district_service = report_metrics.by_district_on_shn_breakdown(
    plot_df, ["service_hours", "unique_route"])

bar_total = (report_charts.make_bar(by_district_service, "service_hours")
             .properties(width=WIDTH, height=HEIGHT)  
            )
bar_avg = (report_charts.make_bar(by_district_service, "avg_service_hours")
           .properties(width=WIDTH, height=HEIGHT)
          )

service_hours_chart = report_charts.configure_hconcat_charts(
    [bar_total, bar_avg], 
    x_scale="independent", 
    y_scale="independent", 
    chart_title="Service Hours by District")

service_hours_chart



In [18]:
by_district_delay = report_metrics.by_district_on_shn_breakdown(
    plot_df, ["delay_hours", "unique_route"]
)

bar_total = (report_charts.make_bar(by_district_delay, "delay_hours")
             .properties(width=WIDTH, height=HEIGHT)      
            )
bar_avg = (report_charts.make_bar(by_district_delay, "avg_delay_hours")
           .properties(width=WIDTH, height=HEIGHT)
          )

delay_hours_chart = report_charts.configure_hconcat_charts(
    [bar_total, bar_avg], 
    x_scale="independent", 
    y_scale="independent", 
    chart_title="Delay Hours by District")

delay_hours_chart



## Map of Routes by Category

In [21]:
def prep_data_for_viz(df: gpd.GeoDataFrame):
    gdf = report_metrics.clean_up_category_values(df)
    
    # line must fall within CA
    ca = hq_catalog.ca_boundary.read().to_crs(f"EPSG: {gdf.crs.to_epsg()}")

    gdf = gpd.sjoin(
        gdf,
        ca,
        how = "inner",
        predicate = "within",
    ).drop(columns= ["index_right"])

    # Buffer to style the line, project to WGS84 for folium
    gdf = gdf.assign(
        geometry = (gdf.geometry.to_crs(geography_utils.CA_StatePlane)
                    .buffer(250).simplify(tolerance=100)
                    .to_crs(geography_utils.WGS84)
                   )
    )
    
    # Drop columns that shouldn't get displayed in tooltip
    drop_cols = ["_merge", "merge_delay", "State"]
    
    gdf2 = gdf.drop(columns = drop_cols)
    
    return gdf2


drop_cols = ["unique_route"]
gdf = prep_data_for_viz(df.drop(columns = drop_cols))

### All Routes (modes: rail, bus, ferry, unknown)

In [None]:
route_map = gdf.explore(
    "category", 
    categorical=True, 
    cmap = [cp.CALITP_CATEGORY_BRIGHT_COLORS[0], 
            cp.CALITP_CATEGORY_BRIGHT_COLORS[1],
            cp.CALITP_CATEGORY_BRIGHT_COLORS[2]
    ],
    tiles = "Carto DB Positron"
)

route_map

### Rail / Ferry / Unknown Routes

In [24]:
include = ["Rail", "Ferry", "Unknown"]

route_map2 = gdf[gdf.route_type_name.isin(include)].explore(
    "category", 
    categorical=True, 
    cmap = [cp.CALITP_CATEGORY_BRIGHT_COLORS[0], 
            cp.CALITP_CATEGORY_BRIGHT_COLORS[1],
            cp.CALITP_CATEGORY_BRIGHT_COLORS[2]
    ],
    tiles = "Carto DB Positron"
)

route_map2

### Bus Routes

In [None]:
include = ["Bus"]

route_map3 = gdf[gdf.route_type_name.isin(include)].explore(
    "category", 
    categorical=True, 
    cmap = [cp.CALITP_CATEGORY_BRIGHT_COLORS[0], 
            cp.CALITP_CATEGORY_BRIGHT_COLORS[1],
            cp.CALITP_CATEGORY_BRIGHT_COLORS[2]
    ],
    tiles = "Carto DB Positron"
)

route_map3