# PMAC presentation

**MT.PO.01: increase total amount of service on the SHN and reliability of that service by 2024**

#### Selected Dates
* Q1: 2/8/22
* Q2: 5/4/22

1. Routes on SHN
a. parallel routes (1 mi corridor) - how many routes, agencies, share of all CA transit agency-routes?
b. intersecting routes (everything not parallel) - how many routes, agencies, share of all CA transit agency-routes?
c. intersecting routes (not parallel, but actually travel on SHN (50 ft buffer) for some portion of that route) - subset of above, how many routes, agencies, share of all CA agency-routes?

2. How many routes on SHN, breakdown by district
3. How many service hours are scheduled for a typical weekday for (1)?
4. How many of these agencies that have parallel routes on SHN also have GTFS RT?
Use `isin` and find `itp_id`, not route-specific, because most agencies that provide GTFS RT do it for the majority of their routes.

In [1]:
import altair as alt
import pandas as pd

import pmac_utils
from D1_pmac_routes import ANALYSIS_DATE
#ANALYSIS_DATE = "2022-02-08"
from utils import IMG_PATH
from shared_utils import geography_utils, styleguide
from shared_utils import calitp_color_palette as cp



In [2]:
df = pmac_utils.flag_parallel_intersecting_routes(ANALYSIS_DATE)

In [3]:
df._merge.value_counts()

both          2949
right_only      32
left_only        0
Name: _merge, dtype: int64

In [4]:
df[df._merge=="right_only"].itp_id.value_counts()

105    17
294     5
314     3
194     3
127     2
282     1
246     1
Name: itp_id, dtype: int64

In [5]:
# route_id....well, unique route_id is not exactly the same as is_parallel,
# is_on_shn, is_other
# Maybe stick with that to be consistent
summary = pmac_utils.get_summary_table(df[df._merge=="both"])

col_order = ['category', 'unique_route', 'pct_unique_route',
             'total_service_hours', 'pct_total_service_hours'
            ]

summary[col_order]

Unnamed: 0,category,unique_route,pct_unique_route,total_service_hours,pct_total_service_hours
0,parallel,1667,56.5,59465,54.4
1,other,721,24.4,30399,27.8
2,on_shn,561,19.0,19527,17.9


In [7]:
geography_utils.aggregate_by_geography(
    summary.assign(category="All"),
    group_cols = ["category"],
    sum_cols = ["unique_route", "total_service_hours"]
)

Unnamed: 0,category,total_service_hours,unique_route
0,All,109391,2949


In [8]:
# Where district is missing, it's not parallel routes
# So let's ignore those sections and focus on just parallel and do breakdown
df[(df.District.isna())].category.value_counts()

other     733
on_shn    564
Name: category, dtype: int64

In [9]:
def by_district_parallel_breakdown(df: pd.DataFrame) -> pd.DataFrame:
    by_district = geography_utils.aggregate_by_geography(
        df[df.category=="parallel"],
        group_cols = ["District"],
        sum_cols = ["total_service_hours", "unique_route"]
    ).astype({"total_service_hours": int})

    by_district = (pmac_utils.add_percent(
        by_district, 
        ["total_service_hours", "unique_route"])
        .sort_values("District")
    )

    for c in ["pct_total_service_hours", "pct_unique_route"]:
        by_district[c] = by_district[c].round(1)
    
    return by_district

In [10]:
by_district = by_district_parallel_breakdown(df)
by_district

Unnamed: 0,District,total_service_hours,unique_route,pct_total_service_hours,pct_unique_route
4,1.0,662,53,1.1,3.1
7,2.0,474,52,0.8,3.1
3,3.0,2153,124,3.6,7.4
0,4.0,27451,537,46.1,31.9
9,5.0,1472,98,2.5,5.8
6,6.0,1343,74,2.3,4.4
1,7.0,11023,397,18.5,23.6
5,8.0,2193,68,3.7,4.0
8,9.0,182,18,0.3,1.1
2,10.0,1558,91,2.6,5.4


In [11]:
%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Raleway');
@import url('https://fonts.googleapis.com/css?family=Nunito+Sans');
@import url('https://fonts.googleapis.com/css?family=Bitter');
</style>

In [12]:
def base_bar(df: pd.DataFrame) -> alt.Chart:
    chart = (alt.Chart(df)
             .mark_bar()
             .encode(
                 x=alt.X("District:N", title="District")
             )
            )
    return chart


def make_bar(df: pd.DataFrame, y_col: str) -> alt.Chart:
    """
    Make bar chart that's total service hours or 
    average service hours by district.
    """
    y_title = f"{y_col.replace('_', ' ').title()}"
    
    if y_col == "total_service_hours":
        value_format = ",.0f"
        y_buffer = 1_000
    else:
        value_format = ",.1f"
        y_buffer = 5
    
    Y_MAX = df[y_col].max() + y_buffer
    
    bar = base_bar(df)
    
    bar = (bar.encode(
        y=alt.Y(f"{y_col}:Q", title=f"{y_title}", 
                scale=alt.Scale(domain=[0, Y_MAX])
               ),
        color=alt.Color("District:N", 
                        scale=alt.Scale(
                            range=cp.CALITP_CATEGORY_BRIGHT_COLORS
                        )
                )
             )
            )
    #https://stackoverflow.com/questions/54015250/altair-setting-constant-label-color-for-bar-chart
    text = (bar
            .mark_text(align="center", baseline="bottom",
                       color="black", dy=-5  
                      )
            .encode(text=alt.Text(y_col, format=value_format), 
                    # Set color here, because encoding for mark_text gets 
                    # superseded by alt.Color
                   color=alt.value("black"))
    )
      
    chart = (bar+text)
    
    chart = (styleguide.preset_chart_config(chart)
             .properties(title= {
                 "text": f"{y_title} by District",
                 "subtitle": "Parallel Routes"
            }).configure_axis(grid=False)
            )
    
    chart.save(f"{IMG_PATH}pmac_{y_col}.png")
    
    display(chart)

In [13]:
by_district = by_district.assign(
    avg_service_hours = by_district.total_service_hours.divide(
        by_district.unique_route).round(1)
)

In [14]:
metrics = [
    "total_service_hours", 
    "avg_service_hours"
]

for m in metrics:
    make_bar(by_district, m)