# PMAC presentation

**MT.PO.01: increase total amount of service on the SHN and reliability of that service by 2024**

#### Selected Dates
* Q1: 2/8/22
* Q2: 5/4/22

1. Routes on SHN
a. parallel routes (1 mi corridor) - how many routes, agencies, share of all CA transit agency-routes?
b. intersecting routes (everything not parallel) - how many routes, agencies, share of all CA transit agency-routes?
c. intersecting routes (not parallel, but actually travel on SHN (50 ft buffer) for some portion of that route) - subset of above, how many routes, agencies, share of all CA agency-routes?

2. How many routes on SHN, breakdown by district
3. How many service hours are scheduled for a typical weekday for (1)?
4. How many of these agencies that have parallel routes on SHN also have GTFS RT?
Use `isin` and find `itp_id`, not route-specific, because most agencies that provide GTFS RT do it for the majority of their routes.

In [1]:
import altair as alt
import geopandas as gpd
import pandas as pd

import utils
import pmac_utils
from D1_pmac_routes import TRAFFIC_OPS_GCS, ANALYSIS_DATE
from shared_utils import geography_utils, gtfs_utils, styleguide
from shared_utils import calitp_color_palette as cp 



In [2]:
trips_with_hrs = pd.read_parquet(
    f"{utils.GCS_FILE_PATH}trips_with_hrs_{ANALYSIS_DATE}.parquet")
trips = pd.read_parquet(
    f"{TRAFFIC_OPS_GCS}trips_{ANALYSIS_DATE}.parquet")

In [3]:
df = pmac_utils.flag_parallel_intersecting_routes(
    trips, trips_with_hrs, ANALYSIS_DATE)

In [4]:
df._merge.value_counts()

both          2949
right_only     256
left_only        0
Name: _merge, dtype: int64

In [5]:
df[df._merge=="right_only"].itp_id.value_counts()

4      67
294    43
314    22
194    21
282    17
105    17
368    11
235    11
279    10
310     9
280     6
110     5
218     4
246     3
127     3
106     3
264     2
356     1
10      1
Name: itp_id, dtype: int64

In [6]:
# I think this is the initial table
# Get it summarized to # and %
summary = geography_utils.aggregate_by_geography(
    df[df._merge=="both"], 
    group_cols = ["category"],
    sum_cols = ["total_service_hours", "unique_route"],
)

In [7]:
summary

Unnamed: 0,category,total_service_hours,unique_route
0,parallel,51001.624722,1667
1,other,40883.458889,1050
2,on_shn,6625.646667,232


In [8]:
# Add percents
def add_percent(df, col_list):
    for c in col_list:
        new_col = f"pct_{c}"
        df[new_col] = (df[c] / df[c].sum()).round(3) * 100
        df[c] = df[c].round(0)
        
    return df

# route_id....well, unique route_id is not exactly the same as is_parallel,
# is_on_shn, is_other
# Maybe stick with that to be consistent

In [9]:
summary = add_percent(summary, ["total_service_hours", "unique_route"])

col_order = ['category', 'unique_route', 'pct_unique_route',
             'total_service_hours', 'pct_total_service_hours'
            ]

summary[col_order]

Unnamed: 0,category,unique_route,pct_unique_route,total_service_hours,pct_total_service_hours
0,parallel,1667,56.5,51002.0,51.8
1,other,1050,35.6,40883.0,41.5
2,on_shn,232,7.9,6626.0,6.7


In [10]:
geography_utils.aggregate_by_geography(
    summary.assign(category="All"),
    group_cols = ["category"],
    sum_cols = ["unique_route", "total_service_hours"]
)

Unnamed: 0,category,total_service_hours,unique_route
0,All,98511.0,2949


In [11]:
# Where district is missing, it's not parallel routes
# So let's ignore those sections and focus on just parallel and do breakdown
df[(df.District.isna())].category.value_counts()

other     1157
on_shn     239
Name: category, dtype: int64

In [12]:
by_district = geography_utils.aggregate_by_geography(
    df[df.category=="parallel"],
    group_cols = ["District"],
    sum_cols = ["total_service_hours", "unique_route"]
)

by_district = (add_percent(by_district, ["total_service_hours", "unique_route"])
               .sort_values("District")
              )

for c in ["pct_total_service_hours", "pct_unique_route"]:
    by_district[c] = by_district[c].round(1)
    
by_district

Unnamed: 0,District,total_service_hours,unique_route,pct_total_service_hours,pct_unique_route
5,1.0,663.0,53,1.3,2.9
8,2.0,475.0,52,0.9,2.9
3,3.0,2153.0,124,4.2,6.9
0,4.0,21241.0,653,41.4,36.1
9,5.0,1401.0,77,2.7,4.3
7,6.0,1343.0,74,2.6,4.1
1,7.0,11149.0,419,21.7,23.2
6,8.0,2194.0,68,4.3,3.8
4,9.0,187.0,19,0.4,1.1
2,10.0,1571.0,92,3.1,5.1


In [13]:
%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Raleway');
@import url('https://fonts.googleapis.com/css?family=Nunito+Sans');
@import url('https://fonts.googleapis.com/css?family=Bitter');
</style>

In [14]:
def base_bar(df):
    chart = (alt.Chart(df)
             .mark_bar()
             .encode(
                 x=alt.X("District:N", title="District")
             )
            )
    return chart

def make_bar(df, y_col):
    
    y_title = f"{y_col.replace('_', ' ').title()}"
    
    if y_col == "total_service_hours":
        value_format = ",.0f"
        y_buffer = 1_000
    else:
        value_format = ",.1f"
        y_buffer = 5
    
    Y_MAX = df[y_col].max() + y_buffer
    
    bar = base_bar(df)
    
    bar = (bar.encode(
        y=alt.Y(f"{y_col}:Q", title=f"{y_title}", 
                scale=alt.Scale(domain=[0, Y_MAX])
               ),
        color=alt.Color("District:N", 
                        scale=alt.Scale(
                            range=cp.CALITP_CATEGORY_BRIGHT_COLORS
                        )
                )
             )
            )
    #https://stackoverflow.com/questions/54015250/altair-setting-constant-label-color-for-bar-chart
    text = (bar
            .mark_text(align="center", baseline="bottom",
                       color="black", dy=-5  
                      )
            .encode(text=alt.Text(y_col, format=value_format), 
                    # Set color here, because encoding for mark_text gets 
                    # superseded by alt.Color
                   color=alt.value("black"))
    )
      
    chart = (bar+text)
    
    chart = (styleguide.preset_chart_config(chart)
             .properties(title= {
                 "text": f"{y_title} by District",
                 "subtitle": "Parallel Routes"
            }).configure_axis(grid=False)
            )
    
    chart.save(f"{utils.IMG_PATH}pmac_{y_col}.png")
    
    display(chart)

In [15]:
by_district = by_district.assign(
    avg_service_hours = by_district.total_service_hours.divide(
        by_district.unique_route).round(1)
)

In [16]:
metrics = [
    "total_service_hours", 
    "avg_service_hours"
]

for m in metrics:
    make_bar(by_district, m)