# PMAC presentation

**MT.PO.01: increase total amount of service on the SHN and reliability of that service by 2024**

1. Routes on SHN
<br>**a. parallel routes (1 mi corridor)** - how many routes, service hours (n, %)
<br>**b. on shn routes** - travel for some portion on SHN, but doesn't meet threshold above (50 ft buffer), how many routes, service hours (n, %)
<br>**c. other routes** (everything not parallel), how many routes, service hours (n, %)

2. How many routes on SHN, breakdown by district
3. How many service hours are scheduled for a typical weekday for (1)?
4. How many of these agencies that have parallel routes on SHN also have GTFS RT?
Use `isin` and find `itp_id`, not route-specific, because most agencies that provide GTFS RT do it for the majority of their routes.

In [None]:
import altair as alt
import branca
import geopandas as gpd
import intake
import pandas as pd

import pmac_utils
from D1_pmac_routes import ANALYSIS_DATE, add_route_geom
from utils import IMG_PATH
from shared_utils import geography_utils, styleguide, map_utils
from shared_utils import calitp_color_palette as cp

hq_catalog = intake.open_catalog("../high_quality_transit_areas/*.yml")

In [None]:
df = pmac_utils.flag_parallel_intersecting_routes(ANALYSIS_DATE)

In [None]:
df._merge.value_counts()

In [None]:
df[df._merge=="right_only"].itp_id.value_counts()

In [None]:
# Should I subset to df[df._merge=="both"]?
# both means that it found a corresponding match in itp_id-route_id 
# since it's been aggregated up to route_id level (shape_id can mismatch more easily)
# Decide here, this is the subset of data I will use for rest of notebook
plot_df = df[df._merge=="both"]

In [None]:
# route_id....well, unique route_id is not exactly the same as is_parallel,
# is_on_shn, is_other
# Maybe stick with that to be consistent
summary = pmac_utils.get_summary_table(plot_df)

col_order = ['category', 'unique_route', 'pct_unique_route',
             'total_service_hours', 'pct_total_service_hours'
            ]

summary[col_order]

In [None]:
all_hours = geography_utils.aggregate_by_geography(
    summary.assign(category="All"),
    group_cols = ["category"],
    sum_cols = ["unique_route", "total_service_hours"]
)

#all_hours

In [None]:
# Where district is missing, it's not parallel routes
# So let's ignore those sections and focus on just parallel and do breakdown
#plot_df[(plot_df.District.isna())].category.value_counts()

In [None]:
def by_district_parallel_breakdown(df: pd.DataFrame) -> pd.DataFrame:
    by_district = geography_utils.aggregate_by_geography(
        df[df.category=="parallel"],
        group_cols = ["District"],
        sum_cols = ["total_service_hours", "unique_route"]
    ).astype({"total_service_hours": int})

    by_district = (pmac_utils.add_percent(
        by_district, 
        ["total_service_hours", "unique_route"])
        .sort_values("District")
    )

    for c in ["pct_total_service_hours", "pct_unique_route"]:
        by_district[c] = by_district[c].round(1)
    
    return by_district

In [None]:
by_district = by_district_parallel_breakdown(plot_df)
#by_district

In [None]:
%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Raleway');
@import url('https://fonts.googleapis.com/css?family=Nunito+Sans');
@import url('https://fonts.googleapis.com/css?family=Bitter');
</style>

In [None]:
def base_bar(df: pd.DataFrame) -> alt.Chart:
    chart = (alt.Chart(df)
             .mark_bar()
             .encode(
                 x=alt.X("District:N", title="District")
             )
            )
    return chart


def make_bar(df: pd.DataFrame, y_col: str) -> alt.Chart:
    """
    Make bar chart that's total service hours or 
    average service hours by district.
    """
    y_title = f"{y_col.replace('_', ' ').title()}"
    
    if y_col == "total_service_hours":
        value_format = ",.0f"
        y_buffer = 1_400
    else:
        value_format = ",.1f"
        y_buffer = 10
    
    Y_MAX = df[y_col].max() + y_buffer
    
    bar = base_bar(df)
    
    bar = (bar.encode(
        y=alt.Y(f"{y_col}:Q", title=f"{y_title}", 
                scale=alt.Scale(domain=[0, Y_MAX]),
                axis=None
               ),
        color=alt.Color("District:N", 
                        scale=alt.Scale(
                            range=cp.CALITP_CATEGORY_BRIGHT_COLORS
                        ), legend=None
                )
             )
            )
    #https://stackoverflow.com/questions/54015250/altair-setting-constant-label-color-for-bar-chart
    text = (bar
            .mark_text(align="center", baseline="bottom",
                       color="black", dy=-5  
                      )
            .encode(text=alt.Text(y_col, format=value_format), 
                    # Set color here, because encoding for mark_text gets 
                    # superseded by alt.Color
                   color=alt.value("black"), 
                   tooltip=["District:N", 
                            alt.Tooltip(f"{y_col}:Q", 
                                        title=f'{y_col.replace("_", " ")}')] 
                   )
    )
      
    chart = (bar+text)
    
    chart = (styleguide.preset_chart_config(chart)
             .properties(title= {
                 "text": f"{y_title} by District",
                 "subtitle": "Parallel Routes"
            }).configure_axis(grid=False)
             .configure_view(strokeWidth=0)
             .interactive()
            )
    
    #chart.save(f"{IMG_PATH}pmac_{y_col}.png")
    
    display(chart)

In [None]:
by_district = by_district.assign(
    avg_service_hours = by_district.total_service_hours.divide(
        by_district.unique_route).round(1)
)

metrics = [
    "total_service_hours", 
    "avg_service_hours"
]

for m in metrics:
    make_bar(by_district, m)

## Map of Parallel / On SHN / Other Routes

In [None]:
def prep_data_for_viz():
    # This categorized_routes_with_geom pulls from merge_routelines_trips
    # and adds route's line geom
    categorized_routes_with_geom = add_route_geom(ANALYSIS_DATE)
    
    category_labeling = {
        "parallel": 0,
        "on_shn": 1,
        "other": 2,
    }
    
    gdf = categorized_routes_with_geom.assign(
        category2 = categorized_routes_with_geom.category.map(category_labeling),
    ) 
    
    # line must fall within CA
    ca = hq_catalog.ca_boundary.read().to_crs(f"EPSG: {gdf.crs.to_epsg()}")

    gdf = gpd.sjoin(
        gdf,
        ca,
        how = "inner",
        predicate = "within",
    ).drop(columns= ["index_right"])

    # Buffer to style the line, project to WGS84 for folium
    gdf = gdf.assign(
        geometry = gdf.geometry.simplify(tolerance=100).buffer(300)
    ).to_crs(geography_utils.WGS84)
    
    return gdf


gdf = prep_data_for_viz()

In [None]:
MIN_VALUE = gdf.category2.min()
MAX_VALUE = gdf.category2.max()

#CAT_VALUES = sorted(gdf.category2.unique().tolist())

colorscale = branca.colormap.StepColormap(
    colors = [cp.CALITP_CATEGORY_BRIGHT_COLORS[0], 
              cp.CALITP_CATEGORY_BRIGHT_COLORS[1],
              cp.CALITP_CATEGORY_BRIGHT_COLORS[2]
             ],
    # play with these thresholds to get all 3 colors displayed
    # have to add an additional value beyond max_value so max_value displayed
    # don't put index=CAT_VALUES
    vmin = MIN_VALUE, vmax = MAX_VALUE, 
)

POPUP_DICT = {
    "itp_id": "ITP ID", 
    "category": "Category",
    "route_id": "Route ID", 
    "total_service_hours": "Daily Service Hours",
    "route_length_mi": "Route Length (mi)",
    "District": "District",
}

m = map_utils.make_folium_choropleth_map(
    gdf,
    plot_col = "category2",
    popup_dict = POPUP_DICT, tooltip_dict = POPUP_DICT,
    fig_width = 500, fig_height = 600,
    colorscale = colorscale,
    title = "Parallel / On SHN / Other Transit Routes",
)

m