# Interactive Stats

Be able to do dropdown to check how many routes are parallel or intersecting by highway or operator.

In [None]:
import branca
import geopandas as gpd
import intake
import pandas as pd

from shared_utils import geography_utils, map_utils
from shared_utils import calitp_color_palette as cp

catalog = intake.open_catalog("*.yml")

IMG_PATH = "./img/"
DATA_PATH = "./data/"

In [None]:
gdf = gpd.read_parquet(f"{DATA_PATH}parallel_or_intersecting.parquet")
highways= gpd.read_parquet(f"{DATA_PATH}highways.parquet")

In [None]:
gdf.head(2)
# Add in operator name from agencies.yml? To display name in map

## Aggregate to highway or operator

Display these stats along with interactive map.

Ref: [aggregation-examples.ipynb](./aggregation-examples.ipynb) for how to get to right unit of analysis.

### Viability
* for a route, it's only viable if the end to end trip is no more than 2x car trip time
* can have all the parallel routes, but even smaller subset is viable

In [None]:
# First, aggregate once to get rid of edge cases where RouteType differs
# 110 in LA County is both Interstate and State Highway
# Make sure other highway characteristics are correctly grabbed (max or sum)
def extra_highway_aggregation(gdf):
    gdf2 = (gdf.groupby(["Route", "County", "District",
                         "route_id", "total_routes"])
            .agg({
                "NB": "max",
                "SB": "max", 
                "EB": "max",
                "WB": "max",
                "route_length": "sum",
                "pct_route": "sum",
                "pct_highway": "sum",
                "highway_length": "sum",
                "parallel": "max",
            }).reset_index()
    )

    # Now we took sum for pct_highway, values can be > 1, set it back to 1 max again.
    gdf2 = gdf2.assign(
        pct_highway = gdf2.apply(lambda x: 1 if x.pct_highway > 1 
                                 else x.pct_highway, axis=1)
    )

    return gdf2

def extra_operator_aggregation(gdf):
    # For the unique route_id, flag it as parallel if it is parallel to any hwy Route
    gdf2 = (gdf.groupby(["itp_id", "County", "route_id"])
            .agg({"parallel": "max"})
            .reset_index()
    )

    return gdf2

In [None]:
def aggregate(df, by="operator"):
    if by=="operator":
        group_cols = ["itp_id", "County"]
        df = extra_operator_aggregation(df)
        
    elif by=="highway":
        group_cols = ["Route", "County", "District", 
                      "NB", "SB", "EB", "WB"]
        df = extra_highway_aggregation(df)
    
    df2 = (geography_utils.aggregate_by_geography(
            df,
            group_cols = group_cols,
            sum_cols = ["parallel"],
            nunique_cols = ["route_id"]
        )
    )
    
    # Calculate % parallel
    df2 = (df2.assign(
            pct_parallel = df2.parallel.divide(df2.route_id).round(3)
        ).rename(columns = {
            "route_id": "unique_route_id",
            "parallel": "num_parallel",
        }).sort_values(group_cols).reset_index(drop=True)
    )
    
    return df2

In [None]:
operator_stats = aggregate(gdf, by="operator")
hwy_stats = aggregate(gdf, by="highway")

In [None]:
for i in [4, 182]:
    display(operator_stats[operator_stats.itp_id==i])
for r in [110, 580]:
    display(hwy_stats[hwy_stats.Route==r])

In [None]:
import ipywidgets as widgets
from ipywidgets import *
from IPython.display import Markdown
from IPython.core.display import display

In [None]:
def display_operator_stats(df, select_col = "itp_id", operator_name = 182):
    # Subset for the operator
    operator_df = df[df[select_col]==operator_name]
    operator_stats = (aggregate(operator_df, by="operator")
                      .sort_values("pct_parallel", ascending=False)
                      .reset_index(drop=True)
                     )
    
    # Grab the highways that it intersects with
    hwy_df = df[(df.Route.isin(operator_df.Route) & 
                 (df.County.isin(operator_df.County))
                )]
    hwy_stats = (aggregate(hwy_df, by="highway")
                 .sort_values("pct_parallel", ascending=False)
                 .reset_index(drop=True)
                )

    display(Markdown(f"### Summary Stats for ITP ID: {operator_name}"))
    display(operator_stats.head())
    
    display(Markdown(f"### Summary Stats for Highways that ITP ID {operator_name} Intersects"))
    display(hwy_stats.head())
    
   
    make_transit_map(operator_df, hwy_df)

In [None]:
def make_transit_map(operator_df, hwy_df):
    ## Add a map to show the operator's routes and highways from C1
    
    # move function from C1
    def data_to_plot(df):
        keep_cols = ["itp_id", "route_id", 
                     "Route", "County", "District", "RouteType",
                     "pct_route", "pct_highway", "parallel",
                     "geometry"
                    ]
        df = df[keep_cols].reset_index(drop=True)
        df = df.assign(
            geometry = df.geometry.buffer(200).simplify(tolerance=100),
        )

        # Use simplify to make gdf smaller
        # folium map is creating too large of an HTML file to check in

        return df

    to_map = data_to_plot(operator_df)
    
    # Set various components for map
    hwys_popup_dict = {
        "Route": "Highway Route",
        "RouteType": "Route Type",
        "County": "County"   
    }

    transit_popup_dict = {
        "itp_id": "Operator ITP ID",
        "route_id": "Route ID",
        "pct_route": "% overlapping route",
        "pct_highway": "% overlapping highway",
    }

    hwys_color = branca.colormap.StepColormap(
        colors=["black", "gray"],
    )

    colorscale = branca.colormap.StepColormap(
        colors=[
            cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue
            cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange
        ],
    )
    
    # Instead of using county centroid, calculate centroid from transit_df
    # Otherwise, it's too zoomed out from where transit routes are
    transit_centroid = (to_map
                        .to_crs(geography_utils.WGS84).geometry.centroid
                        .iloc[0]
                       )

    LAYERS_DICT = {
        "Highways": {"df": hwy_df,
            "plot_col": "Route",
            "popup_dict": hwys_popup_dict, 
            "tooltip_dict": hwys_popup_dict,
            "colorscale": hwys_color,
        },
        "Transit Routes": {"df": to_map,
            "plot_col": "parallel",
            "popup_dict": transit_popup_dict, 
            "tooltip_dict": transit_popup_dict,
            "colorscale": colorscale,
        },
    }
    
    LEGEND_URL = (
        "https://raw.githubusercontent.com/cal-itp/data-analyses/"
        "main/bus_service_increase/"
        "img/legend_intersecting_parallel.png"
    )
    
    LEGEND_DICT = {
        "legend_url": LEGEND_URL,
        "legend_bottom": 85,
        "legend_left": 5,
    }
     
    
    fig = map_utils.make_folium_multiple_layers_map(
        LAYERS_DICT,
        fig_width = 700, fig_height = 700, 
        zoom=11, 
        centroid = [round(transit_centroid.y,2), 
                    round(transit_centroid.x, 2)], 
        title=f"Parallel vs Intersecting Lines for {to_map.itp_id.iloc[0]}",
        legend_dict = LEGEND_DICT
    )
    
    display(fig)
    #fig.save(f"{IMG_PATH}parallel_{operator_name}.html")
    #print(f"{operator_name} map saved")

In [None]:
def display_highway_stats(df, select_col = "Route", hwy_name = 5):
    # Subset for highways
    hwy_df = df[df[select_col]==hwy_name]
    hwy_stats = (aggregate(hwy_df, by="highway")
                 .sort_values("pct_parallel", ascending=False)
                 .reset_index(drop=True)
                )    
    
    # Grab the operators that hwys intersect with
    operator_df = df[(df.itp_id.isin(hwy_df.itp_id))]
    operator_stats = (aggregate(operator_df, by="operator")
                      .sort_values("pct_parallel", ascending=False)
                      .reset_index(drop=True)
                     )
    
    display(Markdown(f"### Summary Stats for Highway Route: {hwy_name}"))
    display(hwy_stats.head())

    display(Markdown(f"### Summary Stats for Operators Route {hwy_name} Intersects"))
    display(operator_stats.head())
    

In [None]:
display_highway_stats(gdf, "Route", 5)

In [None]:
display_operator_stats(gdf, "itp_id", 182)

In [None]:
## Think about how to wrap above interactive widget into a function
# Be able to select agency, county, district, etc
def interactive_widget(df, select_col):
    
    dropdown = widgets.Dropdown(
        description=f"{select_col.title()}",
        options=df[select_col].sort_values().unique().tolist(),
    )
    output = widgets.Output()

    display(dropdown)
    display(output)

    def on_selection(*args):
        output.clear_output()
        with output:
            if select_col=="Route":
                display_highway_stats(df, select_col, dropdown.value)
            elif select_col=="itp_id":
                display_operator_stats(df, select_col, dropdown.value)

    dropdown.observe(on_selection, names="value")
    on_selection()

In [None]:
interactive_widget(gdf, "itp_id")

In [None]:
interactive_widget(gdf, "Route")