# Interactive Stats

Be able to do dropdown to check how many routes are parallel or intersecting by highway or operator.

In [None]:
import geopandas as gpd
import intake
import pandas as pd

from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp

catalog = intake.open_catalog("*.yml")

IMG_PATH = "./img/"
DATA_PATH = "./data/"

In [None]:
gdf = gpd.read_parquet(f"{DATA_PATH}parallel_or_intersecting.parquet")

## Aggregate to highway or operator

Display these stats along with interactive map.

Ref: [aggregation-examples.ipynb](./aggregation-examples.ipynb) for how to get to right unit of analysis.

### Viability
* for a route, it's only viable if the end to end trip is no more than 2x car trip time
* can have all the parallel routes, but even smaller subset is viable

In [None]:
operator_group_cols = ["itp_id", "County"]

# obs is itp_id-county-route_id
# for the itp_id-county (operators can operate across county boundaries)
operator_stats = (geography_utils.aggregate_by_geography(
    gdf, 
    operator_group_cols,
    sum_cols = ["parallel"],
    nunique_cols = ["route_id"]
    )
)

In [None]:
hwy_group_cols = ["Route", "County", "District", "NB", "SB", "EB", "WB"]

# First, aggregate once to get rid of edge cases where RouteType differs
# 110 in LA County is both Interstate and State Highway
# Make sure other highway characteristics are correctly grabbed (max or sum)
gdf2 = (gdf.groupby(["Route", "County", "District",
                     "route_id", "total_routes"])
        .agg({
            "NB": "max",
            "SB": "max", 
            "EB": "max",
            "WB": "max",
            "route_length": "sum",
            "pct_route": "sum",
            "pct_highway": "sum",
            "highway_length": "sum",
            "parallel": "max",
        }).reset_index()
)

# Now we took sum for pct_highway, values can be > 1, set it back to 1 max again.
gdf2 = gdf2.assign(
    pct_highway = gdf2.apply(lambda x: 1 if x.pct_highway > 1 
                             else x.pct_highway, axis=1)
)



In [None]:
hwy_stats = (geography_utils.aggregate_by_geography(
        gdf2,
        group_cols = hwy_group_cols,
        sum_cols = ["parallel"],
        nunique_cols = ["route_id"]
    )
)
    
hwy_stats[hwy_stats.Route==110]

In [None]:
def aggregate(df, aggregate_by="operator"):
    if aggregate_by=="operator":
        group_cols = ["itp_id", "County"]
    elif aggregate_by=="highway":
        group_cols = ["Route", "County", "District", 
                      "NB", "SB", "EB", "WB"]
    
    df2 = (geography_utils.aggregate_by_geography(
        df,
        group_cols = group_cols,
        sum_cols = ["parallel"],
        nunique_cols = ["route_id"]
    )
)

In [None]:
import ipywidgets as widgets
from ipywidgets import *
from IPython.display import Markdown
from IPython.core.display import display

In [None]:
def summary_stats(df, select_col, place):


In [None]:
## Think about how to wrap above interactive widget into a function
# Be able to select agency, county, district, etc
def interactive_widget(df, select_col):

    dropdown = widgets.Dropdown(
        description=f"{select_col.title()}",
        options=df[select_col].sort_values().unique().tolist(),
    )
    output = widgets.Output()

    display(dropdown)
    display(output)

    def on_selection(*args):
        output.clear_output()
        with output:
            summarize_and_plot(df, select_col, dropdown.value)

    dropdown.observe(on_selection, names="value")
    on_selection()