# Interactive Stats

Be able to do dropdown to check how many routes are parallel or intersecting by highway or operator.

In [1]:
import geopandas as gpd
import intake
import pandas as pd

from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp

catalog = intake.open_catalog("*.yml")

IMG_PATH = "./img/"
DATA_PATH = "./data/"



In [2]:
gdf = gpd.read_parquet(f"{DATA_PATH}parallel_or_intersecting.parquet")

## Aggregate to highway or operator

Display these stats along with interactive map.

Ref: [aggregation-examples.ipynb](./aggregation-examples.ipynb) for how to get to right unit of analysis.

### Viability
* for a route, it's only viable if the end to end trip is no more than 2x car trip time
* can have all the parallel routes, but even smaller subset is viable

In [4]:
# First, aggregate once to get rid of edge cases where RouteType differs
# 110 in LA County is both Interstate and State Highway
# Make sure other highway characteristics are correctly grabbed (max or sum)
def extra_highway_aggregation(gdf):
    gdf2 = (gdf.groupby(["Route", "County", "District",
                         "route_id", "total_routes"])
            .agg({
                "NB": "max",
                "SB": "max", 
                "EB": "max",
                "WB": "max",
                "route_length": "sum",
                "pct_route": "sum",
                "pct_highway": "sum",
                "highway_length": "sum",
                "parallel": "max",
            }).reset_index()
    )

    # Now we took sum for pct_highway, values can be > 1, set it back to 1 max again.
    gdf2 = gdf2.assign(
        pct_highway = gdf2.apply(lambda x: 1 if x.pct_highway > 1 
                                 else x.pct_highway, axis=1)
    )

    return gdf2

In [30]:
def aggregate(df, by="operator"):
    if by=="operator":
        group_cols = ["itp_id", "County"]
    elif by=="highway":
        group_cols = ["Route", "County", "District", 
                      "NB", "SB", "EB", "WB"]
        df = extra_highway_aggregation(df)
    
    df2 = (geography_utils.aggregate_by_geography(
            df,
            group_cols = group_cols,
            sum_cols = ["parallel"],
            nunique_cols = ["route_id"]
        )
    )
    
    # Calculate % parallel
    df2 = (df2.assign(
            pct_parallel = df2.parallel.divide(df2.route_id).round(3)
        ).rename(columns = {
            "route_id": "unique_route_id",
            "parallel": "num_parallel",
        }).sort_values(group_cols).reset_index(drop=True)
    )
    
    return df2

In [31]:
operator_stats = aggregate(gdf, by="operator")
hwy_stats = aggregate(gdf, by="highway")

In [60]:
operator_stats.pct_parallel.describe()

count    462.000000
mean       0.345617
std        0.487885
min        0.000000
25%        0.000000
50%        0.000000
75%        0.643000
max        3.500000
Name: pct_parallel, dtype: float64

In [61]:
hwy_stats.pct_parallel.describe()

count    506.000000
mean       0.121146
std        0.156735
min        0.000000
25%        0.000000
50%        0.059500
75%        0.198750
max        1.000000
Name: pct_parallel, dtype: float64

In [32]:
for i in [4, 182]:
    display(operator_stats[operator_stats.itp_id==i])
for r in [110, 580]:
    display(hwy_stats[hwy_stats.Route==r])

Unnamed: 0,itp_id,County,num_parallel,unique_route_id,pct_parallel
0,4,ALA,109,119,0.916
1,4,CC,16,30,0.533
2,4,LAS,1,2,0.5
3,4,MOD,1,4,0.25
4,4,SCL,0,4,0.0
5,4,SF,0,19,0.0
6,4,SHA,1,1,1.0
7,4,SIE,0,1,0.0
8,4,SIS,0,1,0.0
9,4,SM,0,1,0.0


Unnamed: 0,itp_id,County,num_parallel,unique_route_id,pct_parallel
222,182,LA,49,120,0.408
223,182,ORA,0,4,0.0
224,182,VEN,0,1,0.0


Unnamed: 0,Route,County,District,NB,SB,EB,WB,num_parallel,unique_route_id,pct_parallel
284,110,LA,7,1,1,0,0,38,164,0.232


Unnamed: 0,Route,County,District,NB,SB,EB,WB,num_parallel,unique_route_id,pct_parallel
489,580,ALA,4,0,0,1,1,53,259,0.205
490,580,CC,4,0,0,1,1,10,88,0.114
491,580,MRN,4,0,0,1,1,4,70,0.057
492,580,SJ,10,0,0,1,1,0,8,0.0


In [33]:
import ipywidgets as widgets
from ipywidgets import *
from IPython.display import Markdown
from IPython.core.display import display

In [53]:
def display_operator_stats(df, select_col = "itp_id", operator_name = 182):
    # Subset for the operator
    operator_df = df[df[select_col]==operator_name]
    operator_stats = (aggregate(operator_df, by="operator")
                      .sort_values("pct_parallel", ascending=False)
                      .reset_index(drop=True)
                     )
    
    # Grab the highways that it intersects with
    hwy_df = df[(df.Route.isin(operator_df.Route) & 
                 (df.County.isin(operator_df.County))
                )]
    hwy_stats = (aggregate(hwy_df, by="highway")
                 .sort_values("pct_parallel", ascending=False)
                 .reset_index(drop=True)
                )

    display(Markdown(f"### Summary Stats for ITP ID: {operator_name}"))
    display(operator_stats)
    
    display(Markdown(f"### Summary Stats for Highways that ITP ID {operator_name} Intersects"))
    display(hwy_stats)

In [57]:
def display_highway_stats(df, select_col = "Route", hwy_name = 5):
    # Subset for highways
    hwy_df = df[df[select_col]==hwy_name]
    hwy_stats = (aggregate(hwy_df, by="highway")
                 .sort_values("pct_parallel", ascending=False)
                 .reset_index(drop=True)
                )    
    
    # Grab the operators that hwys intersect with
    operator_df = df[(df.itp_id.isin(hwy_df.itp_id))]
    operator_stats = (aggregate(operator_df, by="operator")
                      .sort_values("pct_parallel", ascending=False)
                      .reset_index(drop=True)
                     )
    
    display(Markdown(f"### Summary Stats for Highway Route: {hwy_name}"))
    display(hwy_stats)

    display(Markdown(f"### Summary Stats for Operators Route {hwy_name} Intersects"))
    display(operator_stats)
    

In [59]:
display_highway_stats(gdf, "Route", 5)

### Summary Stats for Highway Route: 5

Unnamed: 0,Route,County,District,NB,SB,EB,WB,num_parallel,unique_route_id,pct_parallel
0,5,SIS,2,1,1,0,0,2,5,0.4
1,5,TEH,2,1,1,0,0,5,13,0.385
2,5,SJ,10,1,1,0,0,10,45,0.222
3,5,SHA,2,1,1,0,0,4,21,0.19
4,5,SAC,3,1,1,0,0,12,76,0.158
5,5,YOL,3,1,1,0,0,1,7,0.143
6,5,ORA,12,1,1,0,0,7,67,0.104
7,5,LA,7,1,1,0,0,13,155,0.084
8,5,SD,11,1,1,0,0,6,90,0.067
9,5,FRE,6,1,1,0,0,0,3,0.0


### Summary Stats for Operators Route 5 Intersects

Unnamed: 0,itp_id,County,num_parallel,unique_route_id,pct_parallel
0,6,ALA,14,4,3.500
1,11,AMA,8,6,1.333
2,110,SOL,22,20,1.100
3,394,ORA,1,1,1.000
4,56,SF,1,1,1.000
...,...,...,...,...,...
229,83,SHA,0,2,0.000
230,13,AMA,0,1,0.000
231,101,YOL,0,2,0.000
232,110,CC,0,4,0.000


In [54]:
display_operator_stats(gdf, "itp_id", 182)

### Summary Stats for ITP ID: 182

Unnamed: 0,itp_id,County,num_parallel,unique_route_id,pct_parallel
0,182,LA,49,120,0.408
1,182,ORA,0,4,0.0
2,182,VEN,0,1,0.0


### Summary Stats for Highways that ITP ID 182 Intersects

Unnamed: 0,Route,County,District,NB,SB,EB,WB,num_parallel,unique_route_id,pct_parallel
0,118,VEN,7,0,0,1,1,18,34,0.529
1,101,VEN,7,1,1,0,0,44,97,0.454
2,23,VEN,7,1,1,0,0,19,45,0.422
3,110,LA,7,1,1,0,0,38,164,0.232
4,14,LA,7,1,1,1,1,10,45,0.222
5,134,LA,7,0,0,1,1,14,66,0.212
6,60,LA,7,0,0,1,1,15,72,0.208
7,210,LA,7,0,0,1,1,15,79,0.19
8,90,LA,7,0,0,1,1,4,22,0.182
9,72,LA,7,1,1,0,0,3,18,0.167


In [None]:
## Think about how to wrap above interactive widget into a function
# Be able to select agency, county, district, etc
def interactive_widget(df, select_col):

    dropdown = widgets.Dropdown(
        description=f"{select_col.title()}",
        options=df[select_col].sort_values().unique().tolist(),
    )
    output = widgets.Output()

    display(dropdown)
    display(output)

    def on_selection(*args):
        output.clear_output()
        with output:
            summarize_and_plot(df, select_col, dropdown.value)

    dropdown.observe(on_selection, names="value")
    on_selection()