# Charts for bus service opportunities by tract and CalEnviroScreen / Density Scores

* Density and CalEnviroScreen both grouped as low/med/high
* Provide some charts to accompany maps
* Heatmaps to show distribution?
* Scatterplot of pop density vs service density

In [None]:
import intake
import pandas as pd

from shared_utils import portfolio_utils
import setup_tract_charts

catalog = intake.open_catalog("*.yml")

In [None]:
def import_processed_data()-> pd.DataFrame:
    """
    Import processed bus stop times data by census tract.
    
    Clean up columns, dtypes, labeling for making visualizations.
    """
    df = catalog.bus_stop_times_by_tract.read()
    
    fill_with_zero = [
        "total_arrivals", "n_stops", "n_operators",
        "num_jobs", "Population",
        "num_pop_jobs"
    ]
    
    df[fill_with_zero] = df[fill_with_zero].fillna(0).astype(int)
    
    df = df.assign(
        popjobdensity_group = pd.qcut(df.popjobs_sq_mi, q=3, labels=False) + 1,
        arrivals_per_1k_pj = (df.total_arrivals / df.num_pop_jobs) * 1_000,
    )

    df = df.assign(
        arrivals_group_pj = (pd.qcut(df.arrivals_per_1k_pj, q=3, labels=False) + 1).astype("Int64"),
    )
    
    # Map getting too big, use simplify to decrease size
    df = df.assign(
        geometry = df.geometry.simplify(tolerance=0.005)
    )
    
    # Address small numbers issue
    df = df[df.Population > 10].reset_index(drop=True)
    
    return df


def aggregate_generate_stats(
    df: pd.DataFrame, 
    group_cols: list
) -> pd.DataFrame:
    # After subset
    t1 = portfolio_utils.aggregate_by_geography(
        df, 
        group_cols = group_cols, 
        sum_cols = ["n_stops", "n_operators", "Population", 
                    "num_jobs", "num_pop_jobs", "total_arrivals"], 
        count_cols = ["Tract"], 
    ).astype("int").sort_values(group_cols)
    
    DENOM = 1_000
    t1 = t1.assign(
        arrivals_per_1k_pj = (t1.total_arrivals / t1.num_pop_jobs) * DENOM,
        stops_per_1k_pj = (t1.n_stops / t1.num_pop_jobs) * DENOM,
    )  
    
    round_me = [i for i in t1.columns if ("sq_mi" in i)
                or ("per_1k" in i) or ("ptile" in i)]
    
    t1[round_me] = t1[round_me].round(2)
    
    return t1

In [None]:
df = import_processed_data()

In [None]:
## Create 3 sets of aggregated datasets to use for charts
# (1) By popjobdensity - equity
by_density_equity = aggregate_generate_stats(
    df, 
    group_cols = ["popjobdensity_group", "equity_group"])

by_density_equity = by_density_equity.assign(
    popjobdensity_group = by_density_equity.popjobdensity_group.map(
        setup_tract_charts.LEGEND_LABELS)
)

In [None]:
setup_tract_charts.grouped_bar_chart(
    by_density_equity, 
    x_col="popjobdensity_group", 
    y_col="arrivals_per_1k_pj", 
    color_col="popjobdensity_group",
    grouped_col="equity_group",
    y_max=by_density_equity.arrivals_per_1k_pj.max() + 10, 
    chart_title="Arrivals per 1k by Pop / Job Density", 
)

## Opportunity Tracts

Tracts in the lower right quandrant, but pick out the most extreme cases.

Tracts with > 75th ptile for pop/job density & < 25th ptile for service density.

Especially the medium-need tracts, these would be the ones dragging down the average for the middle yellow bar.

In [None]:
# Grab the ones with density above 75th percentile
# and in the lower 25th percentile for service density
p75 = df.popjobs_sq_mi.quantile(0.75)
p25 = df.arrivals_per_1k_pj.quantile(0.25)

df = df.assign(
    is_oppor_tract = df.apply(lambda x: (x.popjobs_sq_mi > p75) and 
                       (x.arrivals_per_1k_pj < p25), axis=1)
)

In [None]:
df[df.is_oppor_tract==True].explore(
    "is_oppor_tract",
    tiles = "CartoDB Positron",
    categorical = True
)