In [None]:
%%html
<script>
    // AUTORUN ALL CELLS ON NOTEBOOK-LOAD!
    require(
        ['base/js/namespace', 'jquery'], 
        function(jupyter, $) {
            $(jupyter.events).on("kernel_ready.Kernel", function () {
                console.log("Auto-running all cells-below...");
                jupyter.actions.call('jupyter-notebook:run-all-cells-below');
                jupyter.actions.call('jupyter-notebook:save-notebook');
            });
        }
    );
</script>

In [None]:
import pandas as pd
import sys
import re
import geopandas as gpd
import contextily as cx
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar

from ipywidgets import interact, interact_manual
import ipywidgets as widgets
from IPython.display import HTML, display
import warnings 


sys.path.append("../..")
pd.options.display.max_rows = 500
pd.options.display.max_columns = 200
pd.options.display.max_colwidth = 100

warnings.filterwarnings("ignore")


display(HTML('''<style>
    .widget-label { min-width: 20ex !important; }
</style>'''))

from  sage_one_tree_planted.data.climate_and_economic_justice_dataset import ClimateAndEconomicJusticeDataset

In [None]:
def _is_disadvantaged_group(df):
    return df[df["Identified as disadvantaged"] == True]

def _compute_mean(df, columns):
    return df.groupby(["County Name", "State/Territory"])[columns].mean().reset_index()

def _remove_boolen_filter(columns):
    res = []
    for column in columns:
        if not re.search("Greater.+", column):
            res.append(column)
    return res

def plot_on_usa_map(df_tmp: pd.DataFrame, locations) -> None:
    df_tmp.rename(columns={"County Name":"CF", "State/Territory":"SF"}, inplace=True)
    usa_filtered_locations = locations[(locations["SF"].isin(set(df_tmp["SF"].tolist()))) & \
                                           (locations["CF"].isin(set(df_tmp["CF"].tolist())))]
    usa_filtered_locations_wm = usa_filtered_locations.to_crs(epsg=3857)
    ax = usa_filtered_locations_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor='k')
    cx.add_basemap(ax)
    ax.add_artist(ScaleBar(1))

In [None]:
c = ClimateAndEconomicJusticeDataset()
c.fetch_data()
df2=pd.read_csv(c.screening_data/"communities-2022-05-06-1838GMT.csv")
df_disadvantaged_communities = df2[df2["Identified as disadvantaged"] == True]
df_disadvantaged_communities.groupby(["County Name", "State/Territory"])["Identified as disadvantaged"]\
    .count().to_frame().sort_values("Identified as disadvantaged", ascending=False).head(100)

total_census_tracts_df = df2.groupby(["County Name", "State/Territory",])["Census tract ID"].count()
USA_LOCATIONS = gpd.read_file(c.shape_file/"usa.shp")
COLUMNS_FILTER = df2.columns.tolist()[3:]

STATES=list(df2["State/Territory"].unique())

# Identifying Disadvantaged Counties in the USA

## Methodology 

After starting to work with the data available on https://screeningtool.geoplatform.gov/en/methodology, we wanted to get a better idea of how which states and counties are most disadvantaged. 

We also wanted to give users the ability to explore those locations interactively based on user-set thresholds. 

Starting with the basics, we've created a tool that lets you identify which counties contain the most disadvantaged communities (Census Tracts) according to the disadvantaged critieria defined by the census. 


A census tract is identified as disadvantaged in one or more categories of criteria: 

IF the census tract is above the threshold for one or more environmental or climate indicators 

AND the census tract is above the threshold for the socioeconomic indicators 
(c.f. https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html) 




#### These results are for the USA as a whole. If you'd like to zoom in on an individual state, scroll down! 
##### Legend
- **topN** - the number of counties to show
- **min_census_tracts** a filter of the minimum number of disadvantaged census tracts that a county must contain in order to be displayed
- **Rank By**  By default, we rank each county by the percentage of disadvantaged census tracts that it contains. If you'd like to use the absolute value instead, just click
- **show_map** A toggle to display the counties on a map of the USA, N.B. this can be a bit slow. 

In [None]:
@interact(topN=(10,500,10), min_census_tracts=(1, 1000, 5), use_percent = widgets.Dropdown(
    options=[("Percentage of tracts", True), ("Total tracts", False)],
    description='Rank by',
)
         )
def show_disadvantaged_counties(topN=10,  min_census_tracts=150, use_percent=True, show_map=False, ):
    if not use_percent:
        filtered = df_disadvantaged_communities.groupby(["County Name", "State/Territory"])["Identified as disadvantaged"]\
            .count().to_frame().join(total_census_tracts_df).rename(columns={"Census tract ID":"Total Census Tracts", "Identified as disadvantaged": "Total Census Tracts Identified as Disadvantaged"})\
            .sort_values("Total Census Tracts Identified as Disadvantaged", ascending=False)
        
        final_df = filtered[filtered["Total Census Tracts"] >= min_census_tracts ].reset_index().head(topN).reset_index()[["County Name", "State/Territory","Total Census Tracts", "Total Census Tracts Identified as Disadvantaged" ]]
        if show_map:
            plot_on_usa_map(final_df)
        return final_df
    else: 
        with_totals_df = df2.groupby(["County Name", "State/Territory", "Identified as disadvantaged"])[["Identified as disadvantaged"]].count()
        with_totals_df.columns = ["count"]
        with_total_df = with_totals_df.join(total_census_tracts_df)
        with_total_df["Percent of Census Tracts Identified as Disadvantaged"] = 100 * with_total_df["count"] / with_total_df["Census tract ID"]
        with_total_df.reset_index(inplace=True)
        with_total_df.rename(columns={"Census tract ID":"Total Census Tracts"}, inplace=True)
        filtered  = with_total_df[with_total_df["Identified as disadvantaged"]  == True]\
.sort_values("Percent of Census Tracts Identified as Disadvantaged", ascending=False).reset_index()
        
        final_df = filtered[filtered["Total Census Tracts"] >= min_census_tracts ].head(topN).reset_index()[["County Name", "State/Territory","Total Census Tracts", "Percent of Census Tracts Identified as Disadvantaged" ]]
        if show_map:
            plot_on_usa_map(final_df,locations=USA_LOCATIONS)
        return final_df

# Identifying Disadvantaged Counties by Criteria in the USA

#### These results are for the USA as a whole. If you'd like to zoom in an an individual state, scroll down! 
##### Legend
- **column** - the criteria for disadvantaged census tracts

In [None]:
COLUMNS_FILTER.remove("Identified as disadvantaged")
COLUMNS_FILTER = _remove_boolen_filter(COLUMNS_FILTER)
df_mean =_compute_mean(df=df_disadvantaged_communities , columns=COLUMNS_FILTER)

In [None]:
@interact
def _rank_by_column(column=COLUMNS_FILTER, topN=10, state=STATES, show_map=False):
    df_tmp = df_mean[df_mean["State/Territory"] == state].copy()
    df_tmp["max_rank"] = df_tmp.groupby("State/Territory")[column].rank(method="max")
    df_final = df_tmp[df_tmp["max_rank"] <= topN]
    if show_map:
        plot_on_usa_map(df_final, locations=USA_LOCATIONS)
    return df_final