# Identifying Disadvantaged Counties in the USA

## Methodology 

After starting to work with the data available on https://screeningtool.geoplatform.gov/en/methodology, we wanted to get a better idea of how which states and counties are most disadvantaged. 

We also wanted to give users the ability to explore those locations interactively based on user-set thresholds. 

Starting with the basics, we've created a tool that lets you identify which counties contain the most disadvantaged communities according to any of the disadvantaged critieria available. (c.f. https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html) 


In [134]:
import pandas as pd
import sys

from ipywidgets import interact, interact_manual
import ipywidgets as widgets
from IPython.display import HTML, display


sys.path.append("../..")
pd.options.display.max_rows = 500
pd.options.display.max_columns = 200
pd.options.display.max_colwidth = 100


display(HTML('''<style>
    .widget-label { min-width: 20ex !important; }
</style>'''))

from  sage_one_tree_planted.data.climate_and_economic_justice_dataset import ClimateAndEconomicJusticeDataset
c = ClimateAndEconomicJusticeDataset()
c.fetch_data()

In [138]:
df2=pd.read_csv(c.screening_data/"communities-2022-05-06-1838GMT.csv")
df_disadvantaged_communities = df2[df2["Identified as disadvantaged"] == True]
df_disadvantaged_communities.groupby(["County Name", "State/Territory"])["Identified as disadvantaged"]\
    .count().to_frame().sort_values("Identified as disadvantaged", ascending=False).head(100)

total_census_tracts_df = df2.groupby(["County Name", "State/Territory",])["Census tract ID"].count()

@interact(top_results=(10,500,10), total_census_tracts=(1, 1000, 1))
def show_more_disadvantaged_counties(top_results=10,  total_census_tracts=2, normalize=False):
    if not normalize:
        filtered = df_disadvantaged_communities.groupby(["County Name", "State/Territory"])["Identified as disadvantaged"]\
    .count().to_frame().join(total_census_tracts_df).rename(columns={"Census tract ID":"Total Census Tracts"}).sort_values("Identified as disadvantaged", ascending=False).reset_index()
        return filtered[filtered["Total Census Tracts"] >= total_census_tracts ].head(top_results)
    else: 
        with_totals_df = df2.groupby(["County Name", "State/Territory", "Identified as disadvantaged"])[["Identified as disadvantaged"]].count()
        with_totals_df.columns = ["count"]
        with_total_df = with_totals_df.join(total_census_tracts_df)
        with_total_df["Percent of Census Tracts Identified as Disadvantaged"] = 100 * with_total_df["count"] / with_total_df["Census tract ID"]
        with_total_df.reset_index(inplace=True)
        with_total_df.rename(columns={"Census tract ID":"Total Census Tracts"}, inplace=True)
        filtered  = with_total_df[with_total_df["Identified as disadvantaged"]  == True][["County Name", "State/Territory","Total Census Tracts", "Percent of Census Tracts Identified as Disadvantaged" ]]\
.sort_values("Percent of Census Tracts Identified as Disadvantaged", ascending=False)
        
        return filtered[filtered["Total Census Tracts"] >= total_census_tracts ].reset_index().head(top_results)

interactive(children=(IntSlider(value=10, description='top_results', max=500, min=10, step=10), IntSlider(valu…