In [7]:
import geopandas as gpd
import json
import pandas as pd

In [8]:
# import json file that converts full name of a state to two character abbreviation
with open('assets/state_codes.json', "r") as file:
    state_codes = json.load(file)

In [9]:
map_df = gpd.read_file("../data_raw/shapefiles/historicalcounties")
# rename columns and simplify map geometry (to make it run faster)
map_df.rename(columns={'NHGISNAM': 'county'}, inplace=True)
map_df.rename(columns={'STATENAM': 'state'}, inplace=True)
map_df['state_abrev'] = map_df.loc[:, 'state']
map_df.replace({"state_abrev": state_codes}, inplace=True)
map_df["geometry"] = map_df["geometry"].simplify(0.01).buffer(0)
map_df["Geo_FIPS"] = map_df["Geo_FIPS"].map(lambda x: int(str(x.lstrip("0"))))
map_df.to_csv('assets/map_df.csv')

In [60]:
# county-level population data
county_pop_data_raw = pd.read_csv("../data_raw/census_data/countyPopulation.csv", header=1)
county_pop_data = county_pop_data_raw[county_pop_data_raw["SE_T001_001"].notna()]
county_pop_data = county_pop_data.astype({"SE_T001_001": "int", "Geo_FIPS": "str"})
county_pop_data.rename(columns={'SE_T001_001': 'Population', "Geo_name": "County"}, inplace=True)
county_pop_data = county_pop_data[["Geo_FIPS", "Population", "County", "Geo_STUSAB"]]

In [63]:
with open('assets/state_codes.json', "r") as file:
    state_codes = json.load(file)

{'New Hampshire': 'NH',
 'Vermont': 'VT',
 'Rhode Island': 'RI',
 'Connecticut': 'CT',
 'New York': 'NY',
 'New Jersey': 'NJ',
 'Pennsylvania': 'PA',
 'Delaware': 'DE',
 'Maryland': 'MD',
 'Virginia': 'VA',
 'North Carolina': 'NC',
 'South Carolina': 'SC',
 'Georgia': 'GA',
 'Massachusetts': 'MA'}

In [74]:
debt_by_county = pd.read_csv("../data_clean/final_data_CD.csv")[["Group State", "Group County", '6p_total']]
debt_by_county = debt_by_county.groupby(by=["Group County", "Group State"]).agg(['size', 'sum'])
debt_by_county.reset_index(inplace=True)
debt_by_county.columns = debt_by_county.columns.droplevel(1)
debt_by_county.columns = ['county', 'state', 'count', '6p_total']

county_pop_data_raw = pd.read_csv("../data_raw/census_data/countyPopulation.csv", header=1)
county_geo_fips = county_pop_data_raw[county_pop_data_raw["SE_T001_001"].notna()]
county_geo_fips = county_geo_fips.astype({"SE_T001_001": "int", "Geo_FIPS": "str"})
county_geo_fips = county_geo_fips[["Geo_FIPS", "Geo_name", 'Geo_STUSAB', "SE_T001_001"]]
county_geo_fips.rename(columns={"Geo_name": "county", 'Geo_STUSAB': 'state', "SE_T001_001": 'population'},
                       inplace=True)
county_debt_geo = pd.merge(debt_by_county, county_geo_fips, on=["county", 'state'])

# state debt aggregation
state_debt_geo = county_debt_geo.groupby('state', as_index=False).sum()

county_debt_geo

Unnamed: 0,county,state,count,6p_total,Geo_FIPS,population
0,Abbeville County,SC,4,190.025533,4500010,9197
1,Accomack County,VA,1,78.330000,5100010,13959
2,Albany County,NY,81,49580.460000,3600010,75980
3,Allegany County,MD,1,755.550000,2400010,4809
4,Allegheny County,PA,11,12204.070000,4200030,10203
...,...,...,...,...,...,...
132,Windsor County,VT,5,44.060000,5000270,15740
133,Worcester County,MA,9,8412.157733,2500270,56764
134,Worcester County,MD,5,8468.770000,2400470,11640
135,York County,MA,5,2161.090000,2500275,29078
