In [10]:
import pandas as pd
import numpy as np

# Prep the regression data

### Compile the county dataframes

In [2]:
# These are the years of CDC data that we have (with a one year lag so 2014-2021)
years = [
    '2009','2010','2011','2012','2013','2014','2015','2016','2017','2018',
    '2019'
]

In [3]:
# Filter the counties to just these states - assigned by FIPS code
states = ["01","02","04","05","06","08","09","10","11","12","13","15","16","17","18","19","20",
          "21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37",
          "38","39","40","41","42","44","45","46","47","48","49","50","51","53","54","55","56"]

In [4]:
county_fire_hom = pd.DataFrame()
county_nonfire_hom = pd.DataFrame()

In [5]:
# Compile the data frames
for year in years:
    year_lag = str(int(year)+2)
    # Start with population to get all counties
    population = pd.read_csv('../../data/processed/census/acs5_'+year+'_population_counties.csv', dtype={"geoid":'str',"state":'str'}, usecols=["geoid","universe","state"])
    population.rename(columns={"geoid":"fips","universe":"population"}, inplace=True)
    population = population.loc[population["state"].isin(states)]
    # Add in the dealers
    dealers = pd.read_csv('../../data/processed/atf-ffl-list/'+year+'-ffl-list-counties.csv', dtype={"fips":'str'})
    df_county = population.merge(dealers, on="fips", how="left")
    # Add in race / ethnicity
    race = pd.read_csv('../../data/processed/census/acs5_'+year+'_race_counties.csv', dtype={"geoid":'str'}, usecols=["geoid","universe","white_alone","black_alone","latino_alone","asians_all"])
    race["white_pct"] = (race["white_alone"] / race["universe"]) * 100
    race["black_pct"] = (race["black_alone"] / race["universe"]) * 100
    race["latino_pct"] = (race["latino_alone"] / race["universe"]) * 100
    race["asian_pct"] = (race["asians_all"] / race["universe"]) * 100
    race.rename(columns={"geoid":"fips"}, inplace=True)
    race = race[["fips","white_pct","black_pct","latino_pct","asian_pct"]]
    df_county = df_county.merge(race, on="fips", how="left")
    # Add in percent poverty
    poverty = pd.read_csv('../../data/processed/census/acs5_'+year+'_poverty_counties.csv', dtype={"geoid":'str'}, usecols=["geoid","universe","income_past12months_below_poverty_level"])
    poverty["poverty_pct"] = (poverty["income_past12months_below_poverty_level"] / poverty["universe"]) * 100
    poverty.rename(columns={"geoid":"fips"}, inplace=True)
    poverty = poverty[["fips","poverty_pct"]]
    df_county = df_county.merge(poverty, on="fips", how="left")
    # Add in median income
    income = pd.read_csv('../../data/processed/census/acs5_'+year+'_medianhouseholdincome_counties.csv', dtype={"geoid":'str'}, usecols=["geoid","median"])
    income.rename(columns={"geoid":"fips","median":"income"}, inplace=True)
    df_county = df_county.merge(income, on="fips", how="left")
    # Set the year
    df_county['year'] = year
    # Get the death data and isolate firearm homicides and non-firearm homicides
    df_deaths = pd.read_csv('../../data/processed/cdc-data/mult'+year_lag+'_grouped_rates_counties.csv', dtype={"fips":'str'})
    df_deaths_trim = df_deaths[["fips","type_manner","total_rate","deaths"]]
    fire_hom_deaths = df_deaths_trim.loc[df_deaths_trim["type_manner"] == "firearm-homicide"]
    nonfire_hom_deaths = df_deaths_trim.loc[df_deaths_trim["type_manner"] == "non-firearm-homicide"]
    df_county_fire_hom = df_county.merge(fire_hom_deaths, on="fips", how="left")
    df_county_nonfire_hom = df_county.merge(nonfire_hom_deaths, on="fips", how="left")
    # Add this year's data onto the full dataframes
    county_fire_hom = pd.concat([county_fire_hom, df_county_fire_hom])
    county_nonfire_hom = pd.concat([county_nonfire_hom, df_county_nonfire_hom])

In [6]:
# Get most recent
df_county_2023 = pd.read_csv('../../data/processed/atf-ffl-list/2023-ffl-list-counties.csv', dtype={"fips":'str'})

### Format homicide data for regression analysis

In [7]:
county_fire_hom['year'] = pd.to_numeric(county_fire_hom['year'])

In [11]:
county_fire_hom['total_rate'] = county_fire_hom['total_rate'].fillna(0)
county_fire_hom["total_rate_hs"] = np.arcsinh(county_fire_hom["total_rate"]) 
# county_fire_hom['total_rate_hs'] = county_fire_hom['total_rate_hs'].fillna(0)
county_fire_hom["ln_population"] = np.log(county_fire_hom["population"]) 
county_fire_hom["ln_income"] = np.log(county_fire_hom["income"]) 
county_fire_hom['deaths'] = county_fire_hom['deaths'].fillna(0)
county_fire_hom['ffl_per_100mi'] = county_fire_hom['ffl_per_100mi'].fillna(0)
county_fire_hom['neighbor_ffl_per_100mi'] = county_fire_hom['neighbor_ffl_per_100mi'].fillna(0)

In [12]:
county_fire_hom.to_csv("../../data/processed/county_firearm_homicides_dealers.csv", index=False)

In [13]:
fire_hom = county_fire_hom.set_index(['fips', 'year'])

In [14]:
fire_hom_years = fire_hom.index.get_level_values('year').to_list()
fire_hom['year'] = pd.Categorical(fire_hom_years)

In [15]:
county_nonfire_hom['year'] = pd.to_numeric(county_nonfire_hom['year'])

In [16]:
county_nonfire_hom['total_rate'] = county_nonfire_hom['total_rate'].fillna(0)
county_nonfire_hom["total_rate_hs"] = np.arcsinh(county_nonfire_hom["total_rate"]) 
county_nonfire_hom['total_rate_hs'] = county_nonfire_hom['total_rate_hs'].fillna(0)
county_nonfire_hom["ln_population"] = np.log(county_nonfire_hom["population"]) 
county_nonfire_hom["ln_income"] = np.log(county_nonfire_hom["income"]) 
county_nonfire_hom['deaths'] = county_nonfire_hom['deaths'].fillna(0)
county_nonfire_hom['ffl_per_100mi'] = county_nonfire_hom['ffl_per_100mi'].fillna(0)
county_nonfire_hom['neighbor_ffl_per_100mi'] = county_nonfire_hom['neighbor_ffl_per_100mi'].fillna(0)

In [17]:
county_nonfire_hom.to_csv("../../data/processed/county_nonfirearm_homicides_dealers.csv", index=False)

In [18]:
non_fire_hom = county_nonfire_hom.set_index(['fips', 'year'])

In [19]:
non_fire_hom_years = non_fire_hom.index.get_level_values('year').to_list()
non_fire_hom['year'] = pd.Categorical(non_fire_hom_years)

## County dealer changes
Have any counties/neighbors seen this kind of increase OR reduction? 

In [54]:
compare =  county_fire_hom[(county_fire_hom["year"] == 2009) | (county_fire_hom["year"] == 2019)]

In [55]:
# compare = compare.reset_index(level="fips")

In [43]:
compare = pd.pivot(compare, index=['fips','state_y','countyname'], columns='year', values=['count','neighbor_ffl_per_100mi','total_rate','poverty_pct','black_pct','deaths'])

In [44]:
compare = compare.reset_index()

In [45]:
compare.columns = ['fips','state','countyname','count_old','count_new','neighbor_ffl_per_100mi_old','neighbor_ffl_per_100mi_new','total_rate_old','total_rate_new','poverty_old','poverty_new','black_old','black_new','deaths_old','deaths_new']

In [46]:
compare["ffl_difference"] = compare['neighbor_ffl_per_100mi_new'] - compare['neighbor_ffl_per_100mi_old']
compare["homicide_difference"] = compare['total_rate_new'] - compare['total_rate_old']
compare["count_difference"] = compare['count_new'] - compare['count_old']

In [47]:
# compare[compare["ffl_difference"] >= 1].sort_values("ffl_difference", ascending=False).head(40)

In [1]:
# compare[compare["state"] == "GA"].sort_values("ffl_difference", ascending=False).head(30)

## Calculate unit change in dealers

### National change in dealers

In [261]:
us_county_area = county.drop_duplicates(subset="fips")["area"].sum()

In [262]:
us_county_area/100

35726.82099452933

In [263]:
# Percentage increase in dealers compared to current count of dealers
(((us_county_area/100) + 78787) - 78787) / 78787

0.4534608627632645