In [10]:
import pandas as pd
import numpy as np

# Prep the regression data

### Compile the county dataframes

In [2]:
# These are the years of CDC data that we have (with a one year lag so 2014-2021)
years = [
    '2009','2010','2011','2012','2013','2014','2015','2016','2017','2018',
    '2019'
]

In [3]:
# Filter the counties to just these states - assigned by FIPS code
states = ["01","02","04","05","06","08","09","10","11","12","13","15","16","17","18","19","20",
          "21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37",
          "38","39","40","41","42","44","45","46","47","48","49","50","51","53","54","55","56"]

In [4]:
county_fire_hom = pd.DataFrame()
county_nonfire_hom = pd.DataFrame()

In [5]:
# Compile the data frames
for year in years:
    year_lag = str(int(year)+2)
    # Start with population to get all counties
    population = pd.read_csv('../../data/processed/census/acs5_'+year+'_population_counties.csv', dtype={"geoid":'str',"state":'str'}, usecols=["geoid","universe","state"])
    population.rename(columns={"geoid":"fips","universe":"population"}, inplace=True)
    population = population.loc[population["state"].isin(states)]
    # Add in the dealers
    dealers = pd.read_csv('../../data/processed/atf-ffl-list/'+year+'-ffl-list-counties.csv', dtype={"fips":'str'})
    df_county = population.merge(dealers, on="fips", how="left")
    # Add in race / ethnicity
    race = pd.read_csv('../../data/processed/census/acs5_'+year+'_race_counties.csv', dtype={"geoid":'str'}, usecols=["geoid","universe","white_alone","black_alone","latino_alone","asians_all"])
    race["white_pct"] = (race["white_alone"] / race["universe"]) * 100
    race["black_pct"] = (race["black_alone"] / race["universe"]) * 100
    race["latino_pct"] = (race["latino_alone"] / race["universe"]) * 100
    race["asian_pct"] = (race["asians_all"] / race["universe"]) * 100
    race.rename(columns={"geoid":"fips"}, inplace=True)
    race = race[["fips","white_pct","black_pct","latino_pct","asian_pct"]]
    df_county = df_county.merge(race, on="fips", how="left")
    # Add in percent poverty
    poverty = pd.read_csv('../../data/processed/census/acs5_'+year+'_poverty_counties.csv', dtype={"geoid":'str'}, usecols=["geoid","universe","income_past12months_below_poverty_level"])
    poverty["poverty_pct"] = (poverty["income_past12months_below_poverty_level"] / poverty["universe"]) * 100
    poverty.rename(columns={"geoid":"fips"}, inplace=True)
    poverty = poverty[["fips","poverty_pct"]]
    df_county = df_county.merge(poverty, on="fips", how="left")
    # Add in median income
    income = pd.read_csv('../../data/processed/census/acs5_'+year+'_medianhouseholdincome_counties.csv', dtype={"geoid":'str'}, usecols=["geoid","median"])
    income.rename(columns={"geoid":"fips","median":"income"}, inplace=True)
    df_county = df_county.merge(income, on="fips", how="left")
    # Set the year
    df_county['year'] = year
    # Get the death data and isolate firearm homicides and non-firearm homicides
    df_deaths = pd.read_csv('../../data/processed/cdc-data/mult'+year_lag+'_grouped_rates_counties.csv', dtype={"fips":'str'})
    df_deaths_trim = df_deaths[["fips","type_manner","total_rate","deaths"]]
    fire_hom_deaths = df_deaths_trim.loc[df_deaths_trim["type_manner"] == "firearm-homicide"]
    nonfire_hom_deaths = df_deaths_trim.loc[df_deaths_trim["type_manner"] == "non-firearm-homicide"]
    df_county_fire_hom = df_county.merge(fire_hom_deaths, on="fips", how="left")
    df_county_nonfire_hom = df_county.merge(nonfire_hom_deaths, on="fips", how="left")
    # Add this year's data onto the full dataframes
    county_fire_hom = pd.concat([county_fire_hom, df_county_fire_hom])
    county_nonfire_hom = pd.concat([county_nonfire_hom, df_county_nonfire_hom])

In [6]:
# Get most recent
df_county_2023 = pd.read_csv('../../data/processed/atf-ffl-list/2023-ffl-list-counties.csv', dtype={"fips":'str'})

### Format homicide data for regression analysis

In [7]:
county_fire_hom['year'] = pd.to_numeric(county_fire_hom['year'])

In [11]:
county_fire_hom['total_rate'] = county_fire_hom['total_rate'].fillna(0)
county_fire_hom["total_rate_hs"] = np.arcsinh(county_fire_hom["total_rate"]) 
# county_fire_hom['total_rate_hs'] = county_fire_hom['total_rate_hs'].fillna(0)
county_fire_hom["ln_population"] = np.log(county_fire_hom["population"]) 
county_fire_hom["ln_income"] = np.log(county_fire_hom["income"]) 
county_fire_hom['deaths'] = county_fire_hom['deaths'].fillna(0)
county_fire_hom['ffl_per_100mi'] = county_fire_hom['ffl_per_100mi'].fillna(0)
county_fire_hom['neighbor_ffl_per_100mi'] = county_fire_hom['neighbor_ffl_per_100mi'].fillna(0)

In [12]:
county_fire_hom.to_csv("../../data/processed/county_firearm_homicides_dealers.csv", index=False)

In [13]:
fire_hom = county_fire_hom.set_index(['fips', 'year'])

In [14]:
fire_hom_years = fire_hom.index.get_level_values('year').to_list()
fire_hom['year'] = pd.Categorical(fire_hom_years)

In [15]:
county_nonfire_hom['year'] = pd.to_numeric(county_nonfire_hom['year'])

In [16]:
county_nonfire_hom['total_rate'] = county_nonfire_hom['total_rate'].fillna(0)
county_nonfire_hom["total_rate_hs"] = np.arcsinh(county_nonfire_hom["total_rate"]) 
county_nonfire_hom['total_rate_hs'] = county_nonfire_hom['total_rate_hs'].fillna(0)
county_nonfire_hom["ln_population"] = np.log(county_nonfire_hom["population"]) 
county_nonfire_hom["ln_income"] = np.log(county_nonfire_hom["income"]) 
county_nonfire_hom['deaths'] = county_nonfire_hom['deaths'].fillna(0)
county_nonfire_hom['ffl_per_100mi'] = county_nonfire_hom['ffl_per_100mi'].fillna(0)
county_nonfire_hom['neighbor_ffl_per_100mi'] = county_nonfire_hom['neighbor_ffl_per_100mi'].fillna(0)

In [17]:
county_nonfire_hom.to_csv("../../data/processed/county_nonfirearm_homicides_dealers.csv", index=False)

In [18]:
non_fire_hom = county_nonfire_hom.set_index(['fips', 'year'])

In [19]:
non_fire_hom_years = non_fire_hom.index.get_level_values('year').to_list()
non_fire_hom['year'] = pd.Categorical(non_fire_hom_years)

## County dealer changes
Have any counties/neighbors seen this kind of increase OR reduction? 

In [54]:
compare =  county_fire_hom[(county_fire_hom["year"] == 2009) | (county_fire_hom["year"] == 2019)]

In [55]:
# compare = compare.reset_index(level="fips")

In [56]:
compare[["year"]]

Unnamed: 0,year
0,2009
1,2009
2,2009
3,2009
4,2009
...,...
3137,2019
3138,2019
3139,2019
3140,2019


In [43]:
compare = pd.pivot(compare, index=['fips','state_y','countyname'], columns='year', values=['count','neighbor_ffl_per_100mi','total_rate','poverty_pct','black_pct','deaths'])

In [44]:
compare = compare.reset_index()

In [45]:
compare.columns = ['fips','state','countyname','count_old','count_new','neighbor_ffl_per_100mi_old','neighbor_ffl_per_100mi_new','total_rate_old','total_rate_new','poverty_old','poverty_new','black_old','black_new','deaths_old','deaths_new']

In [46]:
compare["ffl_difference"] = compare['neighbor_ffl_per_100mi_new'] - compare['neighbor_ffl_per_100mi_old']
compare["homicide_difference"] = compare['total_rate_new'] - compare['total_rate_old']
compare["count_difference"] = compare['count_new'] - compare['count_old']

In [47]:
# compare[compare["ffl_difference"] >= 1].sort_values("ffl_difference", ascending=False).head(40)

In [48]:
(7.457147 - 4.436458)/ 4.436458	

0.6808785296738975

In [49]:
(1.355914 - 0.771046)/ 0.771046

0.7585384010811288

In [57]:
(0.406911 - 0.585429) / 0.406911

-0.43871509986213186

In [51]:
compare[compare["state"] == "GA"].sort_values("ffl_difference", ascending=False).head(30)

Unnamed: 0,fips,state,countyname,count_old,count_new,neighbor_ffl_per_100mi_old,neighbor_ffl_per_100mi_new,total_rate_old,total_rate_new,poverty_old,poverty_new,black_old,black_new,deaths_old,deaths_new,ffl_difference,homicide_difference,count_difference
445,13117,GA,Forsyth County,16.0,41.0,7.141596,13.71709,0.585429,0.406911,5.430386,5.685719,2.890537,3.317673,1.0,1.0,6.575494,-0.178518,25.0
420,13067,GA,Cobb County,65.0,105.0,8.264927,14.398372,3.068076,4.983607,9.416887,9.101824,22.269166,26.721005,21.0,38.0,6.133446,1.91553,40.0
394,13013,GA,Barrow County,12.0,24.0,6.668553,12.640392,0.0,4.869853,13.493133,12.611064,11.217611,11.695003,0.0,4.0,5.971839,4.869853,12.0
465,13157,GA,Jackson County,16.0,28.0,6.184843,11.960395,3.344593,5.417191,14.691739,11.283658,7.446627,6.630331,2.0,4.0,5.775552,2.072598,12.0
447,13121,GA,Fulton County,48.0,67.0,8.610761,14.271539,14.760782,28.075873,15.403794,14.366028,42.477622,43.612044,134.0,296.0,5.660778,13.315091,19.0
454,13135,GA,Gwinnett County,53.0,103.0,7.517013,13.02487,1.76129,4.428021,9.520431,10.676425,19.831219,27.019188,14.0,42.0,5.507857,2.666731,50.0
435,13097,GA,Douglas County,15.0,24.0,9.219631,14.593358,0.0,7.664437,10.909719,12.819571,33.128154,45.926484,0.0,11.0,5.373728,7.664437,9.0
456,13139,GA,Hall County,18.0,45.0,5.360283,10.581338,1.68269,3.470449,13.764467,13.935342,6.393337,7.076666,3.0,7.0,5.221055,1.78776,27.0
431,13089,GA,DeKalb County,31.0,34.0,9.375391,14.415341,3.7681,12.258876,15.399136,15.118184,53.572286,53.436369,26.0,93.0,5.03995,8.490776,3.0
415,13057,GA,Cherokee County,23.0,46.0,7.123451,12.060497,1.419675,3.05163,7.121458,7.476255,5.358962,6.707068,3.0,8.0,4.937045,1.631955,23.0


## Calculate unit change in dealers

### National change in dealers

In [261]:
us_county_area = county.drop_duplicates(subset="fips")["area"].sum()

In [262]:
us_county_area/100

35726.82099452933

In [263]:
# Percentage increase in dealers compared to current count of dealers
(((us_county_area/100) + 78787) - 78787) / 78787

0.4534608627632645

### Median county change in dealers

In [264]:
county[["year","area","count","neighbor_area","neighbor_count"]].describe()

Unnamed: 0,year,area,count,neighbor_area,neighbor_count
count,121959.0,121937.0,121959.0,121755.0,121959.0
mean,2014.058077,1166.052304,28.609197,8009.141244,178.943538
std,3.163369,3685.632424,42.132458,17515.066712,158.999128
min,2009.0,2.046193,0.0,174.81558,0.0
25%,2011.0,443.255554,9.0,3312.555422,87.0
50%,2014.0,632.525976,17.0,4603.674824,136.0
75%,2017.0,943.814048,33.0,6753.922143,220.0
max,2019.0,147872.48475,1061.0,411228.951801,1838.0


In [265]:
med_area = 632.525976	
med_neighbor_area = 4603.674824	
med_county_increase = med_area/100
med_county_neighbor_increase = med_neighbor_area/100
med_count = 17
med_neighbor_count = 136
med_change = round((((med_county_increase + med_count) - med_count) / med_count) * 100)
med_neighbor_change = round((((med_county_neighbor_increase + med_neighbor_count) - med_neighbor_count) / med_neighbor_count) * 100)
print("A one unit increase in dealers per 100 square miles for the median county would mean " +  str(med_county_increase) + " more dealers, a " + str(med_change) + " percent increase.",
      "A one unit increase in dealers per 100 square miles for the median county and neighboring counties would mean " +str(med_county_neighbor_increase) + " more, a " + str(med_neighbor_change) + " percent increase.")

A one unit increase in dealers per 100 square miles for the median county would mean 6.32525976 more dealers, a 37 percent increase. A one unit increase in dealers per 100 square miles for the median county and neighboring counties would mean 46.036748239999994 more, a 34 percent increase.


### Dealer change in LA county

In [274]:
df_county_2023.loc[df_county_2023["countyname"] == "Los Angeles County",["area","count","neighbor_area","neighbor_count"]]

Unnamed: 0,area,count,neighbor_area,neighbor_count
205,4101.014423,339.0,35025.069608,974.0


In [271]:
med_area = 4101.014423
med_neighbor_area = 35025.069608
med_county_increase = med_area/100
med_county_neighbor_increase = med_neighbor_area/100
med_count = 339.0
med_neighbor_count = 974.0
med_change = round((((med_county_increase + med_count) - med_count) / med_count) * 100)
med_neighbor_change = round((((med_county_neighbor_increase + med_neighbor_count) - med_neighbor_count) / med_neighbor_count) * 100)
print("A one unit increase in dealers per 100 square miles for Los Angeles from 2023 data would mean " +  str(med_county_increase) + " more dealers, a " + str(med_change) + " percent increase.",
      "A one unit increase in dealers per 100 square miles for Los Angeles and neighboring counties would mean " +str(med_county_neighbor_increase) + " more, a " + str(med_neighbor_change) + " percent increase.")

A one unit increase in dealers per 100 square miles for Los Angeles from 2023 data would mean 41.010144229999995 more dealers, a 12 percent increase. A one unit increase in dealers per 100 square miles for Los Angeles and neighboring counties would mean 350.25069607999995 more, a 36 percent increase.


In [272]:
la_area = 4101.014423
ventura_area = 1857.520152
kern_area = 8161.896772
san_bernardino_area = 20105.623097
orange_area = 799.015164

In [273]:
la_area + ventura_area + kern_area + san_bernardino_area + orange_area

35025.069608

## Summary

**National increase in dealers:** A one unit increase in dealers per 100 square miles nationally would mean 35,727 more dealers, a 45% increase from 2023.

**Median county increase in dealers:** A one unit increase in dealers for the median county would mean 6.3 more dealers, a 37% increase. A one unit increase in dealers for the median county and neighboring counties would mean 46 more, a 34% increase.

**Los Angeles increase in dealers:** A one unit increase in dealers for the county would mean 41 more dealers, a 12% increase. A one unit increase in dealers for the county and neighboring counties would mean 350 more, a 36% increase. Monterey Park is 7.67 sq mi. Los Angeles County is 4,101 sq mi. and LA plus the neighboring counties is 35,025 sq mi.

**Impact on the homicide rate:** On average, for any given county, one additional dealer per 100 square miles results in an extremely small impact on the firearm homicide rate, an increase of 0.046. If there is one additional dealer per 100 square miles in a county and its neighboring counties, the effect is multiplied by 10. Still fairly small but potentially significant. As expected, the FFLs per 100 square miles does not have any impact on non-firearm homicide rates.

**Impact on certain communities:** The effect is especially significant for counties with large Black populations (3.4x), already high homicide rates (4.7x), and a large population in poverty (6.7x). California counties that fall into these categories are:

*Note: Large Black counties are defined as those with more than 50,000 populaton and at least 20% Black in 2009. High poverty are those with more than 50,000 population and at least 20% residents in poverty in 2009. High homicide are those with more than 50,000 population and a rate of 6 firearm homicides per 100,000 people or more in 2009.*