# Clean FFL lists

In [1]:
import pandas as pd
import os

## Load reference files

In [44]:
counties = pd.read_csv("../../data/processed/counties-reference.csv", dtype=str)

In [66]:
neighbors = pd.read_csv("../../data/handmade/neighbor-counties.csv", dtype=str)

## Clean, merge, and group FFL lists for each year

In [68]:
# The license types that allow regular “dealing” in firearms are 
# 01 (Firearms Dealer), 02 (Pawn Broker), 07 (Manufacture of Firearms) and 08 (Importer of firearms)
dealer_types = ["01","02","07","08"]
limited_dealer_types = ["01","02"]

In [69]:
years = [
    '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', 
    '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', 
    '2023'
]

In [77]:
for year in years:
    df = pd.read_csv('../../data/source/atf-ffl-list/'+year+'/'+year+'-ffl-list.csv', dtype=str)
    df.columns = df.columns.str.replace(" ","_")
    df.columns = df.columns.str.lower()
    # Filter license type
    df = df.loc[df["lic_type"].isin(dealer_types)]
    # Filter to just 50 states
    df = df.loc[~df["premise_state"].isin(["PR","VI","GU","MP"])]
    # Clean the zip code
    df['premise_zip_code'] = df['premise_zip_code'].str[:5]
    df['mail_zip_code'] = df['mail_zip_code'].str[:5]
    # Add state FIPS
    df = df.merge(state_fips[["state","statefp","state_name"]], left_on="premise_state", right_on="state", how="left")
    # Add county name
    df = df.merge(county_fips[["statefp","countyfp","countyname"]], left_on=["statefp","lic_cnty"], right_on=["statefp","countyfp"], how="left")
    # Save clean FFL list
    df.to_csv('../../data/processed/atf-ffl-list/'+year+'-ffl-list-clean.csv', index=False)
    # Create county FIPS code
    df["fips"] = df["statefp"] + df["countyfp"]
    # Create county group
    county_df = df.groupby(["fips"]).size().reset_index(name="count")
    # Merge the area
    county_df = counties.merge(county_df, on="fips", how="left")
    # Create neighbor county file
    neighbor_df = neighbors.merge(county_df, left_on="neighbor_fips", right_on="fips", how="left")
    neighbor_df = neighbor_df.groupby("fips_x").agg({"area":"sum","count":"sum"}).reset_index()
    neighbor_df.columns = ["fips","neighbor_area","neighbor_count"]
    # Calculate the density per 100 miles
    county_df["ffl_per_100mi"] = (county_df["count"] / county_df["area"]) * 100
    county_df = county_df.merge(neighbor_df, on="fips", how="left")
    county_df["neighbor_count"] = county_df["count"] + county_df["neighbor_count"]
    county_df["neighbor_area"] = county_df["area"] + county_df["neighbor_area"]
    county_df["neighbor_ffl_per_100mi"] = (county_df["neighbor_count"] / county_df["neighbor_area"]) * 100
    county_df[["count","ffl_per_100mi","neighbor_count","neighbor_ffl_per_100mi"]] = county_df[["count","ffl_per_100mi","neighbor_count","neighbor_ffl_per_100mi"]].fillna(0)
    county_df.loc[county_df["ffl_per_100mi"] >= county_df["neighbor_ffl_per_100mi"], "adj_ffl_per_100mi"] = county_df["ffl_per_100mi"]
    county_df.loc[county_df["ffl_per_100mi"] < county_df["neighbor_ffl_per_100mi"], "adj_ffl_per_100mi"] = county_df["neighbor_ffl_per_100mi"]
    # Save county-level summary
    county_df.to_csv('../../data/processed/atf-ffl-list/'+year+'-ffl-list-counties.csv', index=False)
    # Create state group
    state_df = df.groupby(["state","statefp"]).size().reset_index(name="count")
    # Save state-level group
    state_df.to_csv('../../data/processed/atf-ffl-list/'+year+'-ffl-list-states.csv', index=False)
    # Create license type group
    lic_df = df.groupby(["lic_type"]).size().reset_index(name="count")
    # Save license-type group
    lic_df.to_csv('../../data/processed/atf-ffl-list/'+year+'-ffl-list-license-types.csv', index=False)