# Analysis of ATF trace data

In [None]:
# Time to crime from 2012-2022
# Number of guns recovered in under a year 2017-2022
# Guns purchased in state from 2022

In [None]:
import pandas as pd
import altair as alt

## Load and combine general data

In [None]:
general_17 = pd.read_csv("./data/processed/atf-trace-data/general-numbers-2017.csv", dtype={"all_traces":str,"source_traces":str,"national_ttc":str,"state_ttc":str})
general_18 = pd.read_csv("./data/processed/atf-trace-data/general-numbers-2018.csv", dtype={"all_traces":str,"source_traces":str,"national_ttc":str,"state_ttc":str})
general_19 = pd.read_csv("./data/processed/atf-trace-data/general-numbers-2019.csv", dtype={"all_traces":str,"source_traces":str,"national_ttc":str,"state_ttc":str})
general_20 = pd.read_csv("./data/processed/atf-trace-data/general-numbers-2020.csv", dtype={"all_traces":str,"source_traces":str,"national_ttc":str,"state_ttc":str})
general_21 = pd.read_csv("./data/processed/atf-trace-data/general-numbers-2021.csv", dtype={"all_traces":str,"source_traces":str,"national_ttc":str,"state_ttc":str})

In [None]:
general = pd.concat([general_17, general_18, general_19, general_20, general_21])

In [None]:
general.head()

In [None]:
drop_list = ['Guam','Puerto','U.S.']

In [None]:
general = general.copy().loc[~general["state"].isin(drop_list)]

In [None]:
# Texas 2019 went to too many lines so replace this manually
general.loc[((general["state"] == "Texas") & (general["year"] == 2019)), "source_traces"] = "23,597"

In [None]:
general["all_traces"] = general["all_traces"].str.replace(",","")
general["all_traces"] = pd.to_numeric(general["all_traces"])
general["source_traces"] = general["source_traces"].str.replace(",","")
general["source_traces"] = general["source_traces"].str.replace(" ","")
general["source_traces"] = general["source_traces"].str.strip()
general["source_traces"] = pd.to_numeric(general["source_traces"])
general["state_ttc"] = general["state_ttc"].str.strip()
general["state_ttc"] = pd.to_numeric(general["state_ttc"])
general["national_ttc"] = general["national_ttc"].str.strip()
general["national_ttc"] = pd.to_numeric(general["national_ttc"])

## Time to crime

In [None]:
# Pick any state to grab the national numbers - they are the same for all states
nat_ttc = general[general["state"] == "Alabama"][["year","national_ttc"]]

In [None]:
# Fill in older years by pulling the national time to crime manually from reports
more = {
    "year":[2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016],
    "national_ttc":[10.17,10.33,10.39,10.77,10.94,11.2,11.2,11.08,10.88,10.48,9.79]
}
more = pd.DataFrame(data=more)
nat_ttc = pd.concat([more, nat_ttc])

In [None]:
nat_ttc.year = pd.to_datetime(nat_ttc.year, format='%Y')

In [None]:
nat_ttc

In [None]:
alt.Chart(nat_ttc).mark_bar(size=14).encode(
    x=alt.X('year:T', title="Year", axis=alt.Axis(format="%Y")),
    y=alt.Y('national_ttc:Q',title="Time-to-crime (in years)"),
).properties(
    title="National average time-to-crime"
)

## Guns recovered less than a year from purchase

In [None]:
less_than_year_categories = ["Under 3 Months", "3 Months to Under 7 Months", "7 Months to Under 1 Year"]

In [None]:
df_2017 = pd.read_csv('./_workspace/gun-dealers-2023-analysis/data/processed/atf-trace-data/time-to-crime-2017.csv', dtype=str)
df_2018 = pd.read_csv('./_workspace/gun-dealers-2023-analysis/data/processed/atf-trace-data/time-to-crime-2018.csv', dtype=str)
df_2019 = pd.read_csv('./_workspace/gun-dealers-2023-analysis/data/processed/atf-trace-data/time-to-crime-2019.csv', dtype=str)
df_2020 = pd.read_csv('./_workspace/gun-dealers-2023-analysis/data/processed/atf-trace-data/time-to-crime-2020.csv', dtype=str)
df_2021 = pd.read_csv('./_workspace/gun-dealers-2023-analysis/data/processed/atf-trace-data/time-to-crime-2021.csv', dtype=str)
df_2022 = pd.read_csv('./_workspace/gun-dealers-2023-analysis/data/processed/atf-trace-data/time-to-crime-2022.csv', dtype=str)

In [None]:
df_2017.state = df_2017.state.str.replace("District of", "District of Columbia")
df_2018.state = df_2018.state.str.replace("District of", "District of Columbia")
df_2019.state = df_2019.state.str.replace("District of", "District of Columbia")

In [None]:
under_year = pd.concat([df_2017, df_2018, df_2019, df_2020, df_2021, df_2022])

In [None]:
under_year["time_to_crime"] = under_year["time_to_crime"].str.replace("\n", " ")

In [None]:
under_year.time_to_crime.unique()

In [None]:
# Remove Guam, Puerto (Rico), and U.S. 
under_year = under_year.loc[~under_year.state.isin(['Guam','Puerto','U.S.'])]

In [None]:
under_year["count"] = under_year["count"].str.replace(",", "")
under_year["count"] = pd.to_numeric(under_year["count"])

In [None]:
under_year_df = under_year.loc[under_year.time_to_crime.isin(less_than_year_categories)]

In [None]:
under_year_df.groupby(["year"])["count"].sum().reset_index()

In [None]:
alt.Chart(under_year_df.groupby(["year"])["count"].sum().reset_index()).mark_bar().encode(
    x=alt.X('count:Q', title=""),
    y=alt.Y('year:N', title=""),
    # tooltip=["year","value"]
).properties(
    title="Guns recovered and traced in under a year"
)

## Source states

In [None]:
source_17 = pd.read_csv("./data/processed/atf-trace-data/source-states-2017.csv")
source_18 = pd.read_csv("./data/processed/atf-trace-data/source-states-2018.csv")
source_19 = pd.read_csv("./data/processed/atf-trace-data/source-states-2019.csv")
source_20 = pd.read_csv("./data/processed/atf-trace-data/source-states-2020.csv")
source_21 = pd.read_csv("./data/processed/atf-trace-data/source-states-2021.csv")

In [None]:
source = pd.concat([source_17, source_18, source_19, source_20, source_21])

In [None]:
source["source_state"] = source["source_state"].str.replace("  "," ")

In [None]:
source.groupby("year")["count"].sum()

In [None]:
source_merged = source.merge(general, on=["state","year"], how="left")

In [None]:
source_merged["percent"] = source_merged["count"] / source_merged["source_traces"]

In [None]:
source_merged.head()

In [None]:
# How many guns recovered in California came from California?
source_merged.loc[(source_merged["state"] == "California") & (source_merged["source_state"] == "California")]

### In state

In [None]:
instate = source_merged.loc[(source_merged["source_state"] == source_merged["state"]) & (source_merged["year"] == 2021)]

In [None]:
instate[["state","count","percent","state_ttc"]].sort_values("percent", ascending=False)