In [1]:
import pandas as pd

State populations retrieved from https://www.census.gov/programs-surveys/popest/technical-documentation/research/evaluation-estimates/2020-evaluation-estimates/2010s-state-total.html

In [2]:
gun_violence_filepath = "data/gunviolence.pickle"
policy_filepath = "data/policy_metadata.csv"
population_filepath = "data/populations.xlsx"

gun_violence_df = pd.read_pickle(gun_violence_filepath)
policy_df = pd.read_csv(policy_filepath)
population_df = pd.read_excel(population_filepath, header=3, skiprows=[62, 63, 64, 65, 66, 67])

In [3]:
# preprocess population data
population_df.rename(columns={'July 1': 2020}, inplace=True)
population_df.dropna(inplace=True)
population_df['state'] = population_df['Unnamed: 0'].str.replace('[^\w\s]','')
population_df = pd.melt(population_df, id_vars=['state'], value_vars=[2014, 2015, 2016, 2017, 2018, 2019, 2020])
population_df.rename(columns={'variable': 'year', 'value':'population'}, inplace=True)
population_df.set_index(['year', 'state'])
population_df[:5]

  population_df['state'] = population_df['Unnamed: 0'].str.replace('[^\w\s]','')


Unnamed: 0,state,year,population
0,United States,2014,318386329.0
1,Northeast,2014,56021339.0
2,Midwest,2014,67765576.0
3,South,2014,119666248.0
4,West,2014,74933166.0


In [4]:
gun_violence_df[:5]

Unnamed: 0,index,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,...,participant_type,sources,state_house_district,state_senate_district,year,suicide,mass shooting,gang,wounded,dead
0,278,95289,2014-01-01,Michigan,Muskegon,300 block of Monroe Avenue,0,0,http://www.gunviolencearchive.org/incident/95289,http://www.mlive.com/news/muskegon/index.ssf/2...,...,0::Victim,http://www.mlive.com/news/muskegon/index.ssf/2...,92.0,34.0,2014,False,False,False,False,False
1,279,92401,2014-01-01,New Jersey,Newark,Central Avenue,0,0,http://www.gunviolencearchive.org/incident/92401,http://www.nj.com/essex/index.ssf/2014/01/newa...,...,,http://www.nj.com/essex/index.ssf/2014/01/newa...,29.0,29.0,2014,False,False,False,False,False
2,280,92383,2014-01-01,New York,Queens,113th Avenue,1,0,http://www.gunviolencearchive.org/incident/92383,http://www.timesledger.com/stories/2014/2/firs...,...,0::Victim||1::Subject-Suspect,http://www.timesledger.com/stories/2014/2/firs...,33.0,14.0,2014,True,False,False,False,True
3,281,92142,2014-01-01,New York,Brooklyn,St. Johns Place,0,1,http://www.gunviolencearchive.org/incident/92142,http://www.nydailynews.com/new-york/nyc-crime/...,...,0::Victim||1::Subject-Suspect,http://www.nydailynews.com/new-york/nyc-crime/...,43.0,20.0,2014,False,False,False,True,False
4,282,95261,2014-01-01,Missouri,Springfield,Beverly Hills and Temple,0,1,http://www.gunviolencearchive.org/incident/95261,http://www.ozarksfirst.com/story/deputies-6-ye...,...,0::Victim||1::Subject-Suspect,http://www.ozarksfirst.com/story/deputies-6-ye...,131.0,30.0,2014,False,False,False,True,False


In [5]:
policy_df[:5]

Unnamed: 0,year,state,category,sub_category,policies_implemented
0,2014,Alabama,Ammunition regulations,Background checks,3
1,2014,Alabama,Ammunition regulations,Prohibitors,0
2,2014,Alabama,Assault weapons and large-capacity magazines,Assault weapons ban,0
3,2014,Alabama,Assault weapons and large-capacity magazines,Large capacity magazine ban,0
4,2014,Alabama,Background checks,Gun shows,0


# Correlation Between Incident Types and Policy Categories

### All Incidents

In [6]:
all_incidents = pd.DataFrame(gun_violence_df.groupby(['year', 'state']).size()).join(
                                policy_df[['year', 'state','category', 'policies_implemented']].groupby(
                                    ['year', 'state', 'category']).sum())
all_incidents['population'] = list(all_incidents.merge(population_df, on=['year', 'state'], how='left')['population'])

all_incidents.rename(columns={0:'gun_violence_incidents'}, inplace=True)
all_incidents['incidents_per_capita'] = all_incidents.gun_violence_incidents / all_incidents.population
all_incidents.reset_index(inplace=True)
all_incidents

Unnamed: 0,year,state,category,gun_violence_incidents,policies_implemented,population,incidents_per_capita
0,2014,Alabama,Ammunition regulations,1318,3,4843737.0,0.000272
1,2014,Alabama,Assault weapons and large-capacity magazines,1318,0,4843737.0,0.000272
2,2014,Alabama,Background checks,1318,0,4843737.0,0.000272
3,2014,Alabama,Buyer regulations,1318,1,4843737.0,0.000272
4,2014,Alabama,Child access prevention,1318,0,4843737.0,0.000272
...,...,...,...,...,...,...,...
3245,2018,Wyoming,Immunity,14,1,579054.0,0.000024
3246,2018,Wyoming,Possession regulations,14,3,579054.0,0.000024
3247,2018,Wyoming,Preemption,14,0,579054.0,0.000024
3248,2018,Wyoming,Prohibitions for high-risk gun possession,14,1,579054.0,0.000024


In [7]:
all_incidents.corr()

Unnamed: 0,year,gun_violence_incidents,policies_implemented,population,incidents_per_capita
year,1.0,-0.19366,0.012441,0.008346,-0.345511
gun_violence_incidents,-0.19366,1.0,0.177305,0.734572,0.323411
policies_implemented,0.012441,0.177305,1.0,0.263511,-0.08376
population,0.008346,0.734572,0.263511,1.0,-0.160069
incidents_per_capita,-0.345511,0.323411,-0.08376,-0.160069,1.0


In [8]:
all_incidents.corr().iloc[2,4]

-0.08375990726385779

In [9]:
policy_correlations = {'category': [], 'incidence_type': [], 'corr': []}
corr_categories = set(all_incidents.category)

for category in corr_categories:
    df_category = all_incidents[all_incidents.category == category]
    df_category_corr = df_category.corr()
    policy_correlations['category'].append(category)
    policy_correlations['incidence_type'].append('all_incidents')
    policy_correlations['corr'].append(df_category_corr.iloc[2,4])

Possession regulations policies have a correlation of 0.5!

### Suicides

In [10]:
suicides = pd.DataFrame(gun_violence_df[gun_violence_df.suicide].groupby(['year', 'state']).size()).join(
                                policy_df[['year', 'state','category', 'policies_implemented']].groupby(
                                    ['year', 'state', 'category']).sum())
suicides['population'] = list(suicides.merge(population_df, on=['year', 'state'], how='left')['population'])

suicides.rename(columns={0:'gun_violence_incidents'}, inplace=True)
suicides['incidents_per_capita'] = suicides.gun_violence_incidents / suicides.population
suicides.reset_index(inplace=True)

for category in corr_categories:
    df_category = suicides[suicides.category == category]
    df_category_corr = df_category.corr()
    policy_correlations['category'].append(category)
    policy_correlations['incidence_type'].append('suicides')
    policy_correlations['corr'].append(df_category_corr.iloc[2,4])

### Non-suicides

In [11]:
non_suicides = pd.DataFrame(gun_violence_df[gun_violence_df.suicide != True].groupby(['year', 'state']).size()).join(
                                policy_df[['year', 'state','category', 'policies_implemented']].groupby(
                                    ['year', 'state', 'category']).sum())

non_suicides['population'] = list(non_suicides.merge(population_df, on=['year', 'state'], how='left')['population'])

non_suicides.rename(columns={0:'gun_violence_incidents'}, inplace=True)
non_suicides['incidents_per_capita'] = non_suicides.gun_violence_incidents / non_suicides.population
non_suicides.reset_index(inplace=True)

for category in corr_categories:
    df_category = non_suicides[non_suicides.category == category]
    df_category_corr = df_category.corr()
    policy_correlations['category'].append(category)
    policy_correlations['incidence_type'].append('non_suicides')
    policy_correlations['corr'].append(df_category_corr.iloc[2,4])

### Mass Shootings

In [12]:
mass_shooting = pd.DataFrame(gun_violence_df[gun_violence_df['mass shooting']].groupby(['year', 'state']).size()).join(
                                policy_df[['year', 'state','category', 'policies_implemented']].groupby(
                                    ['year', 'state', 'category']).sum())

mass_shooting['population'] = list(mass_shooting.merge(population_df, on=['year', 'state'], how='left')['population'])

mass_shooting.rename(columns={0:'gun_violence_incidents'}, inplace=True)
mass_shooting['incidents_per_capita'] = mass_shooting.gun_violence_incidents / mass_shooting.population
mass_shooting.reset_index(inplace=True)

for category in corr_categories:
    df_category = mass_shooting[mass_shooting.category == category]
    df_category_corr = df_category.corr()
    policy_correlations['category'].append(category)
    policy_correlations['incidence_type'].append('mass_shooting')
    policy_correlations['corr'].append(df_category_corr.iloc[2,4])

### Storing Data

In [13]:
policy_correlation_df = pd.DataFrame(policy_correlations).pivot(index='category', columns='incidence_type', values='corr')
policy_correlation_df

incidence_type,all_incidents,mass_shooting,non_suicides,suicides
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ammunition regulations,-0.015377,-0.010841,-0.026483,0.0273
Assault weapons and large-capacity magazines,-0.119296,-0.131916,-0.100461,-0.157692
Background checks,-0.094928,-0.135149,-0.048042,-0.237733
Buyer regulations,-0.1351,-0.120317,-0.093434,-0.250143
Child access prevention,-0.089089,-0.122454,-0.044811,-0.224082
Dealer regulations,-0.100895,-0.124211,-0.054113,-0.241937
Domestic violence,-0.174739,-0.100594,-0.138953,-0.259826
Gun trafficking,-0.065531,-0.094347,-0.032881,-0.165109
Immunity,-0.124037,-0.070757,-0.061771,-0.314155
Possession regulations,-0.023057,0.041204,-0.008034,-0.070533


In [14]:
policy_correlation_df.to_csv('data/incident_policy_category_corr.csv')

# Correlation Between Incident Types and Policy Subcategories

### All Incidents

In [15]:
all_incidents = pd.DataFrame(gun_violence_df.groupby(['year', 'state']).size()).join(
                                policy_df[['year', 'state','sub_category', 'policies_implemented']].groupby(
                                    ['year', 'state', 'sub_category']).sum())
all_incidents['population'] = list(all_incidents.merge(population_df, on=['year', 'state'], how='left')['population'])

all_incidents.rename(columns={0:'gun_violence_incidents'}, inplace=True)
all_incidents['incidents_per_capita'] = all_incidents.gun_violence_incidents / all_incidents.population
all_incidents.reset_index(inplace=True)


policy_correlations = {'category': [], 'incidence_type': [], 'corr': []}
corr_categories = set(all_incidents.sub_category)

for category in corr_categories:
    df_category = all_incidents[all_incidents.sub_category == category]
    df_category_corr = df_category.corr()
    policy_correlations['category'].append(category)
    policy_correlations['incidence_type'].append('all_incidents')
    policy_correlations['corr'].append(df_category_corr.iloc[2,4])

### Suicides

In [16]:
suicides = pd.DataFrame(gun_violence_df[gun_violence_df.suicide].groupby(['year', 'state']).size()).join(
                                policy_df[['year', 'state','sub_category', 'policies_implemented']].groupby(
                                    ['year', 'state', 'sub_category']).sum())
suicides['population'] = list(suicides.merge(population_df, on=['year', 'state'], how='left')['population'])

suicides.rename(columns={0:'gun_violence_incidents'}, inplace=True)
suicides['incidents_per_capita'] = suicides.gun_violence_incidents / suicides.population
suicides.reset_index(inplace=True)

for category in corr_categories:
    df_category = suicides[suicides.sub_category == category]
    df_category_corr = df_category.corr()
    policy_correlations['category'].append(category)
    policy_correlations['incidence_type'].append('suicides')
    policy_correlations['corr'].append(df_category_corr.iloc[2,4])

### Non-Suicides

In [17]:
non_suicides = pd.DataFrame(gun_violence_df[gun_violence_df.suicide != True].groupby(['year', 'state']).size()).join(
                                policy_df[['year', 'state','sub_category', 'policies_implemented']].groupby(
                                    ['year', 'state', 'sub_category']).sum())

non_suicides['population'] = list(non_suicides.merge(population_df, on=['year', 'state'], how='left')['population'])

non_suicides.rename(columns={0:'gun_violence_incidents'}, inplace=True)
non_suicides['incidents_per_capita'] = non_suicides.gun_violence_incidents / non_suicides.population
non_suicides.reset_index(inplace=True)

for category in corr_categories:
    df_category = non_suicides[non_suicides.sub_category == category]
    df_category_corr = df_category.corr()
    policy_correlations['category'].append(category)
    policy_correlations['incidence_type'].append('non_suicides')
    policy_correlations['corr'].append(df_category_corr.iloc[2,4])

### Mass Shootings

In [18]:
mass_shooting = pd.DataFrame(gun_violence_df[gun_violence_df['mass shooting']].groupby(['year', 'state']).size()).join(
                                policy_df[['year', 'state','sub_category', 'policies_implemented']].groupby(
                                    ['year', 'state', 'sub_category']).sum())

mass_shooting['population'] = list(mass_shooting.merge(population_df, on=['year', 'state'], how='left')['population'])

mass_shooting.rename(columns={0:'gun_violence_incidents'}, inplace=True)
mass_shooting['incidents_per_capita'] = mass_shooting.gun_violence_incidents / mass_shooting.population
mass_shooting.reset_index(inplace=True)

for category in corr_categories:
    df_category = mass_shooting[mass_shooting.sub_category == category]
    df_category_corr = df_category.corr()
    policy_correlations['category'].append(category)
    policy_correlations['incidence_type'].append('mass_shooting')
    policy_correlations['corr'].append(df_category_corr.iloc[2,4])

### Storing Data

In [19]:
policy_correlation_df = pd.DataFrame(policy_correlations).pivot(index='category', columns='incidence_type', values='corr')
policy_correlation_df

incidence_type,all_incidents,mass_shooting,non_suicides,suicides
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Age restrictions,-0.120659,-0.191894,-0.070802,-0.267905
Alcohol,-0.024184,-0.043451,-0.030679,0.004323
Assault weapons ban,-0.110243,-0.11611,-0.092476,-0.146999
Background check records,-0.087924,-0.127064,-0.049695,-0.2019
Background checks,-0.014622,-0.017982,-0.034538,0.058881
Background checks - mental health records,0.012181,-0.072383,0.055094,-0.141676
Background checks - state records,-0.149136,-0.129185,-0.106458,-0.264459
Background checks through permits,-0.101808,-0.133766,-0.04816,-0.266799
Background checks time limit,-0.045844,-0.000179,-0.023302,-0.114454
Bulk purchase limit,-0.080827,0.002876,-0.098342,-0.000303


In [20]:
policy_correlation_df.to_csv('data/incident_policy_subcategory_corr.csv')