In [3]:
import pandas as pd

# Correlations

In [4]:
gun_violence_metadata = pd.read_pickle("gunViolenceMetadata.pickle")
gun_violence_metadata

Unnamed: 0_level_0,Unnamed: 1_level_0,suicide,mass_shooting,gang,non_suicide,all_incidents
state,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alabama,2014,0.000005,4.129043e-07,6.193565e-07,0.000267,0.000272
Alabama,2015,0.000006,8.239263e-07,0.000000e+00,0.000207,0.000213
Alabama,2016,0.000007,3.082092e-06,6.164184e-07,0.000262,0.000269
Alabama,2017,0.000008,1.435018e-06,2.050025e-07,0.000291,0.000300
Alabama,2018,0.000002,1.022155e-06,0.000000e+00,0.000069,0.000071
...,...,...,...,...,...,...
Wyoming,2014,0.000007,0.000000e+00,0.000000e+00,0.000103,0.000110
Wyoming,2015,0.000005,0.000000e+00,0.000000e+00,0.000331,0.000338
Wyoming,2016,0.000009,0.000000e+00,0.000000e+00,0.000208,0.000217
Wyoming,2017,0.000003,0.000000e+00,0.000000e+00,0.000153,0.000157


In [5]:
policy_metadata = pd.read_pickle('policyMetadata.pickle')
policy_metadata = policy_metadata[['year', 'state', 'sub_category', 'percent_policies_implemented']]
policy_metadata = policy_metadata.set_index(['year', 'state']).join(gun_violence_metadata)
policy_metadata

Unnamed: 0_level_0,Unnamed: 1_level_0,sub_category,percent_policies_implemented,suicide,mass_shooting,gang,non_suicide,all_incidents
year,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014,Alabama,Prohibitors,0.00,0.000005,4.129043e-07,6.193565e-07,0.000267,0.000272
2014,Alabama,Background checks,0.75,0.000005,4.129043e-07,6.193565e-07,0.000267,0.000272
2014,Alabama,Large capacity magazine ban,0.00,0.000005,4.129043e-07,6.193565e-07,0.000267,0.000272
2014,Alabama,Assault weapons ban,0.00,0.000005,4.129043e-07,6.193565e-07,0.000267,0.000272
2014,Alabama,Background checks - mental health records,0.00,0.000005,4.129043e-07,6.193565e-07,0.000267,0.000272
...,...,...,...,...,...,...,...,...
2018,Wyoming,Alcohol,0.00,0.000009,0.000000e+00,0.000000e+00,0.000016,0.000024
2018,Wyoming,Relinquishment of weapons,0.00,0.000009,0.000000e+00,0.000000e+00,0.000016,0.000024
2018,Wyoming,Mental Health,0.00,0.000009,0.000000e+00,0.000000e+00,0.000016,0.000024
2018,Wyoming,Felony,1.00,0.000009,0.000000e+00,0.000000e+00,0.000016,0.000024


In [6]:
policy_metadata.corr()

Unnamed: 0,percent_policies_implemented,suicide,mass_shooting,gang,non_suicide,all_incidents
percent_policies_implemented,1.0,-0.188042,-0.003477,0.167008,-0.057244,-0.063171
suicide,-0.188042,1.0,0.085022,-0.046737,0.534864,0.561303
mass_shooting,-0.003477,0.085022,1.0,0.241538,0.529415,0.521857
gang,0.167008,-0.046737,0.241538,1.0,0.184249,0.178877
non_suicide,-0.057244,0.534864,0.529415,0.184249,1.0,0.999494
all_incidents,-0.063171,0.561303,0.521857,0.178877,0.999494,1.0


In [7]:
policy_metadata.corr().iloc[0, 1:].reset_index()

Unnamed: 0,index,percent_policies_implemented
0,suicide,-0.188042
1,mass_shooting,-0.003477
2,gang,0.167008
3,non_suicide,-0.057244
4,all_incidents,-0.063171


In [8]:
incidence_type = 'all_incidents'
subcategory_corrs = []
for subcategory in set(policy_metadata.sub_category):
    subcategory_data = policy_metadata[policy_metadata.sub_category == subcategory]
    subcategory_corr = subcategory_data.corr().iloc[0, 1:].reset_index()
    subcategory_corr['category'] = subcategory
    subcategory_corrs.append(subcategory_corr)
correlation_df = pd.concat(subcategory_corrs)
correlation_df.rename(columns={'index': 'incidence_type', 'percent_policies_implemented': 'correlation'}, 
                      inplace=True)
correlation_df = correlation_df[correlation_df.incidence_type == incidence_type]
correlation_df.drop(columns=['incidence_type'], inplace=True)
correlation_df

Unnamed: 0,correlation,category
4,0.015074,Drugs
4,-0.041572,Gun trafficking
4,-0.014622,Background checks
4,-0.095201,Safety training
4,-0.116175,Waiting period
4,-0.244537,Registration
4,-0.090064,Stand your ground
4,-0.126642,Licensing
4,-0.045844,Background checks time limit
4,0.111575,Campus carry


# Cluster Data

In [10]:
policy_clusters = pd.read_pickle('policyClusters_3_PCA.pickle')

In [11]:
# join with cluster policy data

policy_clusters.set_index(['state', 'year'], inplace=True)
cluster_data = policy_metadata.join(policy_clusters)
cluster_data = cluster_data[['sub_category', 'percent_policies_implemented', 'cluster']]

# rename the clusters to strings for readability
mapping = {}
for cluster in set(cluster_data.cluster):
    mapping[cluster] = f"cluster {cluster}"
cluster_data['cluster'] = cluster_data['cluster'].apply(lambda x: mapping.get(x, x))
cluster_data.rename(columns={'sub_category': 'category'}, inplace=True)
cluster_data = cluster_data.groupby(['category', 'cluster']).mean()
cluster_data.reset_index(inplace=True)
cluster_data = cluster_data.pivot(index='category', columns='cluster', 
                                  values='percent_policies_implemented')

table_data = cluster_data.join(correlation_df.set_index(['category'])[['correlation']])
table_data = table_data.reset_index().sort_values(by=['correlation'])

In [12]:
table_data

Unnamed: 0,category,cluster 0,cluster 1,cluster 2,correlation
34,Registration,0.014205,0.064103,0.428571,-0.244537
27,Misdemeanor crimes,0.090097,0.43956,0.657143,-0.222615
36,Reporting,0.034091,0.25641,0.8,-0.191727
18,Gun violence restraining orders,0.019886,0.141026,0.214286,-0.188981
6,Background checks - state records,0.0625,0.679487,0.785714,-0.149136
31,Preemption,0.018939,0.042735,0.809524,-0.14228
14,Fingerprinting,0.034091,0.102564,1.0,-0.132898
22,Large capacity magazine ban,0.001894,0.042735,0.647619,-0.13022
24,Licensing,0.028409,0.34188,0.580952,-0.126642
48,Violent Misdemeanor,0.039773,0.205128,0.714286,-0.125246
