In [706]:
# importing pandas
import pandas as pd
# importing geopandas
import geopandas as gpd
# importing datetime
import datetime
# importing numpy
import numpy as np
# importing matplotlib
import matplotlib.pyplot as plt
# importing seaborn
import seaborn as sns

### MASS SHOOTING (data cleaning)

In [572]:
# importing mass shooting data, finding shape
df = pd.read_csv('massshootingdata.csv')

In [573]:
# suspects column with the number of suspects
df["suspects"]= (df["# Subjects-Suspects Injured"] 
                 + df["# Subjects-Suspects Killed"] 
                 + df["# Subjects-Suspects Arrested"])
# boolean column showing if a suspect was killed
df["suspectkilled"] = df["# Subjects-Suspects Killed"].map(lambda a : a > 0)

# dropping unnecessary columns
df = df.drop(["# Subjects-Suspects Injured", "# Subjects-Suspects Killed", 
              "# Subjects-Suspects Arrested", "Operations", "Address"], axis=1)


In [574]:
# renaming columns to clarify purpose & simplify the coding process
rename_dict = {
    "Incident ID": "id",
    "Incident Date": "date",
    "State": "state",
    "City Or County": "area",
    "# Victims Injured": "injuries",
    "# Victims Killed": "fatalities",
}
df.rename(columns=rename_dict, inplace=True)

In [575]:
df["tot_victims"] = df["injuries"] + df["fatalities"]

In [576]:
month_num = {
    "January": 1,
    "February": 2,
    "March": 3,
    "April": 4,
    "May": 5,
    "June": 6,
    "July": 7,
    "August": 8,
    "September": 9,
    "October": 10,
    "November": 11,
    "December": 12
}

# df["year"] = df.date.map(lambda a: int(a[-4:]))

def string_to_date(row):
    parts = row.date.split(" ")
    month = month_num[parts[0]]
    day = int(parts[1].rstrip(','))
    year = int(parts[2])
    row["date"] = datetime.date(year, month, day)
    row["year"] = year
    return row
    
df = df.apply(string_to_date, axis=1)

In [577]:
df = (df.loc[(df.year >= 2021) & (df.year <= 2022)]
      .reset_index()
      .drop("index", axis=1))

### GUN LAWS (data cleaning)

In [636]:
gun_laws_df = pd.read_csv("gunlaws.csv")

In [637]:
gun_laws_df = gun_laws_df.loc[gun_laws_df.year == 2020].drop("year", axis=1)

In [638]:
gun_laws_df.shape

(50, 136)

In [639]:
gun_laws_df = gun_laws_df.set_index("state").sort_values(by='state')

In [640]:
gun_laws_df.tail()

Unnamed: 0_level_0,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,violent,violenth,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Virginia,1,1,1,1,0,0,0,0,0,0,...,0,1,1,0,0,0,0,0,0,25
Washington,1,1,1,1,0,0,0,0,0,0,...,1,1,0,1,1,0,1,0,0,55
West Virginia,1,1,1,1,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,18
Wisconsin,1,1,1,1,0,1,0,0,0,0,...,0,1,1,1,0,0,0,0,1,23
Wyoming,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7


In [641]:
columns = ['age18longgunsale', 'assaultlist',
          'capuses', 'ccbackground',
          'college', 'danger',
          'elementary', 'felony',
           'fingerprint', 'immunity',
           'nosyg', 'opencarrypermitl',
          'statechecksh', 'universalpermit', 'lawtotal']

In [642]:
gun_laws_df = gun_laws_df.loc[:, columns]
gun_laws_df.shape

(50, 15)

In [635]:
gun_total_S = gun_laws_df.loc[:, "lawtotal"]

### SHAPEFILES (data cleaning)

In [570]:
shape_df = gpd.read_file("shapefiles/states.shp")

In [496]:
territories = ["American Samoa", "Commonwealth of the Northern Mariana Islands", 
               "Puerto Rico", "United States Virgin Islands",
               "District of Columbia", "Guam"]
shape_df = (shape_df
            .loc[:, ["NAME", "STUSPS", "geometry"]]
            .rename(columns={"NAME": "state", "STUSPS": "abbreviation"})
            .set_index("state")
            .drop(territories, axis=0)
            .sort_values(by='state'))

### POPULATION (data cleaning)

In [497]:
pop_df = pd.read_csv('populationdata.csv')

In [498]:
pop_df = (pop_df
            .loc[:, ["NAME", "POPESTIMATE2019"]]
            .rename(columns={"NAME": "state", "POPESTIMATE2019": "population"})
            .set_index("state"))

### BRINGING IT ALL TOGETHER!

In [520]:
states_df = shape_df.join(pop_df)

In [500]:
# combo_df = (pd.merge(df, states_df, on="state")
            .sort_values(by="date", ascending=False)
            .reset_index()
            .drop("index", axis=1))

In [501]:
# states_df_large = shape_df.join(pop_df.join(gun_laws_df))

In [502]:
# combo_df_large = (pd.merge(df, states_df_large, on="state")
                 .sort_values(by="date", ascending=False)
                 .reset_index()
                 .drop("index", axis=1))

In [503]:
# combo_df

Unnamed: 0,id,date,state,area,injuries,fatalities,suspects,suspectkilled,tot_victims,year,abbreviation,geometry,population,lawtotal
0,2492253,2022-12-31,Alabama,Mobile,7,1,2,False,8,2022,AL,"POLYGON ((-9841333.855 3579579.495, -9841349.6...",4903185,10.0
1,2491656,2022-12-31,Arizona,Phoenix,9,0,0,False,9,2022,AZ,"POLYGON ((-12761162.105 4147165.875, -12761214...",7278717,8.0
2,2490925,2022-12-30,Texas,Humble,3,2,0,False,5,2022,TX,"POLYGON ((-11473106.144 3889876.760, -11473097...",28995881,18.0
3,2491054,2022-12-30,Tennessee,Memphis,3,1,0,False,4,2022,TN,"POLYGON ((-9945959.215 4370089.033, -9944979.2...",6829174,22.0
4,2489614,2022-12-28,New York,Bronx,3,1,2,False,4,2022,NY,"MULTIPOLYGON (((-8866092.533 5160809.769, -886...",19453561,77.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1306,1893754,2021-01-03,Florida,Miami,6,0,0,False,6,2021,FL,"MULTIPOLYGON (((-9734892.689 3569910.909, -973...",21477737,30.0
1307,1892114,2021-01-01,Iowa,Sioux City,4,1,3,False,5,2021,IA,"POLYGON ((-10312404.419 4952179.681, -10312552...",3155070,24.0
1308,1892704,2021-01-01,Texas,Amarillo,3,1,1,False,4,2021,TX,"POLYGON ((-11473106.144 3889876.760, -11473097...",28995881,18.0
1309,1892034,2021-01-01,Arkansas,Fort Smith,6,0,2,False,6,2021,AR,"POLYGON ((-10515267.713 4101325.818, -10515269...",3017804,11.0


In [734]:
tot_victims = df.groupby("state").tot_victims.sum()

In [735]:
victims_df = pd.merge(tot_victims, gun_laws_df, on="state")
combo_df = pd.merge(victims_df, states_df, on="state")

In [738]:
# victims per capita
combo_df["vpc"] = combo_df.tot_victims / combo_df.population

In [739]:
tot_fatalities = df.groupby("state").fatalities.sum()


In [740]:
combo_df.head()

Unnamed: 0_level_0,tot_victims,age18longgunsale,assaultlist,capuses,ccbackground,college,danger,elementary,felony,fingerprint,immunity,nosyg,opencarrypermitl,statechecksh,universalpermit,lawtotal,abbreviation,geometry,population,vpc
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Alabama,182,0,0,0,1,0,1,0,0,0,0,0,0,0,0,10,AL,"POLYGON ((-9841333.855 3579579.495, -9841349.6...",4903185,3.7e-05
Alaska,5,1,0,0,0,0,0,1,1,0,0,0,0,0,0,3,AK,"MULTIPOLYGON (((-15108567.654 8339788.089, -15...",731545,7e-06
Arizona,111,0,0,0,0,0,1,0,1,0,0,0,0,0,0,8,AZ,"POLYGON ((-12761162.105 4147165.875, -12761214...",7278717,1.5e-05
Arkansas,70,0,0,0,1,1,1,1,1,0,0,1,0,0,0,11,AR,"POLYGON ((-10515267.713 4101325.818, -10515269...",3017804,2.3e-05
California,467,1,1,1,1,1,1,1,1,1,1,1,1,1,1,111,CA,"MULTIPOLYGON (((-13060108.516 3854208.959, -13...",39512223,1.2e-05


In [741]:
tot_fatal = df.groupby("state").fatalities.sum()
combo_df["fpc"] = tot_fatal / combo_df.population

In [742]:
combo_df.head()

Unnamed: 0_level_0,tot_victims,age18longgunsale,assaultlist,capuses,ccbackground,college,danger,elementary,felony,fingerprint,...,nosyg,opencarrypermitl,statechecksh,universalpermit,lawtotal,abbreviation,geometry,population,vpc,fpc
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,182,0,0,0,1,0,1,0,0,0,...,0,0,0,0,10,AL,"POLYGON ((-9841333.855 3579579.495, -9841349.6...",4903185,3.7e-05,6e-06
Alaska,5,1,0,0,0,0,0,1,1,0,...,0,0,0,0,3,AK,"MULTIPOLYGON (((-15108567.654 8339788.089, -15...",731545,7e-06,1e-06
Arizona,111,0,0,0,0,0,1,0,1,0,...,0,0,0,0,8,AZ,"POLYGON ((-12761162.105 4147165.875, -12761214...",7278717,1.5e-05,3e-06
Arkansas,70,0,0,0,1,1,1,1,1,0,...,1,0,0,0,11,AR,"POLYGON ((-10515267.713 4101325.818, -10515269...",3017804,2.3e-05,4e-06
California,467,1,1,1,1,1,1,1,1,1,...,1,1,1,1,111,CA,"MULTIPOLYGON (((-13060108.516 3854208.959, -13...",39512223,1.2e-05,3e-06


In [722]:
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300

In [753]:
# sns.set_theme(style="whitegrid")
sns.set_context("notebook")
sns.set_theme(style="whitegrid", palette="autumn")

g = (sns.lmplot(
    data=combo_df,
    x="lawtotal", y="vpc", fit_reg=True, ci=95)
     .set(title="Victims Per Capita vs. Number of Laws"))

g.ax.set(xlabel='Number of Laws', ylabel='Victims Per Capita')

In [754]:
g = (sns.lmplot(
    data=combo_df,
    x="lawtotal", y="fpc", fit_reg=True, ci=95)
     .set(title="Deaths Per Capita vs. Number of Laws"))

g.ax.set(xlabel='Number of Laws', ylabel='Deaths Per Capita')