In [204]:
# importing pandas
import pandas as pd
# importing datetime
import datetime
# importing geopandas
import geopandas as gpd

In [205]:
# importing mass shooting data, finding shape
df = pd.read_csv('massshootingdata.csv')
df.shape

(2000, 11)

In [206]:
# suspects column with the number of suspects
df["suspects"]= df["# Subjects-Suspects Injured"] + df["# Subjects-Suspects Killed"] + df["# Subjects-Suspects Arrested"]
# boolean column showing if a suspect was killed
df["suspectkilled"] = df["# Subjects-Suspects Killed"].map(lambda a : a > 0)

# dropping unnecessary columns
df = df.drop(["# Subjects-Suspects Injured", "# Subjects-Suspects Killed", 
              "# Subjects-Suspects Arrested", "Operations", "Address"], axis=1)


In [207]:
# renaming columns to clarify purpose & simplify the coding process
rename_dict = {
    "Incident ID": "id",
    "Incident Date": "date",
    "State": "state",
    "City Or County": "area",
    "# Victims Injured": "injuries",
    "# Victims Killed": "fatalities",
}
df.rename(columns=rename_dict, inplace=True)

In [208]:
df["tot_victims"] = df["injuries"] + df["fatalities"]

In [209]:
month_num = {
    "January": 1,
    "February": 2,
    "March": 3,
    "April": 4,
    "May": 5,
    "June": 6,
    "July": 7,
    "August": 8,
    "September": 9,
    "October": 10,
    "November": 11,
    "December": 12
}

# df["year"] = df.date.map(lambda a: int(a[-4:]))

def string_to_date(row):
    parts = row.date.split(" ")
    month = month_num[parts[0]]
    day = int(parts[1].rstrip(','))
    year = int(parts[2])
    row["date"] = datetime.date(year, month, day)
    row["year"] = year
    return row
    
df = df.apply(string_to_date, axis="columns")

In [210]:
df = (df.loc[(df.year >= 2021) & (df.year <= 2022)]
      .reset_index()
      .drop("index", axis=1))

In [211]:
df.head()

Unnamed: 0,id,date,state,area,injuries,fatalities,suspects,suspectkilled,tot_victims,year
0,2492253,2022-12-31,Alabama,Mobile,7,1,2,False,8,2022
1,2491656,2022-12-31,Arizona,Phoenix,9,0,0,False,9,2022
2,2491054,2022-12-30,Tennessee,Memphis,3,1,0,False,4,2022
3,2490925,2022-12-30,Texas,Humble,3,2,0,False,5,2022
4,2489614,2022-12-28,New York,Bronx,3,1,2,False,4,2022


In [212]:
gun_laws_df = pd.read_csv("gunlaws.csv")

In [213]:
gun_laws_df = gun_laws_df.loc[gun_laws_df.year == 2020].drop("year", axis=1)

In [214]:
gun_laws_df.columns.values

array(['state', 'felony', 'invcommitment', 'invoutpatient', 'danger',
       'drugmisdemeanor', 'alctreatment', 'alcoholism', 'relinquishment',
       'violent', 'violenth', 'violentpartial', 'dealer', 'dealerh',
       'recordsall', 'recordsallh', 'recordsdealer', 'recordsdealerh',
       'reportall', 'reportallh', 'reportdealer', 'reportdealerh',
       'purge', 'residential', 'theft', 'security', 'inspection',
       'ammlicense', 'ammrecords', 'permit', 'permith', 'fingerprint',
       'training', 'permitlaw', 'registration', 'registrationh',
       'defactoreg', 'defactoregh', 'ammpermit', 'ammrestrict',
       'age21handgunsale', 'age18longgunsale', 'age21longgunsaled',
       'age21longgunsale', 'age21handgunpossess', 'age18longgunpossess',
       'age21longgunpossess', 'loststolen', 'amm18', 'amm21h',
       'universal', 'universalh', 'gunshow', 'gunshowh',
       'universalpermit', 'universalpermith', 'backgroundpurge',
       'ammbackground', 'threedaylimit', 'mentalhealth', 

In [215]:
gun_laws_df = gun_laws_df.set_index("state")
gun_laws_df.head()

Unnamed: 0_level_0,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,violent,violenth,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,0,1,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,10
Alaska,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
Arizona,1,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8
Arkansas,1,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11
California,1,1,0,1,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,111


In [220]:
shape_df = gpd.read_file("shapefiles/states.shp")

In [221]:
shape_df = (shape_df
            .loc[:, ["NAME", "STUSPS", "geometry"]]
            .rename(columns={"NAME": "state", "STUSPS": "abbreviation"})
            .set_index("state"))

In [222]:
shape_df.head()

Unnamed: 0_level_0,abbreviation,geometry
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Hawaii,HI,"MULTIPOLYGON (((-17361760.204 2164544.588, -17..."
Arkansas,AR,"POLYGON ((-10515267.713 4101325.818, -10515269..."
New Mexico,NM,"POLYGON ((-12138963.727 4106855.170, -12138964..."
Montana,MT,"POLYGON ((-12727480.235 5886876.586, -12727567..."
New York,NY,"MULTIPOLYGON (((-8866092.533 5160809.769, -886..."
