In [135]:
# importing pandas
import pandas as pd
# importing datetime
import datetime
# importing geopandas
import geopandas as gpd

In [136]:
# importing mass shooting data, finding shape
df = pd.read_csv('massshootingdata.csv')
df.shape

(2000, 11)

In [137]:
# dropping unneeded columns
try:
    df.drop("Operations", axis=1, inplace=True)
    df.drop("Address", axis=1, inplace=True)
# if cell is accidentally run again, will not stop
except KeyError:
    pass

In [138]:
# suspects column with the number of suspects
df["suspects"]= df["# Subjects-Suspects Injured"] + df["# Subjects-Suspects Killed"] + df["# Subjects-Suspects Arrested"]
# boolean column showing if a suspect was killed
df["suspectkilled"] = df["# Subjects-Suspects Killed"].map(lambda a : a > 0)

# dropping unnecessary columns
df.drop("# Subjects-Suspects Injured", axis=1, inplace=True)
df.drop("# Subjects-Suspects Killed", axis=1, inplace=True)
df.drop("# Subjects-Suspects Arrested", axis=1, inplace=True)


In [139]:
# renaming columns to clarify purpose & simplify the coding process
rename_dict = {
    "Incident ID": "id",
    "Incident Date": "date",
    "State": "state",
    "City Or County": "area",
    "# Victims Injured": "injuries",
    "# Victims Killed": "fatalities",
}
df.rename(columns=rename_dict, inplace=True)

In [140]:
df["tot_victims"] = df["injuries"] + df["fatalities"]

In [141]:
month_num = {
    "January": 1,
    "February": 2,
    "March": 3,
    "April": 4,
    "May": 5,
    "June": 6,
    "July": 7,
    "August": 8,
    "September": 9,
    "October": 10,
    "November": 11,
    "December": 12
}

# df["year"] = df.date.map(lambda a: int(a[-4:]))

def string_to_date(row):
    parts = row.date.split(" ")
    month = month_num[parts[0]]
    day = int(parts[1].rstrip(','))
    year = int(parts[2])
    row["date"] = datetime.date(year, month, day)
    row["year"] = year
    return row
    
df = df.apply(string_to_date, axis="columns")

In [142]:
df = df.loc[(df.year >= 2021) & (df.year <= 2022)].reset_index()
df.drop("index", axis=1, inplace=True)

In [143]:
df.head()

Unnamed: 0,id,date,state,area,injuries,fatalities,suspects,suspectkilled,tot_victims,year
0,2492253,2022-12-31,Alabama,Mobile,7,1,2,False,8,2022
1,2491656,2022-12-31,Arizona,Phoenix,9,0,0,False,9,2022
2,2491054,2022-12-30,Tennessee,Memphis,3,1,0,False,4,2022
3,2490925,2022-12-30,Texas,Humble,3,2,0,False,5,2022
4,2489614,2022-12-28,New York,Bronx,3,1,2,False,4,2022


In [144]:
gun_laws_df = pd.read_csv("gunlaws.csv")

In [145]:
gun_laws_df = gun_laws_df.loc[gun_laws_df.year == 2020]
gun_laws_df.drop("year", axis=1, inplace=True)

In [146]:
gun_laws_df.columns.values

array(['state', 'felony', 'invcommitment', 'invoutpatient', 'danger',
       'drugmisdemeanor', 'alctreatment', 'alcoholism', 'relinquishment',
       'violent', 'violenth', 'violentpartial', 'dealer', 'dealerh',
       'recordsall', 'recordsallh', 'recordsdealer', 'recordsdealerh',
       'reportall', 'reportallh', 'reportdealer', 'reportdealerh',
       'purge', 'residential', 'theft', 'security', 'inspection',
       'ammlicense', 'ammrecords', 'permit', 'permith', 'fingerprint',
       'training', 'permitlaw', 'registration', 'registrationh',
       'defactoreg', 'defactoregh', 'ammpermit', 'ammrestrict',
       'age21handgunsale', 'age18longgunsale', 'age21longgunsaled',
       'age21longgunsale', 'age21handgunpossess', 'age18longgunpossess',
       'age21longgunpossess', 'loststolen', 'amm18', 'amm21h',
       'universal', 'universalh', 'gunshow', 'gunshowh',
       'universalpermit', 'universalpermith', 'backgroundpurge',
       'ammbackground', 'threedaylimit', 'mentalhealth', 

In [147]:
gun_laws_df.reset_index(inplace=True)
gun_laws_df.drop("index", axis=1, inplace=True)

In [148]:
gun_laws_df.head()

Unnamed: 0,state,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,violent,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
0,Alabama,0,1,0,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,10
1,Alaska,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
2,Arizona,1,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8
3,Arkansas,1,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11
4,California,1,1,0,1,0,0,1,1,1,...,1,1,1,1,1,1,1,1,1,111
