In [204]:
# importing pandas
import pandas as pd
# importing datetime
# importing geopandas
import geopandas as gpd
import datetime

### MASS SHOOTING (data cleaning)

In [229]:
# importing mass shooting data, finding shape
df = pd.read_csv('massshootingdata.csv')
df.shape

(2000, 11)

In [230]:
# suspects column with the number of suspects
df["suspects"]= (df["# Subjects-Suspects Injured"] 
                 + df["# Subjects-Suspects Killed"] 
                 + df["# Subjects-Suspects Arrested"])
# boolean column showing if a suspect was killed
df["suspectkilled"] = df["# Subjects-Suspects Killed"].map(lambda a : a > 0)

# dropping unnecessary columns
df = df.drop(["# Subjects-Suspects Injured", "# Subjects-Suspects Killed", 
              "# Subjects-Suspects Arrested", "Operations", "Address"], axis=1)


In [231]:
# renaming columns to clarify purpose & simplify the coding process
rename_dict = {
    "Incident ID": "id",
    "Incident Date": "date",
    "State": "state",
    "City Or County": "area",
    "# Victims Injured": "injuries",
    "# Victims Killed": "fatalities",
}
df.rename(columns=rename_dict, inplace=True)

In [232]:
df["tot_victims"] = df["injuries"] + df["fatalities"]

In [233]:
month_num = {
    "January": 1,
    "February": 2,
    "March": 3,
    "April": 4,
    "May": 5,
    "June": 6,
    "July": 7,
    "August": 8,
    "September": 9,
    "October": 10,
    "November": 11,
    "December": 12
}

# df["year"] = df.date.map(lambda a: int(a[-4:]))

def string_to_date(row):
    parts = row.date.split(" ")
    month = month_num[parts[0]]
    day = int(parts[1].rstrip(','))
    year = int(parts[2])
    row["date"] = datetime.date(year, month, day)
    row["year"] = year
    return row
    
df = df.apply(string_to_date, axis=1)

In [234]:
df = (df.loc[(df.year >= 2021) & (df.year <= 2022)]
      .reset_index()
      .drop("index", axis=1))

In [235]:
df.head()

Unnamed: 0,id,date,state,area,injuries,fatalities,suspects,suspectkilled,tot_victims,year
0,2492253,2022-12-31,Alabama,Mobile,7,1,2,False,8,2022
1,2491656,2022-12-31,Arizona,Phoenix,9,0,0,False,9,2022
2,2491054,2022-12-30,Tennessee,Memphis,3,1,0,False,4,2022
3,2490925,2022-12-30,Texas,Humble,3,2,0,False,5,2022
4,2489614,2022-12-28,New York,Bronx,3,1,2,False,4,2022


### GUN LAWS (data cleaning)

In [276]:
gun_laws_df = pd.read_csv("gunlaws.csv")

In [277]:
gun_laws_df = gun_laws_df.loc[gun_laws_df.year == 2020].drop("year", axis=1)

In [278]:
gun_laws_df.columns.values

array(['state', 'felony', 'invcommitment', 'invoutpatient', 'danger',
       'drugmisdemeanor', 'alctreatment', 'alcoholism', 'relinquishment',
       'violent', 'violenth', 'violentpartial', 'dealer', 'dealerh',
       'recordsall', 'recordsallh', 'recordsdealer', 'recordsdealerh',
       'reportall', 'reportallh', 'reportdealer', 'reportdealerh',
       'purge', 'residential', 'theft', 'security', 'inspection',
       'ammlicense', 'ammrecords', 'permit', 'permith', 'fingerprint',
       'training', 'permitlaw', 'registration', 'registrationh',
       'defactoreg', 'defactoregh', 'ammpermit', 'ammrestrict',
       'age21handgunsale', 'age18longgunsale', 'age21longgunsaled',
       'age21longgunsale', 'age21handgunpossess', 'age18longgunpossess',
       'age21longgunpossess', 'loststolen', 'amm18', 'amm21h',
       'universal', 'universalh', 'gunshow', 'gunshowh',
       'universalpermit', 'universalpermith', 'backgroundpurge',
       'ammbackground', 'threedaylimit', 'mentalhealth', 

In [279]:
gun_laws_df = gun_laws_df.set_index("state").sort_values(by='state')
gun_laws_df.head()

Unnamed: 0_level_0,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,violent,violenth,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,0,1,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,10
Alaska,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
Arizona,1,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8
Arkansas,1,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11
California,1,1,0,1,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,111


### SHAPEFILES (data cleaning)

In [318]:
shape_df = gpd.read_file("shapefiles/states.shp")

In [319]:
territories = ["American Samoa", "Commonwealth of the Northern Mariana Islands", 
               "Puerto Rico", "United States Virgin Islands",
               "District of Columbia", "Guam"]
shape_df = (shape_df
            .loc[:, ["NAME", "STUSPS", "geometry"]]
            .rename(columns={"NAME": "state", "STUSPS": "abbreviation"})
            .set_index("state")
            .drop(territories, axis=0)
            .sort_values(by='state'))

In [320]:
shape_df.head()

Unnamed: 0_level_0,abbreviation,geometry
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,AL,"POLYGON ((-9841333.855 3579579.495, -9841349.6..."
Alaska,AK,"MULTIPOLYGON (((-15108567.654 8339788.089, -15..."
Arizona,AZ,"POLYGON ((-12761162.105 4147165.875, -12761214..."
Arkansas,AR,"POLYGON ((-10515267.713 4101325.818, -10515269..."
California,CA,"MULTIPOLYGON (((-13060108.516 3854208.959, -13..."


In [322]:
shape_df.index

Index(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
       'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
       'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
       'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
       'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
       'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
       'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
       'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming'],
      dtype='object', name='state')

In [323]:
shape_df.shape

(50, 2)