In [58]:
# Necessary imports
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy
import scipy.stats as stats

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
%matplotlib inline

In [59]:
# Read data in from mass shooting dataset

df = pd.read_csv('data/Mass_Shootings_Dataset_Ver 5.csv', encoding = "ISO-8859-1")

In [60]:
# List columns
df.columns

Index(['S#', 'Title', 'Location', 'Date', 'Incident Area',
       'Open/Close Location', 'Target', 'Cause', 'Summary', 'Fatalities',
       'Injured', 'Total victims', 'Policeman Killed', 'Age',
       'Employeed (Y/N)', 'Employed at', 'Mental Health Issues', 'Race',
       'Gender', 'Latitude', 'Longitude'],
      dtype='object')

In [61]:
# Drop unneeded columns
df = df.drop(labels = ['Open/Close Location', 'Target', 'Cause', 'Summary', 
                       'Employed at', 'Mental Health Issues', 'Race', 
                      'Gender', 'Latitude', 'Longitude', 'Employeed (Y/N)'], axis = 1)

In [62]:
# List columns again
df.columns

Index(['S#', 'Title', 'Location', 'Date', 'Incident Area', 'Fatalities',
       'Injured', 'Total victims', 'Policeman Killed', 'Age'],
      dtype='object')

In [138]:
# Show dataframe
df.head(100)

Unnamed: 0,S#,Title,Location,Date,Incident Area,Fatalities,Injured,Total victims,Policeman Killed,Age
0,1,Texas church mass shooting,"Sutherland Springs, TX",2017-11-05,Church,26,20,46,0.0,26
1,2,Walmart shooting in suburban Denver,"Thornton, CO",2017-11-01,Wal-Mart,3,0,3,0.0,47
2,3,Edgewood businees park shooting,"Edgewood, MD",2017-10-18,Remodeling Store,3,3,6,0.0,37
3,4,Las Vegas Strip mass shooting,"Las Vegas, NV",2017-10-01,Las Vegas Strip Concert outside Mandala Bay,59,527,585,1.0,64
4,5,San Francisco UPS shooting,"San Francisco, CA",2017-06-14,UPS facility,3,2,5,0.0,38
5,6,Pennsylvania supermarket shooting,"Tunkhannock, PA",2017-06-07,Weis grocery,3,0,3,,24
6,7,Florida awning manufacturer shooting,"Orlando, Florida",2017-06-05,manufacturer Fiamma Inc.,5,0,5,,45
7,8,Rural Ohio nursing home shooting,"Kirkersville, Ohio",2017-05-12,a nursing home,3,0,3,1.0,43
8,9,Fresno downtown shooting,"Fresno, California",2017-04-18,a street in downtown,3,0,3,,39
9,10,Fort Lauderdale airport shooting,"Fort Lauderdale, Florida",2017-01-06,baggage claim area of the airport,5,6,11,,26


In [64]:
# Convert Date column to a datetime object
df['Date'] = pd.to_datetime(df['Date'])

In [65]:
start_date = pd.to_datetime('1/1/2009')
end_date = pd.to_datetime('4/24/2019')
mask = (df['Date'] > start_date) & (df['Date'] <= end_date)

In [66]:
df.shape

(323, 10)

In [67]:
df = df.loc[mask]

In [68]:
df.shape

(207, 10)

In [69]:
df.head(50)

Unnamed: 0,S#,Title,Location,Date,Incident Area,Fatalities,Injured,Total victims,Policeman Killed,Age
0,1,Texas church mass shooting,"Sutherland Springs, TX",2017-11-05,Church,26,20,46,0.0,26.0
1,2,Walmart shooting in suburban Denver,"Thornton, CO",2017-11-01,Wal-Mart,3,0,3,0.0,47.0
2,3,Edgewood businees park shooting,"Edgewood, MD",2017-10-18,Remodeling Store,3,3,6,0.0,37.0
3,4,Las Vegas Strip mass shooting,"Las Vegas, NV",2017-10-01,Las Vegas Strip Concert outside Mandala Bay,59,527,585,1.0,64.0
4,5,San Francisco UPS shooting,"San Francisco, CA",2017-06-14,UPS facility,3,2,5,0.0,38.0
5,6,Pennsylvania supermarket shooting,"Tunkhannock, PA",2017-06-07,Weis grocery,3,0,3,,24.0
6,7,Florida awning manufacturer shooting,"Orlando, Florida",2017-06-05,manufacturer Fiamma Inc.,5,0,5,,45.0
7,8,Rural Ohio nursing home shooting,"Kirkersville, Ohio",2017-05-12,a nursing home,3,0,3,1.0,43.0
8,9,Fresno downtown shooting,"Fresno, California",2017-04-18,a street in downtown,3,0,3,,39.0
9,10,Fort Lauderdale airport shooting,"Fort Lauderdale, Florida",2017-01-06,baggage claim area of the airport,5,6,11,,26.0


In [73]:
df['Location'].fillna(df['Title'], inplace = True)
df
# df[df['Location'].isnull()]

Unnamed: 0,S#,Title,Location,Date,Incident Area,Fatalities,Injured,Total victims,Policeman Killed,Age
0,1,Texas church mass shooting,"Sutherland Springs, TX",2017-11-05,Church,26,20,46,0.0,26
1,2,Walmart shooting in suburban Denver,"Thornton, CO",2017-11-01,Wal-Mart,3,0,3,0.0,47
2,3,Edgewood businees park shooting,"Edgewood, MD",2017-10-18,Remodeling Store,3,3,6,0.0,37
3,4,Las Vegas Strip mass shooting,"Las Vegas, NV",2017-10-01,Las Vegas Strip Concert outside Mandala Bay,59,527,585,1.0,64
4,5,San Francisco UPS shooting,"San Francisco, CA",2017-06-14,UPS facility,3,2,5,0.0,38
5,6,Pennsylvania supermarket shooting,"Tunkhannock, PA",2017-06-07,Weis grocery,3,0,3,,24
6,7,Florida awning manufacturer shooting,"Orlando, Florida",2017-06-05,manufacturer Fiamma Inc.,5,0,5,,45
7,8,Rural Ohio nursing home shooting,"Kirkersville, Ohio",2017-05-12,a nursing home,3,0,3,1.0,43
8,9,Fresno downtown shooting,"Fresno, California",2017-04-18,a street in downtown,3,0,3,,39
9,10,Fort Lauderdale airport shooting,"Fort Lauderdale, Florida",2017-01-06,baggage claim area of the airport,5,6,11,,26


In [None]:
# Clean particularly pesky rows

df.Location.iloc[75] = 'Gloucester, VA'
df.Location.iloc[146] = 'Harleysville, Pennsylvania'
df.Location.iloc[175] = 'Albuquerque, New Mexico'

In [125]:
def clean_Location(uncleaned, states_dict):
    word_list = uncleaned.split(',')
#     if len(word_list) > 1:
#         word_list[1] = word_list[1].strip()
#         word_list[1] = word_list[1]
    print(word_list)
    return input("put the correct string for the location here!")

In [119]:
states_abrv = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", 
              "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
              "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
              "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
              "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
states = ["Alabama","Alaska","Arizona","Arkansas","California","Colorado", 
              "Connecticut","Delaware","Florida","Georgia","Hawaii","Idaho","Illinois", 
              "Indiana","Iowa","Kansas","Kentucky","Louisiana","Maine","Maryland", 
              "Massachusetts","Michigan","Minnesota","Mississippi","Missouri","Montana", 
              "Nebraska","Nevada","New Hampshire","New Jersey","New Mexico","New York",
              "North Carolina","North Dakota","Ohio","Oklahoma","Oregon","Pennsylvania", 
              "Rhode Island","South Carolina","South Dakota","Tennessee","Texas","Utah", 
              "Vermont","Virginia","Washington","West Virginia","Wisconsin","Wyoming"]
states_dict = dict(zip(states,states_abrv))

In [129]:
a = clean_Location('Sutherland Springs, TX', states_dict)

['Sutherland Springs', ' TX']
put the correct string for the location here!Sutherland Springs, TX


'Sutherland Springs, TX'

In [123]:
count = 0
for index, row in df.iterrows():
    print(str(count) + ' ' + row['Location'] + ' converts to ') 
    print(clean_Location(row['Location'], states_dict))
    count += 1

0 Sutherland Springs, TX converts to 
['Sutherland Springs', 'TX']
1 Thornton, CO converts to 
['Thornton', 'CO']
2 Edgewood, MD converts to 
['Edgewood', 'MD']
3 Las Vegas, NV converts to 
['Las Vegas', 'NV']
4 San Francisco, CA converts to 
['San Francisco', 'CA']
5 Tunkhannock, PA converts to 
['Tunkhannock', 'PA']
6 Orlando, Florida converts to 
['Orlando', 'Florida']
7 Kirkersville, Ohio converts to 
['Kirkersville', 'Ohio']
8 Fresno, California converts to 
['Fresno', 'California']
9 Fort Lauderdale, Florida converts to 
['Fort Lauderdale', 'Florida']
10 Burlington, WA converts to 
['Burlington', 'WA']
11 Baton Rouge, LA converts to 
['Baton Rouge', 'LA']
12 Dallas, Texas converts to 
['Dallas', 'Texas']
13 Orlando, Florida converts to 
['Orlando', 'Florida']
14 Ferguson, Missouri converts to 
['Ferguson', 'Missouri']
15 Forestville, Maryland Drive-by converts to 
['Forestville', 'Maryland Drive-by']
16 Halifax County, VA converts to 
['Halifax County', 'VA']
17 Tire-Slashing rev

In [133]:
for index, row in df.iterrows():
    row['Location'] = clean_Location(row['Location'], states_dict)

['Sutherland Springs', ' TX']
put the correct string for the location here!Sutherland Springs, TX
['Thornton', ' CO']
put the correct string for the location here!Thornton, CO
['Edgewood', ' MD']
put the correct string for the location here!Edgewood, MD
['Las Vegas', ' NV']
put the correct string for the location here!Las Vegas, NV
['San Francisco', ' CA']
put the correct string for the location here!San Francisco, CA
['Tunkhannock', ' PA']
put the correct string for the location here!Tunkhannock, PA
['Orlando', ' Florida']
put the correct string for the location here!Orlando, FL
['Kirkersville', ' Ohio']
put the correct string for the location here!Kirkersville, OH
['Fresno', ' California']
put the correct string for the location here!Fresno, CA
['Fort Lauderdale', ' Florida']
put the correct string for the location here!Fort Lauderdale, FL
['Burlington', ' WA']
put the correct string for the location here!Burlington, WA
['Baton Rouge', ' LA']
put the correct string for the location h

put the correct string for the location here!Colerain Township, OH
['Flagstaff', ' Arizona']
put the correct string for the location here!Flagstaff, AZ
['Roseburg', ' Oregon']
put the correct string for the location here!Roseburg, OR
['Inglis', ' Florida']
put the correct string for the location here!Inglis, FL
['Greenville', ' Georgia']
put the correct string for the location here!Greenville, GA
['Platte', ' South Dakota']
put the correct string for the location here!Platte, SD
['Clearbrook (Greenwood)', ' Minnesota']
put the correct string for the location here!Clearbrook, MN
['Bristol', ' Tennessee']
put the correct string for the location here!Bristol, TN
['Roanoke', ' Virginia']
put the correct string for the location here!Roanoke, VA
['Rochester', ' New York']
put the correct string for the location here!Rochester, NY
['Houston', ' Texas']
put the correct string for the location here!Houston, TX
['Lafayette', ' Louisiana']
put the correct string for the location here!Lafayette, L

['Tulsa', ' Oklahoma']
put the correct string for the location here!Tulsa, OK
['Oakland', ' California']
put the correct string for the location here!Oakland, CA
['Chardon', ' Ohio']
put the correct string for the location here!Chardon, OH
['Norcross', ' Georgia']
put the correct string for the location here!Norcross, GA
['Birmingham', ' Alabama']
put the correct string for the location here!Birmingham, AL
['Seal Beach', ' California']
put the correct string for the location here!Seal Beach, CA
['Carson City', ' Nevada']
put the correct string for the location here!Carson City, NV
['Grand Rapids', ' Michigan']
put the correct string for the location here!Grand Rapids, MI
['Opelika', ' Alabama']
put the correct string for the location here!Opelika, AL
['Youngstown', ' Ohio']
put the correct string for the location here!Youngstown, OH
['Tucson', ' Arizona']
put the correct string for the location here!Tucson, AZ
['Manchester', ' Connecticut']
put the correct string for the location here!

In [136]:
df.head(50)

Unnamed: 0,S#,Title,Location,Date,Incident Area,Fatalities,Injured,Total victims,Policeman Killed,Age
0,1,Texas church mass shooting,"Sutherland Springs, TX",2017-11-05,Church,26,20,46,0.0,26.0
1,2,Walmart shooting in suburban Denver,"Thornton, CO",2017-11-01,Wal-Mart,3,0,3,0.0,47.0
2,3,Edgewood businees park shooting,"Edgewood, MD",2017-10-18,Remodeling Store,3,3,6,0.0,37.0
3,4,Las Vegas Strip mass shooting,"Las Vegas, NV",2017-10-01,Las Vegas Strip Concert outside Mandala Bay,59,527,585,1.0,64.0
4,5,San Francisco UPS shooting,"San Francisco, CA",2017-06-14,UPS facility,3,2,5,0.0,38.0
5,6,Pennsylvania supermarket shooting,"Tunkhannock, PA",2017-06-07,Weis grocery,3,0,3,,24.0
6,7,Florida awning manufacturer shooting,"Orlando, Florida",2017-06-05,manufacturer Fiamma Inc.,5,0,5,,45.0
7,8,Rural Ohio nursing home shooting,"Kirkersville, Ohio",2017-05-12,a nursing home,3,0,3,1.0,43.0
8,9,Fresno downtown shooting,"Fresno, California",2017-04-18,a street in downtown,3,0,3,,39.0
9,10,Fort Lauderdale airport shooting,"Fort Lauderdale, Florida",2017-01-06,baggage claim area of the airport,5,6,11,,26.0
