In [177]:
# Necessary imports
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy
import scipy.stats as stats

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
%matplotlib inline

In [178]:
# Pull in police shooting information
popo = pd.read_csv('data/PoliceKillingsUS.csv', encoding = "ISO-8859-1")

In [179]:
# strip whitespace from city name and make the name lowercase
popo.city = popo.city.str.strip()
popo.city = popo.city.str.lower()

# Do the same with state except upper
popo.state = popo.state.str.strip()
popo.state = popo.state.str.upper()

In [180]:
# Let's see what we're working with
popo.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
0,3,Tim Elliot,02/01/15,shot,gun,53.0,M,A,shelton,WA,True,attack,Not fleeing,False
1,4,Lewis Lee Lembke,02/01/15,shot,gun,47.0,M,W,aloha,OR,False,attack,Not fleeing,False
2,5,John Paul Quintero,03/01/15,shot and Tasered,unarmed,23.0,M,H,wichita,KS,False,other,Not fleeing,False
3,8,Matthew Hoffman,04/01/15,shot,toy weapon,32.0,M,W,san francisco,CA,True,attack,Not fleeing,False
4,9,Michael Rodriguez,04/01/15,shot,nail gun,39.0,M,H,evans,CO,False,attack,Not fleeing,False


In [181]:
# Create new column with combined city&state info to join on
popo['city_and_state'] = popo.city.astype('str') + ', ' + popo.state.astype('str')

In [182]:
# Be super anal and strip whitespace from city_&_state column
popo.city_and_state = popo.city_and_state.str.strip()

In [183]:
popo.head(10)

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,city_and_state
0,3,Tim Elliot,02/01/15,shot,gun,53.0,M,A,shelton,WA,True,attack,Not fleeing,False,"shelton, WA"
1,4,Lewis Lee Lembke,02/01/15,shot,gun,47.0,M,W,aloha,OR,False,attack,Not fleeing,False,"aloha, OR"
2,5,John Paul Quintero,03/01/15,shot and Tasered,unarmed,23.0,M,H,wichita,KS,False,other,Not fleeing,False,"wichita, KS"
3,8,Matthew Hoffman,04/01/15,shot,toy weapon,32.0,M,W,san francisco,CA,True,attack,Not fleeing,False,"san francisco, CA"
4,9,Michael Rodriguez,04/01/15,shot,nail gun,39.0,M,H,evans,CO,False,attack,Not fleeing,False,"evans, CO"
5,11,Kenneth Joe Brown,04/01/15,shot,gun,18.0,M,W,guthrie,OK,False,attack,Not fleeing,False,"guthrie, OK"
6,13,Kenneth Arnold Buck,05/01/15,shot,gun,22.0,M,H,chandler,AZ,False,attack,Car,False,"chandler, AZ"
7,15,Brock Nichols,06/01/15,shot,gun,35.0,M,W,assaria,KS,False,attack,Not fleeing,False,"assaria, KS"
8,16,Autumn Steele,06/01/15,shot,unarmed,34.0,F,W,burlington,IA,False,other,Not fleeing,True,"burlington, IA"
9,17,Leslie Sapp III,06/01/15,shot,toy weapon,47.0,M,B,knoxville,PA,False,attack,Not fleeing,False,"knoxville, PA"


In [184]:
# Pull in city to county mapping
city_county_map = pd.read_pickle('data/clean_city_county.pkl')

In [185]:
# Let's check it out!
city_county_map.head()

Unnamed: 0,city,city_ascii,state_id,state_name,county_fips,county_name,density,id,city_and_state
0,prairie ridge,Prairie Ridge,WA,Washington,53053,Pierce,1349.8,1840037882,"prairie ridge, WA"
1,edison,Edison,WA,Washington,53057,Skagit,127.4,1840017314,"edison, WA"
2,packwood,Packwood,WA,Washington,53041,Lewis,213.9,1840025265,"packwood, WA"
3,wautauga beach,Wautauga Beach,WA,Washington,53035,Kitsap,261.7,1840037725,"wautauga beach, WA"
4,harper,Harper,WA,Washington,53035,Kitsap,342.1,1840037659,"harper, WA"


In [186]:
city_county_map.dtypes

city               object
city_ascii         object
state_id           object
state_name         object
county_fips         int64
county_name        object
density           float64
id                  int64
city_and_state     object
dtype: object

In [187]:
# grab just the county and city_and_state column from city_county_map
joiner = city_county_map[['county_name', 'city_and_state']]
joiner.dtypes

county_name       object
city_and_state    object
dtype: object

In [188]:
# Let's join the counties onto the shootings dataframe on the city_and_state column
popo2 = pd.merge(popo, joiner,
                left_on = 'city_and_state',
                right_on = 'city_and_state')

In [189]:
# Check it out:
popo2.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,city_and_state,county_name
0,3,Tim Elliot,02/01/15,shot,gun,53.0,M,A,shelton,WA,True,attack,Not fleeing,False,"shelton, WA",Mason
1,4,Lewis Lee Lembke,02/01/15,shot,gun,47.0,M,W,aloha,OR,False,attack,Not fleeing,False,"aloha, OR",Washington
2,890,Phyllis Ilene Jepsen,02/10/15,shot,knife,55.0,F,W,aloha,OR,True,other,Not fleeing,False,"aloha, OR",Washington
3,5,John Paul Quintero,03/01/15,shot and Tasered,unarmed,23.0,M,H,wichita,KS,False,other,Not fleeing,False,"wichita, KS",Sedgwick
4,765,Nicholas Garner,22/08/15,shot,vehicle,26.0,M,W,wichita,KS,False,attack,Car,False,"wichita, KS",Sedgwick


In [191]:
# Clean the counties up by deleting '(city)' where it appears
popo2['Counties'] = popo2.county_name.apply(lambda x: ' '.join(x.split()[:-1]) + ' City' \
                                            if x.split()[-1] == '(city)' else x)

In [192]:
# Reassign the clean column to the old column and drop the clean column 
# (so we have no duplicates)
popo2['county_name'] = popo2['Counties']
popo2.drop(labels = 'Counties', axis = 1, inplace = True)

In [221]:
# Check to see if '(city)' appears at all
# It shouldn't!
for index, row in popo2.iterrows():
    if 'City' in row.county_name.split():
        print(row.county_name)

Chesapeake City County
Chesapeake City County
Baltimore City County
Baltimore City County
Baltimore City County
Baltimore City County
Baltimore City County
Baltimore City County
Baltimore City County
Baltimore City County
Portsmouth City County
Portsmouth City County
Winchester City County
Emporia City County
Newport News City County
Newport News City County
Richmond City County
Richmond City County
Richmond City County
Richmond City County
Carson City City County
Carson City City County
Virginia Beach City County
Virginia Beach City County
Harrisonburg City County
Suffolk City County
Norfolk City County
Norfolk City County
Norfolk City County
Norfolk City County
Norfolk City County
Norfolk City County
Norfolk City County
Roanoke City County
Hopewell City County
Fredericksburg City County
Falls Church City County
Alexandria City County


In [194]:
# Include the word 'County' at the end of each county name
popo2['county_name'] = popo2.county_name.apply(lambda x: x + ' County')

In [200]:
popo2.sample(50)

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,city_and_state,county_name
1791,1696,Thomas Vandemark,06/07/16,shot,gun,65.0,M,W,rush springs,OK,False,other,Not fleeing,True,"rush springs, OK",Grady County
1628,1412,Jose Perez,26/03/16,shot,meat cleaver,39.0,M,H,lowell,MA,True,other,Not fleeing,False,"lowell, MA",Middlesex County
624,2806,TK TK,26/07/17,shot,gun,,M,,san antonio,TX,False,attack,Not fleeing,False,"san antonio, TX",Bexar County
2106,2382,James Leroy Marker,26/02/17,shot,gun,66.0,M,W,floral city,FL,False,other,Car,False,"floral city, FL",Citrus County
1093,2466,Daniel Hendrix,29/03/17,shot,gun,26.0,M,W,chattanooga,TN,False,other,Not fleeing,False,"chattanooga, TN",Hamilton County
1216,2790,Anthony Benavidez,19/07/17,shot,knife,24.0,M,,santa fe,NM,False,other,Not fleeing,False,"santa fe, NM",Santa Fe County
328,1518,Deresha Armstrong,05/05/16,shot,gun,26.0,F,B,orlando,FL,False,attack,Not fleeing,False,"orlando, FL",Orange County
857,420,Joshua Green,28/04/15,shot,gun,27.0,M,W,marion,IL,True,attack,Not fleeing,False,"marion, IL",Williamson County
1421,1036,Justin D. McHenry,29/11/15,shot,gun,22.0,M,W,celina,OH,False,attack,Not fleeing,False,"celina, OH",Mercer County
1293,868,Jeffrey Blood,25/09/15,shot,gun,45.0,M,W,wilhoit,AZ,False,attack,Not fleeing,False,"wilhoit, AZ",Yavapai County


In [202]:
# popo2 = popo2.drop(labels = 'county_and_state', axis = 1)
popo2['county_and_state'] = popo2['county_name'] + ', ' + popo2['state']

In [203]:
popo2.head(20)

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,city_and_state,county_name,county_and_state
0,3,Tim Elliot,02/01/15,shot,gun,53.0,M,A,shelton,WA,True,attack,Not fleeing,False,"shelton, WA",Mason County,"Mason County, WA"
1,4,Lewis Lee Lembke,02/01/15,shot,gun,47.0,M,W,aloha,OR,False,attack,Not fleeing,False,"aloha, OR",Washington County,"Washington County, OR"
2,890,Phyllis Ilene Jepsen,02/10/15,shot,knife,55.0,F,W,aloha,OR,True,other,Not fleeing,False,"aloha, OR",Washington County,"Washington County, OR"
3,5,John Paul Quintero,03/01/15,shot and Tasered,unarmed,23.0,M,H,wichita,KS,False,other,Not fleeing,False,"wichita, KS",Sedgwick County,"Sedgwick County, KS"
4,765,Nicholas Garner,22/08/15,shot,vehicle,26.0,M,W,wichita,KS,False,attack,Car,False,"wichita, KS",Sedgwick County,"Sedgwick County, KS"
5,1843,Caleb J. Douglas,01/09/16,shot,gun,18.0,M,W,wichita,KS,False,other,Car,True,"wichita, KS",Sedgwick County,"Sedgwick County, KS"
6,2437,Kevin C. Perry,18/03/17,shot,gun,25.0,M,W,wichita,KS,False,attack,Not fleeing,False,"wichita, KS",Sedgwick County,"Sedgwick County, KS"
7,8,Matthew Hoffman,04/01/15,shot,toy weapon,32.0,M,W,san francisco,CA,True,attack,Not fleeing,False,"san francisco, CA",San Francisco County,"San Francisco County, CA"
8,182,Amilcar Perez-Lopez,26/02/15,shot,knife,21.0,M,H,san francisco,CA,False,other,Not fleeing,False,"san francisco, CA",San Francisco County,"San Francisco County, CA"
9,221,Alice Brown,17/03/15,shot,vehicle,24.0,F,W,san francisco,CA,True,other,Not fleeing,False,"san francisco, CA",San Francisco County,"San Francisco County, CA"


In [204]:
shootings_by_county = pd.DataFrame(popo2['county_and_state'].value_counts())

In [205]:
shootings_by_county = shootings_by_county.reset_index().rename(columns={'index': 'County_and_State', 'county_and_state': 'num_shootings'})

In [206]:
shootings_by_county.head()

Unnamed: 0,County_and_State,num_shootings
0,"Los Angeles County, CA",106
1,"Maricopa County, AZ",65
2,"Cook County, IL",33
3,"Harris County, TX",32
4,"San Bernardino County, CA",30


In [218]:
for index, row in shootings_by_county.iterrows():
    if 'City' in row.County_and_State.split():
        print(index, row.County_and_State)

50 Baltimore City County, MD
58 Norfolk City County, VA
110 Richmond City County, VA
206 Virginia Beach City County, VA
232 Portsmouth City County, VA
250 Newport News City County, VA
291 Chesapeake City County, VA
298 Carson City County, NV
358 Harrisonburg City County, VA
359 Roanoke City County, VA
457 Hopewell City County, VA
533 Emporia City County, VA
577 Fredericksburg City County, VA
613 Suffolk City County, VA
684 Winchester City County, VA
697 Falls Church City County, VA
767 Alexandria City County, VA


In [217]:
shootings_by_county.set_value(index = 298, col = 'County_and_State', 
                              value = 'Carson City County, NV')


  


Unnamed: 0,County_and_State,num_shootings
0,"Los Angeles County, CA",106
1,"Maricopa County, AZ",65
2,"Cook County, IL",33
3,"Harris County, TX",32
4,"San Bernardino County, CA",30
5,"Clark County, NV",28
6,"Riverside County, CA",27
7,"Miami-Dade County, FL",23
8,"Orange County, CA",23
9,"San Diego County, CA",22


In [224]:
# Rename this column for merging later
shootings_by_county.rename(columns={'County_and_State': 'County'}, inplace=True)

In [227]:
# Send it to a pickle
shootings_by_county.to_pickle('data/shootings_by_county_state.pkl')

In [228]:
shootings_by_county.head(60)

Unnamed: 0,County,num_shootings
0,"Los Angeles County, CA",106
1,"Maricopa County, AZ",65
2,"Cook County, IL",33
3,"Harris County, TX",32
4,"San Bernardino County, CA",30
5,"Clark County, NV",28
6,"Riverside County, CA",27
7,"Miami-Dade County, FL",23
8,"Orange County, CA",23
9,"San Diego County, CA",22
