In [1]:
#Dependencies
import pandas as pd
import os

## Police Deaths: Store CSV into DataFrame

In [16]:
policies_path = os.path.join("Resources/police_deaths.csv")
police_df = pd.read_csv(policies_path)

In [17]:
#States Abbreviations source: https://www.ssa.gov/international/coc-docs/states.html
states_path = os.path.join("Resources/states_abbrev.csv")
states_df = pd.read_csv(states_path, encoding="ISO-8859-1")

In [18]:
police_df.head()

Unnamed: 0,person,dept,eow,cause
0,Constable Darius Quimby,"Albany County Constable's Office, NY","EOW: Monday, January 3, 1791",Cause of Death: Gunfire
1,Sheriff Cornelius Hogeboom,"Columbia County Sheriff's Office, NY","EOW: Saturday, October 22, 1791",Cause of Death: Gunfire
2,Deputy Sheriff Isaac Smith,"Westchester County Sheriff's Department, NY","EOW: Thursday, May 17, 1792",Cause of Death: Gunfire
3,Marshal Robert Forsyth,United States Department of Justice - United S...,"EOW: Saturday, January 11, 1794",Cause of Death: Gunfire
4,Sheriff Robert Maxwell,"Greenville County Sheriff's Office, SC","EOW: Sunday, November 12, 1797",Cause of Death: Gunfire


In [19]:
states_df.head()

Unnamed: 0,state,state_code
0,ALABAMA,AL
1,ALASKA,AK
2,AMERICAN SAMOA,AS
3,ARIZONA,AZ
4,ARKANSAS,AR


## Police Deaths: Cleaning DF

In [20]:
#Extract state of dept & year of eow column 
police_df["state"] = police_df.dept.str.split(", ").str[-1]
police_df["year"] = police_df.eow.str.split(",").str[-1]

In [21]:
#Convert String of year column to Int
police_df["year"] = police_df["year"].astype(int)

In [22]:
#Filter years above 1999
police_df = police_df[police_df["year"]>=2000]

In [23]:
#Reset Index
police_df.reset_index(drop=True)

Unnamed: 0,person,dept,eow,cause,state,year
0,Deputy Sheriff Ernest Martin Hull,"Greene County Sheriff's Office, NC","EOW: Sunday, January 2, 2000",Cause of Death: Automobile accident,NC,2000
1,Patrol Officer James Clinton Bryant,"Broxton Police Department, GA","EOW: Monday, January 3, 2000",Cause of Death: Gunfire,GA,2000
2,"Staff Sergeant Clyde Almond ""Tub"" Merritt","Coffee County Sheriff's Office, GA","EOW: Monday, January 3, 2000",Cause of Death: Gunfire,GA,2000
3,Inspector Kirk B. Brookbush,"San Francisco Police Department, CA","EOW: Tuesday, January 11, 2000",Cause of Death: Aircraft accident,CA,2000
4,Police Officer James Francis Dougherty,"San Francisco Police Department, CA","EOW: Tuesday, January 11, 2000",Cause of Death: Aircraft accident,CA,2000
...,...,...,...,...,...,...
2852,K9 Bruno,"Amarillo Police Department, TX","EOW: Sunday, June 12, 2016",Cause of Death: Accidental,TX,2016
2853,K9 Lazer,United States Department of Homeland Security ...,"EOW: Monday, June 20, 2016",Cause of Death: Heat exhaustion,US,2016
2854,K9 Tyson,"Fountain County Sheriff's Office, IN","EOW: Monday, June 27, 2016",Cause of Death: Heat exhaustion,IN,2016
2855,K9 Credo,"Long Beach Police Department, CA","EOW: Tuesday, June 28, 2016",Cause of Death: Gunfire (Accidental),CA,2016


In [24]:
#DataFrame of States from Police DF
police_states = pd.DataFrame({
    "state" : police_df["state"].unique()})


In [25]:
#Join States in Police DF with States DF

states = pd.merge(police_states, states_df, left_on="state", right_on="state_code", how="left")

In [60]:
states.head()

Unnamed: 0,state_x,state_y,state_code
0,NC,NORTH CAROLINA,NC
1,GA,GEORGIA,GA
2,CA,CALIFORNIA,CA
3,PA,PENNSYLVANIA,PA
4,IN,INDIANA,IN


In [44]:
# We found some observations that are not states
police_df.loc[(police_df['state']=='RR') | (police_df['state']=='TR') | (police_df['state']=='US')]

Unnamed: 0,person,dept,eow,cause,state,year
20009,Deputy U.S. Marshal Peter Purdy Hillman,United States Department of Justice - United S...,"EOW: Thursday, June 8, 2000",Cause of Death: Automobile accident,US,2000
20021,Police Officer Kelmer Harwin One Feather,"Oglala Sioux Tribal Police, TR","EOW: Saturday, July 1, 2000",Cause of Death: Assault,TR,2000
20028,"Postal Inspector Robert Francis Jones, Jr.","United States Postal Inspection Service, US","EOW: Friday, July 14, 2000",Cause of Death: Automobile accident,US,2000
20044,Military Police Officer Brian Thomas Gleason,"United States Army Military Police Corps, US","EOW: Wednesday, August 9, 2000",Cause of Death: Automobile accident,US,2000
20059,Senior Customs Inspector Richard Anthony Forde,United States Department of the Treasury - Cus...,"EOW: Tuesday, September 12, 2000",Cause of Death: Motorcycle accident,US,2000
...,...,...,...,...,...,...
22728,Special Agent Scott McGuire,United States Department of Homeland Security ...,"EOW: Sunday, January 24, 2016",Cause of Death: Vehicular assault,US,2016
22756,Border Patrol Agent Jose Daniel Barraza,United States Department of Homeland Security ...,"EOW: Monday, April 18, 2016",Cause of Death: Automobile accident,US,2016
22767,Deportation Officer Brian Beliso,United States Department of Homeland Security ...,"EOW: Wednesday, June 8, 2016",Cause of Death: Heart attack,US,2016
22774,Officer Bradley Wayne Treat,United States Department of Agriculture - Fore...,"EOW: Wednesday, June 29, 2016",Cause of Death: Animal related,US,2016


In [64]:
# Count people per state
ag_police = police_df[['state', 'person']].groupby(['state']).agg(['count'])
#Reset Index
ag_police = ag_police.reset_index()
ag_police.head()

Unnamed: 0_level_0,state,person
Unnamed: 0_level_1,Unnamed: 1_level_1,count
0,AK,11
1,AL,66
2,AR,33
3,AS,1
4,AZ,62


## Police Involved Fatalities: Store CSV into DataFrame

In [49]:
policies_path = os.path.join("Resources/police_fatalities.csv")
fatalities_df = pd.read_csv(policies_path, encoding= 'unicode_escape')

In [125]:
fatalities_df.head(10)

Unnamed: 0,UID,Name,Age,Gender,Race,Date,City,State,Manner_of_death,Armed,Mental_illness,Flee,Year
0,133,Karen O. Chin,44.0,Female,Asian,5/4/2000,Alameda,CA,Shot,,False,False,2000
1,169,Chyraphone Komvongsa,26.0,Male,Asian,6/2/2000,Fresno,CA,Shot,,False,False,2000
2,257,Ming Chinh Ly,36.0,Male,Asian,8/13/2000,Rosemead,CA,Shot,Gun,False,False,2000
3,483,Kinh Quoc Dao,29.0,Male,Asian,2/9/2001,Valley Glen,CA,Shot,Gun,False,False,2001
4,655,Vanpaseuth Phaisouphanh,25.0,Male,Asian,6/10/2001,Riverside,CA,Shot,Knife,False,False,2001
5,668,Bernardo Ancheta Caberto,55.0,Male,Asian,6/23/2001,Henderson,NV,Shot,Knife,False,False,2001
6,677,Cuong Tran,33.0,Male,Asian,6/30/2001,Syracuse,NY,Shot,,True,False,2001
7,678,Sengsadaphet Phongsavanh,29.0,Male,Asian,7/1/2001,Beaverton,OR,Shot,Knife,True,False,2001
8,686,Nam Quoc Nguyen,21.0,Male,Asian,7/6/2001,Garden Grove,CA,Shot,Gun,False,False,2001
9,736,Rosa Hammer,27.0,Female,Asian,8/9/2001,Gorst,WA,Shot,Gun,False,False,2001


## Police Involved Fatalities: Cleaning DF

In [123]:
#Extract year of Date column 
#fatalities_df.dtypes
fatalities_df['Year'] = fatalities_df['Date'].str[-4:]
fatalities_df.head()

Unnamed: 0,UID,Name,Age,Gender,Race,Date,City,State,Manner_of_death,Armed,Mental_illness,Flee,Year
0,133,Karen O. Chin,44.0,Female,Asian,5/4/2000,Alameda,CA,Shot,,False,False,2000
1,169,Chyraphone Komvongsa,26.0,Male,Asian,6/2/2000,Fresno,CA,Shot,,False,False,2000
2,257,Ming Chinh Ly,36.0,Male,Asian,8/13/2000,Rosemead,CA,Shot,Gun,False,False,2000
3,483,Kinh Quoc Dao,29.0,Male,Asian,2/9/2001,Valley Glen,CA,Shot,Gun,False,False,2001
4,655,Vanpaseuth Phaisouphanh,25.0,Male,Asian,6/10/2001,Riverside,CA,Shot,Knife,False,False,2001


In [65]:
# Count people per state
ag_fatalities = fatalities_df[['State', 'Name']].groupby(['State']).agg(['count'])
#Reset Index
ag_fatalities = ag_fatalities.reset_index()
ag_fatalities.head()

Unnamed: 0_level_0,State,Name
Unnamed: 0_level_1,Unnamed: 1_level_1,count
0,AK,73
1,AL,326
2,AR,70
3,AZ,585
4,CA,2913


# Join Police DF with Fatalities DF by state

In [110]:
final_state = pd.merge(ag_police, ag_fatalities, left_on="state", right_on="State", how="left")
final_state.head()

Unnamed: 0_level_0,state,person,State,Name
Unnamed: 0_level_1,Unnamed: 1_level_1,count,Unnamed: 3_level_1,count
0,AK,11,AK,73.0
1,AL,66,AL,326.0
2,AR,33,AR,70.0
3,AS,1,,
4,AZ,62,AZ,585.0


In [111]:
final_state = final_state.drop('State', 1)
final_state.head()

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,state,person,Name
Unnamed: 0_level_1,Unnamed: 1_level_1,count,count
0,AK,11,73.0
1,AL,66,326.0
2,AR,33,70.0
3,AS,1,
4,AZ,62,585.0


In [112]:
# Renaming columns
final_state = final_state.rename(columns={'state': 'State', 'person': 'Police', 'Name': 'Civil'})
final_state.head()

Unnamed: 0_level_0,State,Police,Civil
Unnamed: 0_level_1,Unnamed: 1_level_1,count,count
0,AK,11,73.0
1,AL,66,326.0
2,AR,33,70.0
3,AS,1,
4,AZ,62,585.0


In [113]:
# Filling NAs in Civil column
final_state['Civil'] = final_state['Civil'].fillna(0)
final_state.head()

Unnamed: 0_level_0,State,Police,Civil
Unnamed: 0_level_1,Unnamed: 1_level_1,count,count
0,AK,11,73.0
1,AL,66,326.0
2,AR,33,70.0
3,AS,1,0.0
4,AZ,62,585.0


In [116]:
# Delete decimals
final_state['Civil'] = (final_state['Civil']).astype(int)
final_state.head()

Unnamed: 0_level_0,State,Police,Civil
Unnamed: 0_level_1,Unnamed: 1_level_1,count,count
0,AK,11,73
1,AL,66,326
2,AR,33,70
3,AS,1,0
4,AZ,62,585
