In [168]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import stats

In [169]:
leso = pd.read_csv('../data/1033-program-foia-may-2014.csv')

In [170]:
#LESO Dataframe EDA
leso.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243492 entries, 0 to 243491
Data columns (total 8 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   State             243492 non-null  object 
 1   County            243422 non-null  object 
 2   NSN               243309 non-null  object 
 3   Item Name         239658 non-null  object 
 4   Quantity          243492 non-null  int64  
 5   UI                243491 non-null  object 
 6   Acquisition Cost  243492 non-null  float64
 7   Ship Date         243492 non-null  object 
dtypes: float64(1), int64(1), object(6)
memory usage: 14.9+ MB


In [171]:
#Clean Data

#Capitalize case County names
leso['County'] = leso['County'].str.upper()

#Change Ship Date to Datetime
leso['Ship Date'] = pd.to_datetime(leso['Ship Date'], format='mixed')

print(leso.head())
print(leso.info())


  State     County               NSN              Item Name  Quantity    UI  \
0    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   
1    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   
2    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   
3    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   
4    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   

   Acquisition Cost  Ship Date  
0             499.0 2012-08-30  
1             499.0 2012-08-30  
2             499.0 2012-08-30  
3             499.0 2012-08-30  
4             499.0 2012-08-30  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243492 entries, 0 to 243491
Data columns (total 8 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   State             243492 non-null  object        
 1   County            243422 non-null  object      

In [172]:
leso = leso.dropna(subset=['Item Name'])

In [173]:
# #Duplicate Rows with Quantities greater than 1

# #leso['Quantity'].unique()

# def duplicate_rows(row):
#     if row['Quantity'] > 1:
#         return pd.concat([row] * row['Quantity'], ignore_index=True)
#     else:
#         return row
    
# foia = leso.apply(duplicate_rows, axis=1)


In [174]:
#Create DataFrame for Total sums of equipment and the cost
foia = leso.groupby(['State', 'County']).sum(numeric_only=True)

foia = foia.rename(columns = {'Quantity':'Total Equip Quantity' , 'Acquisition Cost': 'Total Cost'})

In [175]:
foia.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Equip Quantity,Total Cost
State,County,Unnamed: 2_level_1,Unnamed: 3_level_1
AK,ANCHORAGE,1063,636550.09
AK,BETHEL,2,0.0
AK,FAIRBANKS NORTH STAR,18,26624.67
AK,JUNEAU,14,5542.0
AK,KETCHIKAN GATEWAY,4,1996.0


In [176]:
totals = foia.reset_index()
totals.head()

Unnamed: 0,State,County,Total Equip Quantity,Total Cost
0,AK,ANCHORAGE,1063,636550.09
1,AK,BETHEL,2,0.0
2,AK,FAIRBANKS NORTH STAR,18,26624.67
3,AK,JUNEAU,14,5542.0
4,AK,KETCHIKAN GATEWAY,4,1996.0


In [177]:
test_df = leso[leso['Item Name'].str.contains('GRENADE')]
test_df['Item Name'].value_counts()

Item Name
LAUNCHER,GRENADE                                        205
POUCH,FLASH BANG GRENADE                                 30
POUCH,GRENADE,HAND                                       19
POUCH,HAND GRENADE                                       18
POUCH,HAND GRENADE                                       16
POUCH,FRAG GRENADE,SINGLE                                11
POUCH,GRENADE,HAND                                        6
POUCH,GRENADE                                             5
HAND,GRENADE POUCH                                        5
POUCH,FRAG GRENADE,                                       4
BOX,GRENADE                                               4
BARREL,GRENADE LAUNCHER                                   3
POUCH,SF,GRENADE,KH                                       3
POUCH,GRENADE                                             3
POUCH,SMOKE GRENADE                                       2
CPCVX HOLSTER GRENADE POUCH                               2
GRENADE POUCH,MAINTENANCE OF O

In [178]:
#Filter for militarized equipment

#Equipment Bools
weapon_bool = (leso['Item Name'] == 'RIFLE,7.62 MILLIMETER') | (leso['Item Name'] == 'RIFLE,5.56 MILLIMETER') | (leso['Item Name'] == 'SHOTGUN,12 GAGE,RIOT TYPE') | (leso['Item Name'] == 'LAUNCHER,GRENADE')
rifle_df = leso[weapon_bool]
rifle_df['Item Name'].value_counts()

vehicle_bool = (leso['Item Name'] == 'MINE RESISTANT VEHICLE') | (leso['Item Name'] == 'LIGHT ARMORED VEHICLE') | (leso['Item Name'] == 'ONLY COMPLETE COMBAT/ASSAULT/TACTICAL WHEELED VEHICLES') |(leso['Item Name'] == 'UTILITY VEHICLE,4WD') | (leso['Item Name'] == 'HELICOPTER,OBSERVATION') | (leso['Item Name'] == 'HELICOPTER,UTILITY') | (leso['Item Name'] == 'TRUCK,ARMORED')
vic_df = leso[vehicle_bool]
vic_df['Item Name'].value_counts()

helmet_bool = (leso['Item Name'] == 'HELMET,ADVANCED COMBAT') | (leso['Item Name'] == 'ARMOR, HELMET') | (leso['Item Name'] == "HELMET,GROUND TROOPS'") | (leso['Item Name'] == 'HELMET,GROUND TROOPS') | (leso['Item Name'] == 'HELMET,GROUND TROOPS-PARACHUTISTS')
helmet_df = leso[helmet_bool]

night_vis_bool = (leso['Item Name'] == 'NIGHT VISION GOGGLE ') | (leso['Item Name'] == 'VIEWER,NIGHT VISION') | (leso['Item Name'] == 'IMAGE INTENSIFIER,NIGHT VISION') | (leso['Item Name'] == 'NIGHT VISION EQUIP, EMIT, REFLECTED RAD') | (leso['Item Name'] == 'NIGHT VISION SIGHT') | (leso['Item Name'] == 'NIGHT VISION SIGHT INDIVIDUAL SERVED WEAPONS') | (leso['Item Name'] == 'NIGHT VISION DEVICE') | (leso['Item Name'] == 'GOGGLES,NIGHT VISION') | (leso['Item Name'] == 'BINOCULAR,NIGHT VISION') | (leso['Item Name'] == 'SIGHT,NIGHT VISION') | (leso['Item Name'] == 'NIGHT VISION SIGHT,') | (leso['Item Name'] == 'VIEWER KIT,NIGHT VISION')
night_vis_df = leso[night_vis_bool]

#specialty equipment
spec_equip_bool = (leso['Item Name'] == 'SIGHT,NIGHT VISION SNIPERSCOPE') | (leso['Item Name'] == 'TELESCOPE,STRAIGHT') | (leso['Item Name'] == 'TELESCOPE,NON-INVERTING INFRARED') | (leso['Item Name'] == 'SCOPE,NIGHT VISION')
spec_df = leso[spec_equip_bool]

#armor
armor_bool = (leso['Item Name'] == 'ARMOR, PERSONAL') | (leso['Item Name'] == 'ARMOR,SUPPLEMENTAL,SMALL ARMS-FRAGMENTATION PROTECTIVE') | (leso['Item Name'] == 'ARMOR PLATE') | (leso['Item Name'] == 'ARMOR SET,SUPPLEMENTAL,SMALL ARMS-FRAGMENTATION PROTECTIVE') | (leso['Item Name'] == 'BODY ARMOR,FRAGMENTATION PROTECTIVE') | (leso['Item Name'] == 'ARMOR, PLATE') 
armor_df = leso[armor_bool]
# ARMOR, PERSONAL                                                            47
# ARMOR,SUPPLEMENTAL,SMALL ARMS-FRAGMENTATION PROTECTIVE                     25
# ARMOR PLATE
# ARMOR SET,SUPPLEMENTAL,SMALL ARMS-FRAGMENTATION PROTECTIVE                 10
# BODY ARMOR,FRAGMENTATION PROTECTIVE 
# INSERT,SMALL ARMS PROTECTIVE BODY ARMOR
# ARMOR, PLATE

frames = [vic_df, rifle_df, helmet_df, night_vis_df, spec_df, armor_df]

mil_equip = pd.concat(frames)
mil_equip.info()

<class 'pandas.core.frame.DataFrame'>
Index: 86925 entries, 146 to 241137
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   State             86925 non-null  object        
 1   County            86925 non-null  object        
 2   NSN               86882 non-null  object        
 3   Item Name         86925 non-null  object        
 4   Quantity          86925 non-null  int64         
 5   UI                86925 non-null  object        
 6   Acquisition Cost  86925 non-null  float64       
 7   Ship Date         86925 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(1), object(5)
memory usage: 6.0+ MB


In [179]:
#Create DataFrame for Total sums of Militarized equipment and the cost
militarized = mil_equip.groupby(['State', 'County']).sum(numeric_only=True)

militarized = militarized.rename(columns = {'Quantity':'Mil Equip Quantity' , 'Acquisition Cost': 'Total Mil Cost'})

In [180]:
militarized.sample(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Mil Equip Quantity,Total Mil Cost
State,County,Unnamed: 2_level_1,Unnamed: 3_level_1
VA,SURRY,15,1800.0
VA,WASHINGTON,19,5871.0
ID,BINGHAM,4,1996.0
IN,JENNINGS,17,4873.0
OH,LAKE,105,43713.0
NV,LINCOLN,10,7669.86
CO,LARIMER,58,64135.0
SC,GREENVILLE,16,194187.0
KY,WOODFORD,34,12634.0
SC,MCCORMICK,2,258.0


In [181]:
militarized = militarized.reset_index()
militarized.head()

Unnamed: 0,State,County,Mil Equip Quantity,Total Mil Cost
0,AK,ANCHORAGE,130,521602.23
1,AK,FAIRBANKS NORTH STAR,15,7485.0
2,AK,JUNEAU,14,5542.0
3,AK,KETCHIKAN GATEWAY,4,1996.0
4,AK,KODIAK ISLAND,5,690.0


In [182]:
militarized.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2304 entries, 0 to 2303
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   State               2304 non-null   object 
 1   County              2304 non-null   object 
 2   Mil Equip Quantity  2304 non-null   int64  
 3   Total Mil Cost      2304 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 72.1+ KB


In [183]:
#Create a merged DataFrame that accounts for both total amounts of equipment and militarized equipment
merged_equipment = pd.merge(totals, militarized, on=['State', 'County'], how='outer')
merged_equipment.fillna(0, inplace=True)

In [184]:
merged_equipment.sample(10)

Unnamed: 0,State,County,Total Equip Quantity,Total Cost,Mil Equip Quantity,Total Mil Cost
1653,NY,STEUBEN,34,739328.0,7.0,658828.0
734,IN,PORTER,128,343985.18,57.0,149684.0
2490,WI,OCONTO,495,878170.89,0.0,0.0
2020,TN,CANNON,135,88339.84,0.0,0.0
1490,ND,MORTON,31,10271.0,31.0,10271.0
1897,PR,JAYUYA,1,74834.0,0.0,0.0
165,CA,MADERA,64,1941.31,14.0,1932.0
1346,MT,LINCOLN,36,102129.81,12.0,76019.0
184,CA,SAN LUIS OBISPO,4551,898384.43,97.0,99110.26
607,IL,EDWARDS,3,542.3,3.0,542.3


In [185]:
police = pd.read_csv('../data/police_fatalities 2.csv')

In [186]:
police.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28335 entries, 0 to 28334
Data columns (total 29 columns):
 #   Column                                                          Non-Null Count  Dtype  
---  ------                                                          --------------  -----  
 0   Unique ID                                                       28334 non-null  object 
 1   Subject's name                                                  28335 non-null  object 
 2   Subject's age                                                   27354 non-null  object 
 3   Subject's gender                                                28237 non-null  object 
 4   Subject's race                                                  28334 non-null  object 
 5   Subject's race with imputations                                 27905 non-null  object 
 6   Imputation probability                                          27897 non-null  object 
 7   URL of image of deceased                         

In [187]:
police['Cause of death'].unique()

array(['Vehicle', 'Gunshot', 'Beaten/Bludgeoned with instrument',
       'Stabbed', 'Asphyxiated/Restrained', 'Drowned', 'Drug overdose',
       'Fell from a height', 'Undetermined',
       'Chemical agent/Pepper spray', 'Medical emergency', 'Other',
       'Burned/Smoke inhalation', 'Tasered', 'Unknown', 'Pursuit', nan],
      dtype=object)

In [188]:
police_df = police.drop(columns = ["Full Address", "Subject's name", "Unique identifier (redundant)", "Dispositions/Exclusions INTERNAL USE, NOT FOR ANALYSIS", \
                       "Link to news article or photo of official document", "Symptoms of mental illness? INTERNAL USE, NOT FOR ANALYSIS", \
                        "Video", "Unique ID formula", "Date&Description", "URL of image of deceased", "URL of image of deceased", "Subject's age", \
                            "Subject's gender", "Subject's race", "Subject's race with imputations", "Imputation probability", "A brief description of the circumstances surrounding the death", \
                                'Location of injury (address)'])

In [189]:
police_df.rename(columns = {"Location of death (county)": "County", "Location of death (city)": "City", \
                            "Location of death (state)": "State", "Location of death (zip code)": "Zip Code"}, inplace=True)

In [190]:
police_df.head()

Unnamed: 0,Unique ID,Date of injury resulting in death (month/day/year),City,State,Zip Code,County,Latitude,Longitude,Agency responsible for death,Cause of death,Intentional Use of Force (Developing),Date (Year)
0,25746,01/01/2000,Willits,CA,95490.0,Mendocino,39.470883,-123.361751,Mendocino County Sheriff's Office,Vehicle,Vehicle/Pursuit,2000.0
1,25747,01/01/2000,Detroit,MI,48203.0,Wayne,42.404526,-83.092274,,Vehicle,Vehicle/Pursuit,2000.0
2,25748,01/01/2000,Detroit,MI,48203.0,Wayne,42.404526,-83.092274,,Vehicle,Vehicle/Pursuit,2000.0
3,25749,01/01/2000,Carlsbad,NM,88220.0,Eddy,32.45008,-104.237643,Eddy County Sheriff's Office,Vehicle,Vehicle/Pursuit,2000.0
4,2,01/02/2000,Ellenwood,GA,30294.0,DeKalb,33.645164,-84.229413,DeKalb County Sheriff's Office,Gunshot,"Intentional Use of Force, Deadly",2000.0


In [191]:
fatal_counties = police_df.groupby(['County', 'State']).size()
#fatal_counties.head()
fatal_counties = pd.DataFrame(fatal_counties)


In [192]:
fatalities = fatal_counties.reset_index()
police_victims = fatalities.rename(columns = {0: 'Fatalities'})
police_victims.head(10)

Unnamed: 0,County,State,Fatalities
0,Acadia,LA,7
1,Accomack,VA,2
2,Ada,ID,31
3,Adair,OK,6
4,Adams,CO,78
5,Adams,ID,2
6,Adams,IL,5
7,Adams,MS,6
8,Adams,NE,2
9,Adams,OH,1


In [193]:
police_victims['County'] = police_victims['County'].str.upper()
police_victims['County'] = police_victims['County'].str.replace("'", "")

In [194]:
police_victims.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2405 entries, 0 to 2404
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   County      2405 non-null   object
 1   State       2405 non-null   object
 2   Fatalities  2405 non-null   int64 
dtypes: int64(1), object(2)
memory usage: 56.5+ KB


In [195]:
#Create a merged DataFrame that accounts for both total amounts of equipment and militarized equipment
analysis = pd.merge(merged_equipment, police_victims, on=['State', 'County'], how='outer')
analysis.fillna(0, inplace=True)

In [198]:
#Fix Individual Counties
analysis['County'] = analysis['County'].str.replace('ST. MARYS', 'SAINT MARYS')

In [201]:
alabama = analysis[analysis['State'] == 'AL']
alabama

Unnamed: 0,State,County,Total Equip Quantity,Total Cost,Mil Equip Quantity,Total Mil Cost,Fatalities
7,AL,AUTAUGA,111.0,177423.83,4.0,1996.00,7.0
8,AL,BALDWIN,738.0,453213.45,76.0,70724.86,24.0
9,AL,BARBOUR,48.0,111230.00,10.0,4990.00,9.0
10,AL,BIBB,30.0,39047.72,24.0,17579.72,1.0
11,AL,BLOUNT,7850.0,2472033.03,19.0,459044.00,4.0
...,...,...,...,...,...,...,...
2721,AL,GREENE,0.0,0.00,0.0,0.00,3.0
2847,AL,MONROE,0.0,0.00,0.0,0.00,1.0
2940,AL,ST CLAIR,0.0,0.00,0.0,0.00,2.0
2947,AL,ST. CLAIR,0.0,0.00,0.0,0.00,4.0
