In [545]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import stats

In [546]:
leso = pd.read_csv('../data/1033-program-foia-may-2014.csv')

In [547]:
#LESO Dataframe EDA
leso.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243492 entries, 0 to 243491
Data columns (total 8 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   State             243492 non-null  object 
 1   County            243422 non-null  object 
 2   NSN               243309 non-null  object 
 3   Item Name         239658 non-null  object 
 4   Quantity          243492 non-null  int64  
 5   UI                243491 non-null  object 
 6   Acquisition Cost  243492 non-null  float64
 7   Ship Date         243492 non-null  object 
dtypes: float64(1), int64(1), object(6)
memory usage: 14.9+ MB


In [548]:
#Clean Data

#Capitalize case County names
leso['County'] = leso['County'].str.upper()

#Change Ship Date to Datetime
leso['Ship Date'] = pd.to_datetime(leso['Ship Date'], format='mixed')

print(leso.head())
print(leso.info())


  State     County               NSN              Item Name  Quantity    UI  \
0    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   
1    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   
2    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   
3    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   
4    AK  ANCHORAGE  1005-00-073-9421  RIFLE,5.56 MILLIMETER         1  Each   

   Acquisition Cost  Ship Date  
0             499.0 2012-08-30  
1             499.0 2012-08-30  
2             499.0 2012-08-30  
3             499.0 2012-08-30  
4             499.0 2012-08-30  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243492 entries, 0 to 243491
Data columns (total 8 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   State             243492 non-null  object        
 1   County            243422 non-null  object      

In [549]:
leso = leso.dropna(subset=['Item Name'])

In [550]:
# #Duplicate Rows with Quantities greater than 1

# #leso['Quantity'].unique()

# def duplicate_rows(row):
#     if row['Quantity'] > 1:
#         return pd.concat([row] * row['Quantity'], ignore_index=True)
#     else:
#         return row
    
# foia = leso.apply(duplicate_rows, axis=1)


In [551]:
#Create DataFrame for Total sums of equipment and the cost
foia = leso.groupby(['State', 'County']).sum(numeric_only=True)

foia = foia.rename(columns = {'Quantity':'Total Equip Quantity' , 'Acquisition Cost': 'Total Cost'})

In [552]:
foia.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Equip Quantity,Total Cost
State,County,Unnamed: 2_level_1,Unnamed: 3_level_1
AK,ANCHORAGE,1063,636550.09
AK,BETHEL,2,0.0
AK,FAIRBANKS NORTH STAR,18,26624.67
AK,JUNEAU,14,5542.0
AK,KETCHIKAN GATEWAY,4,1996.0


In [553]:
totals = foia.reset_index()
totals.head()

Unnamed: 0,State,County,Total Equip Quantity,Total Cost
0,AK,ANCHORAGE,1063,636550.09
1,AK,BETHEL,2,0.0
2,AK,FAIRBANKS NORTH STAR,18,26624.67
3,AK,JUNEAU,14,5542.0
4,AK,KETCHIKAN GATEWAY,4,1996.0


In [554]:
test_df = leso[leso['Item Name'].str.contains('GRENADE')]
test_df['Item Name'].value_counts()

Item Name
LAUNCHER,GRENADE                                        205
POUCH,FLASH BANG GRENADE                                 30
POUCH,GRENADE,HAND                                       19
POUCH,HAND GRENADE                                       18
POUCH,HAND GRENADE                                       16
POUCH,FRAG GRENADE,SINGLE                                11
POUCH,GRENADE,HAND                                        6
POUCH,GRENADE                                             5
HAND,GRENADE POUCH                                        5
POUCH,FRAG GRENADE,                                       4
BOX,GRENADE                                               4
BARREL,GRENADE LAUNCHER                                   3
POUCH,SF,GRENADE,KH                                       3
POUCH,GRENADE                                             3
POUCH,SMOKE GRENADE                                       2
CPCVX HOLSTER GRENADE POUCH                               2
GRENADE POUCH,MAINTENANCE OF O

In [555]:
#Filter for militarized equipment

#Equipment Bools
weapon_bool = (leso['Item Name'] == 'RIFLE,7.62 MILLIMETER') | (leso['Item Name'] == 'RIFLE,5.56 MILLIMETER') | (leso['Item Name'] == 'SHOTGUN,12 GAGE,RIOT TYPE') | (leso['Item Name'] == 'LAUNCHER,GRENADE')
rifle_df = leso[weapon_bool]
rifle_df['Item Name'].value_counts()

vehicle_bool = (leso['Item Name'] == 'MINE RESISTANT VEHICLE') | (leso['Item Name'] == 'LIGHT ARMORED VEHICLE') | (leso['Item Name'] == 'ONLY COMPLETE COMBAT/ASSAULT/TACTICAL WHEELED VEHICLES') |(leso['Item Name'] == 'UTILITY VEHICLE,4WD') | (leso['Item Name'] == 'HELICOPTER,OBSERVATION') | (leso['Item Name'] == 'HELICOPTER,UTILITY') | (leso['Item Name'] == 'TRUCK,ARMORED')
vic_df = leso[vehicle_bool]
vic_df['Item Name'].value_counts()

helmet_bool = (leso['Item Name'] == 'HELMET,ADVANCED COMBAT') | (leso['Item Name'] == 'ARMOR, HELMET') | (leso['Item Name'] == "HELMET,GROUND TROOPS'") | (leso['Item Name'] == 'HELMET,GROUND TROOPS') | (leso['Item Name'] == 'HELMET,GROUND TROOPS-PARACHUTISTS')
helmet_df = leso[helmet_bool]

night_vis_bool = (leso['Item Name'] == 'NIGHT VISION GOGGLE ') | (leso['Item Name'] == 'VIEWER,NIGHT VISION') | (leso['Item Name'] == 'IMAGE INTENSIFIER,NIGHT VISION') | (leso['Item Name'] == 'NIGHT VISION EQUIP, EMIT, REFLECTED RAD') | (leso['Item Name'] == 'NIGHT VISION SIGHT') | (leso['Item Name'] == 'NIGHT VISION SIGHT INDIVIDUAL SERVED WEAPONS') | (leso['Item Name'] == 'NIGHT VISION DEVICE') | (leso['Item Name'] == 'GOGGLES,NIGHT VISION') | (leso['Item Name'] == 'BINOCULAR,NIGHT VISION') | (leso['Item Name'] == 'SIGHT,NIGHT VISION') | (leso['Item Name'] == 'NIGHT VISION SIGHT,') | (leso['Item Name'] == 'VIEWER KIT,NIGHT VISION')
night_vis_df = leso[night_vis_bool]

#specialty equipment
spec_equip_bool = (leso['Item Name'] == 'SIGHT,NIGHT VISION SNIPERSCOPE') | (leso['Item Name'] == 'TELESCOPE,STRAIGHT') | (leso['Item Name'] == 'TELESCOPE,NON-INVERTING INFRARED') | (leso['Item Name'] == 'SCOPE,NIGHT VISION')
spec_df = leso[spec_equip_bool]

#armor
armor_bool = (leso['Item Name'] == 'ARMOR, PERSONAL') | (leso['Item Name'] == 'ARMOR,SUPPLEMENTAL,SMALL ARMS-FRAGMENTATION PROTECTIVE') | (leso['Item Name'] == 'ARMOR PLATE') | (leso['Item Name'] == 'ARMOR SET,SUPPLEMENTAL,SMALL ARMS-FRAGMENTATION PROTECTIVE') | (leso['Item Name'] == 'BODY ARMOR,FRAGMENTATION PROTECTIVE') | (leso['Item Name'] == 'ARMOR, PLATE') 
armor_df = leso[armor_bool]
# ARMOR, PERSONAL                                                            47
# ARMOR,SUPPLEMENTAL,SMALL ARMS-FRAGMENTATION PROTECTIVE                     25
# ARMOR PLATE
# ARMOR SET,SUPPLEMENTAL,SMALL ARMS-FRAGMENTATION PROTECTIVE                 10
# BODY ARMOR,FRAGMENTATION PROTECTIVE 
# INSERT,SMALL ARMS PROTECTIVE BODY ARMOR
# ARMOR, PLATE

frames = [vic_df, rifle_df, helmet_df, night_vis_df, spec_df, armor_df]

mil_equip = pd.concat(frames)
mil_equip.info()

<class 'pandas.core.frame.DataFrame'>
Index: 86925 entries, 146 to 241137
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   State             86925 non-null  object        
 1   County            86925 non-null  object        
 2   NSN               86882 non-null  object        
 3   Item Name         86925 non-null  object        
 4   Quantity          86925 non-null  int64         
 5   UI                86925 non-null  object        
 6   Acquisition Cost  86925 non-null  float64       
 7   Ship Date         86925 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(1), object(5)
memory usage: 6.0+ MB


In [556]:
#Create DataFrame for Total sums of Militarized equipment and the cost
militarized = mil_equip.groupby(['State', 'County']).sum(numeric_only=True)

militarized = militarized.rename(columns = {'Quantity':'Mil Equip Quantity' , 'Acquisition Cost': 'Total Mil Cost'})

In [557]:
militarized.sample(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Mil Equip Quantity,Total Mil Cost
State,County,Unnamed: 2_level_1,Unnamed: 3_level_1
WI,VILAS,12,4869.0
MN,DODGE,12,5266.0
ND,STARK,49,9850.0
IA,SCOTT,62,473173.0
GA,DOUGLAS,63,19956.0
NY,MADISON,3,414.0
OK,ELLIS,9,1134.0
GA,COLQUITT,24,8714.95
IL,WINNEBAGO,14,769913.0
TX,PALO PINTO,5,70086.62


In [558]:
militarized = militarized.reset_index()
militarized.head()

Unnamed: 0,State,County,Mil Equip Quantity,Total Mil Cost
0,AK,ANCHORAGE,130,521602.23
1,AK,FAIRBANKS NORTH STAR,15,7485.0
2,AK,JUNEAU,14,5542.0
3,AK,KETCHIKAN GATEWAY,4,1996.0
4,AK,KODIAK ISLAND,5,690.0


In [559]:
militarized.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2304 entries, 0 to 2303
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   State               2304 non-null   object 
 1   County              2304 non-null   object 
 2   Mil Equip Quantity  2304 non-null   int64  
 3   Total Mil Cost      2304 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 72.1+ KB


In [560]:
#Create a merged DataFrame that accounts for both total amounts of equipment and militarized equipment
merged_equipment = pd.merge(totals, militarized, on=['State', 'County'], how='outer')
merged_equipment.fillna(0, inplace=True)

In [561]:
merged_equipment['County'] = merged_equipment['County'].str.replace("SAINT", "ST")
merged_equipment['County'] = merged_equipment['County'].str.replace('-', ' ')
merged_equipment['County'] = merged_equipment['County'].str.replace('DEKALB', 'DE KALB')
merged_equipment['County'] = merged_equipment['County'].str.replace('DEWITT', 'DE WITT')
merged_equipment['County'] = merged_equipment['County'].str.replace('DUPAGE', 'DU PAGE')
merged_equipment['County'] = merged_equipment['County'].str.replace('DESOTO', 'DE SOTO')
merged_equipment['County'] = merged_equipment['County'].str.replace('VIRGINIA BEACH CITY', 'VIRGINIA BEACH')


In [562]:
merged_equipment.sample(10)

Unnamed: 0,State,County,Total Equip Quantity,Total Cost,Mil Equip Quantity,Total Mil Cost
1640,NY,ONONDAGA,937,806156.27,1.0,658000.0
1037,MI,CHEBOYGAN,2,138.0,1.0,138.0
1516,NE,CHASE,4,480.0,4.0,480.0
348,GA,BULLOCH,1,54313.7,0.0,0.0
319,FL,PALM BEACH,76,135175.42,72.0,15714.0
2316,VA,CHESAPEAKE CITY,59,13222.68,20.0,9980.0
2317,VA,CHESTERFIELD,54,744683.69,25.0,3000.0
1594,NM,SANDOVAL,52,812655.46,1.0,733000.0
888,KY,MAGOFFIN,10,893.55,5.0,600.0
2337,VA,GRAYSON,7,1327.0,7.0,1327.0


In [563]:
police = pd.read_csv('../data/police_fatalities 2.csv')

In [564]:
police.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28335 entries, 0 to 28334
Data columns (total 29 columns):
 #   Column                                                          Non-Null Count  Dtype  
---  ------                                                          --------------  -----  
 0   Unique ID                                                       28334 non-null  object 
 1   Subject's name                                                  28335 non-null  object 
 2   Subject's age                                                   27354 non-null  object 
 3   Subject's gender                                                28237 non-null  object 
 4   Subject's race                                                  28334 non-null  object 
 5   Subject's race with imputations                                 27905 non-null  object 
 6   Imputation probability                                          27897 non-null  object 
 7   URL of image of deceased                         

In [565]:
police['Cause of death'].unique()

array(['Vehicle', 'Gunshot', 'Beaten/Bludgeoned with instrument',
       'Stabbed', 'Asphyxiated/Restrained', 'Drowned', 'Drug overdose',
       'Fell from a height', 'Undetermined',
       'Chemical agent/Pepper spray', 'Medical emergency', 'Other',
       'Burned/Smoke inhalation', 'Tasered', 'Unknown', 'Pursuit', nan],
      dtype=object)

In [566]:
police_df = police.drop(columns = ["Full Address", "Subject's name", "Unique identifier (redundant)", "Dispositions/Exclusions INTERNAL USE, NOT FOR ANALYSIS", \
                       "Link to news article or photo of official document", "Symptoms of mental illness? INTERNAL USE, NOT FOR ANALYSIS", \
                        "Video", "Unique ID formula", "Date&Description", "URL of image of deceased", "URL of image of deceased", "Subject's age", \
                            "Subject's gender", "Subject's race", "Subject's race with imputations", "Imputation probability", "A brief description of the circumstances surrounding the death", \
                                'Location of injury (address)'])

In [567]:
police_df.rename(columns = {"Location of death (county)": "County", "Location of death (city)": "City", \
                            "Location of death (state)": "State", "Location of death (zip code)": "Zip Code"}, inplace=True)

In [568]:
police_df.head()

Unnamed: 0,Unique ID,Date of injury resulting in death (month/day/year),City,State,Zip Code,County,Latitude,Longitude,Agency responsible for death,Cause of death,Intentional Use of Force (Developing),Date (Year)
0,25746,01/01/2000,Willits,CA,95490.0,Mendocino,39.470883,-123.361751,Mendocino County Sheriff's Office,Vehicle,Vehicle/Pursuit,2000.0
1,25747,01/01/2000,Detroit,MI,48203.0,Wayne,42.404526,-83.092274,,Vehicle,Vehicle/Pursuit,2000.0
2,25748,01/01/2000,Detroit,MI,48203.0,Wayne,42.404526,-83.092274,,Vehicle,Vehicle/Pursuit,2000.0
3,25749,01/01/2000,Carlsbad,NM,88220.0,Eddy,32.45008,-104.237643,Eddy County Sheriff's Office,Vehicle,Vehicle/Pursuit,2000.0
4,2,01/02/2000,Ellenwood,GA,30294.0,DeKalb,33.645164,-84.229413,DeKalb County Sheriff's Office,Gunshot,"Intentional Use of Force, Deadly",2000.0


In [569]:
police_df['County'] = police_df['County'].str.upper()
police_df['County'] = police_df['County'].str.replace("'", "")
police_df['County'] = police_df['County'].str.replace("ST.","ST")
police_df['County'] = police_df['County'].str.replace("STE.","STE")
police_df['County'] = police_df['County'].str.replace("SAINT","ST")
police_df['County'] = police_df['County'].str.replace('-', ' ')
#Fix Individual Counties
police_df['County'] = police_df['County'].str.replace('DEKALB', 'DE KALB')
police_df['County'] = police_df['County'].str.replace('PARK HILL', 'DENVER')
police_df['County'] = police_df['County'].str.replace('DESOTO', 'DE SOTO')
police_df['County'] = police_df['County'].str.replace('OSKALOOSA', 'OKALOOSA')
police_df['County'] = police_df['County'].str.replace('JOHNSON ', 'JOHNSON')
police_df['County'] = police_df['County'].str.replace('CHICAGO', 'COOK')
police_df['County'] = police_df['County'].str.replace('LASALLE', 'LA SALLE')
police_df['County'] = police_df['County'].str.replace('DUPAGE', 'DU PAGE')
police_df['County'] = police_df['County'].str.replace('VERMILLION', 'VERMILION')
police_df['County'] = police_df['County'].str.replace('MCCLEAN', 'MCLEAN')
police_df['County'] = police_df['County'].str.replace('LAPORTE', 'LA PORTE')
police_df['County'] = police_df['County'].str.replace('INDIANAPOLIS', 'MARION')
police_df['County'] = police_df['County'].str.replace('WASHENTAW', 'WASHTENAW')
#NY
police_df['County'] = police_df['County'].str.replace('MANHATTAN', 'NEW YORK')
# police_df['County'] = police_df['County'].str.replace('KINGS', 'NEW YORK')
# police_df['County'] = police_df['County'].str.replace('RICHMOND', 'NEW YORK')
police_df['County'] = police_df['County'].str.replace('BRONX', 'NEW YORK')
police_df['County'] = police_df['County'].str.replace('BROOKLYN', 'NEW YORK')
#OH
police_df['County'] = police_df['County'].str.replace('BROOKLYN', 'NEW YORK')
#OK
police_df['County'] = police_df['County'].str.replace('LAFLORE', 'LA FLORE')
#TN
police_df['County'] = police_df['County'].str.replace('FENTRESS ', 'FENTRESS')
police_df['County'] = police_df['County'].str.replace('BEFORD', 'BEDFORD')
#TX
police_df['County'] = police_df['County'].str.replace('BRAZIRIA', 'BRAZORIA')
police_df['County'] = police_df['County'].str.replace('DEWITT', 'DE WITT')
police_df['County'] = police_df['County'].str.replace('DIMMIT', 'DIMMITT')
#UT
police_df['County'] = police_df['County'].str.replace('WEBBER', 'WEBER')
#VA
police_df['County'] = police_df['County'].str.replace('BEDFORD CITY', 'BEDFORD')
police_df['County'] = police_df['County'].str.replace('CITY OF CHESAPEAKE', 'CHESAPEAKE CITY')
#police_df['County'] = police_df['County'].str.replace('NORFOLK', 'NORFOLK CITY')
police_df['County'] = police_df['County'].str.replace('CITY OF NEWPORT NEWS', 'NEWPORT NEWS CITY')
police_df['County'] = police_df['County'].str.replace('BEDFORD CITY', 'BEDFORD')
police_df['County'] = police_df['County'].str.replace('LYNCHBURG CITY', 'LYNCHBURG')
#police_df['County'] = police_df['County'].str.replace('RICHMOND', 'RICHMOND CITY')
police_df['County'] = police_df['County'].str.replace('ROCKBRDGE', 'ROCKBRIDGE')
police_df['County'] = police_df['County'].str.replace('VIRGINIA BEACH CITY', 'VIRGINIA BEACH')
#WA
#police_df['County'] = police_df['County'].str.replace('MADISON', 'DANE')
police_df['County'] = police_df['County'].str.replace('RACNE', 'RACINE')




police_df['County'] = police_df['County'].str.replace(' CENSUS AREA', '')
police_df['County'] = police_df['County'].str.replace(' COUNTY', '')
# police_df['County'] = police_df['County'].str.replace('WESAINT', 'WEST')
# police_df['County'] = police_df['County'].str.replace('EASAINT


In [570]:
fatal_counties = police_df.groupby(['County', 'State']).size()
#fatal_counties.head()
fatal_counties = pd.DataFrame(fatal_counties)


In [571]:
fatalities = fatal_counties.reset_index()
police_victims = fatalities.rename(columns = {0: 'Fatalities'})
police_victims.head(10)

Unnamed: 0,County,State,Fatalities
0,ACADIA,LA,7
1,ACCOMACK,VA,2
2,ADA,ID,31
3,ADAIR,OK,6
4,ADAMS,CO,78
5,ADAMS,ID,2
6,ADAMS,IL,5
7,ADAMS,MS,6
8,ADAMS,NE,2
9,ADAMS,OH,1


In [572]:
police_victims.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2359 entries, 0 to 2358
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   County      2359 non-null   object
 1   State       2359 non-null   object
 2   Fatalities  2359 non-null   int64 
dtypes: int64(1), object(2)
memory usage: 55.4+ KB


In [573]:
#Create a merged DataFrame that accounts for both total amounts of equipment and militarized equipment
analysis = pd.merge(merged_equipment, police_victims, on=['State', 'County'], how='outer')
analysis.fillna(0, inplace=True)

In [574]:
pd.options.display.max_rows = 250

In [576]:
state = analysis[analysis['State'] == 'WY']
state

Unnamed: 0,State,County,Total Equip Quantity,Total Cost,Mil Equip Quantity,Total Mil Cost,Fatalities
2555,WY,ALBANY,28.0,70601.0,28.0,70601.0,3.0
2556,WY,BIG HORN,162.0,1562749.11,14.0,660119.0,1.0
2557,WY,CAMPBELL,65.0,364740.31,18.0,67416.0,5.0
2558,WY,CARBON,38.0,13334.97,31.0,12924.0,2.0
2559,WY,CONVERSE,30.0,106905.0,23.0,4546.0,3.0
2560,WY,CROOK,7.0,1291.0,7.0,1291.0,0.0
2561,WY,FREMONT,511.0,156168.48,29.0,6529.0,6.0
2562,WY,GOSHEN,15.0,2412.0,15.0,2412.0,1.0
2563,WY,HOT SPRINGS,6.0,828.0,6.0,828.0,0.0
2564,WY,JOHNSON,16.0,4374.0,16.0,4374.0,0.0
