In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from scipy import stats

In [2]:
police = pd.read_csv('police_use_of_force.csv')
police.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,X,Y,PoliceUseOfForceID,CaseNumber,ResponseDate,Problem,Is911Call,PrimaryOffense,SubjectInjury,ForceReportNumber,...,TotalCityCallsForYear,TotalPrecinctCallsForYear,TotalNeighborhoodCallsForYear,CenterGBSID,CenterLatitude,CenterLongitude,CenterX,CenterY,DateAdded,OBJECTID
0,-93.273141,44.980808,15928382,08-000149,2008/01/01 01:29:47+00,Code 3,No,MISC,,1,...,322402,46998.0,23458,17121,44.980808,-93.273141,-10383120.0,5618501.0,2020/11/02 08:18:49+00,1
1,-93.251092,44.961813,15928383,08-012774,2008/01/13 03:21:52+00,Suspicious Vehicle,No,FLEE,No,2,...,322402,84018.0,10316,17023,44.961813,-93.251092,-10380660.0,5615512.0,2020/11/02 08:18:49+00,2
2,-93.266112,44.974295,15928384,08-019237,2008/01/20 03:47:57+00,Unwanted Person,Yes,OBSTRU,No,3,...,322402,46998.0,23458,21739,44.974295,-93.266112,-10382340.0,5617476.0,2020/11/02 08:18:49+00,3
3,-93.295639,45.000883,15928385,08-030704,2008/02/01 06:15:20+00,Attempt Pick-Up,No,CHLDAB,,2,...,322402,80434.0,15344,22445,45.000883,-93.295639,-10385620.0,5621661.0,2020/11/02 08:18:49+00,4
4,-93.290726,45.013029,15928386,08-038956,2008/02/09 03:49:09+00,Neighbor Trouble,Yes,OBSTRU,Yes,2,...,322402,80434.0,13679,25902,45.013029,-93.290726,-10385080.0,5623573.0,2020/11/02 08:18:49+00,5


In [3]:
police.shape

(33257, 30)

In [4]:
# Filtrando o dataset para remover entradas indesejdas cujos resultados não são proveitosos para o modelo:
police2 = police.drop(columns=['X', 'Y', 'PoliceUseOfForceID', 'CaseNumber', 'ResponseDate', 'ForceReportNumber', 'SubjectRole', 'SubjectRoleNumber', 'CenterGBSID', 'OBJECTID', 'DateAdded', 'CenterX', 'CenterY', 'CenterLatitude', 'CenterLongitude'])

* Dados relacionados a latitude e longitude foram desconsiderados, uma vez que para a análise os dados de localização baseado em distrito policial e bairro são mais proveitosos para que a análise não se torne muito complexa, uma vez que o intuito do modelo não é ver atituldes policiais em pontos muito específicos.
* As features PoliceUseOfForceID, CaseNumber, ForceReportNumber, SubjectRoleNumber, CenterGBSID, OBJECTID, DateAdded foram desconsideradas por se tratarem de códigos policiais que não são precisos para a análise.

In [5]:
police2.head()

Unnamed: 0,Problem,Is911Call,PrimaryOffense,SubjectInjury,ForceType,ForceTypeAction,Race,Sex,EventAge,TypeOfResistance,Precinct,Neighborhood,TotalCityCallsForYear,TotalPrecinctCallsForYear,TotalNeighborhoodCallsForYear
0,Code 3,No,MISC,,Bodily Force,Push Away,White,Male,39.0,Commission of Crime,1,Downtown West,322402,46998.0,23458
1,Suspicious Vehicle,No,FLEE,No,Bodily Force,Kicks,Black,Male,30.0,Fled in Vehicle,3,Ventura Village,322402,84018.0,10316
2,Unwanted Person,Yes,OBSTRU,No,Bodily Force,Push Away,Black,Male,40.0,Commission of Crime,1,Downtown West,322402,46998.0,23458
3,Attempt Pick-Up,No,CHLDAB,,Bodily Force,Joint Lock,Black,Female,35.0,Commission of Crime,4,Jordan,322402,80434.0,15344
4,Neighbor Trouble,Yes,OBSTRU,Yes,Bodily Force,Slaps,Black,Male,46.0,Tensed,4,Hawthorne,322402,80434.0,13679


## Tabelas comparativas entre a target e as demais features considerando todos os valores

In [6]:
# Tabela para Raça:
pd.crosstab(police2['ForceType'], police2['Race'], normalize = 'index').round(3)

Race,Asian,Black,Native American,Other / Mixed Race,Pacific Islander,Unknown,White,not recorded
ForceType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Baton,0.034,0.603,0.052,0.069,0.0,0.052,0.19,0.0
Bodily Force,0.013,0.615,0.054,0.036,0.0,0.017,0.258,0.007
Chemical Irritant,0.022,0.651,0.029,0.043,0.0,0.054,0.166,0.035
Firearm,0.023,0.523,0.023,0.0,0.0,0.0,0.205,0.227
Gun Point Display,0.018,0.699,0.064,0.057,0.0,0.0,0.155,0.007
Improvised Weapon,0.003,0.669,0.057,0.037,0.0,0.014,0.22,0.0
Less Lethal,0.0,0.104,0.104,0.0,0.0,0.438,0.354,0.0
Less Lethal Projectile,0.0,0.438,0.062,0.125,0.0,0.0,0.375,0.0
Maximal Restraint Technique,0.0,0.618,0.056,0.0,0.014,0.049,0.264,0.0
Police K9 Bite,0.007,0.635,0.072,0.041,0.0,0.007,0.232,0.007


In [7]:
# Tabela para Sexo:
pd.crosstab(police2['ForceType'], police2['Sex'], normalize = 'index').round(3)

Sex,Female,Male,Unknown,not recorded
ForceType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Baton,0.069,0.897,0.034,0.0
Bodily Force,0.114,0.88,0.003,0.003
Chemical Irritant,0.256,0.684,0.027,0.032
Firearm,0.045,0.864,0.0,0.091
Gun Point Display,0.014,0.986,0.0,0.0
Improvised Weapon,0.089,0.911,0.0,0.0
Less Lethal,0.042,0.688,0.271,0.0
Less Lethal Projectile,0.0,1.0,0.0,0.0
Maximal Restraint Technique,0.208,0.792,0.0,0.0
Police K9 Bite,0.017,0.976,0.003,0.003


In [8]:
# Tabela para tipo de resistência:
pd.crosstab(police2['ForceType'], police2['TypeOfResistance'], normalize = 'index').round(3)

TypeOfResistance,Assaulted Officer,Assaulted Officer,Assaulted Police Horse,Assaulted Police K9,Assaulting Police Horse,Assaulting Police K9,COMMISSION OF CRIME,Commission of Crime,Commission of a Crime,Fled in Vehicle,...,Other,TENSED,Tensed,Tensed,Unspecified,Verbal Non-Compliance,Verbal Non-Compliance,commission of crime,tensed,verbal non-compliance
ForceType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Baton,0.103,0.017,0.0,0.0,0.0,0.0,0.0,0.345,0.0,0.017,...,0.0,0.0,0.155,0.0,0.103,0.069,0.0,0.0,0.0,0.0
Bodily Force,0.104,0.021,0.0,0.0,0.002,0.0,0.0,0.148,0.018,0.026,...,0.032,0.0,0.275,0.049,0.068,0.07,0.017,0.0,0.0,0.0
Chemical Irritant,0.034,0.008,0.0,0.0,0.002,0.0,0.0,0.393,0.042,0.003,...,0.03,0.0,0.104,0.007,0.104,0.199,0.028,0.004,0.0,0.0
Firearm,0.209,0.0,0.0,0.0,0.0,0.0,0.0,0.372,0.0,0.023,...,0.0,0.0,0.14,0.0,0.163,0.047,0.0,0.0,0.0,0.0
Gun Point Display,0.055,0.0,0.0,0.0,0.0,0.0,0.0,0.269,0.0,0.077,...,0.0,0.002,0.169,0.0,0.091,0.148,0.0,0.0,0.0,0.0
Improvised Weapon,0.134,0.02,0.0,0.0,0.003,0.0,0.0,0.178,0.015,0.087,...,0.02,0.0,0.155,0.0,0.05,0.076,0.006,0.0,0.0,0.0
Less Lethal,0.0,0.578,0.022,0.0,0.0,0.0,0.0,0.0,0.089,0.0,...,0.244,0.0,0.0,0.0,0.0,0.0,0.022,0.0,0.0,0.0
Less Lethal Projectile,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.438,0.0,0.0,...,0.0,0.0,0.0,0.0,0.188,0.188,0.0,0.0,0.0,0.0
Maximal Restraint Technique,0.0,0.191,0.0,0.0,0.0,0.0,0.0,0.0,0.061,0.0,...,0.351,0.0,0.0,0.214,0.0,0.0,0.107,0.0,0.0,0.0
Police K9 Bite,0.014,0.0,0.0,0.0,0.0,0.024,0.0,0.14,0.01,0.16,...,0.007,0.0,0.01,0.0,0.126,0.048,0.003,0.0,0.0,0.0


In [9]:
# Tabela para Idade:
pd.crosstab(police2['ForceType'], police2['EventAge'], normalize = 'index').round(3)

EventAge,0.0,6.0,7.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,...,66.0,67.0,69.0,70.0,71.0,72.0,73.0,74.0,77.0,82.0
ForceType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Baton,0.018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bodily Force,0.004,0.0,0.0,0.0,0.0,0.0,0.001,0.005,0.008,0.016,...,0.001,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Chemical Irritant,0.096,0.001,0.0,0.0,0.0,0.0,0.001,0.003,0.012,0.015,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Firearm,0.163,0.0,0.0,0.0,0.0,0.0,0.0,0.023,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Gun Point Display,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005,0.011,0.016,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002,0.0
Improvised Weapon,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.003,0.012,0.012,...,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Less Lethal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Less Lethal Projectile,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Maximal Restraint Technique,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Police K9 Bite,0.003,0.0,0.0,0.0,0.0,0.0,0.007,0.0,0.02,0.027,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# Fazendo limpeza para tirar os resultados indeterminados (Nan)
police2.dropna(inplace=True)
police2.shape

(19262, 15)

In [11]:
police2.head()

Unnamed: 0,Problem,Is911Call,PrimaryOffense,SubjectInjury,ForceType,ForceTypeAction,Race,Sex,EventAge,TypeOfResistance,Precinct,Neighborhood,TotalCityCallsForYear,TotalPrecinctCallsForYear,TotalNeighborhoodCallsForYear
1,Suspicious Vehicle,No,FLEE,No,Bodily Force,Kicks,Black,Male,30.0,Fled in Vehicle,3,Ventura Village,322402,84018.0,10316
2,Unwanted Person,Yes,OBSTRU,No,Bodily Force,Push Away,Black,Male,40.0,Commission of Crime,1,Downtown West,322402,46998.0,23458
4,Neighbor Trouble,Yes,OBSTRU,Yes,Bodily Force,Slaps,Black,Male,46.0,Tensed,4,Hawthorne,322402,80434.0,13679
5,Domestic Abuse-In Progress,Yes,DASLT5,No,Bodily Force,Punches,Black,Male,36.0,Tensed,4,Cleveland,322402,80434.0,2992
6,Suspicious Person,No,DISCON,No,Bodily Force,Knees,Black,Male,34.0,Tensed,1,Downtown West,322402,46998.0,23458


## Tabelas comparativas entre a target e as demais features depois da limpeza

In [12]:
# Tabela para Raça:
pd.crosstab(police2['ForceType'], police2['Race'], normalize = 'index').round(3)

Race,Asian,Black,Native American,Other / Mixed Race,Pacific Islander,Unknown,White,not recorded
ForceType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Baton,0.036,0.6,0.055,0.073,0.0,0.036,0.2,0.0
Bodily Force,0.013,0.626,0.055,0.042,0.0,0.01,0.248,0.006
Chemical Irritant,0.041,0.646,0.039,0.041,0.0,0.018,0.189,0.027
Firearm,0.024,0.548,0.024,0.0,0.0,0.0,0.19,0.214
Gun Point Display,0.016,0.699,0.066,0.058,0.0,0.0,0.153,0.008
Improvised Weapon,0.003,0.688,0.057,0.041,0.0,0.009,0.202,0.0
Less Lethal,0.0,0.5,0.0,0.0,0.0,0.5,0.0,0.0
Less Lethal Projectile,0.0,0.438,0.062,0.125,0.0,0.0,0.375,0.0
Maximal Restraint Technique,0.0,0.643,0.071,0.0,0.0,0.0,0.286,0.0
Police K9 Bite,0.007,0.628,0.067,0.045,0.0,0.007,0.238,0.007


In [13]:
# Tabela para Sexo:
pd.crosstab(police2['ForceType'], police2['Sex'], normalize = 'index').round(3)

Sex,Female,Male,Unknown,not recorded
ForceType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Baton,0.073,0.891,0.036,0.0
Bodily Force,0.088,0.908,0.002,0.002
Chemical Irritant,0.198,0.771,0.007,0.024
Firearm,0.048,0.881,0.0,0.071
Gun Point Display,0.016,0.984,0.0,0.0
Improvised Weapon,0.079,0.921,0.0,0.0
Less Lethal,0.0,1.0,0.0,0.0
Less Lethal Projectile,0.0,1.0,0.0,0.0
Maximal Restraint Technique,0.179,0.821,0.0,0.0
Police K9 Bite,0.011,0.981,0.004,0.004


In [14]:
# Tabela para tipo de resistência:
pd.crosstab(police2['ForceType'], police2['TypeOfResistance'], normalize = 'index').round(3)

TypeOfResistance,Assaulted Officer,Assaulted Officer,Assaulting Police Horse,Assaulting Police K9,Commission of Crime,Commission of a Crime,Fled in Vehicle,Fled in a Vehicle,Fled on Foot,Fled on Foot,Other,TENSED,Tensed,Tensed,Unspecified,Verbal Non-Compliance,Verbal Non-Compliance,commission of crime,tensed
ForceType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Baton,0.109,0.0,0.0,0.0,0.345,0.0,0.018,0.0,0.182,0.0,0.0,0.0,0.164,0.0,0.109,0.073,0.0,0.0,0.0
Bodily Force,0.136,0.012,0.002,0.0,0.168,0.007,0.035,0.002,0.175,0.009,0.01,0.0,0.278,0.018,0.07,0.071,0.004,0.0,0.0
Chemical Irritant,0.098,0.005,0.006,0.0,0.362,0.005,0.007,0.0,0.099,0.0,0.002,0.001,0.14,0.003,0.104,0.159,0.003,0.007,0.0
Firearm,0.214,0.0,0.0,0.0,0.381,0.0,0.024,0.0,0.048,0.0,0.0,0.0,0.143,0.0,0.143,0.048,0.0,0.0,0.0
Gun Point Display,0.06,0.0,0.0,0.0,0.285,0.0,0.077,0.0,0.192,0.0,0.0,0.003,0.175,0.0,0.079,0.129,0.0,0.0,0.0
Improvised Weapon,0.145,0.009,0.003,0.0,0.186,0.0,0.091,0.003,0.259,0.0,0.0,0.0,0.167,0.0,0.054,0.079,0.003,0.0,0.0
Less Lethal,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Less Lethal Projectile,0.0,0.0,0.0,0.0,0.438,0.0,0.0,0.0,0.188,0.0,0.0,0.0,0.0,0.0,0.188,0.188,0.0,0.0,0.0
Maximal Restraint Technique,0.0,0.321,0.0,0.0,0.0,0.071,0.0,0.0,0.0,0.036,0.393,0.0,0.0,0.143,0.0,0.0,0.036,0.0,0.0
Police K9 Bite,0.015,0.0,0.0,0.026,0.149,0.004,0.171,0.007,0.416,0.007,0.0,0.0,0.011,0.0,0.138,0.052,0.004,0.0,0.0


In [15]:
# Tabela para Idade:
pd.crosstab(police2['ForceType'], police2['EventAge'], normalize = 'index').round(3)

EventAge,0.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,...,64.0,65.0,66.0,67.0,70.0,71.0,72.0,73.0,74.0,82.0
ForceType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Baton,0.018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055,0.036,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bodily Force,0.004,0.0,0.0,0.0,0.0,0.004,0.007,0.014,0.021,0.025,...,0.001,0.001,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Chemical Irritant,0.054,0.0,0.0,0.0,0.0,0.004,0.008,0.017,0.013,0.014,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Firearm,0.167,0.0,0.0,0.0,0.0,0.024,0.0,0.0,0.071,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Gun Point Display,0.0,0.0,0.0,0.0,0.0,0.005,0.008,0.011,0.019,0.027,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Improvised Weapon,0.003,0.0,0.0,0.0,0.0,0.0,0.013,0.013,0.032,0.016,...,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Less Lethal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Less Lethal Projectile,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Maximal Restraint Technique,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Police K9 Bite,0.004,0.0,0.0,0.0,0.007,0.0,0.019,0.026,0.048,0.048,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**Depois de analisar os dados, percebe-se que os valores não mudam significativamente depois da limpeza dos valores Nan. Logo, a limpeza mostra-se proveitosa.**

In [16]:
police2['ForceType'].value_counts()

Bodily Force                   14648
Taser                           2466
Chemical Irritant               1054
Gun Point Display                365
Improvised Weapon                317
Police K9 Bite                   269
Baton                             55
Firearm                           42
Maximal Restraint Technique       28
Less Lethal Projectile            16
Less Lethal                        2
Name: ForceType, dtype: int64

In [17]:
police2['Problem'].value_counts()

Suspicious Person              3306
Fight                          1525
Domestic Abuse-In Progress     1479
Disturbance                    1189
Suspicious Vehicle              952
                               ... 
Parking Problem                   1
Down Outide-One w/Fire            1
Shooting Report Only              1
Aircraft Crash in City            1
Crank 9-1-1 Call                  1
Name: Problem, Length: 114, dtype: int64