In [1]:
import pandas as pd

In [2]:
pd.set_option('display.max_rows', 1000)

In [3]:
pd.options.mode.chained_assignment = None 

In [4]:
discipline = pd.read_csv("/Users/josemartinez/Desktop/Code/Discipline_Data/cleaned_discipline_final.csv")

In [5]:
discipline.columns

Index(['CONTROL #', 'FINAL DISP DATE', 'FINAL DISPOSITION', 'FINAL # DAYS',
       'EMPLOYEE LAST NAME', 'EMPLOYEE FIRST NAME', 'ALLEGATION', 'COMP SEX',
       'COMP RACE', 'full_name', 'clean_name_x', 'roster_name_match',
       'clean_name_y', 'gender', 'clean_race_ethnicity'],
      dtype='object')

In [6]:
#Changed column names for clarity purposes.
discipline.columns = [
    'control_number',
    'final_disposition_date',
    'final_disposition',
    'final_number_of_days',
    'last_name',
    'first_name',
    'allegation',
    'complaintant_gender',
    'complaintant_race',
    'dirty_full_name',
    'clean_discipline_name',
    'clean_roster_name',
    'artifact_roster_name',
    'officer_gender',
    'officer_race',
]

In [7]:
discipline.columns

Index(['control_number', 'final_disposition_date', 'final_disposition',
       'final_number_of_days', 'last_name', 'first_name', 'allegation',
       'complaintant_gender', 'complaintant_race', 'dirty_full_name',
       'clean_discipline_name', 'clean_roster_name', 'artifact_roster_name',
       'officer_gender', 'officer_race'],
      dtype='object')

In [8]:
discipline = discipline[
    (discipline.officer_race!='unknown') & (discipline.clean_roster_name!='no match') & ~(discipline.officer_race.isna())
].copy()

In [9]:
#filtered data by race and found number of instances that each race was involved in a disciplinary action.
total = discipline['officer_race'].value_counts().to_frame()
total

Unnamed: 0,officer_race
white,9997
black,1710
multiracial,374
hispanic,236
asian/pacific islander,177
native american,8


In [10]:
total['officer_race'].sum()
total['Percentage'] = (total['officer_race']/12502) * 100
total

Unnamed: 0,officer_race,Percentage
white,9997,79.963206
black,1710,13.677812
multiracial,374,2.991521
hispanic,236,1.887698
asian/pacific islander,177,1.415773
native american,8,0.06399


In [11]:
#Same as above, but tried a new technique, 'groupby'
discipline.groupby('officer_race')['clean_roster_name'].count()

officer_race
asian/pacific islander     177
black                     1710
hispanic                   236
multiracial                374
native american              8
white                     9997
Name: clean_roster_name, dtype: int64

In [12]:
#created a dataframe that found the number of different types of dispositions by race.
dispositions = discipline.groupby(['officer_race', 'final_disposition'])['final_disposition'].count().to_frame(name='amount').reset_index()

In [13]:
#Uncomment next cell to see dataframe. It's commented out because it's very long and didn't let me see the code while I was working.

In [14]:
#dispositions

In [15]:
#Filtered dataset above to find every instance where an officer was terminated.
terminated = dispositions.loc[dispositions['final_disposition'].str.contains('TERMINAT')]
terminated

Unnamed: 0,officer_race,final_disposition,amount
36,black,TERMINATED,4
37,black,TERMINATION,7
38,black,"TERMINATION, DEMOTION, SUSPENDED",1
78,multiracial,TERMINATION,1
133,white,TERMINATED,4
134,white,TERMINATION,16


In [16]:
#Filtered dataset above to find every instance where an officer was demoted.
demotion = dispositions.loc[dispositions['final_disposition'].str.contains('DEMOT')]
demotion

Unnamed: 0,officer_race,final_disposition,amount
15,black,DEMOTION,1
16,black,"DEMOTION, SUSPENDED",1
33,black,"SUSPENDED, DEMOTION",1
38,black,"TERMINATION, DEMOTION, SUSPENDED",1
90,white,DEMOTION,3
91,white,"DEMOTION, SUSPENDED",3
128,white,"SUSPENDED, DEMOTION",2


In [17]:
#Filtered dataset above to find every instance where an officer was suspended.
suspended = dispositions.loc[dispositions['final_disposition'] == 'SUSPENDED']

In [18]:
#Filtered dataset above to find every instance where an officer resigned.
resigned = dispositions.loc[dispositions['final_disposition'].str.contains('RESIGN')]
resigned

Unnamed: 0,officer_race,final_disposition,amount
31,black,RESIGNED,36
56,hispanic,RESIGNED,1
75,multiracial,RESIGNED,1
125,white,RESIGNED,55
126,white,RESIGNED/SETTLEMENT,1


In [19]:
#some suspensions come written as 'suspended' or 'suspension', so I filtered dataset to include every value that included 'suspen' so that includes all values
all_suspensions = dispositions.loc[dispositions['final_disposition'].str.contains('SUSPEN')]

In [20]:
#Uncomment next cell to see dataframe. It's commented out because it's very long and didn't let me see the code while I was working.

In [21]:
all_suspensions

Unnamed: 0,officer_race,final_disposition,amount
9,asian/pacific islander,SUSPENDED,36
16,black,"DEMOTION, SUSPENDED",1
32,black,SUSPENDED,504
33,black,"SUSPENDED, DEMOTION",1
34,black,"SUSPENDED, FORMAL COUNSELING",1
35,black,"SUSPENDED, WRITTEN",2
38,black,"TERMINATION, DEMOTION, SUSPENDED",1
57,hispanic,SUSPENDED,58
76,multiracial,SUSPENDED,76
77,multiracial,"SUSPENDED, NOT SUSTAINED",1


In [22]:
#Filtered out all suspensions so that I only see Black officers.
black_suspensions = all_suspensions.loc[all_suspensions['officer_race'] == 'black']

In [23]:
black_suspensions

Unnamed: 0,officer_race,final_disposition,amount
16,black,"DEMOTION, SUSPENDED",1
32,black,SUSPENDED,504
33,black,"SUSPENDED, DEMOTION",1
34,black,"SUSPENDED, FORMAL COUNSELING",1
35,black,"SUSPENDED, WRITTEN",2
38,black,"TERMINATION, DEMOTION, SUSPENDED",1


## Give analysis above, of all 1,710 dispositions involving Black officers, 29.8% were suspensions. - (510/1710) *100 = 29.82

In [24]:
#Filtered out all suspensions so that I only see white officers.
white_suspensions = all_suspensions.loc[all_suspensions['officer_race'] == 'white']

In [25]:
white_suspensions

Unnamed: 0,officer_race,final_disposition,amount
91,white,"DEMOTION, SUSPENDED",3
127,white,SUSPENDED,1964
128,white,"SUSPENDED, DEMOTION",2
129,white,"SUSPENDED, NOT SUSTAINED",2
130,white,"SUSPENDED, ORAL",2
131,white,"SUSPENDED, WRITTEN",10


## Give analysis above, of all 9,997 dispositions involving White officers, 19.8% were suspensions. - (1983/9997) *100 = 19.83

# Thus, dispositions resulting in suspensions were 10% higher for Black officers than white officers.
#
#
#

## **Below, I'm trying to find the percentage of unique officers suspended and categorized by race**

In [26]:
#Created new dataframe that classified the amount of dispositions classified by type of disposition, by officer, and by their race
df = discipline.groupby(['officer_race','clean_roster_name','final_disposition'])['final_disposition'].count().to_frame(name='amount').reset_index()

In [27]:
#Important thing about this dataframe, is that it includes the amount of time that an officer had a certain disposition.
#For example, a jacob paul was exonerated 20 times. In the original dataset, it would show 20 different rows for his exoneration.
df

Unnamed: 0,officer_race,clean_roster_name,final_disposition,amount
0,asian/pacific islander,a jacob paul,EXONERATED,20
1,asian/pacific islander,a jacob paul,MATTER OF RECORD,1
2,asian/pacific islander,a jacob paul,NOT SUSTAINED,5
3,asian/pacific islander,a jacob paul,"NOT SUSTAINED, NOT SUSTAINED, EXONERATED",1
4,asian/pacific islander,a jacob paul,"NOT SUSTAINED, UNFOUNDED",1
...,...,...,...,...
6350,white,vanbergen william zachary,EXONERATED,2
6351,white,waters william z,COUNSELED,1
6352,white,waters william z,EXONERATED,13
6353,white,waters william z,NOT SUSTAINED,4


In [28]:
#Then, I filter out all unique dispositions to only include suspensions.
suspensions_only = df.loc[df['final_disposition'].str.contains('SUSPEN')]

In [29]:
suspensions_only

Unnamed: 0,officer_race,clean_roster_name,final_disposition,amount
5,asian/pacific islander,a jacob paul,SUSPENDED,4
9,asian/pacific islander,aida m manalac,SUSPENDED,1
12,asian/pacific islander,akhom kethsada,SUSPENDED,2
15,asian/pacific islander,alejandro iii ramos t,SUSPENDED,2
18,asian/pacific islander,allen christian downen,SUSPENDED,2
...,...,...,...,...
6328,white,seth t vaughn,"SUSPENDED, WRITTEN",1
6331,white,skopek timothy,SUSPENDED,1
6337,white,steve underwood,SUSPENDED,2
6344,white,todd w watson,SUSPENDED,1


In [30]:
# Here, I add up the amount of suspensions that each officer had.
unique_suspensions = suspensions_only.groupby(['clean_roster_name','officer_race'])['amount'].sum().to_frame()

In [31]:
unique_suspensions

Unnamed: 0_level_0,Unnamed: 1_level_0,amount
clean_roster_name,officer_race,Unnamed: 2_level_1
a aldea lopez luis,hispanic,1
a almose thompson,black,2
a andrew mrosla,white,1
a anthony bourk,white,2
a archuleta vincent,white,3
...,...,...
skopek timothy,white,1
steve underwood,white,2
threadgill tonetta,black,1
todd w watson,white,1


In [32]:
#The code below is important becaues it counted the amount of times each race showed up. Now, there won't be various instances for each officer.
#This will show us the number of unique officers that were involved in suspensions.
total_unique = unique_suspensions.groupby('officer_race').count()
total_unique

Unnamed: 0_level_0,amount
officer_race,Unnamed: 1_level_1
asian/pacific islander,17
black,200
hispanic,27
multiracial,32
native american,1
white,966


In [33]:
total_unique['amount'].sum()
total_unique['Percentage'] = (total_unique['amount']/1243)*100
total_unique

Unnamed: 0_level_0,amount,Percentage
officer_race,Unnamed: 1_level_1,Unnamed: 2_level_1
asian/pacific islander,17,1.367659
black,200,16.090105
hispanic,27,2.172164
multiracial,32,2.574417
native american,1,0.080451
white,966,77.715205


In [34]:
#Found the number of unique officers by race that were involved in disciplinary action
discipline.drop_duplicates(subset='clean_roster_name',keep='first').officer_race.value_counts()

white                     1722
black                      306
multiracial                 59
hispanic                    47
asian/pacific islander      24
native american              3
Name: officer_race, dtype: int64

# 65% of all Black officers who faced some sort of discipline, were suspended.
# 56% of all white officers who faced some sort of discipline were suspended.

# Thus, Black officers facing discipline were supended 10% more than white officers.


#
#
#
# Now, I will see how discipline changed by year

In [35]:
#Created new 'Year' column 
discipline['Year'] = pd.DatetimeIndex(discipline['final_disposition_date']).year

In [36]:
yearly_dis = discipline.groupby('Year')['clean_roster_name'].count().to_frame().reset_index()
yearly_dis

Unnamed: 0,Year,clean_roster_name
0,2010,1628
1,2011,1390
2,2012,1401
3,2013,1359
4,2014,1215
5,2015,1243
6,2016,1091
7,2017,917
8,2018,826
9,2019,813


In [37]:
#Categorized number of discipines by year and by race
yearly_discipline = discipline.groupby(['Year','officer_race'])['officer_race'].count().to_frame(name='amount').reset_index()

In [38]:
yearly_discipline

Unnamed: 0,Year,officer_race,amount
0,2010,asian/pacific islander,27
1,2010,black,234
2,2010,hispanic,21
3,2010,multiracial,27
4,2010,native american,1
5,2010,white,1318
6,2011,asian/pacific islander,27
7,2011,black,207
8,2011,hispanic,27
9,2011,multiracial,23


In [39]:
#Filtered out data to show me only number of disciplines by year for Black officers.
#However, this includes instances where an officer may have had multiple incidents.
yearly_discipline.loc[yearly_discipline['officer_race']=='black']

Unnamed: 0,Year,officer_race,amount
1,2010,black,234
7,2011,black,207
13,2012,black,200
19,2013,black,198
24,2014,black,166
29,2015,black,152
35,2016,black,123
41,2017,black,126
46,2018,black,122
51,2019,black,108


In [40]:
yearly_discipline.loc[yearly_discipline['officer_race']=='white']

Unnamed: 0,Year,officer_race,amount
5,2010,white,1318
11,2011,white,1104
17,2012,white,1124
22,2013,white,1090
27,2014,white,968
33,2015,white,1009
39,2016,white,880
44,2017,white,709
49,2018,white,633
54,2019,white,667


### Now, I find number of disciplines per unique officer by year and race

In [41]:
#Categorize data so that each officer is one row and their number of incidents is a new column
unique_officers = discipline.groupby(['Year','officer_race','clean_roster_name'])['clean_roster_name'].count().to_frame(name='amount').reset_index()

In [42]:
unique_officers

Unnamed: 0,Year,officer_race,clean_roster_name,amount
0,2010,asian/pacific islander,a jacob paul,2
1,2010,asian/pacific islander,aida m manalac,1
2,2010,asian/pacific islander,akhom kethsada,3
3,2010,asian/pacific islander,burgess e michael,1
4,2010,asian/pacific islander,c christopher lo,2
...,...,...,...,...
6978,2020,white,r thomas white,1
6979,2020,white,ryan sherry thomas,1
6980,2020,white,sean weaver wesley,1
6981,2020,white,skopek timothy,2


In [43]:
#Filtered data so that we can see how the number of disciplines towards unique officers changed by year
unique_race_discipline = unique_officers.groupby(['Year','officer_race'])['officer_race'].count().to_frame(name='amount').reset_index()

In [44]:
unique_race_discipline

Unnamed: 0,Year,officer_race,amount
0,2010,asian/pacific islander,10
1,2010,black,106
2,2010,hispanic,9
3,2010,multiracial,12
4,2010,native american,1
5,2010,white,583
6,2011,asian/pacific islander,10
7,2011,black,107
8,2011,hispanic,10
9,2011,multiracial,11


In [45]:
#Eventually, I wanted to calculate percentage, so I had to find the total number of unique discipilnes by year, which is what I did here.
unique_officers_dis_yearly = unique_race_discipline.groupby('Year')['amount'].sum().to_frame().reset_index()
#changed to float for division purposes later
unique_officers_dis_yearly = unique_officers_dis_yearly.astype(int)
unique_officers_dis_yearly

Unnamed: 0,Year,amount
0,2010,721
1,2011,698
2,2012,720
3,2013,684
4,2014,701
5,2015,712
6,2016,654
7,2017,573
8,2018,535
9,2019,551


In [46]:
#Filtered data to find amount of unique black officers disciplined by year
unique_black_discipline = unique_race_discipline.loc[unique_race_discipline['officer_race']=='black']

In [47]:
#Didn't need race column anymore since this is only for Black officers
del unique_black_discipline['officer_race']
#Changed all objects to integers for eventual percentage calculation
unique_black_discipline = unique_black_discipline.astype(int).reset_index()
#Took off index column to ignore order from original dataset
del unique_black_discipline['index']
unique_black_discipline

Unnamed: 0,Year,amount
0,2010,106
1,2011,107
2,2012,96
3,2013,94
4,2014,88
5,2015,89
6,2016,73
7,2017,74
8,2018,69
9,2019,75


In [48]:
#Filtered data to find amount of unique white officers disciplined by year
unique_white_discipline = unique_race_discipline.loc[unique_race_discipline['officer_race']=='white']

In [49]:
#Didn't need race column anymore since this is only for white officers
del unique_white_discipline['officer_race']
#Changed all objects to integers for eventual percentage calculation
unique_white_discipline = unique_white_discipline.astype(int).reset_index()
#Took off index column to ignore order from original dataset
del unique_white_discipline['index']
unique_white_discipline

Unnamed: 0,Year,amount
0,2010,583
1,2011,558
2,2012,583
3,2013,557
4,2014,567
5,2015,580
6,2016,534
7,2017,459
8,2018,428
9,2019,444


In [50]:
#Added a new column that had percentages to Black officer data
unique_black_discipline['Percentage'] = ((unique_black_discipline['amount']/unique_officers_dis_yearly['amount'])*100).round(1)

In [51]:
unique_black_discipline

Unnamed: 0,Year,amount,Percentage
0,2010,106,14.7
1,2011,107,15.3
2,2012,96,13.3
3,2013,94,13.7
4,2014,88,12.6
5,2015,89,12.5
6,2016,73,11.2
7,2017,74,12.9
8,2018,69,12.9
9,2019,75,13.6


In [52]:
#Added Percentage column to white officer data
unique_white_discipline['Percentage'] = ((unique_white_discipline['amount']/unique_officers_dis_yearly['amount'])*100).round(1)

In [53]:
unique_white_discipline

Unnamed: 0,Year,amount,Percentage
0,2010,583,80.9
1,2011,558,79.9
2,2012,583,81.0
3,2013,557,81.4
4,2014,567,80.9
5,2015,580,81.5
6,2016,534,81.7
7,2017,459,80.1
8,2018,428,80.0
9,2019,444,80.6


## The percentage of unique white officers being disicplined hovered around 80% while for Blacks it was around 13%.

In [54]:
discipline.columns

Index(['control_number', 'final_disposition_date', 'final_disposition',
       'final_number_of_days', 'last_name', 'first_name', 'allegation',
       'complaintant_gender', 'complaintant_race', 'dirty_full_name',
       'clean_discipline_name', 'clean_roster_name', 'artifact_roster_name',
       'officer_gender', 'officer_race', 'Year'],
      dtype='object')

## Below, I'm seeing how disciplinary actions are distributed by gender

In [55]:
gender_dis = discipline.groupby(['officer_gender','final_disposition'])['final_disposition'].count().to_frame(name='amount').reset_index()

In [56]:
gender_dis

Unnamed: 0,officer_gender,final_disposition,amount
0,F,CHARGES DISMISSED,2
1,F,COUNSELED,15
2,F,DEMOTION,1
3,F,"DEMOTION, SUSPENDED",1
4,F,EXONERATED,328
5,F,"EXONERATED, EXONERATED",2
6,F,"EXONERATED, EXONERATED, EXONERATED",1
7,F,"EXONERATED, NOT SUSTAINED",5
8,F,"EXONERATED, UNFOUNDED",1
9,F,FORMAL COUNSELING,14


In [57]:
gender_dis.groupby('officer_gender')['amount'].sum()

officer_gender
F     1099
M    11403
Name: amount, dtype: int64

In [58]:
gender_suspen = gender_dis.loc[gender_dis['final_disposition'].str.contains('SUSPEN')]

In [59]:
gender_suspen

Unnamed: 0,officer_gender,final_disposition,amount
3,F,"DEMOTION, SUSPENDED",1
21,F,SUSPENDED,264
22,F,"SUSPENDED, WRITTEN",1
35,M,"DEMOTION, SUSPENDED",3
73,M,SUSPENDED,2375
74,M,"SUSPENDED, DEMOTION",3
75,M,"SUSPENDED, FORMAL COUNSELING",1
76,M,"SUSPENDED, NOT SUSTAINED",3
77,M,"SUSPENDED, ORAL",2
78,M,"SUSPENDED, WRITTEN",11


In [60]:
#Total number of times a female or male has been suspended. NOT unique.
gender_suspen.groupby('officer_gender')['amount'].sum().to_frame().reset_index()

Unnamed: 0,officer_gender,amount
0,F,266
1,M,2399


In [61]:
#Total number of times a female or male has been involved in disciplinary action. NOT unique.
discipline.groupby('officer_gender')['officer_gender'].count().to_frame()

Unnamed: 0_level_0,officer_gender
officer_gender,Unnamed: 1_level_1
F,1099
M,11403


## Here, I want to examine exonerations by race

In [73]:
discipline.columns

Index(['control_number', 'final_disposition_date', 'final_disposition',
       'final_number_of_days', 'last_name', 'first_name', 'allegation',
       'complaintant_gender', 'complaintant_race', 'dirty_full_name',
       'clean_discipline_name', 'clean_roster_name', 'artifact_roster_name',
       'officer_gender', 'officer_race', 'Year'],
      dtype='object')

In [78]:
dis_by_race = discipline.groupby(['officer_race','clean_roster_name','final_disposition'])['final_disposition'].count().to_frame(name='amount').reset_index().groupby(['officer_race','final_disposition'])['amount'].sum().to_frame().reset_index()

In [81]:
exonerated = dis_by_race.loc[dis_by_race['final_disposition'].str.contains('EXONERATED')]
exonerated

Unnamed: 0,officer_race,final_disposition,amount
1,asian/pacific islander,EXONERATED,62
2,asian/pacific islander,"EXONERATED, NOT SUSTAINED",1
6,asian/pacific islander,"NOT SUSTAINED, NOT SUSTAINED, EXONERATED",1
17,black,EXONERATED,437
18,black,"EXONERATED, EXONERATED",2
19,black,"EXONERATED, NOT SUSTAINED",9
20,black,"EXONERATED, UNFOUNDED",3
25,black,"NOT SUSTAINED, EXONERATED",3
46,hispanic,EXONERATED,66
47,hispanic,"EXONERATED, NOT SUSTAINED",1


In [86]:
ex = exonerated.groupby(['officer_race'])['amount'].sum().to_frame().reset_index()
ex

Unnamed: 0,officer_race,amount
0,asian/pacific islander,64
1,black,454
2,hispanic,68
3,multiracial,135
4,native american,2
5,white,3745


In [87]:
ex.amount.sum()

4468

In [90]:
ex['% of all exonerations'] = (ex['amount']/4468) *100

In [96]:
discipline_by_race = discipline.groupby(['officer_race'])['final_disposition'].count().to_frame().reset_index()
discipline_by_race

Unnamed: 0,officer_race,final_disposition
0,asian/pacific islander,177
1,black,1710
2,hispanic,236
3,multiracial,374
4,native american,8
5,white,9997


In [98]:
ex['% of all discipline'] = (ex['amount']/discipline_by_race['final_disposition'])*100
ex

Unnamed: 0,officer_race,amount,%,% of all exonerations,% of all discipline
0,asian/pacific islander,64,1.432408,1.432408,36.158192
1,black,454,10.161146,10.161146,26.549708
2,hispanic,68,1.521934,1.521934,28.813559
3,multiracial,135,3.021486,3.021486,36.096257
4,native american,2,0.044763,0.044763,25.0
5,white,3745,83.818263,83.818263,37.461238


## Of all 9997 allegations involving white officers, 37.5% were exonerated.
## Of all 1710 allegations involving Black officers, 26.5% were exonerated.

## Below, we're examining severe allegations

In [99]:
discipline['allegation'].value_counts()

CARE OF GOVERNMENT PROPERTY                          1284
COURTESY                                             1073
DEFICIENT OR INEFFICIENT PERFORMANCE OF DUTIES        382
OBSTRUCTION OF RIGHTS                                 257
DEFICIENT PERFORMANCE OF DUTIES                       231
                                                     ... 
APPEARING IN COURT - CALLED IN 2/26/14                  1
INABILITY TO APPEAR - CALLED IN 10/7/2010               1
ADHERENCE TO LAW - RAN STOP SIGN                        1
FTA CALL-IN 4/18/18                                     1
FIREARMS POLICY & PROCEDURE - SAFETY AND HANDLING       1
Name: allegation, Length: 4770, dtype: int64

In [100]:
discipline.groupby('officer_race')['allegation'].count()

officer_race
asian/pacific islander     177
black                     1709
hispanic                   236
multiracial                374
native american              8
white                     9994
Name: allegation, dtype: int64

In [101]:
allegations = discipline.groupby(['officer_race','allegation'])['allegation'].count().to_frame(name='amount').reset_index()
allegations

Unnamed: 0,officer_race,allegation,amount
0,asian/pacific islander,ABUSIVE TREATMENT,4
1,asian/pacific islander,ABUSIVE TREATMENT (13-0270934),1
2,asian/pacific islander,ABUSIVE TREATMENT/SELF IDENTIFICATION,1
3,asian/pacific islander,ACTING IN A CIVIL MATTER,1
4,asian/pacific islander,ADHERENCE TO LAW,1
...,...,...,...
5468,white,"WARRANTLESS SEARCHES, CRIME SCENE SEARCHES",1
5469,white,"WARRANTLESS SEARCHES, DEF. PERF. OF DUTIES",1
5470,white,"WARRANTLESS SEARCHES, LEGAL BASIS FOR STOPS",1
5471,white,WEAPON FOUND IN VEHICLE,1


In [133]:
biased = allegations.loc[allegations['allegation'].str.contains('BIASED BASED POLICING')]
biased = biased.groupby(['officer_race'])['amount'].sum().to_frame().reset_index()
biased

Unnamed: 0,officer_race,amount
0,black,11
1,hispanic,7
2,multiracial,8
3,white,188


In [135]:
discipline_by_race

Unnamed: 0,officer_race,final_disposition
0,asian/pacific islander,177
1,black,1710
2,hispanic,236
3,multiracial,374
4,native american,8
5,white,9997


In [144]:
print('% of discipline against white officers that involved biased based policing'), 188/9997 *100

% of discipline against white officers that involved biased based policing


(None, 1.8805641692507753)

In [103]:
discrimination = allegations.loc[allegations['allegation'].str.contains('DISCRIMINATION')]
discrimination.groupby(['officer_race'])['amount'].sum().to_frame().reset_index()

Unnamed: 0,officer_race,amount
0,asian/pacific islander,3
1,black,24
2,hispanic,3
3,multiracial,16
4,native american,1
5,white,191


In [146]:
print('% of discipline against white officers that involved discrimination'), 191/9997 *100

% of discipline against white officers that involved discrimination


(None, 1.9105731719515855)

In [104]:
abuse = allegations.loc[allegations['allegation'].str.contains('ABUSIVE TREATMENT')]
abuse.groupby(['officer_race'])['amount'].sum().to_frame().reset_index()

Unnamed: 0,officer_race,amount
0,asian/pacific islander,9
1,black,46
2,hispanic,3
3,multiracial,10
4,white,301


In [147]:
print('% of discipline against white officers that involved abuse'), 301/9997 *100

% of discipline against white officers that involved abuse


(None, 3.0109032709812946)

In [148]:
1.8805641692507753 + 1.9105731719515855 + 3.0109032709812946

6.802040612183655

# 6.8% of all allegations against white officers involved either abusive treatment, discrimination, and biased based policing

## Looking at allegations of courtesy

In [155]:
alleg_with_dis = discipline.groupby(['officer_race','allegation','final_disposition'])['allegation'].count().to_frame(name='amount').reset_index()

In [171]:
courtesy_with_dis = alleg_with_dis.loc[alleg_with_dis['allegation'].str.contains('COURTESY')]

In [172]:
courtesy_with_dis_total = courtesy_with_dis.groupby(['officer_race','final_disposition'])['amount'].sum().to_frame(name='amount').reset_index()

In [176]:
courtesy_with_dis_total.loc[courtesy_with_dis_total['final_disposition'].str.contains('SUSPEN')]

Unnamed: 0,officer_race,final_disposition,amount
19,black,SUSPENDED,14
28,hispanic,SUSPENDED,4
39,multiracial,SUSPENDED,1
43,native american,SUSPENDED,1
71,white,SUSPENDED,25
72,white,"SUSPENDED, ORAL",1


In [178]:
courtesy_with_dis_total.loc[courtesy_with_dis_total['final_disposition'].str.contains('TERMIN')]

Unnamed: 0,officer_race,final_disposition,amount


In [106]:
courtesy = allegations.loc[allegations['allegation'] == 'COURTESY']
courtesy

Unnamed: 0,officer_race,allegation,amount
64,asian/pacific islander,COURTESY,16
540,black,COURTESY,149
1130,hispanic,COURTESY,18
1345,multiracial,COURTESY,24
3666,white,COURTESY,866


In [150]:
(866/9997)*100

8.662598779633889

In [151]:
(149/1710)*100

8.71345029239766