In [24]:
# import os
# os.chdir("../")

In [25]:
import pandas as pd

from fuse.all import fuse_allegation
from lib.clean import standardize_desc_cols

In [26]:
df = fuse_allegation()

In [27]:
"""
Standardize text formatting for disposition column
"""
df = df.pipe(standardize_desc_cols, ["disposition"])

In [28]:
"""
Drop rows missing a disposition value
"""
df = df[~((df.disposition.fillna("") == ""))]

In [29]:
"""
Number of agencies for which we have disposition data (data spans 2003 - 2021)
"""
df.agency.nunique()

40

In [30]:
"""
Number of dispositions in total
"""
df.disposition.count()

12255

In [31]:
"""
Normalized disposition counts, i.e., 31% of dispositions were sustained after an internal investigation
"""
df.disposition.value_counts(normalize=True).head(20)

sustained                                        0.313178
unfounded                                        0.214525
not sustained                                    0.164259
exonerated                                       0.116687
pending investigation                            0.039331
founded                                          0.021134
withdrawn; mediation                             0.019910
negotiated settlement                            0.013301
di-2                                             0.013219
no further investigation merited                 0.009139
non-sustained                                    0.009058
cancelled                                        0.005141
unsustained                                      0.005059
resigned                                         0.004978
sustained; resigned while under investigation    0.004162
withdrawn                                        0.003427
inconclusive                                     0.003101
duplicate alle

In [32]:
"""
Analyze disciplinary actions for sustained allegations
"""
df = df[df.disposition.isin(["sustained"])]

In [33]:
"""
df is filtered for sustained dispositions
"""
df.disposition.unique()

array(['sustained'], dtype=object)

In [34]:
"""
Fill na values
"""
def fill_nas(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")
    return df

In [35]:
df = df.pipe(fill_nas)

In [36]:
"""
Standardize text formatting for disciplinary action column
"""
df = df.pipe(standardize_desc_cols, ["action"])

In [37]:
"""
45% of sustained allegations have a corresponding disciplinary action
"""
df.action.value_counts(normalize=True)

n/a                                                                                    0.548202
letter of caution                                                                      0.097707
letter of reprimand                                                                    0.041167
termination                                                                            0.020844
                                                                                         ...   
resigned before disposition                                                            0.000261
2-day suspension;30-day loss of unit                                                   0.000261
letter of reprimand/8-hour driving school/10-day loss of unit                          0.000261
2-day suspension; verbal judo training; attaining respect class; early intervention    0.000261
6-day suspension; 26-hours loss of pay; 6-day loss of seniority                        0.000261
Name: action, Length: 189, dtype: float6

In [38]:
def drop_na_action_values(df):
    df.loc[:, "action"] = df.action.str.replace(r"^n/a$", "", regex=True)
    return df[~((df.action.fillna("") == ""))]

In [39]:
df = df.pipe(drop_na_action_values)

In [40]:
"""
Disciplinary action data 
"""
df.action.value_counts(normalize=True)

letter of caution                                                                                                                        0.216388
letter of reprimand                                                                                                                      0.091171
termination                                                                                                                              0.046163
reprimand                                                                                                                                0.031737
                                                                                                                                           ...   
verbal counseling/30-day loss of unit                                                                                                    0.000577
downgraded to dm-1                                                                                                          

In [41]:
"""
Group similar disicplinary actions
"""
def group_actions(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")\
        .str.replace(r'(.+)?suspend?e?d?s?i?o?n?(.+)?', 'suspended', regex=True)\
        .str.replace(r"(.+)?(reprimand|written|letter)(.+)?", "written or verbal reprimand", regex=True)\
        .str.replace(r"^termination$", "terminated", regex=True)\
        .str.replace(r"(.+)?train(.+)?", "training", regex=True)\
        .str.replace(r"(.+)?demotion(.+)?", "demoted", regex=True)\
        .str.replace(r"(.+)?loss of unit(.+)?", "lost unit privileges", regex=True)\
        .str.replace(r"(.+)?counsel(ing)?(.+)?", "counseled", regex=True)\
        .str.replace(r"(.+)?warning(.+)?", "warned", regex=True)\
        .str.replace(r"(.+)?(resignation|resigned)(.+)", "resigned", regex=True)
    return df

In [42]:
df = df.pipe(group_actions)

In [43]:
"""
Disciplinary action data after grouping similar actions
"""
df.action.value_counts(normalize=True)

written or verbal reprimand            0.455857
suspended                              0.278130
warned                                 0.080208
terminated                             0.054241
counseled                              0.050202
resigned                               0.027121
training                               0.009810
discharged                             0.005770
demoted                                0.004039
lod                                    0.002885
conference worksheet                   0.002308
hold in abeyance                       0.002308
unknown                                0.001731
performance log                        0.001731
disciplined                            0.001731
green sheet                            0.001731
lost unit privileges                   0.001731
dmvr                                   0.001154
deferred/handled upon rehire           0.001154
1-day driving school                   0.001154
retired                                0