In [57]:
# import os
# os.chdir("../")

In [58]:
import pandas as pd

from fuse.all import fuse_allegation
from lib.clean import standardize_desc_cols

In [59]:
df = fuse_allegation()

In [60]:
"""
Standardize text formatting for disposition column
"""
df = df.pipe(standardize_desc_cols, ["disposition"])

In [61]:
"""
Drop rows missing a disposition value
"""
df = df[~((df.disposition.fillna("") == ""))]

In [62]:
"""
Number of agencies for which we have disposition data (data spans 2003 - 2021)
"""
df.agency.nunique()

40

In [63]:
"""
Number of dispositions in total
"""
df.disposition.count()

12255

In [64]:
"""
Normalized disposition counts, i.e., 31% of dispositions were sustained after an internal investigation
"""
df.disposition.value_counts(normalize=True)

sustained                 0.313178
unfounded                 0.214525
not sustained             0.164259
exonerated                0.116687
pending investigation     0.039331
                            ...   
ragas                     0.000082
dismissed                 0.000082
other                     0.000082
adm rem; not sustained    0.000082
retired                   0.000082
Name: disposition, Length: 136, dtype: float64

In [65]:
"""
Analyze disciplinary actions for sustained allegations
"""
df = df[df.disposition.isin(["sustained"])]

In [66]:
"""
df is filtered for sustained dispositions
"""
df.disposition.unique()

array(['sustained'], dtype=object)

In [67]:
"""
Fill na values
"""
def fill_nas(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")
    return df

In [68]:
df = df.pipe(fill_nas)

In [69]:
"""
Standardize text formatting for disciplinary action column
"""
df = df.pipe(standardize_desc_cols, ["action"])

In [70]:
"""
45% of sustained allegations have a corresponding disciplinary action
"""
df.action.value_counts(normalize=True)

n/a                                                                                    0.548202
letter of caution                                                                      0.097707
letter of reprimand                                                                    0.041167
termination                                                                            0.020844
                                                                                         ...   
suspension 3 days                                                                      0.000261
5 day suspension|loss of seniority                                                     0.000261
suspension|eap                                                                         0.000261
2-day suspension; verbal judo training; attaining respect class; early intervention    0.000261
                                                                                       0.000261
Name: action, Length: 189, dtype: float6

In [71]:
def drop_na_action_values(df):
    df.loc[:, "action"] = df.action.str.replace(r"^n/a$", "", regex=True)
    return df[~((df.action.fillna("") == ""))]

In [72]:
df = df.pipe(drop_na_action_values)

In [73]:
"""
Disciplinary action data 
"""
df.action.value_counts(normalize=True)

letter of caution                                0.216388
letter of reprimand                              0.091171
termination                                      0.046163
1-day suspension                                 0.031737
                                                   ...   
8-hour transfer                                  0.000577
56-day suspension                                0.000577
no discipline                                    0.000577
other                                            0.000577
5 day suspension|loss of senority|loss of pay    0.000577
Name: action, Length: 187, dtype: float64

In [74]:
"""
Group similar disicplinary actions
"""
def group_actions(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")\
        .str.replace(r'(.+)?suspend?e?d?s?i?o?n?(.+)?', 'suspended', regex=True)\
        .str.replace(r"(.+)?(reprimand|written|letter)(.+)?", "written or verbal reprimand", regex=True)\
        .str.replace(r"^termination$", "terminated", regex=True)\
        .str.replace(r"(.+)?train(.+)?", "training", regex=True)\
        .str.replace(r"(.+)?demotion(.+)?", "demoted", regex=True)\
        .str.replace(r"(.+)?loss of unit(.+)?", "lost unit privileges", regex=True)\
        .str.replace(r"(.+)?counsel(ing)?(.+)?", "counseled", regex=True)\
        .str.replace(r"(.+)?warning(.+)?", "warned", regex=True)\
        .str.replace(r"(.+)?(resignation|resigned)(.+)", "resigned", regex=True)
    return df

In [75]:
df = df.pipe(group_actions)

In [76]:
"""
Disciplinary action data after grouping similar actions
"""
df.action.value_counts(normalize=True)

written or verbal reprimand            0.455857
suspended                              0.278130
warned                                 0.080208
terminated                             0.054241
counseled                              0.050202
resigned                               0.027121
training                               0.009810
discharged                             0.005770
demoted                                0.004039
lod                                    0.002885
conference worksheet                   0.002308
hold in abeyance                       0.002308
performance log                        0.001731
green sheet                            0.001731
disciplined                            0.001731
unknown                                0.001731
lost unit privileges                   0.001731
retired                                0.001154
deferred/handled upon rehire           0.001154
remedial                               0.001154
dmvr                                   0