In [1]:
import os
os.chdir("../")

In [2]:
import pandas as pd

from fuse.all import fuse_allegation
from lib.clean import standardize_desc_cols

In [3]:
df = fuse_allegation()

In [4]:
df.agency.nunique()

51

In [5]:
"""
Standardize text formatting for disposition column
"""
df = df.pipe(standardize_desc_cols, ["disposition"])

In [6]:
"""
Drop rows missing a disposition value
"""
df = df[~((df.disposition.fillna("") == ""))]

In [7]:
"""
Number of agencies for which we have disposition data (data spans 2003 - 2021)
"""
df.agency.nunique()

43

In [8]:
"""
Number of dispositions in total
"""
df.disposition.count()

12344

In [9]:
"""
Normalized disposition counts, i.e., 31% of dispositions were sustained after an internal investigation
"""
df.disposition.value_counts(normalize=True).head(20)

sustained                                        0.318454
unfounded                                        0.213707
not sustained                                    0.162751
exonerated                                       0.115846
pending investigation                            0.039047
founded                                          0.020982
withdrawn; mediation                             0.019767
negotiated settlement                            0.013205
di-2                                             0.013124
no further investigation merited                 0.009073
non-sustained                                    0.008992
cancelled                                        0.005104
unsustained                                      0.005023
resigned                                         0.004618
sustained; resigned while under investigation    0.004132
withdrawn                                        0.003402
inconclusive                                     0.002997
duplicate alle

In [10]:
"""
Analyze disciplinary actions for sustained allegations
"""
df = df[df.disposition.isin(["sustained"])]

In [11]:
"""
df is filtered for sustained dispositions
"""
df.disposition.unique()

array(['sustained'], dtype=object)

In [12]:
"""
Fill na values
"""
def fill_nas(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")
    return df

In [13]:
df = df.pipe(fill_nas)

In [14]:
"""
Standardize text formatting for disciplinary action column
"""
df = df.pipe(standardize_desc_cols, ["action"])

In [15]:
"""
45% of sustained allegations have a corresponding disciplinary action
"""
df.action.value_counts(normalize=True)

n/a                                                                0.537522
letter of caution                                                  0.095396
letter of reprimand                                                0.040448
termination                                                        0.021114
                                                                     ...   
quit                                                               0.000254
6-day suspension; 26-hours loss of pay; 6-day loss of seniority    0.000254
1-day suspension; 12-hours loss of pay                             0.000254
loss of vehicle privileges (2 weeks)                               0.000254
28-day suspension;loss ed 6-months;evaluation                      0.000254
Name: action, Length: 193, dtype: float64

In [16]:
def drop_na_action_values(df):
    df.loc[:, "action"] = df.action.str.replace(r"^n/a$", "", regex=True)
    return df[~((df.action.fillna("") == ""))]

In [17]:
df = df.pipe(drop_na_action_values)

In [18]:
"""
Disciplinary action data 
"""
df.action.value_counts(normalize=True)

letter of caution                                                                                                                        0.206384
letter of reprimand                                                                                                                      0.087507
termination                                                                                                                              0.045680
1-day suspension                                                                                                                         0.033021
                                                                                                                                           ...   
demotion to lieutenant                                                                                                                   0.000550
letter of reprimand/8-hour driving school/45-day loss of unit                                                               

In [19]:
"""
Group similar disicplinary actions
"""
def group_actions(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")\
        .str.replace(r'(.+)?suspend?e?d?s?i?o?n?(.+)?', 'suspended', regex=True)\
        .str.replace(r"(.+)?(reprimand|written|letter)(.+)?", "written or verbal reprimand", regex=True)\
        .str.replace(r"^termination$", "terminated", regex=True)\
        .str.replace(r"(.+)?train(.+)?", "training", regex=True)\
        .str.replace(r"(.+)?demotion(.+)?", "demoted", regex=True)\
        .str.replace(r"(.+)?loss of unit(.+)?", "lost unit privileges", regex=True)\
        .str.replace(r"(.+)?counsel(ing)?(.+)?", "counseled", regex=True)\
        .str.replace(r"(.+)?warning(.+)?", "warned", regex=True)\
        .str.replace(r"(.+)?(resignation|resigned)(.+)", "resigned", regex=True)
    return df

In [20]:
df = df.pipe(group_actions)

In [21]:
"""
Disciplinary action data after grouping similar actions
"""
df.action.value_counts(normalize=True)

written or verbal reprimand             0.444139
suspended                               0.280682
warned                                  0.076500
terminated                              0.061640
counseled                               0.053385
resigned                                0.027518
training                                0.009356
discharged                              0.005504
demoted                                 0.004403
lod                                     0.002752
conference worksheet                    0.002201
arrested; resigned                      0.002201
arrested; terminated                    0.002201
hold in abeyance                        0.002201
unknown                                 0.001651
performance log                         0.001651
lost unit privileges                    0.001651
green sheet                             0.001651
disciplined                             0.001651
retired                                 0.001101
dmvr                