In [3]:
# import os
# os.chdir("../")

In [4]:
import pandas as pd

from fuse.all import fuse_allegation
from lib.clean import standardize_desc_cols

In [5]:
df = fuse_allegation()

In [6]:
df.agency.nunique()

49

In [7]:
"""
Standardize text formatting for disposition column
"""
df = df.pipe(standardize_desc_cols, ["disposition"])

In [8]:
"""
Drop rows missing a disposition value
"""
df = df[~((df.disposition.fillna("") == ""))]

In [9]:
"""
Number of agencies for which we have disposition data (data spans 2003 - 2021)
"""
df.agency.nunique()

41

In [10]:
"""
Number of dispositions in total
"""
df.disposition.count()

12339

In [11]:
"""
Normalized disposition counts, i.e., 31% of dispositions were sustained after an internal investigation
"""
df.disposition.value_counts(normalize=True).head(20)

sustained                                        0.318502
unfounded                                        0.213875
not sustained                                    0.162817
exonerated                                       0.115893
pending investigation                            0.039063
founded                                          0.020990
withdrawn; mediation                             0.019775
negotiated settlement                            0.013210
di-2                                             0.013129
no further investigation merited                 0.009077
non-sustained                                    0.008996
cancelled                                        0.005106
unsustained                                      0.005025
resigned                                         0.004619
sustained; resigned while under investigation    0.004133
withdrawn                                        0.003404
inconclusive                                     0.002999
duplicate alle

In [12]:
"""
Analyze disciplinary actions for sustained allegations
"""
df = df[df.disposition.isin(["sustained"])]

In [13]:
"""
df is filtered for sustained dispositions
"""
df.disposition.unique()

array(['sustained'], dtype=object)

In [14]:
"""
Fill na values
"""
def fill_nas(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")
    return df

In [15]:
df = df.pipe(fill_nas)

In [16]:
"""
Standardize text formatting for disciplinary action column
"""
df = df.pipe(standardize_desc_cols, ["action"])

In [17]:
"""
45% of sustained allegations have a corresponding disciplinary action
"""
df.action.value_counts(normalize=True)

n/a                                                         0.537659
letter of caution                                           0.095420
letter of reprimand                                         0.040458
termination                                                 0.020356
                                                              ...   
letter of caution; firearm safety training; range master    0.000254
suspension 3 days                                           0.000254
11-day suspension                                           0.000254
5 day suspension without pay                                0.000254
56-day suspension                                           0.000254
Name: action, Length: 193, dtype: float64

In [18]:
def drop_na_action_values(df):
    df.loc[:, "action"] = df.action.str.replace(r"^n/a$", "", regex=True)
    return df[~((df.action.fillna("") == ""))]

In [19]:
df = df.pipe(drop_na_action_values)

In [20]:
"""
Disciplinary action data 
"""
df.action.value_counts(normalize=True)

letter of caution                                                  0.206498
letter of reprimand                                                0.087555
termination                                                        0.044053
1-day suspension                                                   0.033040
                                                                     ...   
8-hour driving school/45-day loss of unit                          0.000551
suspension 5 days; special evaluation                              0.000551
5-day suspension; 24-hours loss of pay; 5-day loss of seniority    0.000551
8-day suspension                                                   0.000551
56-day suspension                                                  0.000551
Name: action, Length: 191, dtype: float64

In [21]:
"""
Group similar disicplinary actions
"""
def group_actions(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")\
        .str.replace(r'(.+)?suspend?e?d?s?i?o?n?(.+)?', 'suspended', regex=True)\
        .str.replace(r"(.+)?(reprimand|written|letter)(.+)?", "written or verbal reprimand", regex=True)\
        .str.replace(r"^termination$", "terminated", regex=True)\
        .str.replace(r"(.+)?train(.+)?", "training", regex=True)\
        .str.replace(r"(.+)?demotion(.+)?", "demoted", regex=True)\
        .str.replace(r"(.+)?loss of unit(.+)?", "lost unit privileges", regex=True)\
        .str.replace(r"(.+)?counsel(ing)?(.+)?", "counseled", regex=True)\
        .str.replace(r"(.+)?warning(.+)?", "warned", regex=True)\
        .str.replace(r"(.+)?(resignation|resigned)(.+)", "resigned", regex=True)
    return df

In [22]:
df = df.pipe(group_actions)

In [23]:
"""
Disciplinary action data after grouping similar actions
"""
df.action.value_counts(normalize=True)

written or verbal reprimand             0.444383
suspended                               0.280837
warned                                  0.076542
terminated                              0.060022
counseled                               0.053414
resigned                                0.028634
training                                0.009361
discharged                              0.005507
demoted                                 0.004405
lod                                     0.002753
conference worksheet                    0.002203
arrested; terminated                    0.002203
arrested; resigned                      0.002203
hold in abeyance                        0.002203
disciplined                             0.001652
unknown                                 0.001652
lost unit privileges                    0.001652
performance log                         0.001652
green sheet                             0.001652
separated from employment               0.001101
1-day driving school