In [None]:
import os
os.chdir("../")

In [70]:
import pandas as pd

from fuse.all import fuse_allegation
from lib.clean import standardize_desc_cols

In [71]:
df = fuse_allegation()

In [72]:
# standardize text formatting for disposition column
df = df.pipe(standardize_desc_cols, ["disposition"])

In [73]:
# drop rows missing a disposition value
df = df[~((df.disposition.fillna("") == ""))]

In [74]:
# number of agencies for which we have disposition data (data spans 2004 - 2021)
df.agency.nunique()

39

In [75]:
# number of dispositions
df.disposition.count()

12523

In [76]:
# normalized disposition counts
# i.e., 30% of investigation dispositions were sustained
df.disposition.value_counts(normalize=True)

sustained                  0.307275
unfounded                  0.213447
not sustained              0.165376
exonerated                 0.115947
pending investigation      0.039208
                             ...   
unfounded; sustained       0.000080
retired                    0.000080
suspended investigation    0.000080
no conclusion given        0.000080
complaint withdrawn        0.000080
Name: disposition, Length: 140, dtype: float64

In [77]:
# analyze disciplinary actions for sustained allegations
df = df[df.disposition.isin(["sustained"])]

In [78]:
# df is filtered for sustained dispositions
df.disposition.unique()

array(['sustained'], dtype=object)

In [79]:
# group similar actions
def clean_action(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")\
        .str.replace(r'(.+)?suspend?e?d?s?i?o?n?(.+)?', 'suspended', regex=True)\
        .str.replace(r"(.+)?(reprimand|written|letter)(.+)?", "written or verbal reprimand", regex=True)\
        .str.replace(r"^terminated$", "termination", regex=True)\
        .str.replace(r"(.+)?train(.+)?", "training", regex=True)
    return df

In [80]:
# apply clean action function
df = df.pipe(clean_action)

In [81]:
# standardize text formatting for action column
df = df.pipe(standardize_desc_cols, ["action"])

In [82]:
# disciplinary action data is available for 45% of sustained allegations
df.action.value_counts(normalize=True)

n/a                                          0.549376
written or verbal reprimand                  0.205561
suspended                                    0.125260
termination                                  0.024428
counseled                                    0.011175
resigned                                     0.010655
verbal counseling                            0.009356
training                                     0.004418
discharged                                   0.002599
lod                                          0.001299
hold in abeyance                             0.001040
conference worksheet                         0.001040
demotion                                     0.001040
counseling                                   0.001040
disciplined                                  0.000780
green sheet                                  0.000780
unknown                                      0.000780
performance log                              0.000780
resigned in lieu of terminat

In [83]:
def drop_na_action_values(df):
    df.loc[:, "action"] = df.action.str.replace(r"^n/a$", "", regex=True)
    return df[~((df.action.fillna("") == ""))]

In [84]:
df = df.pipe(drop_na_action_values)

In [85]:
# of the 45% of sustained allegations that have a disciplinary action: 45% were written or verbal reprimands;
# 27% were suspensions (of some form); 7% were warnings; 5% were terminations 
df.action.value_counts(normalize=True)

written or verbal reprimand                  0.456434
suspended                                    0.278130
termination                                  0.054241
counseled                                    0.024812
resigned                                     0.023658
verbal counseling                            0.020773
training                                     0.009810
discharged                                   0.005770
lod                                          0.002885
hold in abeyance                             0.002308
conference worksheet                         0.002308
counseling                                   0.002308
demotion                                     0.002308
resigned in lieu of termination              0.001731
unknown                                      0.001731
performance log                              0.001731
green sheet                                  0.001731
disciplined                                  0.001731
remedial                    