In [154]:
# import os
# os.chdir("../")

In [155]:
import pandas as pd

from fuse.all import fuse_allegation
from lib.clean import standardize_desc_cols

In [156]:
df = fuse_allegation()

In [157]:
"""
Standardize text formatting for disposition column
"""
df = df.pipe(standardize_desc_cols, ["disposition"])

In [158]:
"""
Drop rows missing a disposition value
"""
df = df[~((df.disposition.fillna("") == ""))]

In [159]:
"""
Number of agencies for which we have disposition data (data spans 2003 - 2021)
"""
df.agency.nunique()

39

In [160]:
"""
Number of dispositions in total
"""
df.disposition.count()

12523

In [161]:
"""
Normalized disposition counts, i.e., 31% of dispositions were sustained after an internal investigation
"""
df.disposition.value_counts(normalize=True)

sustained                                                        0.307275
unfounded                                                        0.213447
not sustained                                                    0.165376
exonerated                                                       0.115947
pending investigation                                            0.039208
                                                                   ...   
board                                                            0.000080
failure to turn in reports in a timely manner                    0.000080
louisiana state report 08-2879 "no contest" plea / susp. sent    0.000080
no conclusion given                                              0.000080
not sustained; sustained                                         0.000080
Name: disposition, Length: 140, dtype: float64

In [162]:
"""
Analyze disciplinary actions for sustained allegations
"""
df = df[df.disposition.isin(["sustained"])]

In [163]:
"""
df is filtered for sustained dispositions
"""
df.disposition.unique()

array(['sustained'], dtype=object)

In [164]:
"""
Fill na values
"""
def fill_nas(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")
    return df

In [165]:
df = df.pipe(fill_nas)

In [166]:
"""
Standardize text formatting for disciplinary action column
"""
df = df.pipe(standardize_desc_cols, ["action"])

In [167]:
"""
45% of sustained allegations have a corresponding disciplinary action
"""
df.action.value_counts(normalize=True)

n/a                                                              0.549376
letter of caution                                                0.097713
letter of reprimand                                              0.041060
termination                                                      0.020270
                                                                   ...   
72-hour suspension                                               0.000260
5-day loss of unit                                               0.000260
letter of reprimand/8-hour driving school/45-day loss of unit    0.000260
letter of reprimand;60-day loss of unit                          0.000260
verbal counseling/30-day loss of unit                            0.000260
Name: action, Length: 190, dtype: float64

In [168]:
def drop_na_action_values(df):
    df.loc[:, "action"] = df.action.str.replace(r"^n/a$", "", regex=True)
    return df[~((df.action.fillna("") == ""))]

In [169]:
df = df.pipe(drop_na_action_values)

In [170]:
"""
Disciplinary action data 
"""
df.action.value_counts(normalize=True)

letter of caution                                                  0.216965
letter of reprimand                                                0.091171
termination                                                        0.045009
reprimand                                                          0.031737
                                                                     ...   
5-day suspension; 24-hours loss of pay; 5-day loss of seniority    0.000577
suspension 3 days                                                  0.000577
1 week detail suspension                                           0.000577
deficiency                                                         0.000577
28-day suspension;loss ed 6-months;evaluation                      0.000577
Name: action, Length: 188, dtype: float64

In [171]:
"""
Group similar disicplinary actions
"""
def group_actions(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")\
        .str.replace(r'(.+)?suspend?e?d?s?i?o?n?(.+)?', 'suspended', regex=True)\
        .str.replace(r"(.+)?(reprimand|written|letter)(.+)?", "written or verbal reprimand", regex=True)\
        .str.replace(r"^termination$", "terminated", regex=True)\
        .str.replace(r"(.+)?train(.+)?", "training", regex=True)\
        .str.replace(r"(.+)?demotion(.+)?", "demoted", regex=True)\
        .str.replace(r"(.+)?loss of unit(.+)?", "lost unit privileges", regex=True)\
        .str.replace(r"(.+)?counsel(ing)?(.+)?", "counseled", regex=True)\
        .str.replace(r"(.+)?warning(.+)?", "warned", regex=True)
    return df

In [172]:
df = df.pipe(group_actions)

In [173]:
"""
Disciplinary action data after grouping similar actions
"""
df.action.value_counts(normalize=True)

written or verbal reprimand            0.456434
suspended                              0.278130
warned                                 0.080208
terminated                             0.054241
counseled                              0.050202
resigned                               0.023658
training                               0.009810
discharged                             0.005770
demoted                                0.004039
lod                                    0.002885
conference worksheet                   0.002308
hold in abeyance                       0.002308
resigned in lieu of termination        0.001731
green sheet                            0.001731
lost unit privileges                   0.001731
unknown                                0.001731
disciplined                            0.001731
performance log                        0.001731
deferred/handled upon rehire           0.001154
1-day driving school                   0.001154
retired                                0