In [1]:
# import os
# os.chdir("../")

In [2]:
import pandas as pd
from lib.clean import standardize_desc_cols

In [3]:
df = pd.read_csv("data/fuse/allegation.csv")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [4]:
"""
Number of agencies in LLEAD with allegation data
"""
df.agency.nunique()

51

In [5]:
"""
Standardize text formatting for disposition column
"""
df = df.pipe(standardize_desc_cols, ["disposition"])

In [6]:
"""
Drop rows missing a disposition value
"""
df = df[~((df.disposition.fillna("") == ""))]

In [7]:
"""
Number of agencies for which we have disposition data (data spans 2003 - 2021)
"""
df.agency.nunique()

43

In [8]:
"""
Number of dispositions in total
"""
df.disposition.count()

12343

In [9]:
"""
Normalized disposition counts, i.e., 31% of dispositions were sustained after an internal investigation
"""
df.disposition.value_counts(normalize=True).head(20)

sustained                                        0.318723
unfounded                                        0.213805
not sustained                                    0.162764
exonerated                                       0.115855
pending investigation                            0.039050
founded                                          0.020984
withdrawn; mediation                             0.019768
negotiated settlement                            0.013206
di-2                                             0.013125
no further investigation merited                 0.009074
non-sustained                                    0.008993
cancelled                                        0.005104
unsustained                                      0.005023
resigned                                         0.004618
sustained; resigned while under investigation    0.004132
withdrawn                                        0.003403
inconclusive                                     0.002998
duplicate alle

In [10]:
"""
Analyze disciplinary actions for sustained allegations
"""
df = df[df.disposition.isin(["sustained"])]

In [11]:
"""
df is filtered for sustained dispositions
"""
df.disposition.unique()

array(['sustained'], dtype=object)

In [12]:
"""
Fill na values
"""
def fill_nas(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")
    return df

In [13]:
df = df.pipe(fill_nas)

In [14]:
"""
Standardize text formatting for disciplinary action column
"""
df = df.pipe(standardize_desc_cols, ["action"])

In [15]:
"""
45% of sustained allegations have a corresponding disciplinary action
"""
df.action.value_counts(normalize=True)

n/a                                                           0.537112
letter of caution                                             0.095323
letter of reprimand                                           0.040417
termination                                                   0.021098
                                                                ...   
seperation                                                    0.000254
demoted to deputy                                             0.000254
letter of caution/8-hour driving school/5-day loss of unit    0.000254
letter of reprimand/8-hour driving school                     0.000254
72-hour suspension                                            0.000254
Name: action, Length: 193, dtype: float64

In [16]:
def drop_na_action_values(df):
    df.loc[:, "action"] = df.action.str.replace(r"^n/a$", "", regex=True)
    return df[~((df.action.fillna("") == ""))]

In [17]:
df = df.pipe(drop_na_action_values)

In [18]:
"""
Disciplinary action data 
"""
df.action.value_counts(normalize=True)

letter of caution                            0.206044
letter of reprimand                          0.087363
termination                                  0.045604
1-day suspension                             0.032967
                                               ...   
6-day suspension                             0.000549
demotion to lieutenant                       0.000549
8-hour driving school/45-day loss of unit    0.000549
seperation                                   0.000549
72-hour suspension                           0.000549
Name: action, Length: 191, dtype: float64

In [19]:
"""
Group similar disicplinary actions
"""
def group_actions(df):
    df.loc[:, 'action'] = df.action.str.lower().str.strip().fillna("n/a")\
        .str.replace(r'(.+)?suspend?e?d?s?i?o?n?(.+)?', 'suspended', regex=True)\
        .str.replace(r"(.+)?(reprimand|written|letter)(.+)?", "written or verbal reprimand", regex=True)\
        .str.replace(r"^termination$", "terminated", regex=True)\
        .str.replace(r"(.+)?train(.+)?", "training", regex=True)\
        .str.replace(r"(.+)?demotion(.+)?", "demoted", regex=True)\
        .str.replace(r"(.+)?loss of unit(.+)?", "lost unit privileges", regex=True)\
        .str.replace(r"(.+)?counsel(ing)?(.+)?", "counseled", regex=True)\
        .str.replace(r"(.+)?warning(.+)?", "warned", regex=True)\
        .str.replace(r"(.+)?(resignation|resigned)(.+)", "resigned", regex=True)
    return df

In [20]:
df = df.pipe(group_actions)

In [21]:
"""
Disciplinary action data after grouping similar actions
"""
df.action.value_counts(normalize=True)

written or verbal reprimand             0.443956
suspended                               0.280220
warned                                  0.076374
terminated                              0.061538
counseled                               0.053297
resigned                                0.028571
training                                0.009341
discharged                              0.005495
demoted                                 0.004396
lod                                     0.002747
arrested; terminated                    0.002198
conference worksheet                    0.002198
hold in abeyance                        0.002198
arrested; resigned                      0.002198
lost unit privileges                    0.001648
performance log                         0.001648
green sheet                             0.001648
disciplined                             0.001648
unknown                                 0.001648
deferred/handled upon rehire            0.001099
1-day driving school