In [1]:
# import os
# os.chdir("../")

In [2]:
import pandas as pd

from clean.post_officer_history import clean

In [3]:
df = clean()

standardize_from_lookup_table: unmatched sequences:
  {'sex: ', "sex:'"}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, col] = (
  df.agency.str.strip()


In [4]:
"""
Preview
"""
df.head(10)

Unnamed: 0,history_id,agency,last_name,first_name,middle_name,left_reason,hire_date,left_date,employment_status,uid,switched_job,source_agency
1,2,west-monroe-pd,Wall,Dennis,Edward,,2/23/1999,,full-time,d548b5bb6c965c77b3d698c9dc4660de,True,post
2,2,monroe-pd,Wall,Dennis,Edward,,10/1/2004,,full-time,21ed5093c58402cf547423c4617f380d,True,post
10,4,new-orleans-pd,Moore,Matthewi,D,,12/26/2004,,full-time,7d810c7cb89e7f404f1a2b2ffa18b846,False,post
14,9,lsuhsc-new-orleans-university-pd,Jackson,Taskaras,,,11/25/2013,8/1/2016,full-time,5c2f8fb54e90e64189f2bd426083034f,True,post
15,9,orleans-levee-pd,Jackson,Taskaras,,,8/2/2016,,full-time,58b594b3ad3cc815b5db5af86c220c87,True,post
17,13,new-orleans-pd,Wheeler,Kevin,L,,12/10/2007,11/27/2012,full-time,82504d15567e036bf5cba6072f1d25e0,True,post
18,13,orleans-levee-pd,Wheeler,Kevin,L,,5/9/2016,,full-time,f3a807b5117fa0d88d7d6dc66a97e207,True,post
19,13,loyola-university-pd,Wheeler,Kevin,L,voluntary resignation,11/1/2019,1/21/2020,full-time,deeb30995ad06909936910a3339767a8,True,post
23,17,tulane-university-pd,Solorzano,Rodrigo,Miguel,,1/9/2015,,full-time,0a618e1f9a72d6e5bd6bf1cffff87a99,False,post
24,19,tulane-university-pd,Downs,Joseph,C,,10/1/2003,,full-time,9636468f281ab87e0588ba7e1232e282,True,post


In [5]:
"""
Filter out officers who haven't switched employers
"""
df = df[df.switched_job == True]

In [6]:
"""
Act 272 of 2017 requires all law enforcement agencies to report changes to an officer's employment status
---------------------------------------------------------------------------------------------------------
Filter out data prior to 2018
Filter out officers who retired or are deceased 
Fill na rows with n/a
"""
def filter_years(df):
    year = df.left_date.str.extract(r"(\w{4})")
    df.loc[:, "left_year"] = year[0]

    df = df[(df.left_year.isin(["2018", "2019", "2020", "2021", "2022"]))]
    return df

def filter_employment_status(df):
    df = df[~((df.employment_status == "retired"))]
    df = df[~((df.employment_status == "deceased"))]
    df.loc[:, "left_reason"] = df.left_reason.str.replace(r"deceased", "", regex=False)
    return df

def fill_na_values(df):
    df.loc[:, "left_reason"] = df.left_reason.fillna("n/a")
    return df

In [7]:
df = df.pipe(filter_years).pipe(filter_employment_status).pipe(fill_na_values)

In [8]:
"""
Number of officers who have switched employers
"""
df.history_id.nunique()

129

In [9]:
"""
Officers who switched employers in these years moved between the following number of agencies
"""
df.agency.nunique()

64

In [10]:
"""
A left reason is provided for 49% of the officers
"""
df.left_reason.value_counts(normalize=True)

voluntary resignation      0.464516
                           0.451613
termination                0.058065
resignation                0.019355
involuntary resignation    0.006452
Name: left_reason, dtype: float64

In [11]:
def drop_na_rows(df):
    df.loc[:, "left_reason"] = df.left_reason.str.replace(r"^$", "", regex=True)
    return df[~((df.left_reason == ""))]

In [12]:
df = df.pipe(drop_na_rows)

In [13]:
"""
Of the 49%, the most commonly reported left_reason is voluntary resignation, followed by termination and resignation.
"""
df.left_reason.value_counts(normalize=True)

voluntary resignation      0.847059
termination                0.105882
resignation                0.035294
involuntary resignation    0.011765
Name: left_reason, dtype: float64