In [55]:
# import os
# os.chdir("../")

In [56]:
import pandas as pd

from clean.post_officer_history import clean

In [None]:
df = clean()

In [58]:
"""
Preview
"""
df.head(10)

Unnamed: 0,history_id,agency,last_name,first_name,middle_name,left_reason,hire_date,left_date,employment_status,uid,switched_job,source_agency
0,0,jefferson-so,Alvarado,Julio,C,,2/28/2005,,full-time,cd9f1f562357f0f3307603cafda8d032,True,post
1,0,new-orleans-so,Alvarado,Julio,C,,5/17/2002,,full-time,1554036f19bab5d19a59a508d773984e,True,post
3,1,new-orleans-pd,Vaught,Ryan,Clinton,,10/7/2004,9/17/2016,full-time,3df20fb350cbf9b1e748c0f671dcf008,True,post
4,1,jefferson-so,Vaught,Ryan,Clinton,,9/19/2016,,reserve,5242a69e525917a838f93af4196ec656,True,post
5,2,harahan-pd,Mehrtens,Steven,Allen,,9/1/2011,,full-time,8c342854dcfcd41aad83ddcefe72f104,True,post
6,2,jefferson-so,Mehrtens,Steven,Allen,,10/3/2016,,full-time,74f6f774e80a9c7f3d7c3a6555cf8217,True,post
8,4,jefferson-so,Gaudet,Myron,A,,8/14/1992,,full-time,50e1899068d40e08d23b920a9321ea60,False,post
9,5,new-orleans-so,Dowling,Keith,Edward,,7/1/1993,4/1/1994,full-time,95e86285a377f1f40a0efad49209e74e,True,post
10,5,st-bernard-so,Dowling,Keith,Edward,,1/12/1998,2/1/2001,full-time,22b2cfbe116d149542e913d226a1192f,True,post
11,5,st-tammany-so,Dowling,Keith,Edward,,11/15/2002,,full-time,029bfb6d971f9054825a5c2ca848e716,True,post


In [59]:
"""
Filter out officers who haven't switched employers
"""
df = df[df.switched_job == True]

In [60]:
"""
Act 272 of 2017 requires all law enforcement agencies to report changes to an officer's employment status
---------------------------------------------------------------------------------------------------------
Filter out data after 2017
Filter out officers who retired or are deceased 
Fill na rows with n/a
"""
def filter_years(df):
    year = df.left_date.str.extract(r"(\w{4})")
    df.loc[:, "left_year"] = year[0]

    df = df[~(df.left_year.isin(["2018", "2019", "2020", "2021", "2022"]))]
    return df

def filter_employment_status(df):
    df = df[~((df.employment_status == "retired"))]
    df = df[~((df.employment_status == "deceased"))]
    return df

def fill_na_values(df):
    df.loc[:, "left_reason"] = df.left_reason.fillna("n/a")
    return df

In [61]:
df = df.pipe(filter_years).pipe(filter_employment_status).pipe(fill_na_values)

In [62]:
"""
Number of officers who have switched employers
"""
df.history_id.nunique()

565

In [63]:
"""
Officers who switched employers in these years moved between the following number of agencies
"""
df.agency.nunique()

186

In [64]:
"""
A left reason is provided for 7% of the officers
"""
df.left_reason.value_counts(normalize=True)

n/a                      0.931707
resignation              0.034959
voluntary resignation    0.017886
termination              0.015447
Name: left_reason, dtype: float64

In [65]:
def drop_na_rows(df):
    df.loc[:, "left_reason"] = df.left_reason.str.replace(r"^n\/a$", "", regex=True)
    return df[~((df.left_reason == ""))]

In [66]:
df = df.pipe(drop_na_rows)

In [67]:
"""
Of the 7%, the most commonly reported left_reason is resignation, followed by voluntary resignation and termination.
"""
df.left_reason.value_counts(normalize=True)

resignation              0.511905
voluntary resignation    0.261905
termination              0.226190
Name: left_reason, dtype: float64