In [1]:
# import os
# os.chdir("../")

In [2]:
import pandas as pd

from clean.post_officer_history import clean

In [3]:
df = clean()

standardize_from_lookup_table: unmatched sequences:
  {'mate', 'plaqu', 'reserve 4/1/2001 5/1/2002', '-oo of icer - basic holiday inn north 8/3/2018'}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
  df.agency.str.strip()
  df.agency.str.strip()


In [4]:
"""
Preview
"""
df.head(10)

Unnamed: 0,history_id,agency,last_name,first_name,middle_name,left_reason,hire_date,left_date,employment_status,uid,switched_job,source_agency
0,0,jefferson-so,Alvarado,Julio,C,,2/28/2005,,full-time,533c4b1488acdc53a407ded1a3aead5a,True,post
1,0,new-orleans-so,Alvarado,Julio,C,,5/17/2002,,full-time,cb96d0f672d1d8326bd5179e4b096008,True,post
3,1,new-orleans-pd,Vaught,Ryan,Clinton,,10/7/2004,9/17/2016,full-time,25476f4abd0349a7642be58e7884a089,True,post
4,1,jefferson-so,Vaught,Ryan,Clinton,,9/19/2016,,,87b74f5bcb15ada873c385eeae1a7a82,True,post
5,2,harahan-pd,Mehrtens,Steven,Allen,,9/1/2011,,full-time,95133fd49fc34420fc33fb2b4b93a15a,True,post
6,2,jefferson-so,Mehrtens,Steven,Allen,,10/3/2016,,full-time,8d32027409ab418474a016d5eb6327d7,True,post
7,3,jefferson-so,Estrada,Manuel,E,,6/2/2003,,,9bc1cc3c98b08a83dffaa020de7fbb2c,False,post
8,4,jefferson-so,Gaudet,Myron,A,,11/18/1994,,full-time,19a938673d27ca9d02ae75f058d18d40,False,post
11,5,st-tammany-so,Dowling,Keith,Edward,,11/15/2002,8/1/2009,full-time,d8771bbcf4768c937b62c1886e20642f,True,post
12,5,causeway-pd,Dowling,Keith,Edward,,7/29/2012,,full-time,eb2328ff27c678f8979d7771b563f38e,True,post


In [5]:
"""
Filter out officers who haven't switched employers
"""
df = df[df.switched_job == True]

In [6]:
"""
Act 272 of 2017 requires all law enforcement agencies to report changes to an officer's employment status
---------------------------------------------------------------------------------------------------------
Filter for the years 2017 - present
Filter out officers who retired or are deceased 
Fill na rows with n/a
"""
def filter_years(df):
    year = df.left_date.str.extract(r"(\w{4})")
    df.loc[:, "left_year"] = year[0]

    df = df[~(df.left_year.isin(["2018", "2019", "2020", "2021", "2022"]))]
    return df

def filter_employment_status(df):
    df = df[~((df.employment_status == "retired"))]
    df = df[~((df.employment_status == "deceased"))]
    return df

def fill_na_values(df):
    df.loc[:, "left_reason"] = df.left_reason.fillna("no left reason")
    return df

In [7]:
df = df.pipe(filter_years).pipe(filter_employment_status).pipe(fill_na_values)

In [8]:
"""
Number of officers who have switched employers
"""
df.history_id.nunique()

642

In [9]:
"""
Officers who switched employers in these years moved between the following number of agencies
"""
df.agency.nunique()

213

In [10]:
"""
A left reason is provided for 7% of the officers
"""
df.left_reason.value_counts(normalize=True)

no left reason           0.946429
resignation              0.031429
voluntary resignation    0.015714
termination              0.006429
Name: left_reason, dtype: float64

In [11]:
def drop_na_rows(df):
    df.loc[:, "left_reason"] = df.left_reason.str.replace(r"^no left reason$", "", regex=True)
    return df[~((df.left_reason == ""))]

In [12]:
df = df.pipe(drop_na_rows)

In [14]:
"""
Of the 7%, the most commonly reported left_reason is resignation, followed by voluntary resignation and termination.
"""
df.left_reason.value_counts(normalize=True)

resignation              44
voluntary resignation    22
termination               9
Name: left_reason, dtype: int64