In [91]:
# import os
# os.chdir("../")

In [92]:
import pandas as pd

from clean.post_officer_history import clean

In [93]:
df = clean()

standardize_from_lookup_table: unmatched sequences:
  {"lacommission onlaw'], ['9/17/2021']],", 'sex: ', "[['stt tammany parishs sor range'], ['116 05/31/2013'], ['tallant, joel']],", "[['lafayetteparish: so'], ['agency name'], ['full-time 7/1/1992'], ['status hire date separation date reason for separation']],", "[['orleans parishs so'], ['full-time 3/19/2019']],", "sex:'"}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, col] = (
  .str.replace(r"univ pd (.+)", r"\1-university-pd")


In [94]:
"""
Preview
"""
df.head(10)

Unnamed: 0,history_id,agency,last_name,first_name,middle_name,left_reason,hire_date,left_date,employment_status,uid,switched_job
6,9,lsuhsc-new-orleans-university-pd,Jackson,Taskaras,,,11/25/2013,8/1/2016,full-time,5c2f8fb54e90e64189f2bd426083034f,True
7,9,orleans-levee-pd,Jackson,Taskaras,,,8/2/2016,,full-time,58b594b3ad3cc815b5db5af86c220c87,True
9,13,new-orleans-pd,Wheeler,Kevin,L,,12/10/2007,11/27/2012,full-time,82504d15567e036bf5cba6072f1d25e0,True
10,13,orleans-levee-pd,Wheeler,Kevin,L,,5/9/2016,,full-time,f3a807b5117fa0d88d7d6dc66a97e207,True
11,13,loyola-university-pd,Wheeler,Kevin,L,voluntary resignation,11/1/2019,1/21/2020,full-time,deeb30995ad06909936910a3339767a8,True
16,19,tulane-university-pd,Downs,Joseph,C,,10/1/2003,,full-time,9636468f281ab87e0588ba7e1232e282,True
17,19,orleans-civil-so,Downs,Joseph,C,,11/24/2000,,full-time,f5c08c176c9683844efc8074a578b033,True
19,23,orleans-so,Llopis,Shelbytheresa,,,6/15/2017,3/8/2018,full-time,830d5dc96d1871c3db978088b634bf82,True
20,23,xavier-university-pd,Llopis,Shelbytheresa,,voluntary resignation,8/1/2018,9/11/2020,full-time,2c94792ecb5ecf077f278e23a5d78087,True
21,23,tulane-university-pd,Llopis,Shelbytheresa,,,9/11/2020,,full-time,9af764ae0cf48233080f5b273d7b75a8,True


In [95]:
"""
Filter out officers who haven't switched employers
"""
df = df[df.switched_job == True]

In [96]:
"""
Act 272 of 2017 requires all law enforcement agencies to report changes to an officer's employment status
---------------------------------------------------------------------------------------------------------
Filter out data after 2017
Filter out officers who retired or are deceased 
Fill na rows with n/a
"""
def filter_years(df):
    year = df.left_date.str.extract(r"(\w{4})")
    df.loc[:, "left_year"] = year[0]

    df = df[(df.left_year.isin(["2018", "2019", "2020", "2021", "2022"]))]
    return df

def filter_employment_status(df):
    df = df[~((df.employment_status == "retired"))]
    df = df[~((df.employment_status == "deceased"))]
    df.loc[:, "left_reason"] = df.left_reason.str.replace(r"deceased", "", regex=False)
    return df

def fill_na_values(df):
    df.loc[:, "left_reason"] = df.left_reason.fillna("n/a")
    return df

In [97]:
df = df.pipe(filter_years).pipe(filter_employment_status).pipe(fill_na_values)

In [98]:
"""
Number of officers who have switched employers
"""
df.history_id.nunique()

148

In [99]:
"""
Officers who switched employers in these years moved between the following number of agencies
"""
df.agency.nunique()

71

In [100]:
"""
A left reason is provided for 49% of the officers
"""
df.left_reason.value_counts(normalize=True)

                         0.511236
voluntary resignation    0.387640
termination              0.084270
resignation              0.016854
Name: left_reason, dtype: float64

In [101]:
def drop_na_rows(df):
    df.loc[:, "left_reason"] = df.left_reason.str.replace(r"^n\/a$", "", regex=True)
    return df[~((df.left_reason == ""))]

In [102]:
df = df.pipe(drop_na_rows)

In [103]:
"""
Of the 49%, the most commonly reported left_reason is resignation, followed by voluntary resignation and termination.
"""
df.left_reason.value_counts(normalize=True)

voluntary resignation    0.793103
termination              0.172414
resignation              0.034483
Name: left_reason, dtype: float64