In [109]:
import pandas as pd

In [110]:
def read_tbl():
    df = pd.read_csv("../data/input/006_PR_AllOfficers_w_AppointmentsAndFinalActions.csv")

    # df = df.drop(columns=["suffix", "year_of_birth", "age", "type"])

    df = df.rename(columns={"Term_Desc": "separation_reason", 
                            "AppointedDate": "start_date",
                            "TerminationDate": "end_date",
                            "Last_Name": "last_name",
                            "First_Name": "first_name",
                            "Middle_Name": "middle_name",
                            "Agency": "agency_name",
                            "PostId": "person_nbr"})
    
    df = df[["agency_name", "person_nbr", "first_name", "middle_name", "last_name", "start_date", "end_date", "separation_reason"]]
    return df


def clean_separation_reason(df):
    df.loc[:, "separation_reason"] = (df.separation_reason
                                      .str.lower()
                                      .str.strip()
                                      .fillna("")
                                      .str.replace(r"misconduct - no|active|medical|other\/unknown", "", regex=True)
                                      .str.replace(r"misconduct - yes", "misconduct", regex=True)
                                      .str.replace(r"active", "", regex=True)
    )
    return df 


def clean_agency(df):
    df.loc[:, "agency_name"] = (df.agency_name
                           .str.lower()
                           .str.strip()
                           .str.replace(r"^az ", "arizona ", regex=True)
                           .str.replace(r" dept ", " department ", regex=False)
                           .str.replace(r"dept$", "department", regex=True)
                           .str.replace(r"departme$", "department", regex=True)
                           .str.replace(r" enf ", " enforcement ", regex=False)
                           .str.replace(r" az ", " arizona ", regex=False)
                           .str.replace(r" & ", " and ", regex=False)
                           .str.replace(r"contr$" ,"control", regex=True)
                           .str.replace(r"pd$", "police department", regex=True)
                           .str.replace(r"departm$", "department", regex=True)
                           .str.replace(r" cty ", " county ", regex=False)
                           .str.replace(r"-(\w+)$", r"- \1", regex=True )
                           .str.replace(r"(\w+)\,(\w+)", r"\1, \2", regex=True)
                           .str.replace(r"animal se", "animal services", regex=False)
                           .str.replace(r"^ret\,? ", "", regex=True)
    )
    return df 

df = read_tbl()

df = df.pipe(clean_separation_reason).pipe(clean_agency)

df

Unnamed: 0,agency_name,person_nbr,first_name,middle_name,last_name,start_date,end_date,separation_reason
0,maricopa county sheriffs office,1,Donald,J,Nathaniel,4/24/00,4/26/00,
1,maricopa county sheriffs office,2,J,R,Hanigsberg,8/25/79,3/11/81,
2,pima law enforcement services,2,J,R,Hanigsberg,6/10/84,5/15/85,
3,maricopa county parks and recreation,2,J,R,Hanigsberg,4/7/85,6/3/87,
4,phoenix police department,3,D,A,Brookes,4/13/70,5/15/81,
...,...,...,...,...,...,...,...,...
69870,arizona game and fish department,72810,Matthew,Keith,Waunch,3/19/25,,
69871,arizona game and fish department,72811,Zachary,,Lunow,3/19/25,,
69872,arizona department of public safety,72812,Matthew,Anthony,Ebuhardt,3/20/25,,
69873,arizona department of public safety,72813,Michael,George,Julye,3/20/25,,


In [111]:
df.agency_name.unique()

array(['maricopa county sheriffs office', 'pima law enforcement services',
       'maricopa county parks and recreation',
       'phoenix police department', 'pinal county sheriffs office',
       'oro valley police department', 'tucson police department',
       'goodyear police department',
       'arizona department of public safety', 'eagar police department',
       'chino valley police department', 'tolleson police department',
       'avondale police department', 'scottsdale police department',
       'arizona department of transportation - ecd',
       'navajo county sheriffs office', 'pima county sheriffs department',
       'university of arizona police department',
       'tempe police department', 'huachuca city police department',
       'cochise county sheriffs office', 'chandler police department',
       'ak chin tribal police', 'eloy police department',
       'gilbert police department', 'yuma police department',
       'yuma county sheriffs office', 'yavapai county s

In [112]:
df.to_csv("../data/output/arizona_index.csv", index=False)