In [1]:
import pandas as pd

In [2]:
### includes allegation


def read_allegations():

    df = pd.read_csv(
        "../data/input/fl-2023-original-complaint-offenses.csv",
        encoding="latin",
    )

    df = df.fillna("")

    df = df[~(df.offense_comments == "")]

    df = df[["complaint_nbr", "offense_comments"]]
    return df


dfa = read_allegations()


def read_actions():
    df = pd.read_csv(
        "../data/input/fl-2023-original-complaint-discipline.csv",
        encoding="latin",
    )
    df = df.fillna("")

    df = df[~(df.discipline_imposed == "")]

    df = df[["complaint_nbr", "discipline_imposed", "discipline_comments"]]
    return df


dfb = read_actions()


def read_complaint_status():
    df = pd.read_csv(
        "../data/input/fl-2023-original-complaints.csv", encoding="latin"
    )
    df.loc[:, "case_opened_date"] = df.case_opened_date.str.replace(
        r"^(.+) (\w+):(\w+):(\w+)", r"\1", regex=True
    )
    df.loc[:, "case_closed_date"] = df.case_opened_date.str.replace(
        r"^(.+) (\w+):(\w+):(\w+)", r"\1", regex=True
    )

    df = df.fillna("")

    df = df[~(df.case_opened_date == "")]

    df = df[
        ["complaint_nbr", "person_nbr", "case_opened_date", "case_closed_date"]
    ]
    return df


dfc = read_complaint_status()

df = pd.merge(dfa, dfb, on="complaint_nbr").merge(dfc, on="complaint_nbr")

df

Unnamed: 0,complaint_nbr,offense_comments,discipline_imposed,discipline_comments,person_nbr,case_opened_date,case_closed_date
0,1,MISDEAMEANOR,NC-Dismissed,,61793,11/11/1976,11/11/1976
1,4,FELONY,Rev,,70322,7/11/1980,7/11/1980
2,20,FELONY,Rev,,85601,10/24/1980,10/24/1980
3,23,MISDEAMEANOR,Rev,,6849,4/4/1980,4/4/1980
4,24,MISDEAMEANOR,Rev,,28478,1/1/1980,1/1/1980
...,...,...,...,...,...,...,...
12494,49944,Felony Battery - Cause Great Bodily Harm,Rev,,324096,12/14/2022,12/14/2022
12495,49967,"Worker's Compensation; More than $20,000 but l...",Rev,,315444,12/21/2022,12/21/2022
12496,50012,Trafficking Phenethylamines 10 grams or more,Rev,,505091,12/22/2022,12/22/2022
12497,50086,Benzodiazepines,Rev,,529107,2/1/2023,2/1/2023


In [3]:
def read_index():
    df = pd.read_csv(
        "../data/input/fl-2023-index-enhanced.csv", encoding="latin"
    )
    return df


def read_officers():
    df = pd.read_csv(
        "../data/input/fl-2023-original-officers.csv", encoding="latin"
    )
    return df


index = read_index()

officers = read_officers()


def clean_demo_data(df):
    df.loc[:, "sex"] = (
        df.sex_code.str.lower()
        .str.strip()
        .fillna("")
        .str.replace(r"^f$", "Female", regex=True)
        .str.replace(r"^m$", "Male", regex=True)
        .str.replace(r"(u|o)", "", regex=True)
    )

    df.loc[:, "race"] = (
        df.race_code.str.lower()
        .str.strip()
        .fillna("")
        .str.replace(r"^(his|h)$", "Hispanic", regex=True)
        .str.replace(r"^(wh|whi)$", "White", regex=True)
        .str.replace(r"^blk$", "Black", regex=True)
        .str.replace(r"^as$", "Asian", regex=True)
        .str.replace(r"(oth|na)", "", regex=True)
    )

    df = df[["person_nbr", "sex", "race"]]

    return df


officers = officers.pipe(clean_demo_data)

index = pd.merge(index, officers, on="person_nbr")
index = index[
    [
        "person_nbr",
        "first_name",
        "middle_name",
        "last_name",
        "suffix",
        "year_of_birth",
        "agency",
        "type",
        "start_date",
        "end_date",
        "separation_reason",
        "race",
        "sex",
    ]
]

index

  index = read_index()


Unnamed: 0,person_nbr,first_name,middle_name,last_name,suffix,year_of_birth,agency,type,start_date,end_date,separation_reason,race,sex
0,99996,Seborn,E,Blackburn,,,Pasco-Hernando State College,Instructor,1994-02-15,1994-02-15,Voluntary Separation (Not involving misconduct),,Male
1,99995,David,A,Thomas,,,Orange County Sheriff's Office,Instructor,2013-03-14,2021-01-08,Retired (Not involving misconduct),,Male
2,99995,David,A,Thomas,,,"Valencia College, Criminal Justice Institute",Instructor,2004-05-10,2013-03-14,Instructor Request for Change of Affiliation,,Male
3,99995,David,A,Thomas,,,"Valencia College, Criminal Justice Institute",Instructor,1999-10-29,2003-10-01,Failure to Meet Mandatory Retraining Requirement,,Male
4,99994,Eric,C,Herb,,1960.0,Sarasota County Sheriff's Office,Correctional,1992-06-11,1994-08-29,Voluntary Separation (Not involving misconduct),White,Male
...,...,...,...,...,...,...,...,...,...,...,...,...,...
646417,100,Barry,J,Smith,,1946.0,Winter Garden Police Department,Law Enforcement,1974-03-28,1978-02-13,Resigned/Retired (Historical Use Only),White,Male
646418,1,Lex,Lane,Chance,,1972.0,Florida Department Of Law Enforcement,Instructor,2008-12-04,,,White,Male
646419,1,Lex,Lane,Chance,,1972.0,Florida Department Of Law Enforcement,Law Enforcement,2008-02-07,,,White,Male
646420,1,Lex,Lane,Chance,,1972.0,Jefferson County Sheriff's Office,Law Enforcement,2007-12-17,2008-02-07,Voluntary Separation (Not involving misconduct),White,Male


In [4]:
merged_df = pd.merge(df, index, on="person_nbr", how="left")

# Convert date columns to datetime
date_columns = [
    "case_opened_date",
    "case_closed_date",
    "start_date",
    "end_date",
]
for col in date_columns:
    merged_df[col] = pd.to_datetime(merged_df[col])

# Filter to keep only records where case_opened_date is between start_date and end_date
merged_df = merged_df[
    (merged_df["case_opened_date"] >= merged_df["start_date"])
    & (
        merged_df["case_opened_date"]
        <= merged_df["end_date"].fillna(pd.Timestamp.now())
    )
]

merged_df.loc[:, "year_of_birth"] = (
    merged_df["year_of_birth"].astype(str).str.replace(r"\.0", "", regex=True)
)

merged_df = merged_df.rename(
    columns={"agency": "agency_name", "offense_comments": "offense"}
)


def proper_case(df):
    df.loc[:, "offense"] = df.offense.str.title()
    df.loc[:, "discipline_imposed"] = df.discipline_imposed.str.title()
    df.loc[:, "discipline_comments"] = df.discipline_comments.str.capitalize()
    return df


merged_df = merged_df.pipe(proper_case)

merged_df.to_csv("../data/output/florida-discipline_index.csv", index=False)