In [768]:
### to-do ###
# explore # of arrested (x amount of black males were arrested. how many were/are innocent? given the dubious effectiveness of tech)

In [769]:
import pandas as pd

def standardize_item_no(df):
    df.loc[:, "item_number"] = df.item_number.str.lower().str.strip()
    return df

def clean():
    df1 = pd.read_csv("../data/electronic_police_report_2018.csv")
    df2 = pd.read_csv("../data/electronic_police_report_2019.csv")
    df3 = pd.read_csv("../data/electronic_police_report_2020.csv")
    df4 = pd.read_csv("../data/electronic_police_report_2021.csv")
    df5 = pd.read_csv("../data/electronic_police_report_2022.csv")
    df5["year"] = 2022

    dfs = [df1, df2, df3, df4, df5]
    dfa = pd.concat(dfs, join="outer")
    dfa = dfa.pipe(standardize_item_no)
    
    dfb = pd.read_csv("../data/rtcc.csv", encoding="cp1252").rename(columns={"Item_number": "item_number"})
    dfb = dfb.pipe(standardize_item_no)

    df = pd.merge(dfb, dfa, on="item_number")
    return df

In [770]:
def drop_rows_missing_race(df):
    df.loc[:, "offender_race"] = df.offender_race.fillna("").str.lower().str.strip()\
    .str.replace("unknown", "", regex=False)\
    .str.replace("hispanic", "", regex=False)\
    .str.replace("asian", "", regex=False)\
    .str.replace(r"amer\. ind\.", "", regex=True)
    return df[~((df.offender_race == ""))]


def filter_gender(df):
    df.loc[:, "offender_gender"] = (
        df.offender_gender.str.lower().str.strip().fillna("").str.replace("unknown", "", regex=False)
    )
    df = df[df.offender_gender.isin(["male"])]
    return df[~((df.offender_gender == ""))]


def filter_arrested(df):
    df.loc[:, "offenderstatus"] = df.offenderstatus.str.lower().str.strip().fillna("")
    df = df[df.offenderstatus.isin(["arrested"])]
    return df[~((df.offenderstatus == ""))]


def filter_year(df):
    df.loc[:, "year"] = df.year.astype(str).str.replace(r"\.$", "", regex=True)
    df = df[df.year.astype(str).isin(["2018", "2019", "2020", "2021", "2022"])]
    return df
  
  
def drop_rows_missing_charge_desc(df):
    df.loc[:, "charge_description"] = df.charge_description.str.lower().str.strip().fillna("")
    return df[~((df.charge_description == ""))]

In [771]:
def filter_data():
  df = clean()
  df = df.pipe(filter_year).pipe(drop_rows_missing_race).pipe(drop_rows_missing_charge_desc)
  df = df.drop_duplicates()
  return df

In [772]:
df = filter_data()

  exec(code_obj, self.user_global_ns, self.user_ns)


In [773]:
df["rtcc_footage_request_tally"] = 1

In [774]:
df.groupby("offender_race").rtcc_footage_request_tally.value_counts()

offender_race  rtcc_footage_request_tally
black          1                             6848
white          1                              583
Name: rtcc_footage_request_tally, dtype: int64

In [775]:
df.offender_race.unique()

array(['black', 'white'], dtype=object)

In [776]:
df = df[["offender_race", "rtcc_footage_request_tally", "charge_description"]]
df = df.groupby(["offender_race","charge_description"]).rtcc_footage_request_tally.count()
df = pd.DataFrame(df).reset_index()

In [777]:
df.loc[(df.offender_race == "black"), "population"] = "105498"
df.loc[(df.offender_race == "white"), "population"] = "65756"

df.loc[(df.offender_race == "black"), "total_requests"] = "6848"
df.loc[(df.offender_race == "white"), "total_requests"] = "583"


# df.loc[(df.offender_race == "hispanic"), "population"] = "11205"
# df.loc[(df.offender_race == "amer. ind."), "population"] = "360"
# df.loc[(df.offender_race == "asian"), "population"] = "5499"

In [778]:
def calc_rate(dfb_total, population=""):
    population = int(population)
    rate = (( dfb_total / population) * 1000)
    return rate

def calc_ratio(charges, total=""):
    total = int(total)
    ratio = charges/total
    return ratio

In [779]:
df["rate_per_1000_persons"] = df.apply(lambda x: calc_rate(x.rtcc_footage_request_tally, x.population), axis=1)
df["ratio"] = df.apply(lambda x: calc_ratio(x.rtcc_footage_request_tally, x.total_requests), axis=1)
df["year"] = "2018-2022"

In [780]:
df.sort_values("charge_description")

Unnamed: 0,offender_race,charge_description,rtcc_footage_request_tally,population,total_requests,rate_per_1000_persons,ratio,year
0,black,2nd degree cruelty to juveniles,1,105498,6848,0.009479,0.000146,2018-2022
1,black,access device fraud,20,105498,6848,0.189577,0.002921,2018-2022
235,white,access device fraud,6,65756,583,0.091246,0.010292,2018-2022
2,black,accessory - 2nd degree murder,1,105498,6848,0.009479,0.000146,2018-2022
3,black,accessory - agg. assault,4,105498,6848,0.037915,0.000584,2018-2022
...,...,...,...,...,...,...,...,...
351,white,violations of protective orders,2,65756,583,0.030415,0.003431,2018-2022
233,black,violations of protective orders,11,105498,6848,0.104267,0.001606,2018-2022
352,white,violations of registration provisions,1,65756,583,0.015208,0.001715,2018-2022
234,black,warrant issued by,7,105498,6848,0.066352,0.001022,2018-2022
