In [212]:
import pandas as pd
from lib import standardize_item_numbers
from scipy.stats import chi2_contingency

In [213]:
def clean():
    dfa = pd.read_csv("../data/police_reports/electronic_police_report_2018_2022.csv")
    dfa = dfa.pipe(standardize_item_numbers, ["item_number"])

    dfb = pd.read_csv("../data/real_time_crime_center/rtcc.csv")
    dfb["rtcc_requested"] = "1"
    dfb = dfb.pipe(standardize_item_numbers, ["item_number"])

    df = pd.merge(dfa, dfb, on="item_number")
    return df

In [214]:
df = clean()

  dfa = pd.read_csv("../data/police_reports/electronic_police_report_2018_2022.csv")


In [215]:
def clean_offender_race(df):
    df.loc[:, "offender_race"] = df.offender_race.str.lower().str.strip()
    return df

In [216]:
df = df.pipe(clean_offender_race)

In [217]:
top_10_charge_descs_sr = (pd.Series(df.groupby("charge_description")
                                                .rtcc_requested.value_counts()
                                                .sort_values(ascending=False).head(10))
)
top_10_charge_descs_df = (pd.DataFrame(top_10_charge_descs_sr).unstack()
                                                              .reset_index()
)

top_10_charge_desc_list = [x for x in top_10_charge_descs_df["charge_description"]]

In [218]:
df = df[df.charge_description.isin(top_10_charge_desc_list)]

In [219]:
df = df[~((df.charge_description.fillna("") == ""))]
df.loc[:, "offender_race"] = df.offender_race.fillna("").str.replace(r"^$", "black", regex=True)

In [220]:
df = df[["offender_race", "charge_description"]]

charges = pd.get_dummies(df["charge_description"])
df = pd.concat([df, charges], axis=1)


races = pd.get_dummies(df["offender_race"])
df = pd.concat([df, races], axis=1)

In [221]:
df = df.drop(columns=["charge_description", "white", "amer. ind.", "hispanic", "unknown", "white", "asian", "offender_race"])

In [222]:
results_black = {}
for col in df:
    if pd.api.types.is_numeric_dtype(df[col]):
        CrosstabResult=pd.crosstab(index=df.black, columns=df[col])
        ChiSqResult = chi2_contingency(CrosstabResult)
        results_black[col] = ChiSqResult[1]

results_black = pd.DataFrame.from_dict(results_black, orient="index")
results_black["race"] = "black"
results_black = results_black.reset_index()
results_black = results_black.rename(columns={results_black.columns[0]: "charge_description", results_black.columns[1]: "p-value"})
results_black

Unnamed: 0,charge_description,p-value,race
0,AGG. BATTERY,1.830304e-13,black
1,AGG. CRIMINAL DAMAGE,7.589706e-25,black
2,AGGRAVATED ASSAULT WITH A FIREARM,1.709513e-09,black
3,ARMED ROBBERY,1.177811e-56,black
4,SIMPLE BURGLARY,1.816258e-14,black
5,SIMPLE CRIMINAL DAMAGE TO PROPERTY,4.3048230000000003e-33,black
6,SIMPLE ROBBERY,2.594596e-24,black
7,THEFT,0.009234163,black
8,THEFT OF A MOTOR VEHICLE,4.946784e-10,black
9,USE OF FIREARM IN ROBBERY,1.2762950000000001e-39,black
