In [430]:
import pandas as pd
from lib import standardize_item_numbers
from scipy.stats import chi2_contingency
import statsmodels.api as sm
import numpy as np
import statsmodels.stats.multitest as smm

In [431]:
def clean():
    dfa = pd.read_csv("../data/police_reports/electronic_police_report_2018_2022.csv")
    dfa = dfa.pipe(standardize_item_numbers, ["item_number"])

    dfb = pd.read_csv("../data/real_time_crime_center/rtcc.csv")
    dfb["rtcc_requested"] = "1"
    dfb = dfb.pipe(standardize_item_numbers, ["item_number"])

    df = pd.merge(dfa, dfb, on="item_number")
    return df

In [432]:
df = clean()

  dfa = pd.read_csv("../data/police_reports/electronic_police_report_2018_2022.csv")


In [433]:
def clean_offender_race(df):
    df.loc[:, "offender_race"] = df.offender_race.str.lower().str.strip()
    return df

In [434]:
df = df.pipe(clean_offender_race)

In [435]:
top_10_charge_descs_sr = (pd.Series(df.groupby("charge_description")
                                                .rtcc_requested.value_counts()
                                                .sort_values(ascending=False).head(10))
)
top_10_charge_descs_df = (pd.DataFrame(top_10_charge_descs_sr).unstack()
                                                              .reset_index()
)

top_10_charge_desc_list = [x for x in top_10_charge_descs_df["charge_description"]]

In [436]:
df = df[df.charge_description.isin(top_10_charge_desc_list)]

In [437]:
df = df[~((df.charge_description.fillna("") == ""))]
df.loc[:, "offender_race"] = df.offender_race.fillna("").str.replace(r"^$", "black", regex=True)

In [438]:
df = df[["offender_race", "charge_description"]]

charges = pd.get_dummies(df["charge_description"])
df = pd.concat([df, charges], axis=1)

races = pd.get_dummies(df["offender_race"])
df = pd.concat([df, races], axis=1)

In [439]:
df = df.drop(columns=["charge_description", "amer. ind.", "hispanic", "asian", "offender_race",])

In [441]:
races = ['black', 'white', 'unknown']
results_c2 = {}
for race in races:
    results_c2[race] = {}
    for col in df:
        if pd.api.types.is_numeric_dtype(df[col]):
            CrosstabResult = pd.crosstab(index=df[race], columns=df[col])
            ChiSqResult = chi2_contingency(CrosstabResult)
            results_c2[race][col] = ChiSqResult[1]
results = pd.DataFrame.from_dict(results_c2, orient="index")
results

Unnamed: 0,AGG. BATTERY,AGG. CRIMINAL DAMAGE,AGGRAVATED ASSAULT WITH A FIREARM,ARMED ROBBERY,SIMPLE BURGLARY,SIMPLE CRIMINAL DAMAGE TO PROPERTY,SIMPLE ROBBERY,THEFT,THEFT OF A MOTOR VEHICLE,USE OF FIREARM IN ROBBERY,black,unknown,white
black,1.830304e-13,7.589706e-25,1.709513e-09,1.177811e-56,1.816258e-14,4.3048230000000003e-33,2.594596e-24,0.009234,4.946784e-10,1.2762950000000001e-39,0.0,0.0,2.855272e-132
white,0.0004231211,0.0004905849,0.03286511,0.03283886,0.2076675,0.4380683,0.2133689,0.000327,0.3615904,0.0001520485,2.855272e-132,6.804999e-17,0.0
unknown,4.341797e-11,1.184539e-34,5.873861e-13,6.568339e-54,1.0939900000000001e-17,4.60772e-34,1.698802e-29,1e-06,7.117821e-12,4.350754e-34,0.0,0.0,6.804999e-17


In [442]:
count_data_black = df.groupby(['black']).sum()
count_data_black = count_data_black.reset_index()

results_dict = {}

for col in count_data_black.columns[:]:
    model = sm.GLM(count_data_black[col], count_data_black[['black']], family=sm.families.Poisson())
    result = model.fit()
    results_dict[col] = result.pvalues[0]
    

results_df = pd.DataFrame.from_dict(results_dict, orient='index', columns=['p_value'])
results_df
# perform multiple comparison correction using FDR method
results_df['p_adjusted'] = smm.multipletests(results_df['p_value'], method='fdr_bh')[1]

# filter for statistically significant results
significant_results_df = results_df[results_df['p_adjusted'] < 0.05]
significant_results_df

Unnamed: 0,p_value,p_adjusted
AGG. BATTERY,0.0,0.0
AGG. CRIMINAL DAMAGE,0.0,0.0
AGGRAVATED ASSAULT WITH A FIREARM,0.0,0.0
ARMED ROBBERY,0.0,0.0
SIMPLE BURGLARY,0.0,0.0
SIMPLE CRIMINAL DAMAGE TO PROPERTY,0.0,0.0
SIMPLE ROBBERY,0.0,0.0
THEFT,0.0,0.0
THEFT OF A MOTOR VEHICLE,0.0,0.0
USE OF FIREARM IN ROBBERY,0.0,0.0
