# Setup

In [1]:
import figure_utilities
import constants
from stats_utilities import select_controls, test_balance
import matplotlib.pyplot as plt
from panel_utilities import get_value_variable_names, prepare_df_for_DiD
plt.rcParams['savefig.dpi'] = 300
import os
from differences import ATTgt
import pandas as pd

In [2]:
# Store paths.
INPUT_DATA_PANEL = "../data/03_cleaned/crime_analysis_monthly.csv"
OUTPUT_TABLES = "../output/final_paper/tables"
OUTPUT_FIGURES = "../output/final_paper/figures"

# Main Results

In [3]:
# Read fresh copy of unrestricted dataset into memory.
df = pd.read_csv(INPUT_DATA_PANEL)
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.


In [4]:
# Generate value variables list and dictionaries mapping between months and integers.
analysis = f"group_0_crimes_{constants.Analysis.MAIN_RESULTS_RADIUS}m"
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)

In [5]:
# Re-Balance on Controls


balance_table, pre_treatment_covariates = test_balance(df, analysis, OUTPUT_TABLES)
balance_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant
Unnamed: 0_level_1,Unnamed: 1_level_1,Cases Won by Plaintiff,Unweighted,\emph{p},Weighted,\emph{p}.1
Panel A,"Total Crime Incidents, 2017",338.17801,-10.485477,0.4045004,-11.462231,0.3642144
Panel A,"Change in Crime Incidents, 2017-2019",-50.052356,-15.587778,0.006822542,3.09768,0.5550044
Panel A,relative_pre_treatment_change_in_group_0_crimes_250m,-4.662827,-1.68735,0.1962394,0.341471,0.7697571
Panel B,"Bachelor's degree, 2010",0.316407,0.002512,0.8133641,-0.010712,0.3146804
Panel B,"Job density, 2013",16161.032515,-1272.661768,0.5501121,-1326.51337,0.528986
Panel B,"Median household income, 2016",47553.063874,2997.870414,0.01427126,-1834.451383,0.1479477
Panel B,"Poverty rate, 2010",0.279638,-0.02103,0.004904136,-0.006456,0.3899724
Panel B,"Population density, 2010",23320.185868,-297.766413,0.6737331,-652.082941,0.3550874
Panel B,"Share white, 2010",0.319421,0.012446,0.3450948,-0.01017,0.4441247
Panel C,Filing for cause,0.132984,0.043066,0.005801068,-0.001316,0.9367176


## Outcome Regression DiD Using Treatment Date Dummies

In [6]:
df = pd.read_csv(INPUT_DATA_PANEL)
df = df.loc[~df[treatment_date_variable].isin(['2019-04', '2019-05']), :]
treatment_date_variable_dummies = pd.get_dummies(df[treatment_date_variable].str.replace("-", "_", regex=False),
                                                 prefix=f"{treatment_date_variable}",
                                                 prefix_sep="_",
                                                 drop_first=True)
neighborhood_variable_dummies = pd.get_dummies(df['neighborhood'].str.replace(" ", "", regex=False),
                                               prefix="neighborhood",
                                               prefix_sep="_",
                                               drop_first=True)
df = pd.concat([df, neighborhood_variable_dummies, treatment_date_variable_dummies], axis=1)
pre_treatment_covariates_or = treatment_date_variable_dummies.columns.tolist() + neighborhood_variable_dummies.columns.tolist()

df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates_or,
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)
# Run DiD conditional on covariates.
att_gt_all_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates_or)
result = att_gt_all_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True, est_method='reg')

# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')
figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes,
                                                  start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                  end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_or_all_crimes.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:09<00:00, 100.38it/s]


## Doubly Robust DiD Conditional on Covariates

In [13]:
df = pd.read_csv(INPUT_DATA_PANEL)
df = df.loc[~df[treatment_date_variable].isin(['2019-04', '2019-05']), :]
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)
# Run DiD conditional on covariates.
att_gt_all_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
att_gt_all_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')

figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes,
                                                  start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                  end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_all_crimes.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:02<00:00, 331.66it/s]


In [None]:
# Check for heterogeneous treatment effects.
subsample_variables = ['poor_share2010', 'share_white2010', 'popdensity2010']
# Read fresh copy of unrestricted dataset into memory.
df = pd.read_csv(INPUT_DATA_PANEL)
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.
point_estimates = []
ci_uppers = []
ci_lowers = []

for subsample_variable in subsample_variables:
    # Get results on below median subsample.
    below_median_subsample = df[df[subsample_variable] < df[subsample_variable].median()].copy()
    weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(
        below_median_subsample, analysis)
    below_median_subsample = below_median_subsample.reset_index()
    below_median_subsample = prepare_df_for_DiD(df=below_median_subsample,
                                                analysis=analysis,
                                                treatment_date_variable=treatment_date_variable,
                                                pre_treatment_covariates=pre_treatment_covariates,
                                                value_vars=weekly_value_vars_crime,
                                                period_to_int_dictionary=month_to_int_dictionary)
    # Run DiD conditional on covariates.
    att_gt_below_median = ATTgt(data=below_median_subsample, cohort_name=treatment_date_variable,
                                base_period='universal')
    formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
    att_gt_below_median.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
    average_post_treatment_att_below_median = att_gt_below_median.aggregate('event', overall=True)
    point_estimate_below_median = round(average_post_treatment_att_below_median['EventAggregationOverall'].iloc[0, 0],
                                        2)
    ci_upper_below_median = round(average_post_treatment_att_below_median['EventAggregationOverall'].iloc[0, 3], 2)
    ci_lower_below_median = round(average_post_treatment_att_below_median['EventAggregationOverall'].iloc[0, 2], 2)
    point_estimates.append(point_estimate_below_median)
    ci_uppers.append(ci_upper_below_median)
    ci_lowers.append(ci_lower_below_median)

    # Get results on above median subsample.
    above_median_subsample = df[df[subsample_variable] > df[subsample_variable].median()].copy()
    weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(
        above_median_subsample, analysis)

    above_median_subsample = above_median_subsample.reset_index()
    above_median_subsample = prepare_df_for_DiD(df=above_median_subsample,
                                                analysis=analysis,
                                                treatment_date_variable=treatment_date_variable,
                                                pre_treatment_covariates=pre_treatment_covariates,
                                                value_vars=weekly_value_vars_crime,
                                                period_to_int_dictionary=month_to_int_dictionary)
    # Run DiD conditional on covariates.
    att_gt_above_median = ATTgt(data=above_median_subsample, cohort_name=treatment_date_variable,
                                base_period='universal')
    formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
    att_gt_above_median.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
    average_post_treatment_att_above_median = att_gt_above_median.aggregate('event', overall=True)
    point_estimate_above_median = round(average_post_treatment_att_above_median['EventAggregationOverall'].iloc[0, 0],
                                        2)
    ci_upper_above_median = round(average_post_treatment_att_above_median['EventAggregationOverall'].iloc[0, 3], 2)
    ci_lower_above_median = round(average_post_treatment_att_above_median['EventAggregationOverall'].iloc[0, 2], 2)
    point_estimates.append(point_estimate_above_median)
    ci_uppers.append(ci_upper_above_median)
    ci_lowers.append(ci_lower_above_median)

# In[4]:


fig, ax = plt.subplots()
figure_utilities.plot_labeled_vline(ax, x=0, text="", color='black', linestyle='-')
for i, (ci_lower, ci_upper) in enumerate(zip(ci_lowers, ci_uppers)):
    ax.hlines(y=i, xmin=ci_lower, xmax=ci_upper, color='black')
ax.scatter(point_estimates, range(len(point_estimates)), color='black', s=7)
ax.set_yticks(ticks=range(len(point_estimates)),
              labels=["Below median poverty rate, 2010", "Above median poverty rate, 2010",
                      "Below median share white, 2010", "Above median share white, 2010",
                      "Below median population density, 2010", "Above median population density, 2010"])
ax.set_ylabel("Sample Restriction")
ax.set_xlabel("Average Post-Treatment ATT")

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "heterogenous_effects.png"))

In [15]:
df = pd.read_csv(INPUT_DATA_PANEL)
# Generate value variables list and dictionaries mapping between months and integers.
analysis = f"group_1_crimes_{constants.Analysis.MAIN_RESULTS_RADIUS}m"
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)
df = df.loc[~df[treatment_date_variable].isin(['2019-04', '2019-05']), :]
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)

# Run DiD conditional on covariates.
att_gt_placebo_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
result = att_gt_placebo_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)

# Plot D.R. ATT(t-g)s for placebo crimes next to D.R. ATT(t-g)s for all crimes.
fig, (ax1, ax2) = plt.subplots(1, 2, layout='constrained', sharey=True)

figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes, start_period=-5,
                                                  end_period=36,
                                                  title="All Crime Incidents as Outcome", ax=ax1)
figure_utilities.aggregate_by_event_time_and_plot(att_gt_placebo_crimes, start_period=-5,
                                                  end_period=36,
                                                  title="Subset of Crime Incidents as Outcome", ax=ax2)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_group_1_crimes.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:03<00:00, 325.43it/s]
