# Setup

In [1]:
import figure_utilities
import constants
from stats_utilities import select_controls, test_balance
import matplotlib.pyplot as plt
from panel_utilities import get_value_variable_names, prepare_df_for_DiD
plt.rcParams['savefig.dpi'] = 300
import statsmodels.formula.api as smf
import os
from differences import ATTgt
import pandas as pd

In [2]:
# Store paths.
INPUT_DATA_PANEL = "../data/03_cleaned/crime_analysis_monthly.csv"
OUTPUT_TABLES = "../output/final_paper/tables"
OUTPUT_FIGURES = "../output/final_paper/figures"

# Main Results

In [3]:
# Read fresh copy of unrestricted dataset into memory.
df = pd.read_csv(INPUT_DATA_PANEL)
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.


In [4]:
# Generate value variables list and dictionaries mapping between months and integers.
analysis = f"group_0_crimes_{constants.Analysis.MAIN_RESULTS_RADIUS}m"
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)

In [5]:
# Re-Balance on Controls


balance_table, pre_treatment_covariates = test_balance(df, analysis, OUTPUT_TABLES)
balance_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant
Unnamed: 0_level_1,Unnamed: 1_level_1,Cases Won by Plaintiff,Unweighted,\emph{p},Weighted,\emph{p}.1
Panel A,"Total Crime Incidents, 2017",464.804188,-12.605893,0.4321898,-14.086691,0.3807374
Panel A,"Change in Crime Incidents, 2017-2019",-70.682723,-17.087355,0.01000304,2.399454,0.6974106
Panel A,relative_pre_treatment_change_in_group_0_crimes_300m,-7.002094,-2.02798,0.1544305,0.23015,0.8576563
Panel B,"Bachelor's degree, 2010",0.316407,0.002512,0.8133641,-0.010234,0.336423
Panel B,"Job density, 2013",16161.032515,-1272.661768,0.5501121,-1130.339282,0.5902723
Panel B,"Median household income, 2016",47553.063874,2997.870414,0.01427126,-1812.420094,0.1528495
Panel B,"Poverty rate, 2010",0.279638,-0.02103,0.004904136,-0.005748,0.4438978
Panel B,"Population density, 2010",23320.185868,-297.766413,0.6737331,-608.550119,0.386691
Panel B,"Share white, 2010",0.319421,0.012446,0.3450948,-0.010033,0.4504327
Panel C,Filing for cause,0.132984,0.043066,0.005801068,-0.001937,0.9070577


## Unconditional DiD Using Neighborhood, Latest Docket Date Dummies

In [6]:
df = pd.read_csv(INPUT_DATA_PANEL)
df = df.loc[~df[treatment_date_variable].isin(['2019-04', '2019-05']), :]
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=[],
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)
# Run DiD conditional on covariates.
att_gt_all_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis}'
result = att_gt_all_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)

# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')
figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes,
                                                  start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                  end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_or_all_crimes.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:02<00:00, 384.56it/s] 


## Doubly Robust DiD Conditional on Covariates

In [7]:
df = pd.read_csv(INPUT_DATA_PANEL)
df = df.loc[~df[treatment_date_variable].isin(['2019-04', '2019-05']), :]
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)
# Run DiD conditional on covariates.
att_gt_all_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
att_gt_all_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')

figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes,
                                                  start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                  end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_all_crimes.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:02<00:00, 358.94it/s]


## Heterogeneous Treatment Effects

In [8]:
point_estimates = []
ci_uppers = []
ci_lowers = []
pretrend_p_values = []
for variable in ['popdensity2010', 'share_white2010', 'poor_share2010']:
    # Read fresh copy of unrestricted dataset into memory.
    df = pd.read_csv(INPUT_DATA_PANEL)

    # Generate indicator variable for above median value of current characteristic
    median = df[variable].median()
    above_median_indicator_name = f'above_median_{variable}'
    df.loc[:, above_median_indicator_name] = 0
    df.loc[df[variable] > median, above_median_indicator_name] = 1

    # Prepare DataFrame for DiD
    pre_treatment_covariates_minus_current_var = pre_treatment_covariates.copy()
    pre_treatment_covariates_minus_current_var.remove(variable)
    df = prepare_df_for_DiD(df=df,
                            analysis=analysis,
                            treatment_date_variable=treatment_date_variable,
                            pre_treatment_covariates=pre_treatment_covariates_minus_current_var + [above_median_indicator_name],
                            value_vars=weekly_value_vars_crime,
                            period_to_int_dictionary=month_to_int_dictionary)

    # Run DiD
    att_gt_by_sample = ATTgt(data=df,
                             cohort_name=treatment_date_variable,
                             base_period='universal')
    att_gt_by_sample.fit(formula=f'{analysis} ~ relative_pre_treatment_change_in_{analysis}',
                         control_group='never_treated',
                         split_sample_by=f'above_median_{variable}',
                         n_jobs=-1,
                         progress_bar=True)

    att_gt_by_sample = att_gt_by_sample.aggregate('event', overall=True)

    # Collect point estimates, confidence interval bounds
    below_median_point_estimate = att_gt_by_sample.loc[f'above_median_{variable} = 0', ("EventAggregationOverall", slice(None), "ATT")]
    point_estimates.append(below_median_point_estimate)
    above_median_point_estimate = att_gt_by_sample.loc[f'above_median_{variable} = 1', ("EventAggregationOverall", slice(None), "ATT")]
    point_estimates.append(above_median_point_estimate)

    below_median_ci_lower = att_gt_by_sample.loc[f'above_median_{variable} = 0', ("EventAggregationOverall", "pointwise conf. band", "lower")]
    ci_lowers.append(below_median_ci_lower)
    above_median_ci_lower = att_gt_by_sample.loc[f'above_median_{variable} = 1', ("EventAggregationOverall", "pointwise conf. band", "lower")]
    ci_lowers.append(above_median_ci_lower)

    below_median_ci_upper = att_gt_by_sample.loc[f'above_median_{variable} = 0', ("EventAggregationOverall", "pointwise conf. band", "upper")]
    ci_uppers.append(below_median_ci_upper)
    above_median_ci_upper = att_gt_by_sample.loc[f'above_median_{variable} = 1', ("EventAggregationOverall", "pointwise conf. band", "upper")]
    ci_uppers.append(above_median_ci_upper)



fig, ax = plt.subplots()
figure_utilities.plot_labeled_vline(ax, x=0, text="", color='black', linestyle='-')
for i, (ci_lower, ci_upper) in enumerate(zip(ci_lowers, ci_uppers)):
    ax.hlines(y=i, xmin=ci_lower, xmax=ci_upper, color='black')
ax.scatter(point_estimates, range(len(point_estimates)), color='black', s=7)
ax.set_yticks(ticks=range(len(point_estimates)),
              labels=["Below median population density, 2010",
                      "Above median population density, 2010",
                      "Below median share white, 2010",
                      "Above median share white, 2010",
                      "Below median share below poverty line, 2010",
                      "Above median share below poverty line, 2010"])
ax.set_ylabel("Sample Restriction")
ax.set_xlabel("Average Post-Treatment ATT")

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "heterogenous_effects.png"))

Computing ATTgt for above_median_popdensity2010 = 0 [workers=12]100%|████████████████████| 1176/1176 [00:02<00:00, 548.63it/s] 
Computing ATTgt for above_median_popdensity2010 = 1 [workers=12]100%|████████████████████| 1176/1176 [00:01<00:00, 1081.93it/s]
Computing ATTgt for above_median_share_white2010 = 0 [workers=12]100%|████████████████████| 1176/1176 [00:02<00:00, 420.57it/s] 
Computing ATTgt for above_median_share_white2010 = 1 [workers=12]100%|████████████████████| 1176/1176 [00:01<00:00, 1170.19it/s]
Computing ATTgt for above_median_poor_share2010 = 0 [workers=12]100%|████████████████████| 1176/1176 [00:02<00:00, 513.25it/s] 
Computing ATTgt for above_median_poor_share2010 = 1 [workers=12]100%|████████████████████| 1176/1176 [00:01<00:00, 1143.08it/s]


## Calculating Treatment Effects Using Subset of Crimes as Outcome

In [None]:
df = pd.read_csv(INPUT_DATA_PANEL)
# Generate value variables list and dictionaries mapping between months and integers.
analysis = f"group_1_crimes_{constants.Analysis.MAIN_RESULTS_RADIUS}m"
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)
df = df.loc[~df[treatment_date_variable].isin(['2019-04', '2019-05']), :]
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)

# Run DiD conditional on covariates.
att_gt_group_1_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
result = att_gt_group_1_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)

# Plot D.R. ATT(t-g)s for placebo crimes next to D.R. ATT(t-g)s for all crimes.
fig, (ax1, ax2) = plt.subplots(1, 2, layout='constrained', sharey=True)

figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes, start_period=-5,
                                                  end_period=36,
                                                  title="All Crime Incidents as Outcome", ax=ax1)
figure_utilities.aggregate_by_event_time_and_plot(att_gt_group_1_crimes, start_period=-5,
                                                  end_period=36,
                                                  title="Subset of Crime Incidents as Outcome", ax=ax2)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_group_1_crimes.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:02<00:00, 344.10it/s]


## D.R. DiD Using File Date as Treatment Date Variable

In [None]:
df = pd.read_csv(INPUT_DATA_PANEL)
df = df.loc[~df[treatment_date_variable].isin(['2019-04', '2019-05']), :]
treatment_date_variable = 'file_month'
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)
# Run DiD conditional on covariates.
att_gt_all_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
att_gt_all_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')

figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes,
                                                  start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                  end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_all_crimes_file_date.png"))

## Alternative Radii

In [None]:
fig, axes = plt.subplots(1, 3, layout='constrained', sharey=True)
radii = [constants.Analysis.MAIN_RESULTS_RADIUS - 50, constants.Analysis.MAIN_RESULTS_RADIUS, constants.Analysis.MAIN_RESULTS_RADIUS + 50]
for ax, radius in zip(axes, radii):
    df = pd.read_csv(INPUT_DATA_PANEL)
    df = df.loc[~df[treatment_date_variable].isin(['2019-04', '2019-05']), :]

    analysis = f"group_0_crimes_{radius}m"
    weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)
    _, pre_treatment_covariates = test_balance(df, analysis)


    df = prepare_df_for_DiD(df=df,
                            analysis=analysis,
                            treatment_date_variable=treatment_date_variable,
                            pre_treatment_covariates=pre_treatment_covariates,
                            value_vars=weekly_value_vars_crime,
                            period_to_int_dictionary=month_to_int_dictionary)
    # Run DiD conditional on covariates.
    current_att_gt = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
    formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
    current_att_gt.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
    figure_utilities.aggregate_by_event_time_and_plot(current_att_gt,
                                                      start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                      end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                      title=f"{radius} Meters", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_alternative_radii.png"))