# Setup

In [1]:
import figure_utilities
import constants
from stats_utilities import test_balance, run_event_study
import matplotlib.pyplot as plt
from panel_utilities import get_value_variable_names, prepare_df_for_DiD
plt.rcParams['savefig.dpi'] = 300
import statsmodels.api as sm
import os
from differences import ATTgt
import pandas as pd
import numpy as np

In [2]:
# Store paths.
INPUT_DATA_PANEL = "../data/03_cleaned/crime_analysis_monthly.csv"
OUTPUT_TABLES = "../output/final_paper/tables"
OUTPUT_FIGURES = "../output/final_paper/figures"

# Main Results

In [3]:
# Read fresh copy of unrestricted dataset into memory.
df = pd.read_csv(INPUT_DATA_PANEL)
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.


In [4]:
# Generate value variables list and dictionaries mapping between months and integers.
analysis = f"group_0_crimes_{constants.Analysis.MAIN_RESULTS_RADIUS}m"
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)

In [5]:
# Re-Balance on Controls
balance_table, pre_treatment_covariates = test_balance(df, analysis, OUTPUT_TABLES)
balance_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant
Unnamed: 0_level_1,Unnamed: 1_level_1,Cases Won by Plaintiff (1),Unweighted (2),\emph{p} (3),Weighted (4),\emph{p} (5)
Panel A,"Total Incidents, 2017",295.810606,-21.320862,0.08620097,-1.68308,0.887314
Panel A,"$\Delta$ Incidents, 2017-2019",-51.580808,-16.887102,0.006803477,1.688375,0.766036
Panel A,"$\Delta$ Incidents, 2 Years Pre-Treatment",-4.542929,-1.50237,0.2709252,0.443884,0.715688
Panel B,"Bachelor's degree, 2010",0.330414,0.016408,0.1460136,-0.001922,0.865269
Panel B,"Job density, 2013",18339.304431,575.202823,0.8065373,-1282.720521,0.598268
Panel B,"Median household income, 2016",48690.489899,4225.390598,0.001153473,44.952095,0.972919
Panel B,"Poverty rate, 2010",0.27148,-0.031377,4.010476e-05,-0.00073,0.920416
Panel B,"Population density, 2010",22820.682895,-924.94939,0.2062529,155.113066,0.825744
Panel B,"Share white, 2010",0.32006,0.012198,0.3844655,0.001699,0.905084
Panel C,Filing for cause,0.17803,0.07873,1.083017e-05,0.002319,0.906184


## Crime Trends Around Filing Date, Latest Docket Date

In [6]:
df = pd.read_csv(INPUT_DATA_PANEL)
result_file_month, omitted_period_control_mean = run_event_study(df, 'file_month')
result_latest_docket_month, omitted_period_control_mean = run_event_study(df, 'latest_docket_month')

In [7]:
for result, treatment_date_variable in zip([result_file_month, result_latest_docket_month], ['File Date', 'Latest Docket Date']):
    month = []
    treatment = []
    control = []
    for i in range(-12, 37):
        month.append(i)
        control.append(result.params[f'month_{i}'] )
        treatment.append((result.params['judgment_for_plaintiff'] +
                           result.params[f'month_{i}_X_treatment_indicator'] +
                           result.params[f'month_{i}'] ))
    fig, ax = plt.subplots()
    ax.plot(month,
        control,
        color='grey',
        linestyle='--',
        marker='o',
        label="Properties where Tenant Won Eviction Case")
    ax.plot(month,
            treatment,
            color='black',
            linestyle='--',
            marker='o',
            label="Properties where Plaintiff Won Eviction Case")
    label = treatment_date_variable.replace(" ", "\n")
    figure_utilities.plot_labeled_vline(ax, x=0, text=f"{label}", color='black', linestyle='-',
                           text_y_location_normalized=0.1)
    ax.set_xlabel(f"Month Relative to {treatment_date_variable}")
    ax.set_ylabel(f"Crime Incidents within {constants.Analysis.MAIN_RESULTS_RADIUS} Meters")
    ax.legend()
    figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, f"crime_trends_around_{treatment_date_variable}.png"))

In [8]:
file_month_point_estimates = []
file_month_p_values = []
latest_docket_month_point_estimates = []
latest_docket_month_p_values = []
for result, point_estimates, p_values in zip([result_file_month, result_latest_docket_month],
                                             [file_month_point_estimates, latest_docket_month_point_estimates],
                                             [file_month_p_values, latest_docket_month_p_values]):
    for i in range(-12, 0):
        hypothesis = f"month_{i}_X_treatment_indicator + judgment_for_plaintiff = 0"
        f_test = result.f_test(hypothesis)
        point_estimates.append(result.params[f'month_{i}_X_treatment_indicator'] + result.params['judgment_for_plaintiff'])
        p_values.append(f_test.pvalue)

    joint_significance_hypothesis = "=".join([f"month_{i}_X_treatment_indicator + judgment_for_plaintiff" for i in range(-12, 0)])
    p_values.append(result.f_test(joint_significance_hypothesis).pvalue)
    point_estimates.append(np.nan)


In [9]:
file_month_pretrends_test = pd.DataFrame({'$\\gamma_r + \\beta_0$ (1)': file_month_point_estimates,
                                              '\\textit{p} (2)': file_month_p_values}, index=list(range(-12, 0)) + ["$\\gamma_{-12} = ... = \\gamma_{-1}$"])
file_month_pretrends_test.index.name = "$r$"
file_month_pretrends_test = pd.concat([file_month_pretrends_test], axis=1, keys=["\\textit{File Date}"])

latest_docket_month_pretrends_test = pd.DataFrame({'$\\gamma_r + \\beta_0$ (3)': latest_docket_month_point_estimates,
                                              '\\textit{p} (4)': latest_docket_month_p_values}, index=list(range(-12, 0)) + ["$\\gamma_{-12} = ... = \\gamma_{-1} = 0$"])
latest_docket_month_pretrends_test.index.name = "$r$"
latest_docket_month_pretrends_test = pd.concat([latest_docket_month_pretrends_test], axis=1, keys=["\\textit{Latest Docket Date}"])


spacer = pd.concat([pd.DataFrame([[np.nan] for i in range(13)], index=latest_docket_month_pretrends_test.index, columns=[" "])], axis=1, keys=[""])

pretrends_test = pd.concat([file_month_pretrends_test, spacer, latest_docket_month_pretrends_test], axis=1)

In [10]:
# Export to LaTeX.
filename = os.path.join(OUTPUT_TABLES, "pretrends_test.tex")
latex = (pretrends_test
         .style
         .format("{:,.2f}", na_rep=" ")
         .to_latex(None,
                   column_format="lccccc",
                   multicol_align='c',
                   hrules=True,
                   clines="skip-last;data"))

latex = latex.split("\\\\\n")
latex.insert(1, "\\cline{2-3}\\cline{5-6}\n")
latex = "\\\\\n".join(latex)
with open(filename, 'w') as file:
    file.write(latex)

## Unconditional DiD

In [63]:
df = pd.read_csv(INPUT_DATA_PANEL)
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=[],
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)
# Run DiD conditional on covariates.
att_gt_all_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis}'
att_gt_all_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')

figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes,
                                                  start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                  end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_unconditional_all_crimes.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:02<00:00, 404.45it/s] 


## Doubly Robust DiD Conditional on Covariates

In [65]:
df = pd.read_csv(INPUT_DATA_PANEL)
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)
# Run DiD conditional on covariates.
att_gt_all_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
att_gt_all_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')

figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes,
                                                  start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                  end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_all_crimes.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:03<00:00, 305.22it/s]


## Heterogeneous Treatment Effects

In [66]:
point_estimates = []
ci_uppers = []
ci_lowers = []
pretrend_p_values = []
for variable in ['popdensity2010', 'share_white2010', 'poor_share2010']:
    # Read fresh copy of unrestricted dataset into memory.
    df = pd.read_csv(INPUT_DATA_PANEL)

    # Generate indicator variable for above median value of current characteristic
    median = df[variable].median()
    above_median_indicator_name = f'above_median_{variable}'
    df.loc[:, above_median_indicator_name] = 0
    df.loc[df[variable] > median, above_median_indicator_name] = 1

    # Prepare DataFrame for DiD
    pre_treatment_covariates_minus_current_var = pre_treatment_covariates.copy()
    pre_treatment_covariates_minus_current_var.remove(variable)
    df = prepare_df_for_DiD(df=df,
                            analysis=analysis,
                            treatment_date_variable=treatment_date_variable,
                            pre_treatment_covariates=pre_treatment_covariates_minus_current_var + [above_median_indicator_name],
                            value_vars=weekly_value_vars_crime,
                            period_to_int_dictionary=month_to_int_dictionary)

    # Run DiD
    att_gt_by_sample = ATTgt(data=df,
                             cohort_name=treatment_date_variable,
                             base_period='universal')
    att_gt_by_sample.fit(formula=f'{analysis} ~ relative_pre_treatment_change_in_{analysis}',
                         control_group='never_treated',
                         split_sample_by=f'above_median_{variable}',
                         n_jobs=-1,
                         progress_bar=True)

    att_gt_by_sample = att_gt_by_sample.aggregate('event', overall=True)

    # Collect point estimates, confidence interval bounds
    below_median_point_estimate = att_gt_by_sample.loc[f'above_median_{variable} = 0', ("EventAggregationOverall", slice(None), "ATT")]
    point_estimates.append(below_median_point_estimate)
    above_median_point_estimate = att_gt_by_sample.loc[f'above_median_{variable} = 1', ("EventAggregationOverall", slice(None), "ATT")]
    point_estimates.append(above_median_point_estimate)

    below_median_ci_lower = att_gt_by_sample.loc[f'above_median_{variable} = 0', ("EventAggregationOverall", "pointwise conf. band", "lower")]
    ci_lowers.append(below_median_ci_lower)
    above_median_ci_lower = att_gt_by_sample.loc[f'above_median_{variable} = 1', ("EventAggregationOverall", "pointwise conf. band", "lower")]
    ci_lowers.append(above_median_ci_lower)

    below_median_ci_upper = att_gt_by_sample.loc[f'above_median_{variable} = 0', ("EventAggregationOverall", "pointwise conf. band", "upper")]
    ci_uppers.append(below_median_ci_upper)
    above_median_ci_upper = att_gt_by_sample.loc[f'above_median_{variable} = 1', ("EventAggregationOverall", "pointwise conf. band", "upper")]
    ci_uppers.append(above_median_ci_upper)



fig, ax = plt.subplots()
figure_utilities.plot_labeled_vline(ax, x=0, text="", color='black', linestyle='-')
for i, (ci_lower, ci_upper) in enumerate(zip(ci_lowers, ci_uppers)):
    ax.hlines(y=i, xmin=ci_lower, xmax=ci_upper, color='black')
ax.scatter(point_estimates, range(len(point_estimates)), color='black', s=7)
ax.set_yticks(ticks=range(len(point_estimates)),
              labels=["Below median population density, 2010",
                      "Above median population density, 2010",
                      "Below median share white, 2010",
                      "Above median share white, 2010",
                      "Below median share below poverty line, 2010",
                      "Above median share below poverty line, 2010"])
ax.set_ylabel("Sample Restriction")
ax.set_xlabel("Average Post-Treatment ATT")

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "heterogenous_effects.png"))

Computing ATTgt for above_median_popdensity2010 = 1 [workers=12]100%|████████████████████| 980/980 [00:01<00:00, 507.64it/s] 
Computing ATTgt for above_median_popdensity2010 = 0 [workers=12]100%|████████████████████| 980/980 [00:00<00:00, 1229.64it/s]
Computing ATTgt for above_median_share_white2010 = 0 [workers=12]100%|████████████████████| 980/980 [00:01<00:00, 498.83it/s] 
Computing ATTgt for above_median_share_white2010 = 1 [workers=12]100%|████████████████████| 980/980 [00:00<00:00, 1148.76it/s]
Computing ATTgt for above_median_poor_share2010 = 1 [workers=12]100%|████████████████████| 980/980 [00:01<00:00, 492.84it/s]
Computing ATTgt for above_median_poor_share2010 = 0 [workers=12]100%|████████████████████| 980/980 [00:00<00:00, 1145.43it/s]


## Calculating Treatment Effects Using Subset of Crimes as Outcome

In [67]:
df = pd.read_csv(INPUT_DATA_PANEL)
# Generate value variables list and dictionaries mapping between months and integers.
analysis = f"group_1_crimes_{constants.Analysis.MAIN_RESULTS_RADIUS}m"
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)

# Run DiD conditional on covariates.
att_gt_group_1_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
result = att_gt_group_1_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)

# Plot D.R. ATT(t-g)s for placebo crimes next to D.R. ATT(t-g)s for all crimes.
fig, (ax1, ax2) = plt.subplots(1, 2, layout='constrained', sharey=True)

figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes, start_period=-5,
                                                  end_period=36,
                                                  title="All Crime Incidents as Outcome", ax=ax1)
figure_utilities.aggregate_by_event_time_and_plot(att_gt_group_1_crimes, start_period=-5,
                                                  end_period=36,
                                                  title="Subset of Crime Incidents as Outcome", ax=ax2)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_group_1_crimes.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:03<00:00, 309.43it/s]


## Alternative Radii

In [68]:
fig, axes = plt.subplots(1, 3, layout='constrained', sharey=True)
radii = [constants.Analysis.MAIN_RESULTS_RADIUS, constants.Analysis.MAIN_RESULTS_RADIUS + 50, constants.Analysis.MAIN_RESULTS_RADIUS + 100]
atts = []
att_ses = []
twenty_seventeen_totals = []
att_labels = ['250m away', '300m away', '350m away', '250m and 350m away', '250m and 400m away']
for ax, radius in zip(axes, radii):
    if radius != constants.Analysis.MAIN_RESULTS_RADIUS:
        df = pd.read_csv(INPUT_DATA_PANEL)
        twenty_seventeen_totals.append(df[f'total_twenty_seventeen_group_0_crimes_{radius}m'].mean())
        analysis = f"group_0_crimes_{radius}m"
        weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)
        _, pre_treatment_covariates = test_balance(df, analysis)


        df = prepare_df_for_DiD(df=df,
                                analysis=analysis,
                                treatment_date_variable=treatment_date_variable,
                                pre_treatment_covariates=pre_treatment_covariates,
                                value_vars=weekly_value_vars_crime,
                                period_to_int_dictionary=month_to_int_dictionary)
        # Run DiD conditional on covariates.
        current_att_gt = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
        formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
        current_att_gt.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
        figure_utilities.aggregate_by_event_time_and_plot(current_att_gt,
                                                          start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                          end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                          title=f"{radius} Meters", ax=ax)
        atts.append(current_att_gt.aggregate('event', overall=True)['EventAggregationOverall']['']['ATT'].iloc[0])
        att_ses.append(current_att_gt.aggregate('event', overall=True)['EventAggregationOverall']['analytic']['std_error'].iloc[0])

    else:
        df = pd.read_csv(INPUT_DATA_PANEL)
        twenty_seventeen_totals.append(df[f'total_twenty_nineteen_group_0_crimes_{radius}m'].mean())
        figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes,
                                                          start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                          end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                          title=f"{radius} Meters", ax=ax)
        atts.append(att_gt_all_crimes.aggregate('event', overall=True)['EventAggregationOverall']['']['ATT'].iloc[0])
        att_ses.append(att_gt_all_crimes.aggregate('event', overall=True)['EventAggregationOverall']['analytic']['std_error'].iloc[0])


figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_alternative_radii.png"))

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:03<00:00, 310.99it/s]
Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:03<00:00, 312.48it/s]


In [69]:
for robustness_radius in constants.Analysis.ROBUSTNESS_RADII:
    df = pd.read_csv(INPUT_DATA_PANEL)
    twenty_seventeen_totals.append(df[f'total_twenty_nineteen_group_0_crimes_{robustness_radius}m'].mean())

    analysis = f"group_0_crimes_{robustness_radius}m"
    weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)
    _, pre_treatment_covariates = test_balance(df, analysis)

    df = prepare_df_for_DiD(df=df,
                            analysis=analysis,
                            treatment_date_variable=treatment_date_variable,
                            pre_treatment_covariates=pre_treatment_covariates,
                            value_vars=weekly_value_vars_crime,
                            period_to_int_dictionary=month_to_int_dictionary)

    # Run DiD conditional on covariates.
    att_gt_all_crimes_donut = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
    formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
    att_gt_all_crimes_donut.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)


    atts.append(att_gt_all_crimes_donut.aggregate('event', overall=True)['EventAggregationOverall']['']['ATT'].iloc[0])
    att_ses.append(att_gt_all_crimes_donut.aggregate('event', overall=True)['EventAggregationOverall']['analytic']['std_error'].iloc[0])

Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:03<00:00, 309.32it/s]
Computing ATTgt [workers=12]  100%|████████████████████| 980/980 [00:03<00:00, 310.89it/s]


In [70]:
# Build table
atts_aggregated = pd.DataFrame()
atts_aggregated.loc[:, 'Treatment Effect (S.E.)'] = pd.Series(atts).round(2).astype(str) + " " + "(" + pd.Series(att_ses).round(2).astype(str) + ")"
atts_aggregated.loc[:, 'Total Incidents, 2017 (Mean Property)'] = pd.Series(twenty_seventeen_totals).round(2)
atts_aggregated.loc[:, 'Treatment Effect as \% of Mean'] = (-100 * (pd.Series(atts) / atts_aggregated['Total Incidents, 2017 (Mean Property)'])).round(2).astype(str)
atts_aggregated.loc[:, 'Total Incidents, 2017 (Mean Property)'] = atts_aggregated['Total Incidents, 2017 (Mean Property)'].astype(str)
atts_aggregated.index=att_labels
atts_aggregated = atts_aggregated.T

In [71]:
# Reformat table
main_results_columns = ['250m away', '300m away', '350m away']
robustness_columns = ['250m and 350m away', '250m and 400m away']
main_estimates = pd.concat([atts_aggregated[main_results_columns]], axis=1, keys=["Crimes Less Than"])
robustness_estimates = pd.concat([atts_aggregated[robustness_columns]], axis=1, keys=["Crimes Between"])


In [72]:
spacer = pd.concat([pd.DataFrame([[" "], [" "], [" "]], index=main_estimates.index, columns=[" "])], axis=1, keys=[""])
atts_aggregated = pd.concat([main_estimates, spacer, robustness_estimates], axis=1)

In [73]:
latex = (atts_aggregated
                 .style
                 #.format(formatter="{:.2f}", subset=pd.IndexSlice[['Total Incidents, 2017 (Mean Property)', 'Treatment Effect as \% of Mean'], :])
                 .format_index("\\textit{{{}}}", escape="latex", axis=1, level=0)
                 .to_latex(None,
                           column_format="lcccccc",
                           hrules=True,
                           multicol_align='c',
                           clines="skip-last;data")).replace("{*}", "{.75cm}")
latex = latex.split("\\\\\n")
latex.insert(1, "\\cline{2-4}\\cline{6-7}\n")
latex = "\\\\\n".join(latex)
with open(os.path.join(OUTPUT_TABLES, "magnitudes_summary.tex"), 'w') as file:
    file.write(latex)