# Setup

In [71]:
import contextily as cx
import figure_utilities
import constants
from constants.Analysis import MINIMUM_PRE_PERIOD, MINIMUM_POST_PERIOD
from stats_utilities import produce_summary_statistics, select_controls, test_balance
import geopandas as gpd
import matplotlib.pyplot as plt
from panel_utilities import get_value_variable_names, prepare_df_for_DiD
import numpy as np
plt.rcParams["figure.dpi"] = 300
plt.rcParams['savefig.dpi'] = 300
import os
from differences import ATTgt
import pandas as pd

In [72]:
# Store paths.
INPUT_DATA_PANEL = "../../data/03_cleaned/crime_analysis_monthly.parquet"
INPUT_DATA_TRACTS = "../../data/02_intermediate/tracts.csv"
INPUT_DATA_BOSTON_TRACTS_SHAPEFILE = "../../data/01_raw/Census_2010_Tracts"
OUTPUT_TABLES = "../../output/final_paper/tables"
OUTPUT_FIGURES = "../../output/final_paper/figures"

# Main Results

In [None]:
# Read fresh copy of unrestricted dataset into memory.
df = pd.read_parquet(INPUT_DATA_PANEL)
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.
analysis = 'group_0_crimes_500m'

In [None]:
# Generate value variables list and dictionaries mapping between months and integers.
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)

In [None]:
# Test for Confounders
covariates_exploration_df = select_controls(df=df, analysis=analysis,
                                            treatment_date_variable=treatment_date_variable,
                                            output_directory=OUTPUT_TABLES)
covariates_exploration_df


In [None]:
# Re-Balance on Controls


balance_table, pre_treatment_covariates = test_balance(df, analysis, covariates_exploration_df, OUTPUT_TABLES)
balance_table


In [None]:
# Prepare df for use with the differences package.
df = df.reset_index()
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        missing_indicators=[],
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)

# Run DiD conditional on covariates.
att_gt_all_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
result = att_gt_all_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)

# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')
figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes, start_period=MINIMUM_PRE_PERIOD,
                                                  end_period=36,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_all_crimes.png"))

# Treatment Effects Heterogeneity

In [None]:
# Check for heterogeneous treatment effects.
subsample_variables = ['poor_share2010', 'share_white2010', 'popdensity2010']
# Read fresh copy of unrestricted dataset into memory.
df = pd.read_parquet(INPUT_DATA_PANEL)
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.
analysis = 'group_0_crimes_500m'
point_estimates = []
ci_uppers = []
ci_lowers = []

for subsample_variable in subsample_variables:
    # Get results on below median subsample.
    below_median_subsample = df[df[subsample_variable] < df[subsample_variable].median()].copy()
    weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(
        below_median_subsample, analysis)
    covariates_exploration_df = select_controls(df=below_median_subsample, analysis=analysis,
                                                treatment_date_variable=treatment_date_variable)
    balance_table, pre_treatment_covariates = test_balance(below_median_subsample, analysis, covariates_exploration_df)
    below_median_subsample = below_median_subsample.reset_index()
    below_median_subsample = prepare_df_for_DiD(df=below_median_subsample,
                                                analysis=analysis,
                                                treatment_date_variable=treatment_date_variable,
                                                pre_treatment_covariates=pre_treatment_covariates,
                                                missing_indicators=[],
                                                value_vars=weekly_value_vars_crime,
                                                period_to_int_dictionary=month_to_int_dictionary)
    # Run DiD conditional on covariates.
    att_gt_below_median = ATTgt(data=below_median_subsample, cohort_name=treatment_date_variable,
                                base_period='universal')
    formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
    att_gt_below_median.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
    average_post_treatment_att_below_median = att_gt_below_median.aggregate('event', overall=True)
    point_estimate_below_median = round(average_post_treatment_att_below_median['EventAggregationOverall'].iloc[0, 0],
                                        2)
    ci_upper_below_median = round(average_post_treatment_att_below_median['EventAggregationOverall'].iloc[0, 3], 2)
    ci_lower_below_median = round(average_post_treatment_att_below_median['EventAggregationOverall'].iloc[0, 2], 2)
    point_estimates.append(point_estimate_below_median)
    ci_uppers.append(ci_upper_below_median)
    ci_lowers.append(ci_lower_below_median)

    # Get results on above median subsample.
    above_median_subsample = df[df[subsample_variable] > df[subsample_variable].median()].copy()
    weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(
        above_median_subsample, analysis)
    covariates_exploration_df = select_controls(df=above_median_subsample, analysis=analysis,
                                                treatment_date_variable=treatment_date_variable)
    balance_table, pre_treatment_covariates = test_balance(above_median_subsample, analysis, covariates_exploration_df)
    above_median_subsample = above_median_subsample.reset_index()
    above_median_subsample = prepare_df_for_DiD(df=above_median_subsample,
                                                analysis=analysis,
                                                treatment_date_variable=treatment_date_variable,
                                                pre_treatment_covariates=pre_treatment_covariates,
                                                missing_indicators=[],
                                                value_vars=weekly_value_vars_crime,
                                                period_to_int_dictionary=month_to_int_dictionary)
    # Run DiD conditional on covariates.
    att_gt_above_median = ATTgt(data=above_median_subsample, cohort_name=treatment_date_variable,
                                base_period='universal')
    formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
    att_gt_above_median.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)
    average_post_treatment_att_above_median = att_gt_above_median.aggregate('event', overall=True)
    point_estimate_above_median = round(average_post_treatment_att_above_median['EventAggregationOverall'].iloc[0, 0],
                                        2)
    ci_upper_above_median = round(average_post_treatment_att_above_median['EventAggregationOverall'].iloc[0, 3], 2)
    ci_lower_above_median = round(average_post_treatment_att_above_median['EventAggregationOverall'].iloc[0, 2], 2)
    point_estimates.append(point_estimate_above_median)
    ci_uppers.append(ci_upper_above_median)
    ci_lowers.append(ci_lower_above_median)

fig, ax = plt.subplots()
figure_utilities.plot_labeled_vline(ax, x=0, text="", color='black', linestyle='-')

for i, (ci_lower, ci_upper) in enumerate(zip(ci_lowers, ci_uppers)):
    ax.hlines(y=i, xmin=ci_lower, xmax=ci_upper, color='black')
ax.scatter(point_estimates, range(len(point_estimates)), color='black', s=7)
ax.set_yticks(ticks=range(len(point_estimates)),
              labels=["Below median poverty rate, 2010", "Above median poverty rate, 2010",
                      "Below median share white, 2010", "Above median share white, 2010",
                      "Below median population density, 2010", "Above median population density, 2010"])
ax.set_ylabel("Sample Restriction")
ax.set_xlabel("Average Post-Treatment ATT")

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "heterogenous_effects.png"))


# Mechanism Test

In [None]:

# Read fresh copy of unrestricted dataset into memory.
df = pd.read_parquet(INPUT_DATA_PANEL)
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.
analysis = 'group_1_crimes_500m'

# Generate value variables list and dictionaries mapping between months and integers.
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)

covariates_exploration_df = select_controls(df=df, analysis=analysis,
                                            treatment_date_variable=treatment_date_variable)
covariates_exploration_df

balance_table, pre_treatment_covariates = test_balance(df, analysis, covariates_exploration_df)
balance_table


# Prepare df for use with the differences package.
df = df.reset_index()
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        missing_indicators=[],
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)


# Run DiD conditional on covariates.
att_gt_placebo_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
result = att_gt_placebo_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)

# Plot D.R. ATT(t-g)s for placebo crimes next to D.R. ATT(t-g)s for all crimes.
fig, (ax1, ax2) = plt.subplots(1, 2, layout='constrained', sharey=True)

figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes, start_period=MINIMUM_PRE_PERIOD,
                                                  end_period=36,
                                                  title="All Crime Incidents as Outcome", ax=ax1)
figure_utilities.aggregate_by_event_time_and_plot(att_gt_placebo_crimes, start_period=MINIMUM_PRE_PERIOD,
                                                  end_period=36,
                                                  title="Subset of Crime Incidents as Outcome", ax=ax2)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_placebo_crimes.png"))


# Warm vs. Cold Months Test

In [None]:
# Estimation on Cases Concluding During Warm vs. Cold Months

# Read fresh copy of unrestricted dataset into memory; limit to warm months
df = pd.read_parquet(INPUT_DATA_PANEL)
warm_months = ['2019-05', '2019-06', '2019-07', '2019-08', '2019-09', '2019-10']
df = df.loc[df['latest_docket_month'].isin(warm_months), :]
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.
analysis = 'group_0_crimes_500m'


# Generate value variables list and dictionaries mapping between months and integers.
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)


covariates_exploration_df = select_controls(df=df, analysis=analysis,
                                            treatment_date_variable=treatment_date_variable)
covariates_exploration_df

balance_table, pre_treatment_covariates = test_balance(df, analysis, covariates_exploration_df)
balance_table


# Prepare df for use with the differences package.
df = df.reset_index()
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        missing_indicators=[],
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)

# Run DiD conditional on covariates.
att_gt_warm = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
result = att_gt_warm.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)



# Read fresh copy of unrestricted dataset into memory; limit to cold months
df = pd.read_parquet(INPUT_DATA_PANEL)
cold_months = ['2019-04', '2019-11', '2019-12', '2020-01', '2020-02', '2020-03']
df = df.loc[df['latest_docket_month'].isin(cold_months), :]
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.
analysis = 'group_0_crimes_500m'



# Generate value variables list and dictionaries mapping between months and integers.
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)


covariates_exploration_df = select_controls(df=df, analysis=analysis,
                                            treatment_date_variable=treatment_date_variable)
covariates_exploration_df



balance_table, pre_treatment_covariates = test_balance(df, analysis, covariates_exploration_df)
balance_table


# Prepare df for use with the differences package.
df = df.reset_index()
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        missing_indicators=[],
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)

# Run DiD conditional on covariates.
att_gt_cold = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
result = att_gt_cold.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)


# Plot D.R. ATT(t-g)s for placebo crimes next to D.R. ATT(t-g)s for all crimes.
fig, (ax1, ax2) = plt.subplots(1, 2, layout='constrained', sharey=True)

figure_utilities.aggregate_by_event_time_and_plot(att_gt_warm, start_period=MINIMUM_PRE_PERIOD,
                                                  end_period=36,
                                                  title="Cases Concluding in\nMay 2019-October 2019", ax=ax1)
figure_utilities.aggregate_by_event_time_and_plot(att_gt_cold, start_period=MINIMUM_PRE_PERIOD,
                                                  end_period=36,
                                                  title="Cases Concluding in\nNovember 2019-April 2020", ax=ax2)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_temperature.png"))


In [None]:
# Robustness

# Read fresh copy of unrestricted dataset into memory.
df = pd.read_parquet(INPUT_DATA_PANEL)
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.
analysis = 'group_0_crimes_250m'



# Generate value variables list and dictionaries mapping between months and integers.
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)



covariates_exploration_df = select_controls(df=df, analysis=analysis,
                                            treatment_date_variable=treatment_date_variable,
                                            output_directory=OUTPUT_TABLES)
covariates_exploration_df


balance_table, pre_treatment_covariates = test_balance(df, analysis, covariates_exploration_df, OUTPUT_TABLES)
balance_table


# Prepare df for use with the differences package.
df = df.reset_index()
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        missing_indicators=[],
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)


# Run DiD conditional on covariates.
att_gt_non_payment = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
result = att_gt_non_payment.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)


# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')
figure_utilities.aggregate_by_event_time_and_plot(att_gt_non_payment, start_period=MINIMUM_PRE_PERIOD,
                                                  end_period=36,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_250m.png"))
