# Setup

In [1]:
import contextily as cx
import figure_utilities
import constants
from stats_utilities import produce_summary_statistics, select_controls, test_balance
import geopandas as gpd
import matplotlib.pyplot as plt
from panel_utilities import get_value_variable_names, prepare_df_for_DiD
import numpy as np
plt.rcParams['savefig.dpi'] = 300
import os
from differences import ATTgt

import pandas as pd


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
# Store paths.
INPUT_DATA_PANEL = "../data/03_cleaned/crime_analysis_monthly.parquet"
OUTPUT_TABLES = "../output/final_paper/tables"
OUTPUT_FIGURES = "../output/final_paper/figures"

# Main Results

In [3]:
# Read fresh copy of unrestricted dataset into memory.
df = pd.read_parquet(INPUT_DATA_PANEL)
treatment_date_variable = 'latest_docket_month'  # Store treatment date variable.


In [4]:
# Generate value variables list and dictionaries mapping between months and integers.
analysis = f"group_0_crimes_{constants.Analysis.MAIN_RESULTS_RADIUS}m"
weekly_value_vars_crime, month_to_int_dictionary, int_to_month_dictionary = get_value_variable_names(df, analysis)

In [5]:
# Test for Confounders
covariates_exploration_df = select_controls(df=df, analysis=analysis,
                                            treatment_date_variable=treatment_date_variable,
                                            output_directory=OUTPUT_TABLES)
covariates_exploration_df


Unnamed: 0_level_0,Unnamed: 1_level_0,Dependent Variable,Dependent Variable
Unnamed: 0_level_1,Unnamed: 1_level_1,"Change in Crime Incidents, April 2019-March 2020",Treated Property
Unnamed: 0_level_2,\emph{Independent Variable},Unnamed: 2_level_2,Unnamed: 3_level_2
Panel A: Pre-treatment Outcomes,total_twenty_seventeen_group_0_crimes_250m,0.4156413,0.4017521
Panel A: Pre-treatment Outcomes,pre_treatment_change_in_group_0_crimes_250m,5.568113e-149,0.006851633
Panel A: Pre-treatment Outcomes,relative_pre_treatment_change_in_group_0_crimes_250m,7.324643000000001e-255,0.195962
Panel B: Census Tract Characteristics,frac_coll_plus2010,0.9648084,0.8133641
Panel B: Census Tract Characteristics,job_density_2013,8.721894e-08,0.5501121
Panel B: Census Tract Characteristics,med_hhinc2016,0.3649106,0.01427126
Panel B: Census Tract Characteristics,poor_share2010,0.04202086,0.004904136
Panel B: Census Tract Characteristics,popdensity2010,7.874144e-05,0.6737331
Panel B: Census Tract Characteristics,share_white2010,0.04545321,0.3450948
Panel C: Case Initiation,for_cause,0.9352558,0.005801068


In [6]:
# Re-Balance on Controls


balance_table, pre_treatment_covariates = test_balance(df, analysis, covariates_exploration_df, OUTPUT_TABLES)
balance_table


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant,Difference in Cases Won by Defendant
Unnamed: 0_level_1,Unnamed: 1_level_1,Cases Won by Plaintiff,Unweighted,\emph{p},Weighted,\emph{p}.1
Panel A,"Total Crime Incidents, 2017",338.181152,-10.547731,0.4017521,-15.417696,0.233555
Panel A,"Change in Crime Incidents, 2017-2019",-50.048168,-15.579503,0.006851633,1.208509,0.831152
Panel A,relative_pre_treatment_change_in_group_0_crimes_250m,-4.663874,-1.688398,0.195962,0.097806,0.941432
Panel B,"Bachelor's degree, 2010",0.316407,0.002512,0.8133641,-0.01365,0.20754
Panel B,"Job density, 2013",16161.032515,-1272.661768,0.5501121,-784.905782,0.720271
Panel B,"Median household income, 2016",47553.063874,2997.870414,0.01427126,-1887.383158,0.101952
Panel B,"Poverty rate, 2010",0.279638,-0.02103,0.004904136,-0.013519,0.077249
Panel B,"Population density, 2010",23320.185868,-297.766413,0.6737331,-1036.271859,0.152531
Panel B,"Share white, 2010",0.319421,0.012446,0.3450948,-0.013159,0.322919
Panel C,Filing for cause,0.132984,0.043066,0.005801068,-0.003107,0.839442


In [7]:
# Prepare df for use with the differences package.
df = df.reset_index()
df = prepare_df_for_DiD(df=df,
                        analysis=analysis,
                        treatment_date_variable=treatment_date_variable,
                        pre_treatment_covariates=pre_treatment_covariates,
                        missing_indicators=[],
                        value_vars=weekly_value_vars_crime,
                        period_to_int_dictionary=month_to_int_dictionary)

# Run DiD conditional on covariates.
att_gt_all_crimes = ATTgt(data=df, cohort_name=treatment_date_variable, base_period='universal')
formula = f'{analysis} ~ ' + '+'.join(pre_treatment_covariates)
result = att_gt_all_crimes.fit(formula=formula, control_group='never_treated', n_jobs=-1, progress_bar=True)

# Plot D.R. ATT(t-g)s on a long horizon.
fig, ax = plt.subplots(layout='constrained')
figure_utilities.aggregate_by_event_time_and_plot(att_gt_all_crimes,
                                                  start_period=constants.Analysis.MINIMUM_PRE_PERIOD,
                                                  end_period=constants.Analysis.MAXIMUM_POST_PERIOD,
                                                  title="", ax=ax)

figure_utilities.save_figure_and_close(fig, os.path.join(OUTPUT_FIGURES, "att_gt_dr_all_crimes.png"))

Computing ATTgt [workers=50]  100%|████████████████████| 1176/1176 [00:21<00:00, 55.56it/
