In [14]:
import statsmodels.formula.api as smf
from differences import ATTgt
from differences.did.pscore_cal import pscore_mle
from matplotlib import pyplot as plt

import figure_and_table_constants
import figure_utilities
from analysis_utilities import produce_summary_statistics, aggregate_by_time_and_plot, aggregate_by_event_time_and_plot

plt.rcParams["figure.dpi"] = 300
plt.rcParams['savefig.dpi'] = 300
import numpy as np
import os
import pandas as pd

# Store paths.
INPUT_DATA = "/Users/arjunshanmugam/Documents/GitHub/seniorthesis/data/03_cleaned/crime_analysis.csv"
OUTPUT_FIGURES = "/Users/arjunshanmugam/Documents/GitHub/seniorthesis/output/DiD_crime/figures"
OUTPUT_TABLES = "/Users/arjunshanmugam/Documents/GitHub/seniorthesis/output/DiD_crime/tables"

# Read restricted sample panel dataset into memory.
df = pd.read_csv(INPUT_DATA)

In [15]:
# Store list of crime variable names and create dictionaries which map between month variable names to integers.
years = [str(year) for year in range(2015, 2023)]
months = ["0" + str(month) for month in range(1, 10)] + [str(month) for month in range(10, 13)]
value_vars = [str(year) + "-" + str(month) for year in years for month in months]
value_vars = value_vars[5:]
value_vars.append('2023-01')
value_vars_crime = [value_var + "_crimes" for value_var in value_vars]
name_change_dictionary = {value_var_crime: value_var for value_var_crime, value_var in zip(value_vars_crime, value_vars)}
df = df.rename(columns=name_change_dictionary)
month_to_int_dictionary = {key: value + 1 for value, key in enumerate(value_vars)}
int_to_month_dictionary = {key + 1: value for key, value in enumerate(value_vars)}

In [None]:
# Choose covariates to include in D.R. model.
outcome_variable =
independent_variable =
# Run produce summary statistics on the DataFrame to add pre-treatment covariate columns.
summary_statistics, variable_display_names_dict = produce_summary_statistics(df, 'file_date')
variable_display_names_dict['twenty_seventeen'] = 'Zestimate, Jan. 2017'
variable_display_names_dict['twenty_eighteen'] = 'Zestimate, Jan. 2018'
pre_treatment_panels = ["Panel A: Pre-treatment Zestimates",
                        "Panel B: Census Tract Characteristics",
                        "Panel C: Case Initiation",
                        "Panel D: Defendant and Plaintiff Characteristics"]
summary_statistics = summary_statistics.loc[pre_treatment_panels, :]
potential_covariates = summary_statistics.index.get_level_values(1)
p_values = []
df.loc[:, 'final_month_of_panel_zestimate'] = df.loc[:, '2022-12']  # Create alias column for Patchy.
for potential_covariate in potential_covariates:
    # Get p-value from regression of outcome on covariates.
    p_y = (smf.ols(formula=f"final_month_of_panel_zestimate ~ {potential_covariate}",
                   data=df,
                   missing='drop')
    .fit().pvalues.loc[potential_covariate])
    # Get p-value from regression of treatment on covariates.
    p_x = (smf.ols(formula=f"judgment_for_plaintiff ~ {potential_covariate}",
                   data=df,
                   missing='drop')
    .fit().pvalues.loc[potential_covariate])
    p_values.append((p_y, p_x))
covariate_exploration_df = (pd.DataFrame(p_values,
                                         columns=["Zestimate, Dec. 2022", "Plaintiff victory"],
                                         index=summary_statistics.index))
covariate_exploration_df = pd.concat([covariate_exploration_df], axis=1, keys=['Dependent Variable'])
covariate_exploration_df.index = covariate_exploration_df.index.set_names(['',
                                                                           '\\emph{Independent Variable}'])
# Export to LaTeX.
filename = os.path.join(OUTPUT_TABLES, "pre_treatment_covariate_tests.tex")
latex = (covariate_exploration_df
         .rename(index=variable_display_names_dict)
         .style
         .format(formatter="{:0.2f}")
         .format_index("\\textit{{{}}}", escape="latex", axis=0, level=0)
         .format_index("\\textit{{{}}}", escape="latex", axis=1, level=0)
         .to_latex(None,
                   column_format="llcc",
                   hrules=True,
                   multicol_align='c',
                   clines="skip-last;data")
         .replace("{*}", "{3cm}"))
latex = latex.split("\\\\\n")
latex.insert(1, "\\cline{3-4}\n")
latex = "\\\\\n".join(latex)
with open(filename, 'w') as file:
    file.write(latex)
covariate_exploration_df