In [1]:
from scipy import stats
import numpy as np
from src.boot import boot_sample, compute_treatment_effects, aggregate_treatment_effects

In [None]:
def bootstrap_ATT(B, g, delta, your_data, unit_id, time):
    """
    B: Number of bootstrap samples
    g: Parameter g
    delta: Parameter delta
    your_data: The data you are working with, format depends on your needs
    unit_id: Column name for unit IDs
    time: Column name for time
    """
    ATT_bootstrap_results = []
    R_bootstrap_results = []
    param_cov_matrices = []  # To store covariance matrices from each bootstrap
    
    for b in range(B):
        # Step 1: Use boot_sample to draw the bootstrap sample
        bs_data = boot_sample(your_data, unit_id, time)
        
        # Step 2: Modularized computation of ATT_hat_star and its covariance matrix
        ATT_hat_star, cov_matrix = compute_ATT(bs_data)  # Make compute_ATT return covariance matrix
        param_cov_matrices.append(cov_matrix)
        
        # Forming R_hat_star
        R_hat_star = np.sqrt(n) * (ATT_hat_star - ATT_hat)
        
        # Storing results
        ATT_bootstrap_results.append(ATT_hat_star)
        R_bootstrap_results.append(R_hat_star)
        
    # Step 4: Bootstrap estimator of Sigma^(1/2)
    q_75 = np.percentile(R_bootstrap_results, 75)
    q_25 = np.percentile(R_bootstrap_results, 25)
    z_75 = stats.norm.ppf(0.75)
    z_25 = stats.norm.ppf(0.25)
    sigma_half_hat = (q_75 - q_25) / (z_75 - z_25)
    
    # Step 5 and 6: Compute t-tests and empirical quantiles
    t_tests = [np.abs(R) / sigma_half_hat for R in R_bootstrap_results]
    c_1_alpha = np.percentile(t_tests, 95)  # Assuming alpha = 0.05
    
    # Step 7: Construct confidence bands (Extendable to simultaneous intervals)
    ATT_hat_nev_dr, _ = compute_ATT(None, your_data)  
    confidence_band = [ATT_hat_nev_dr + c_1_alpha * sigma_half_hat / np.sqrt(n),
                       ATT_hat_nev_dr - c_1_alpha * sigma_half_hat / np.sqrt(n)]
    
    # Average parameter covariance matrix over all bootstraps
    avg_param_cov_matrix = np.mean(param_cov_matrices, axis=0)
    
    # Standard errors from the diagonal of the average covariance matrix
    param_std_errors = np.sqrt(np.diag(avg_param_cov_matrix))
    
    return confidence_band, param_std_errors

In [None]:
# Number of units, time periods, and cohorts
n_units = 10
n_time = 10
n_cohorts = 3

# Random seed for reproducibility
np.random.seed(42)

# Generate a DataFrame
df = pd.DataFrame({
    'unit_id': np.repeat(range(1, n_units + 1), n_time),
    'time_id': list(range(1, n_time + 1)) * n_units,
    'treatment': np.random.choice([0, 1], n_units * n_time),
    'covariate1': np.random.normal(0, 1, n_units * n_time),
    'covariate2': np.random.normal(0, 1, n_units * n_time),
    'cohort': np.random.choice([2010, 2011, 2012], n_units * n_time)
})

# Adding a few never-treated units
df.loc[df['unit_id'].isin([1, 2]), 'treatment'] = 0

# Simulating potential outcomes under the control (Y0)
df['Y0'] = 5 + 0.5 * df['covariate1'] + 0.3 * df['covariate2'] + 0.2 * df['unit_id'] + 0.1 * df['time_id'] + np.random.normal(0, 1, n_units * n_time)

# Defining treatment effect (constant for all units as 2)
df['treatment_effect'] = 2

# Simulating potential outcomes under the treatment (Y1)
df['Y1'] = df['Y0'] + df['treatment_effect']

# Constructing the observed outcome based on treatment status
df['outcome'] = np.where(df['treatment'] == 1, df['Y1'], df['Y0'])

# Adding pre-treatment and post-treatment periods (assuming treatment starts at time 5 for everyone)
df['period_type'] = np.where(df['time_id'] < 5, 'pre-treatment', 'post-treatment')

In [None]:
result_df = compute_treatment_effects(df, 'outcome', 'treatment', 'unit_id', 'time_id', covariates=['covariate1', 'covariate2'])


In [None]:
data = result_df.copy()
data = data[data['treatment'] == 1]
data.head()

In [None]:
# Test the aggregate effects function
print(aggregate_treatment_effects(data, 't_effects', estimand='overall'))
print(aggregate_treatment_effects(data, 't_effects', estimand='cohort', groupby_column='cohort', time_column='time_id'))
print(aggregate_treatment_effects(data, 't_effects', estimand='event', groupby_column='cohort' ,time_column='time_id'))