In [1]:
!pip install git+https://github.com/StochasticTree/stochtree.git

Collecting git+https://github.com/StochasticTree/stochtree.git
  Cloning https://github.com/StochasticTree/stochtree.git to /tmp/pip-req-build-n52g6t7d
  Running command git clone --filter=blob:none --quiet https://github.com/StochasticTree/stochtree.git /tmp/pip-req-build-n52g6t7d
  Resolved https://github.com/StochasticTree/stochtree.git to commit f55bbb47b57ef6160964084650ab81f557c9559c
  Running command git submodule update --init --recursive -q
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: stochtree
  Building wheel for stochtree (pyproject.toml) ... [?25l[?25hdone
  Created wheel for stochtree: filename=stochtree-0.1.0-cp311-cp311-linux_x86_64.whl size=871294 sha256=d0109bc2a85e428a87126697ce947e3039542abd26a21d45b95cd767d1a4dc11
  Stored in directory: /tmp/pip-ephem-wheel-cache-0fbjbxcz/wheels/6b/16/bb/b09e1d07fb9c

In [2]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import random
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [3]:
def generate_staggered_did_data_fixed_X(
    n_units=200,
    num_pre_periods=5,
    num_post_periods=5,
    linearity_degree=1,
    pre_trend_bias_delta=0.2,
    propensity_noise_scale=0.5, # Scale of noise added to utility for assignment randomness
    epsilon_scale=1,
    seed=42
):
    """
    Generates panel data for DiD with staggered adoption based on propensity scores,
    using a fixed set of 8 covariates with mixed static/dynamic properties.

    Covariates (8 total):
    - X1: Bernoulli(p=0.66) - STATIC, influences propensity
    - X2: Bernoulli(p=0.45) - Time-varying
    - X3: Categorical({1,2,3,4} p={0.3,0.1,0.2,0.4}) - Time-varying
    - X4-X7: Numerical (Normal(0,1)) - Time-varying
    - X8: Numerical (Normal(0,1)) - STATIC, influences propensity

    Assigns units to 4 groups based on STATIC covariates (X1, X8):
    - Group 0: Never treated (Control) - Baseline
    - Group 1: Treated starting at num_pre_periods (T0)
    - Group 2: Treated starting at num_pre_periods + 1 (T1)
    - Group 3: Treated starting at num_pre_periods + 2 (T2)

    Args:
        n_units (int): Total number of units.
        num_pre_periods (int): Periods before the *earliest* treatment.
        num_post_periods (int): Periods after the *earliest* treatment.
        linearity_degree (int): Degree of linearity in the DGP (1-4).
        pre_trend_bias_delta (float): Bias for pre-trends in eventually treated groups.
        propensity_noise_scale (float): Std deviation of noise added to group utility
                                        before assignment. Higher -> more random assignment.
        epsilon_scale (float): Std deviation of the outcome error term.
        seed (int): Random seed for reproducibility.

    Returns:
        pd.DataFrame: Generated panel data including propensity-based group assignment
                      and mixed static/time-varying covariates.
    """
    np.random.seed(seed)
    total_covariates = 8 # Fixed number of covariates

    # --- 1. Generate STATIC Unit-Level Covariates (for Propensity Score) ---
    unit_ids = np.arange(n_units)
    # X1: Static Bernoulli(p=0.66)
    unit_X1_bern = np.random.binomial(n=1, p=0.66, size=n_units)
    # X8: Static Numerical (Normal(0,1))
    unit_X8_num = np.random.normal(0, 1, size=n_units)

    # --- 2. Calculate Group Utilities and Assign Groups based on STATIC X1, X8 ---
    # Define coefficients linking ONLY static covariates (X1, X8) to utility
    coeffs = {
        # Group 1 (T0) - Example: Higher utility if X1=1 and high X8
        'g1': {'intercept': 0.1, 'x1_bern': 0.8, 'x8_num': 0.6},
        # Group 2 (T1) - Example: Higher utility if X1=0 and low X8
        'g2': {'intercept': 0.0, 'x1_bern': -0.5, 'x8_num': -0.7},
        # Group 3 (T2) - Example: Mild preference for X1=1, sensitive to X8
        'g3': {'intercept': -0.1, 'x1_bern': 0.3, 'x8_num': 0.4}
    }

    # Calculate systematic utility part (V = X*beta) for each group
    V0 = np.zeros(n_units) # Utility for Group 0 (Control) is baseline 0
    V1 = coeffs['g1']['intercept'] + coeffs['g1']['x1_bern'] * unit_X1_bern + coeffs['g1']['x8_num'] * unit_X8_num
    V2 = coeffs['g2']['intercept'] + coeffs['g2']['x1_bern'] * unit_X1_bern + coeffs['g2']['x8_num'] * unit_X8_num
    V3 = coeffs['g3']['intercept'] + coeffs['g3']['x1_bern'] * unit_X1_bern + coeffs['g3']['x8_num'] * unit_X8_num

    # Add random noise
    noise = np.random.normal(0, propensity_noise_scale, size=(n_units, 4))
    U = np.column_stack((V0, V1, V2, V3)) + noise

    # Assign unit to group with highest utility
    unit_treatment_group = np.argmax(U, axis=1)

    # --- 3. Create Panel DataFrame and Merge STATIC Unit-Level Info ---
    periods = num_pre_periods + num_post_periods
    time_periods = np.arange(periods)

    data = pd.DataFrame({
        'unit_id': np.repeat(unit_ids, periods),
        'time': np.tile(time_periods, n_units)
    })

    # Create a temporary DataFrame for unit-level data
    df_unit_static = pd.DataFrame({
        'unit_id': unit_ids,
        'treatment_group': unit_treatment_group,
        'X1': unit_X1_bern, # Static Bernoulli
        'X8': unit_X8_num   # Static Numerical
    })

    # Merge static unit-level data into the main panel DataFrame
    data = pd.merge(data, df_unit_static, on='unit_id', how='left')

    # --- 4. Generate Time-Varying Covariates ---
    n_observations = len(data)
    # X2: Time-varying Bernoulli(p=0.45)
    data['X2'] = np.random.binomial(n=1, p=0.45, size=n_observations)
    # X3: Time-varying Categorical
    cat_choices = [1, 2, 3, 4]
    cat_probs = [0.3, 0.1, 0.2, 0.4]
    data['X3'] = np.random.choice(cat_choices, size=n_observations, p=cat_probs)
    # X4-X7: Time-varying Numerical (Normal(0,1)) - 4 covariates
    X_num_time_varying = np.random.normal(0, 1, size=(n_observations, 4))
    data['X4'] = X_num_time_varying[:, 0]
    data['X5'] = X_num_time_varying[:, 1]
    data['X6'] = X_num_time_varying[:, 2]
    data['X7'] = X_num_time_varying[:, 3]

    # --- 5. Define Treatment Timing and Indicators ---
    earliest_treatment_period = num_pre_periods
    conditions = [
        data['treatment_group'] == 0, data['treatment_group'] == 1,
        data['treatment_group'] == 2, data['treatment_group'] == 3
    ]
    choices = [ np.inf, earliest_treatment_period, earliest_treatment_period + 1, earliest_treatment_period + 2 ]
    data['first_treat_period'] = np.select(conditions, choices, default=np.nan)
    data['post_treatment'] = (data['time'] >= num_pre_periods).astype(int)
    data['eventually_treated'] = (data['treatment_group'] > 0).astype(int)
    data['D'] = (data['time'] >= data['first_treat_period']).astype(int)
    data['time_trend'] = data['time']

    # --- 6. Generate Outcome Variable (Y) using FIXED beta_x ---
    if linearity_degree == 1 or linearity_degree == 2: treatment_effect_beta = 0
    elif linearity_degree == 3: treatment_effect_beta = 0
    else: treatment_effect_beta = np.nan

    data['epsilon'] = np.random.normal(scale=epsilon_scale, size=len(data))

    # DGP parameters
    beta_0 = -0.5 # Intercept
    beta_group_effect = 0.75 # Main effect of treated group (alpha_i)
    beta_time = 0.2 # Main effect of time trend (gamma_t)
    beta_interaction = treatment_effect_beta # Treatment effect magnitude

    # FIXED coefficients for the 8 covariates
    beta_x = np.array([-0.75, 0.5, -0.5, -1.30, 1.8, 2.5, -1.0, 0.3])
    if len(beta_x) != total_covariates:
        raise ValueError(f"Length of fixed beta_x ({len(beta_x)}) does not match total_covariates ({total_covariates})")


    # Prepare covariate matrix X from DataFrame columns in order X1 to X8
    X_cols = [f'X{i}' for i in range(1, total_covariates + 1)]
    X = data[X_cols].values # Shape (n_observations, 8)

    # --- Calculate Y based on linearity_degree ---
    Y_base = (beta_0 + beta_group_effect * data['eventually_treated'] + beta_time * data['time_trend'])
    half = total_covariates // 2 # half = 4

    if linearity_degree == 1: # Fully Linear
        Y_covariates = np.sum(beta_x * X, axis=1)
        Y_treatment = beta_interaction * data['D']
        data['CATE'] = beta_interaction * data['D']

    elif linearity_degree == 2: # Half X non-linear
        # beta_x indices: [0,1] [2,3] [4,5,6,7]
        # X columns    :  0,1   2,3   4,5,6,7
        cov_effect = (np.sum(beta_x[:2] * (X[:, :2] ** 2), axis=1) +        # First 2 X's squared
                      np.sum(beta_x[2:4] * np.exp(X[:, 2:4]), axis=1) +     # Next 2 X's exp
                      np.sum(beta_x[4:] * X[:, 4:], axis=1))               # Last 4 X's linear
        Y_covariates = cov_effect
        Y_treatment = beta_interaction * data['D']
        data['CATE'] = beta_interaction * data['D']

    elif linearity_degree == 3: 
        Y_base = (beta_0 + beta_group_effect * data['eventually_treated'] + beta_time * data['time_trend']**2) # Non-linear time
        # beta_x indices: [0,1] [2,3] [4,5] [6,7]
        # X columns    :  0,1   2,3   4,5   6,7
        cov_effect = (np.sum(beta_x[:2] * (X[:, :2] ** 2), axis=1) +         # First 2 X's squared
                      np.sum(beta_x[2:4] * np.exp(X[:, 2:4]), axis=1) +      # Next 2 X's exp
                      np.sum(beta_x[4:6] * np.abs(X[:, 4:6]), axis=1) +      # Next 2 X's abs
                      np.sum(beta_x[6:] * np.sqrt(np.abs(X[:, 6:])), axis=1))# Last 2 X's sqrt(abs)
        Y_covariates = cov_effect
        Y_treatment = beta_interaction * data['D'] # Linear treatment
        data['CATE'] = beta_interaction * data['D']
    else:
         Y_covariates = 0
         Y_treatment = 0
         data['CATE'] = 0

    data['Y'] = Y_base + Y_covariates + Y_treatment

    # --- Add pre-trend bias ---
    if pre_trend_bias_delta != 0:
        pre_period_mask = data['time'] < earliest_treatment_period
        bias_mask = pre_period_mask & (data['eventually_treated'] == 1)
        if linearity_degree == 3: 
            seasonal_amplitude = 1.0
            seasonal_period = 4
            seasonal_effect = seasonal_amplitude * np.sin(2 * np.pi * data['time'] / seasonal_period)
            data.loc[bias_mask, 'Y'] += pre_trend_bias_delta * seasonal_effect[bias_mask]
        else:
            time_diff = data['time'] - earliest_treatment_period
            data.loc[bias_mask, 'Y'] += pre_trend_bias_delta * time_diff[bias_mask]

    # Add final error term
    data['Y'] += data['epsilon']

    # --- 7. Finalize DataFrame ---
    # Reorder columns for clarity (optional)
    final_cols = (['unit_id', 'time', 'treatment_group', 'first_treat_period', 'eventually_treated', 'D','post_treatment'] +
                   X_cols + ['Y', 'CATE', 'time_trend', 'epsilon'])
    # Ensure all columns exist before selecting
    final_cols = [col for col in final_cols if col in data.columns]
    data = data[final_cols]

    return data

In [4]:
def find_first_treatment_indexes_array(df, min_time=4, eventually_treated=1):
    """
    Finds the indexes of the first row for each treatment group (0, 1, 2, 3)
    after filtering the DataFrame by time and eventually_treated, and returns them as a NumPy array.

    Args:
        df: The pandas DataFrame.
        min_time: The minimum time value.
        eventually_treated: The desired eventually_treated value.

    Returns:
        A NumPy array containing the first row indexes for each treatment group (0, 1, 2, 3),
        or None if no rows meet the criteria. Returns -1 if a treatment group does not appear in the filtered data.
    """

    filtered_df = df[(df['time'] >= min_time) & (df['eventually_treated'] == eventually_treated)]

    if filtered_df.empty:
        return None  # Return None if no rows match the time and eventually_treated criteria.

    indexes = []
    for group in [1, 2, 3]:
        group_df = filtered_df[filtered_df['treatment_group'] == group]
        if not group_df.empty:
            indexes.append(group_df.index[0])  # Get the first index
        else:
            indexes.append(-1) #Return -1 if the treatment group does not appear in the filtered data.

    return np.array(indexes)


In [5]:
from stochtree import BCFModel
from tqdm import tqdm  # Import tqdm for the progress bar

In [6]:
def calculate_error_metrics_grouped_hybrid( # Renamed slightly for clarity
    true_ATE,
    estimated_ATE,
    accumulated_p_values,
    suffix=""
    ):
    """
    Calculates both per-iteration and summary metrics (RMSE, MAE, MAPE).

    1. Saves per-iteration results to an Excel file named
       "BCF_GATE_and_PValues{suffix}.xlsx". The file has multiple sheets,
       with each row representing a simulation iteration:
       - 'Overall_Metrics': Contains overall RMSE, MAE, MAPE per iteration.
       - 'Group_X': One sheet per group, containing the group's RMSE, MAE, MAPE
                    per iteration, alongside the raw p-values for that group.

    2. Returns dictionaries containing summary statistics (mean and standard
       deviation of metrics aggregated across all iterations).

    Args:
        true_ATE: A numpy array of shape (num_iterations, num_post_periods, number_of_groups)
                  containing the true ATE values.
        estimated_ATE: A numpy array of shape (num_iterations, num_post_periods, number_of_groups)
                       containing the estimated ATE values.
        accumulated_p_values: A numpy array of shape (num_iterations, num_post_periods, number_of_groups)
                              containing the p-values.
        suffix (str): An optional suffix to append to the base filename
                      "BCF_GATE_and_PValues". Defaults to "".

    Returns:
        A tuple containing:
            - overall_metrics: Dictionary with overall summary statistics
              (mean RMSE, mean MAE, mean MAPE, std RMSE, std MAE, std MAPE).
            - per_group_metrics: Dictionary where keys are group indices and
              values are dictionaries with summary statistics for that group.
    """

    # Input validation
    if not (true_ATE.shape == estimated_ATE.shape == accumulated_p_values.shape):
        raise ValueError("Shapes of true_ATE, estimated_ATE, and accumulated_p_values must match.")
    if true_ATE.ndim != 3:
         raise ValueError("Input arrays must have 3 dimensions: (iterations, time, groups).")

    num_iterations, num_post_periods, number_of_groups = true_ATE.shape
    iteration_index = pd.RangeIndex(num_iterations, name='Iteration')

    # Construct filename using suffix
    filename = f"detrend_unbiased_BCF_GATE_PS_and_PValues{suffix}.xlsx"

    # Calculate element-wise errors
    errors = estimated_ATE - true_ATE # Shape: (iterations, time, groups)
    abs_errors = np.abs(errors)       # Shape: (iterations, time, groups)

    # --- Calculate PER-ITERATION Metrics (used for both Excel and summaries) ---

    # Overall per-iteration metrics
    overall_rmse_per_iteration = np.sqrt(np.mean(errors**2, axis=(1, 2)))
    overall_mae_per_iteration = np.mean(abs_errors, axis=(1, 2))
    overall_mape_per_iteration = np.zeros(num_iterations) * np.nan
    for i in range(num_iterations):
         true_ate_i = true_ATE[i, :, :]
         errors_i = errors[i, :, :]
         valid_mask_i = true_ate_i != 0
         if np.any(valid_mask_i):
             abs_perc_errors_i = np.abs(errors_i[valid_mask_i] / true_ate_i[valid_mask_i])
             overall_mape_per_iteration[i] = np.mean(abs_perc_errors_i) * 100

    # Create Overall DataFrame for Excel
    df_overall = pd.DataFrame({
        'Overall_RMSE': overall_rmse_per_iteration,
        'Overall_MAE': overall_mae_per_iteration,
        'Overall_MAPE': overall_mape_per_iteration
    }, index=iteration_index)

    # --- Calculate SUMMARY Overall Metrics (for return value) ---
    overall_rmse = np.sqrt(np.mean(errors**2))
    overall_mae = np.mean(abs_errors)
    valid_mape_mask = true_ATE != 0
    abs_perc_errors = np.full_like(errors, fill_value=np.nan)
    abs_perc_errors[valid_mape_mask] = np.abs(errors[valid_mape_mask] / true_ATE[valid_mape_mask])
    overall_mape = np.nanmean(abs_perc_errors) * 100

    summary_overall_std_rmse = np.nanstd(overall_rmse_per_iteration)
    summary_overall_std_mae = np.nanstd(overall_mae_per_iteration)
    summary_overall_std_mape = np.nanstd(overall_mape_per_iteration)

    overall_metrics = { # Dictionary for return value
        "Overall_RMSE": overall_rmse,
        "Overall_MAE": overall_mae,
        "Overall_MAPE": overall_mape,
        "Overall_Std_RMSE": summary_overall_std_rmse,
        "Overall_Std_MAE": summary_overall_std_mae,
        "Overall_Std_MAPE": summary_overall_std_mape,
    }

    # --- Process Per-Group Data (for both Excel and summaries) ---
    group_combined_dfs = {} # For Excel sheets
    per_group_metrics = {}  # For return value summaries

    for g in range(number_of_groups):
        # Slice data for the current group
        true_ATE_g = true_ATE[:, :, g]  # Shape: (iterations, time)
        errors_g = errors[:, :, g]      # Shape: (iterations, time)
        abs_errors_g = abs_errors[:, :, g]# Shape: (iterations, time)
        p_values_g = accumulated_p_values[:, :, g] # Shape: (iterations, time)

        # Calculate per-iteration metrics for group g
        group_rmse_per_iter = np.sqrt(np.mean(errors_g**2, axis=1))
        group_mae_per_iter = np.mean(abs_errors_g, axis=1)
        group_mape_per_iter = np.zeros(num_iterations) * np.nan
        for i in range(num_iterations):
            true_ate_gi = true_ATE_g[i, :]
            errors_gi = errors_g[i, :]
            valid_mask_gi = true_ate_gi != 0
            if np.any(valid_mask_gi):
                abs_perc_errors_gi = np.abs(errors_gi[valid_mask_gi] / true_ate_gi[valid_mask_gi])
                group_mape_per_iter[i] = np.mean(abs_perc_errors_gi) * 100

        # --- Calculate SUMMARY Stats for Group g (for return dict) ---
        group_rmse = np.sqrt(np.mean(errors_g**2))
        group_mae = np.mean(abs_errors_g)
        valid_mape_mask_g = true_ATE_g != 0
        abs_perc_errors_g = np.full_like(errors_g, fill_value=np.nan)
        abs_perc_errors_g[valid_mape_mask_g] = np.abs(errors_g[valid_mape_mask_g] / true_ATE_g[valid_mape_mask_g])
        group_mape = np.nanmean(abs_perc_errors_g) * 100

        summary_group_std_rmse = np.nanstd(group_rmse_per_iter)
        summary_group_std_mae = np.nanstd(group_mae_per_iter)
        summary_group_std_mape = np.nanstd(group_mape_per_iter)

        per_group_metrics[g] = { # Populate return dictionary for group g
             f"Group_{g}_RMSE": group_rmse,
             f"Group_{g}_MAE": group_mae,
             f"Group_{g}_MAPE": group_mape,
             f"Group_{g}_Std_RMSE": summary_group_std_rmse,
             f"Group_{g}_Std_MAE": summary_group_std_mae,
             f"Group_{g}_Std_MAPE": summary_group_std_mape,
        }

        # --- Create DataFrames for Excel Sheet for Group g ---
        df_metrics_g = pd.DataFrame({
            f'Group_{g}_RMSE': group_rmse_per_iter, # Use the per-iter arrays
            f'Group_{g}_MAE': group_mae_per_iter,
            f'Group_{g}_MAPE': group_mape_per_iter
        }, index=iteration_index)

        p_value_columns = [f'PValue_Time_{t}' for t in range(num_post_periods)]
        df_pvals_g = pd.DataFrame(p_values_g,
                                  index=iteration_index,
                                  columns=p_value_columns)

        # Combine metrics and p-values for the group's Excel sheet
        group_combined_dfs[g] = pd.concat([df_metrics_g, df_pvals_g], axis=1)


    # --- Write Per-Iteration Data to Excel ---
    try:
        with pd.ExcelWriter(filename) as writer:
            # Write Overall Metrics Sheet
            df_overall.to_excel(writer, sheet_name='Overall_Metrics', header=True, index=True)
            # Write Per-Group Sheets
            for g in range(number_of_groups):
                sheet_name_g = f'Group_{g}'
                group_combined_dfs[g].to_excel(writer, sheet_name=sheet_name_g, header=True, index=True)

        print(f"Per-iteration metrics and p-values successfully saved to '{filename}'")

    except Exception as e:
        print(f"Error saving per-iteration metrics to Excel file '{filename}': {e}")

    # --- Return Summary Dictionaries ---
    return overall_metrics, per_group_metrics

# Experiments

In [7]:
# Set the number of covariates as specified.
num_x_covariates = 6
linearity_degree=2

# Set the number of iterations and initialize the counter.
num_iterations = 100
count_at_least_two_non_significant = 0

num_pre_periods=4

num_post_periods=4

number_of_groups=3
true_ATE=np.zeros([num_iterations,num_post_periods,number_of_groups])
estimated_ATE_subset=np.zeros([num_iterations,num_post_periods,number_of_groups])
accumulated_p_values=np.zeros([num_iterations,num_post_periods,number_of_groups])

epsilon_scale=1

# Run the loop 100 times.
for i in tqdm(range(num_iterations), desc="Progress", unit="iteration"):
    # Generate a random seed for each iteration.
    seed_val = i

    # Generate data with specified hyperparameters.
    data_linear = generate_staggered_did_data_fixed_X(
        n_units=200,
        linearity_degree=linearity_degree,
        num_pre_periods=num_pre_periods,
        num_post_periods=num_post_periods,
        pre_trend_bias_delta=0,
        epsilon_scale=epsilon_scale,
        seed=seed_val
    )
    indexes = find_first_treatment_indexes_array(data_linear)

    x_columns = [f"X{i}" for i in range(1, num_x_covariates + 1+2)]
    X = np.array(data_linear[["eventually_treated"] + x_columns +["post_treatment"]+["time"]+["treatment_group"]])
    Z=np.array(data_linear["D"])
    y=np.array(data_linear["Y"])

    bcf_model = BCFModel()
    general_params = {"keep_every": 5, "num_chains": 3}
    prognostic_forest_params = {"keep_vars": np.array([0, 1] + list(range(2, num_x_covariates + 3))+[num_x_covariates + 4])}
    treatment_effect_forest_params = {"keep_vars": np.array([num_x_covariates + 3,num_x_covariates + 4,num_x_covariates + 5])}
    bcf_model.sample(X_train=X, Z_train=Z, y_train=y, num_gfr=50, num_mcmc=500, general_params=general_params, prognostic_forest_params=prognostic_forest_params,
                treatment_effect_forest_params=treatment_effect_forest_params)

    for j in range(len(indexes)):
      true_ATE[i,:,j]=np.array(data_linear[(data_linear['time'] >= num_pre_periods) & data_linear['eventually_treated'] == 1]["CATE"].loc[indexes[j]:indexes[j]+num_post_periods])
      estimated_ATE_subset[i,:,j]=bcf_model.tau_hat_train.mean(axis=1)[indexes[j]:indexes[j]+num_post_periods]*Z[indexes[j]:indexes[j]+num_post_periods]-np.mean(bcf_model.tau_hat_train.mean(axis=1)[indexes[j]-4:indexes[j]]*Z[indexes[j]-4:indexes[j]])

    for h in range(num_pre_periods,num_post_periods+num_pre_periods):
      for j in range(len(indexes)):
        mean_values=(bcf_model.tau_hat_train[indexes[j]:indexes[j]+num_post_periods,:]*Z[indexes[j]:indexes[j]+num_post_periods].reshape(-1, 1)-0*bcf_model.tau_hat_train[indexes[j]-4:indexes[j],:].mean(axis=0))[h-num_pre_periods,:]
        above_zero = np.sum(mean_values > 0)
        below_zero = np.sum(mean_values < 0)
        total_points = mean_values.size
        percentage_above_zero = (above_zero / total_points)
        percentage_below_zero = (below_zero / total_points)
        accumulated_p_values[i,h-num_pre_periods,j]=min(percentage_above_zero, percentage_below_zero)




simulation_suffix = "_linearity=2" # Example suffix
overall_metrics, per_group_metrics = calculate_error_metrics_grouped_hybrid(
    true_ATE,
    estimated_ATE_subset,
    accumulated_p_values,
    suffix=simulation_suffix # Pass the suffix here
)

print("\n--- Overall Metrics (Dictionary) ---")
print(overall_metrics)

print("\n--- Per Group Metrics (Dictionary) ---")
for group_idx, metrics in per_group_metrics.items():
    print(f"Group {group_idx}:")
    print(metrics)



Progress: 100%|██████████| 100/100 [2:22:00<00:00, 85.21s/iteration]
  overall_mape = np.nanmean(abs_perc_errors) * 100
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  group_mape = np.nanmean(abs_perc_errors_g) * 100
  group_mape = np.nanmean(abs_perc_errors_g) * 100
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  group_mape = np.nanmean(abs_perc_errors_g) * 100
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Per-iteration metrics and p-values successfully saved to 'detrend_unbiased_BCF_GATE_PS_and_PValues_linearity=2.xlsx'

--- Overall Metrics (Dictionary) ---
{'Overall_RMSE': np.float64(0.1983609319231804), 'Overall_MAE': np.float64(0.13200967103521336), 'Overall_MAPE': np.float64(nan), 'Overall_Std_RMSE': np.float64(0.11889876900243643), 'Overall_Std_MAE': np.float64(0.10275309789020365), 'Overall_Std_MAPE': np.float64(nan)}

--- Per Group Metrics (Dictionary) ---
Group 0:
{'Group_0_RMSE': np.float64(0.23311440718349086), 'Group_0_MAE': np.float64(0.1755016649684547), 'Group_0_MAPE': np.float64(nan), 'Group_0_Std_RMSE': np.float64(0.14644076349391233), 'Group_0_Std_MAE': np.float64(0.14704093995045917), 'Group_0_Std_MAPE': np.float64(nan)}
Group 1:
{'Group_1_RMSE': np.float64(0.1994532911811355), 'Group_1_MAE': np.float64(0.13486382006127434), 'Group_1_MAPE': np.float64(nan), 'Group_1_Std_RMSE': np.float64(0.11988809473614313), 'Group_1_Std_MAE': np.float64(0.10389729858600937), 'Group_1