In [2]:

#' Compare Multiple Metrics Between Two Models Using Robust Location Tests
#'
#' Iterates through pairs of corresponding metrics for two models, performs a robust
#' location test (user choice: Welch or Fligner-Policello) for each pair, and
#' applies multiple comparison correction across all location tests performed.
#' Scale comparison is NOT performed.
#'
#' @param data A data frame or list containing the simulation results.
#' @param model1_cols Character vector of column names in `data` for the first model's metrics.
#' @param model2_cols Character vector of column names in `data` for the second model's metrics.
#'        Must be the same length as `model1_cols` and correspond element-wise.
#' @param location_test_method Character string specifying the location test.
#'        Allowed values: "welch" (default) or "fligner-policello".
#' @param alpha Numeric significance level (default: 0.05).
#' @param correction_methods Character vector of p-value adjustment methods
#'        to apply (passed to `p.adjust.methods`). Default includes common ones.
#'
#' @return A list containing the results:
#'         - `parameters`: Input parameters used for the analysis.
#'         - `results_by_metric`: A list where each element corresponds to a metric pair,
#'           containing the raw location test object and original p-value for that metric.
#'         - `all_location_p_values_original`: Named vector of all original location p-values
#'           collected across metrics.
#'         - `p_values_adjusted`: A matrix of adjusted p-values (rows = location tests, cols = methods).
#'         - `significant_results`: A matrix/data frame indicating significance (TRUE/FALSE) based on adjusted p-values and alpha.
#'         - `warnings_list`: A list of warnings generated during execution for specific metrics.
#'
#' @examples
#' \dontrun{
#' # --- Example Data Setup ---
#' set.seed(123)
#' n_sim <- 50
#' simulation_results <- data.frame(
#'   mvbcf_pehe1 = rnorm(n_sim, 1.0, 0.2),
#'   mvbcf_ate1 = rnorm(n_sim, 0.5, 0.1),
#'   mvbcf_cover1 = rnorm(n_sim, 0.95, 0.05),
#'   bcf_pehe1 = rnorm(n_sim, 1.2, 0.25),  # Worse PEHE
#'   bcf_ate1 = rnorm(n_sim, 0.5, 0.12),   # Similar ATE, slightly more variance
#'   bcf_cover1 = rnorm(n_sim, 0.90, 0.05),  # Worse coverage
#'   mvbcf_runtime = rexp(n_sim, rate = 1/20), # seconds
#'   bcf_runtime = rexp(n_sim, rate = 1/15)   # Faster runtime
#' )
#' # Add some NAs
#' simulation_results$mvbcf_pehe1[sample(n_sim, 3)] <- NA
#' simulation_results$bcf_runtime[sample(n_sim, 2)] <- NA
#'
#' # Define corresponding columns
#' model1_cols <- c("mvbcf_pehe1", "mvbcf_ate1", "mvbcf_cover1", "mvbcf_runtime")
#' model2_cols <- c("bcf_pehe1", "bcf_ate1", "bcf_cover1", "bcf_runtime")
#'
#' # --- Run the Comparison ---
#'
#' # Using Welch's t-test
#' comparison_results_welch <- compare_multiple_locations(
#'   data = simulation_results,
#'   model1_cols = model1_cols,
#'   model2_cols = model2_cols,
#'   location_test_method = "welch"
#' )
#'
#' print(comparison_results_welch$p_values_adjusted)
#' print(comparison_results_welch$significant_results)
#' print(comparison_results_welch$warnings_list)
#'
#' # Using Fligner-Policello test
#' comparison_results_fp <- compare_multiple_locations(
#'   data = simulation_results,
#'   model1_cols = model1_cols,
#'   model2_cols = model2_cols,
#'   location_test_method = "fligner-policello"
#' )
#'
#' print(comparison_results_fp$p_values_adjusted)
#' print(comparison_results_fp$warnings_list)
#' }
#' 


compare_multiple_locations <- function(data,
                                       model1_cols,
                                       model2_cols,
                                       location_test_method = "welch",
                                       alpha = 0.05,
                                       correction_methods = c("bonferroni", "holm", "BH")) {

  # --- Input Validation ---
  if (!is.data.frame(data) && !is.list(data)) {
    stop("'data' must be a data frame or a list.", call. = FALSE)
  }
  if (length(model1_cols) != length(model2_cols)) {
    stop("'model1_cols' and 'model2_cols' must have the same length.", call. = FALSE)
  }
  if (length(model1_cols) == 0) {
    stop("Column lists cannot be empty.", call. = FALSE)
  }
  if (!all(sapply(model1_cols, function(cn) cn %in% names(data)))) {
      missing_cols <- model1_cols[!model1_cols %in% names(data)]
      stop("Missing columns for model 1 in data: ", paste(missing_cols, collapse=", "), call. = FALSE)
  }
  if (!all(sapply(model2_cols, function(cn) cn %in% names(data)))) {
      missing_cols <- model2_cols[!model2_cols %in% names(data)]
      stop("Missing columns for model 2 in data: ", paste(missing_cols, collapse=", "), call. = FALSE)
  }
  if (!location_test_method %in% c("welch", "fligner-policello")) {
    stop("location_test_method must be 'welch' or 'fligner-policello'.", call. = FALSE)
  }
  if (location_test_method == "fligner-policello" && !requireNamespace("kSamples", quietly = TRUE)) {
     stop("Package 'kSamples' needed for Fligner-Policello test. Please install it.", call. = FALSE)
  }
  # No longer need 'car' package
  # if (!requireNamespace("car", quietly = TRUE)) {
  #  stop("Package 'car' needed for Levene's test. Please install it.", call. = FALSE)
  #}
  if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1) {
    stop("alpha must be a numeric value between 0 and 1.", call. = FALSE)
  }

  # --- Initialization ---
  all_location_p_values_list <- list() # Collect all location p-values
  results_by_metric <- list()          # Store detailed results per metric
  warnings_list <- list()              # Collect warnings

  num_metrics <- length(model1_cols)
  message(paste("Starting location comparison for", num_metrics, "metric(s)..."))

  # --- Loop Through Metrics ---
  for (i in 1:num_metrics) {
    col1 <- model1_cols[i]
    col2 <- model2_cols[i]
    # Generate a more robust metric name base
    split1 <- strsplit(col1, "_")[[1]]
    split2 <- strsplit(col2, "_")[[1]]
    prefix_len <- 0
    if (length(split1) > 1 && length(split2) > 1 && split1[1] == split2[1]){
        prefix_len <- nchar(split1[1]) + 1 # length of prefix + underscore
    }
    metric_name <- substr(col1, prefix_len + 1, nchar(col1))
    if (nchar(metric_name) == 0) metric_name <- paste0("metric_", i) # Fallback

    message(paste("Processing metric:", metric_name, "(", col1, "vs", col2, ")"))

    metric_results <- list(metric_name = metric_name, model1_col = col1, model2_col = col2)
    metric_warnings <- list()

    # Extract and clean data for the current metric
    group1_raw <- data[[col1]]
    group2_raw <- data[[col2]]

    if(is.list(group1_raw)) group1_raw <- unlist(group1_raw)
    if(is.list(group2_raw)) group2_raw <- unlist(group2_raw)

    group1 <- tryCatch(as.numeric(group1_raw), warning = function(w) {
        metric_warnings <<- c(metric_warnings, paste("Coercion warning for", col1, ":", w$message))
        suppressWarnings(as.numeric(group1_raw))
    })
    group2 <- tryCatch(as.numeric(group2_raw), warning = function(w) {
        metric_warnings <<- c(metric_warnings, paste("Coercion warning for", col2, ":", w$message))
        suppressWarnings(as.numeric(group2_raw))
    })

    # Handle NAs
    valid_idx1 <- !is.na(group1)
    valid_idx2 <- !is.na(group2)
    group1_complete <- group1[valid_idx1]
    group2_complete <- group2[valid_idx2]

    n1 <- length(group1_complete)
    n2 <- length(group2_complete)

    if (sum(!valid_idx1) > 0) metric_warnings <- c(metric_warnings, paste(sum(!valid_idx1), "NA(s) removed from", col1))
    if (sum(!valid_idx2) > 0) metric_warnings <- c(metric_warnings, paste(sum(!valid_idx2), "NA(s) removed from", col2))

    # Initialize p-value for this metric as NA
    p_loc <- NA
    loc_test_name_suffix <- ""
    location_test_result <- NULL

    # --- Perform Location Test if data sufficient ---
    if (n1 < 2 || n2 < 2) { # t.test needs at least 2, FP often needs more but min check is 2
      metric_warnings <- c(metric_warnings, "Insufficient non-NA data (< 2 in at least one group) for location testing.")
      message("  Skipping location test due to insufficient data.")
    } else {
      # Location Test
      loc_test_failed <- FALSE
      if (location_test_method == "welch") {
        loc_test_name_suffix <- "_welch"
        location_test_result <- tryCatch({
          t.test(group1_complete, group2_complete, var.equal = FALSE)
        }, error = function(e) {
          metric_warnings <<- c(metric_warnings, paste("Welch t-test failed:", e$message))
          loc_test_failed <<- TRUE
          NULL
        })
        if(!is.null(location_test_result)) p_loc <- location_test_result$p.value

      } else if (location_test_method == "fligner-policello") {
        loc_test_name_suffix <- "_fp"
        min_n_fp <- 10 # Recommended minimum sample size per group
        if (n1 < min_n_fp || n2 < min_n_fp) {
             warn_msg <- paste0("Sample size(s) (", n1, ", ", n2,
                             ") may be small for reliable Fligner-Policello results (recommend >=", min_n_fp, ").")
             metric_warnings <- c(metric_warnings, warn_msg)
             message(paste("  Warning:", warn_msg))
         }
         location_test_result <- tryCatch({
            # Check if data is constant, fp.test might fail
            if (length(unique(group1_complete)) <= 1 || length(unique(group2_complete)) <= 1) {
                stop("Data within at least one group is constant.")
            }
            fp.test(group1_complete, group2_complete)
         }, error = function(e) {
            metric_warnings <<- c(metric_warnings, paste("Fligner-Policello test failed:", e$message))
            loc_test_failed <<- TRUE
            NULL
         })
        if(!is.null(location_test_result)) p_loc <- location_test_result$p.value
      }
       metric_results$location_test_result <- location_test_result

    } # End if sufficient data

    # Store location p-value with unique name
    p_loc_name <- paste0(metric_name, "_location", loc_test_name_suffix)
    all_location_p_values_list[[p_loc_name]] <- p_loc

    # Store metric-specific results
    metric_results$p_value_location_original <- p_loc
    metric_results$warnings <- metric_warnings
    results_by_metric[[metric_name]] <- metric_results
    if(length(metric_warnings) > 0) warnings_list[[metric_name]] <- metric_warnings

  } # End loop through metrics

  message("Finished individual location tests. Applying multiple comparison corrections...")

  # --- Apply Multiple Comparison Correction ---
  p_values_vec <- unlist(all_location_p_values_list)
  valid_p_indices <- !is.na(p_values_vec)
  p_values_to_adjust <- p_values_vec[valid_p_indices]

  p_adj_matrix <- NULL
  sig_matrix <- NULL

  if (length(p_values_to_adjust) > 0) {
      # Ensure only valid methods are requested
      valid_methods <- p.adjust.methods
      correction_methods <- intersect(correction_methods, valid_methods)
       if(length(correction_methods) == 0) {
          warning("No valid correction methods specified or available. Using 'bonferroni'.", call.=FALSE)
          correction_methods <- "bonferroni"
      }

      p_adj_list <- lapply(correction_methods, function(method) {
          p.adjust(p_values_to_adjust, method = method)
      })
      p_adj_matrix_valid <- do.call(cbind, p_adj_list)
      colnames(p_adj_matrix_valid) <- correction_methods
      rownames(p_adj_matrix_valid) <- names(p_values_to_adjust)

      # Create full matrix including NAs for alignment
      p_adj_matrix <- matrix(NA_real_, nrow = length(p_values_vec), ncol = length(correction_methods),
                            dimnames = list(names(p_values_vec), correction_methods))
      p_adj_matrix[valid_p_indices, ] <- p_adj_matrix_valid

      # Determine significance based on adjusted values
      sig_matrix <- p_adj_matrix < alpha
      # Ensure NAs remain NA in significance matrix
      sig_matrix[is.na(p_adj_matrix)] <- NA

      message(paste("Corrections applied to", length(p_values_to_adjust), "valid p-values."))

  } else {
      message("No valid p-values obtained across all metrics to apply corrections.")
  }


  # --- Consolidate Results ---
  final_results <- list(
      parameters = list(
          location_test_method = location_test_method,
          alpha = alpha,
          correction_methods = correction_methods,
          num_metrics = num_metrics,
          num_tests_corrected = length(p_values_to_adjust)
      ),
      results_by_metric = results_by_metric,
      all_location_p_values_original = p_values_vec, # Vector with NAs included
      p_values_adjusted = p_adj_matrix,
      significant_results = sig_matrix,
      warnings_list = warnings_list
  )

  message("Comparison complete.")
  return(final_results)
}

In [3]:
  # Helper function to estimate power for Welch's t-test
estimate_welch_power <- function(n1, n2, sd1, sd2, delta, alpha) {
    # Calculate degrees of freedom using Welch-Satterthwaite equation
    df <- ((sd1^2/n1 + sd2^2/n2)^2) / 
          ((sd1^2/n1)^2/(n1-1) + (sd2^2/n2)^2/(n2-1))
    
    # Calculate non-centrality parameter
    ncp <- delta / sqrt(sd1^2/n1 + sd2^2/n2)
    
    # Calculate critical value
    crit <- qt(1-alpha/2, df)
    
    # Calculate power
    power <- 1 - pt(crit, df, ncp) + pt(-crit, df, ncp)
    
    return(power)
}

In [15]:
# Install and load necessary packages
if (!requireNamespace("readxl", quietly = TRUE)) {
  install.packages("readxl")
}
library(readxl)

# Initialize variables to store the overall smallest relative difference and corresponding info
overall_smallest_relative_diff <- Inf
overall_best_column <- ""
overall_best_model <- ""
overall_best_linearity_degree <- NA
overall_best_did_bcf_sd <- NA
overall_best_other_model_sd <- NA
overall_best_num_rows <- NA
overall_best_did_bcf_mean <- NA

# Loop through different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n--- Processing for linearity degree =", linearity_degree, "---\n")

  # Define file names dynamically based on linearity_degree
  file_names <- c(
    sprintf("DiD_BCF_CATE_GATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("did_dr_CATE_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("DoubleML_did_CATE_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("OLS_CATE_GATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("synthdid_CATE_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each Excel file and select the first three columns
  for (file in file_names) {
    df <- read_excel(file)
    data_list[[file]] <- df[, 1:3] # Select only the first three columns
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  
  # Calculate DiD_BCF means, standard deviations, and number of rows
  did_bcf_means <- colMeans(did_bcf_df)
  did_bcf_sds <- apply(did_bcf_df, 2, sd) # Calculate standard deviation for each column
  num_rows <- nrow(did_bcf_df)

  # Initialize variables for the current linearity degree's best result
  current_linearity_smallest_relative_diff <- Inf
  current_linearity_best_column <- ""
  current_linearity_best_model <- ""
  current_linearity_best_did_bcf_sd <- NA
  current_linearity_best_other_model_sd <- NA
  current_linearity_best_did_bcf_mean <- NA # Store the mean of the DiD_BCF column

  # Compare DiD_BCF with other models for the current linearity degree
  for (i in 2:length(file_names)) {
    # Extract clean model name
    model_name <- gsub(sprintf("_linearity_degree=%d\\.xlsx$", linearity_degree), "", 
                       gsub(sprintf("_linearity=%d\\.xlsx$", linearity_degree), "", 
                            gsub("did_dr_CATE_GATE_PS_and_PValues", "did_dr", 
                                 gsub("DoubleML_did_CATE_GATE_PS_and_PValues", "DoubleML_did",
                                      gsub("OLS_CATE_GATE_PS_and_PValues", "OLS",
                                      gsub("synthdid_CATE_GATE_PS_and_PValues", "SDiD",
                                           gsub("DiD_BCF_CATE_GATE_PS_and_PValues", "DiD_BCF", basename(file_names[i]))))))))
    
    current_df <- data_list[[file_names[i]]]
    current_means <- colMeans(current_df)
    current_sds <- apply(current_df, 2, sd) # Calculate standard deviation for current model's columns

    # Calculate relative difference for each column
    for (j in 1:length(did_bcf_means)) {
      col_name <- names(did_bcf_means)[j]
      
      if (did_bcf_means[j] == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_means[j] - did_bcf_means[j]) / did_bcf_means[j])
      }

      # Check if this is the smallest relative difference for the current linearity degree
      if (!is.na(relative_diff) && relative_diff < current_linearity_smallest_relative_diff) {
        current_linearity_smallest_relative_diff <- relative_diff
        current_linearity_best_column <- col_name
        current_linearity_best_model <- model_name
        current_linearity_best_did_bcf_sd <- did_bcf_sds[j]
        current_linearity_best_other_model_sd <- current_sds[j]
        current_linearity_best_did_bcf_mean <- did_bcf_means[j]
      }
      
      # Check if this is the overall smallest relative difference
      if (!is.na(relative_diff) && relative_diff < overall_smallest_relative_diff) {
        overall_smallest_relative_diff <- relative_diff
        overall_best_column <- col_name
        overall_best_model <- model_name
        overall_best_linearity_degree <- linearity_degree
        overall_best_did_bcf_sd <- did_bcf_sds[j]
        overall_best_other_model_sd <- current_sds[j]
        overall_best_num_rows <- num_rows # The number of rows is consistent across models for a given linearity
        overall_best_did_bcf_mean <- did_bcf_means[j]
      }
    }
  }
  
  # Print result for the current linearity degree
  if (current_linearity_smallest_relative_diff == Inf) {
    cat("  Could not find a meaningful smallest relative difference for linearity degree", linearity_degree, "(possibly due to DiD_BCF values being zero).\n")
  } else {
    cat("  For linearity degree ", linearity_degree, ":\n",
        "    Number of rows: ", num_rows, "\n",
        "    Mean of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_mean), "\n",
        "    SD of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_sd), "\n",
        "    The column with the smallest relative difference from DiD_BCF is '", current_linearity_best_column, "' ",
        "in the '", current_linearity_best_model, "' model.\n",
        "    Relative difference: ", sprintf("%.4f", current_linearity_smallest_relative_diff), "\n",
        "    SD of '", current_linearity_best_model, "' for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_other_model_sd), "\n",
        "Estimated Power For the Simulations for Detecting at least a 25% Superiority of the DiD-BCF model:" , estimate_welch_power(n1=num_rows, n2=num_rows, sd1=current_linearity_best_did_bcf_sd, sd2=current_linearity_best_other_model_sd, delta=current_linearity_best_did_bcf_mean*0.25, alpha=0.05), "\n", 
        "Estimated Power For the Simulations for Detecting at least a 10% Superiority of the DiD-BCF model:" , estimate_welch_power(n1=num_rows, n2=num_rows, sd1=current_linearity_best_did_bcf_sd, sd2=current_linearity_best_other_model_sd, delta=current_linearity_best_did_bcf_mean*0.10, alpha=0.05), "\n", sep="")
  }
}


--- Processing for linearity degree = 1 ---
  For linearity degree 1:
    Number of rows: 100
    Mean of DiD_BCF for 'MAPE_overall': 0.0982
    SD of DiD_BCF for 'MAPE_overall': 0.0258
    The column with the smallest relative difference from DiD_BCF is 'MAPE_overall' in the 'SDiD' model.
    Relative difference: 1.1619
    SD of 'SDiD' for 'MAPE_overall': 0.0112
Estimated Power For the Simulations for Detecting at least a 25% Superiority of the DiD-BCF model:1
Estimated Power For the Simulations for Detecting at least a 10% Superiority of the DiD-BCF model:0.9344364

--- Processing for linearity degree = 2 ---
  For linearity degree 2:
    Number of rows: 100
    Mean of DiD_BCF for 'MAPE_overall': 0.3402
    SD of DiD_BCF for 'MAPE_overall': 0.3081
    The column with the smallest relative difference from DiD_BCF is 'MAPE_overall' in the 'SDiD' model.
    Relative difference: 0.1710
    SD of 'SDiD' for 'MAPE_overall': 0.1075
Estimated Power For the Simulations for Detecting at lea

In [10]:
# Install and load necessary packages
if (!requireNamespace("readxl", quietly = TRUE)) {
  install.packages("readxl")
}
library(readxl)

# Placeholder for estimate_welch_power function.
# In a real scenario, you'd use a robust power analysis function from a package like 'pwr'.
estimate_welch_power <- function(n1, n2, sd1, sd2, delta, alpha) {
  if (is.na(sd1) || is.na(sd2) || (sd1 == 0 && sd2 == 0)) return(NA) 
  
  if (is.na(delta)) return(NA)

  se_delta <- sqrt(sd1^2/n1 + sd2^2/n2)
  if (se_delta == 0) return(NA) 
  
  z_alpha <- qnorm(1 - alpha/2) 
  
  d_prime <- delta / se_delta
  
  power_val <- pnorm(d_prime - z_alpha) + pnorm(-d_prime - z_alpha)
  
  power_val <- max(0, min(1, power_val))
  
  return(power_val)
}

# Outer loop for different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n", rep("-", 50), "\n", sep="") # Separator for clarity
  cat("--- Processing for linearity degree =", linearity_degree, "---\n")
  cat(rep("-", 50), "\n\n", sep="")

  # Define file names dynamically based on linearity_degree (for Excel files)
  file_names <- c(
    sprintf("DiD_BCF_CATE_GATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("did_dr_CATE_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("DoubleML_did_CATE_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("OLS_CATE_GATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("synthdid_CATE_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each Excel file and select the first three columns
  for (file in file_names) {
    df <- read_excel(file)
    data_list[[file]] <- df[, 1:3] # Select only the first three columns
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  num_rows <- nrow(did_bcf_df) # Number of rows is consistent across models for a given linearity

  # Loop through each of the first three columns
  # Iterate from 1 to ncol(did_bcf_df) which will be 3 based on your selection
  for (col_idx in 1:ncol(did_bcf_df)) {
    col_name <- names(did_bcf_df)[col_idx]
    
    cat("\n--- Analyzing Column '", col_name, "' for linearity degree =", linearity_degree, "---\n", sep="")

    # Get DiD_BCF stats for the current column
    did_bcf_mean_col <- colMeans(did_bcf_df, na.rm = TRUE)[col_idx]
    did_bcf_sd_col <- apply(did_bcf_df, 2, sd, na.rm = TRUE)[col_idx]

    # Initialize variables for the best relative difference FOR THIS SPECIFIC COLUMN
    smallest_relative_diff_for_col <- Inf
    best_model_for_col <- ""
    best_other_model_sd_for_col <- NA
    best_other_model_mean_for_col <- NA

    # Compare DiD_BCF (for current column) against other models (for current column)
    # Start from the second file (index 2) as the first is DiD_BCF
    for (i in 2:length(file_names)) {
      # Extract clean model name
      model_name <- gsub(sprintf("_linearity_degree=%d\\.xlsx$", linearity_degree), "", 
                         gsub(sprintf("_linearity=%d\\.xlsx$", linearity_degree), "", 
                              gsub("did_dr_CATE_GATE_PS_and_PValues", "did_dr", 
                                   gsub("DoubleML_did_CATE_GATE_PS_and_PValues", "DoubleML_did",
                                        gsub("OLS_CATE_GATE_PS_and_PValues", "OLS",
                                        gsub("synthdid_CATE_GATE_PS_and_PValues", "SDiD",
                                             gsub("DiD_BCF_CATE_GATE_PS_and_PValues", "DiD_BCF", basename(file_names[i]))))))))
      
      current_df <- data_list[[file_names[i]]]
      
      # Ensure the column exists in the current model's dataframe
      if (!(col_name %in% names(current_df))) {
        warning(paste("Column '", col_name, "' not found in '", model_name, "' dataframe. Skipping comparison for this model/column.", sep=""))
        next
      }

      current_mean_col <- colMeans(current_df, na.rm = TRUE)[col_idx]
      current_sd_col <- apply(current_df, 2, sd, na.rm = TRUE)[col_idx]

      if (did_bcf_mean_col == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_mean_col - did_bcf_mean_col) / did_bcf_mean_col)
      }

      # Check if this is the smallest relative difference for THIS COLUMN
      if (!is.na(relative_diff) && relative_diff < smallest_relative_diff_for_col) {
        smallest_relative_diff_for_col <- relative_diff
        best_model_for_col <- model_name
        best_other_model_sd_for_col <- current_sd_col
        best_other_model_mean_for_col <- current_mean_col
      }
    } # End of inner loop (comparing models for a specific column)

    # Print results for the current column
    if (smallest_relative_diff_for_col == Inf) {
      cat("  Could not find a meaningful smallest relative difference for column '", col_name, "' ",
          "(possibly due to DiD_BCF values being zero or missing).\n", sep="")
    } else {
      # Calculate power for 25% and 10% superiority based on current column's best result
      power_25_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.25, 
                                               alpha=0.05)
      
      power_10_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.10, 
                                               alpha=0.05)

      cat("  Number of rows: ", num_rows, "\n",
          "  Mean of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_mean_col), "\n",
          "  SD of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_sd_col), "\n",
          "  Smallest relative difference for '", col_name, "' is with the '", best_model_for_col, "' model.\n",
          "  Relative difference: ", sprintf("%.4f", smallest_relative_diff_for_col), "\n",
          "  Mean of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_mean_for_col), "\n",
          "  SD of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_sd_for_col), "\n",
          "  Estimated Power (25% Superiority of DiD-BCF): ", sprintf("%.4f", power_25_percent), "\n", 
          "  Estimated Power (10% Superiority of DiD-BCF): ", sprintf("%.4f", power_10_percent), "\n", sep="")
    }
  } # End of loop for each column
} # End of loop for linearity degrees

cat("\n", rep("=", 70), "\n", sep="")
cat("Analysis Complete for all linearity degrees and columns.\n")
cat(rep("=", 70), "\n", sep="")


--------------------------------------------------
--- Processing for linearity degree = 1 ---
--------------------------------------------------


--- Analyzing Column 'RMSE_overall' for linearity degree =1---
  Number of rows: 100
  Mean of DiD_BCF for 'RMSE_overall': 0.3831
  SD of DiD_BCF for 'RMSE_overall': 0.0820
  Smallest relative difference for 'RMSE_overall' is with the 'did_dr' model.
  Relative difference: 1.9581
  Mean of 'did_dr' for 'RMSE_overall': 1.1332
  SD of 'did_dr' for 'RMSE_overall': 0.2645
  Estimated Power (25% Superiority of DiD-BCF): 0.9330
  Estimated Power (10% Superiority of DiD-BCF): 0.2825

--- Analyzing Column 'MAE_overall' for linearity degree =1---
  Number of rows: 100
  Mean of DiD_BCF for 'MAE_overall': 0.2866
  SD of DiD_BCF for 'MAE_overall': 0.0725
  Smallest relative difference for 'MAE_overall' is with the 'did_dr' model.
  Relative difference: 2.2237
  Mean of 'did_dr' for 'MAE_overall': 0.9241
  SD of 'did_dr' for 'MAE_overall': 0.2159
  Es

In [16]:
# Install and load necessary packages
if (!requireNamespace("readxl", quietly = TRUE)) {
  install.packages("readxl")
}
library(readxl)

# Initialize variables to store the overall smallest relative difference and corresponding info
overall_smallest_relative_diff <- Inf
overall_best_column <- ""
overall_best_model <- ""
overall_best_linearity_degree <- NA
overall_best_did_bcf_sd <- NA
overall_best_other_model_sd <- NA
overall_best_num_rows <- NA
overall_best_did_bcf_mean <- NA

# Loop through different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n--- Processing for linearity degree =", linearity_degree, "---\n")

  # Define file names dynamically based on linearity_degree
  file_names <- c(
    sprintf("DiD_BCF_CATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("did_dr_CATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("DoubleML_did_CATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("OLS_CATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("synthdid_CATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each Excel file and select the first three columns
  for (file in file_names) {
    df <- read_excel(file)
    data_list[[file]] <- df[, 1:3] # Select only the first three columns
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  
  # Calculate DiD_BCF means, standard deviations, and number of rows
  did_bcf_means <- colMeans(did_bcf_df)
  did_bcf_sds <- apply(did_bcf_df, 2, sd) # Calculate standard deviation for each column
  num_rows <- nrow(did_bcf_df)

  # Initialize variables for the current linearity degree's best result
  current_linearity_smallest_relative_diff <- Inf
  current_linearity_best_column <- ""
  current_linearity_best_model <- ""
  current_linearity_best_did_bcf_sd <- NA
  current_linearity_best_other_model_sd <- NA
  current_linearity_best_did_bcf_mean <- NA # Store the mean of the DiD_BCF column

  # Compare DiD_BCF with other models for the current linearity degree
  for (i in 2:length(file_names)) {
    # Extract clean model name
    model_name <- gsub(sprintf("_linearity_degree=%d\\.xlsx$", linearity_degree), "", 
                       gsub(sprintf("_linearity=%d\\.xlsx$", linearity_degree), "", 
                            gsub("did_dr_CATE_PS_and_PValues", "did_dr", 
                                 gsub("DoubleML_did_CATE_PS_and_PValues", "DoubleML_did",
                                      gsub("OLS_CATE_PS_and_PValues", "OLS",
                                      gsub("synthdid_CATE_PS_and_PValues", "SDiD",
                                           gsub("DiD_BCF_CATE_PS_and_PValues", "DiD_BCF", basename(file_names[i]))))))))
    
    current_df <- data_list[[file_names[i]]]
    current_means <- colMeans(current_df)
    current_sds <- apply(current_df, 2, sd) # Calculate standard deviation for current model's columns

    # Calculate relative difference for each column
    for (j in 1:length(did_bcf_means)) {
      col_name <- names(did_bcf_means)[j]
      
      if (did_bcf_means[j] == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_means[j] - did_bcf_means[j]) / did_bcf_means[j])
      }

      # Check if this is the smallest relative difference for the current linearity degree
      if (!is.na(relative_diff) && relative_diff < current_linearity_smallest_relative_diff) {
        current_linearity_smallest_relative_diff <- relative_diff
        current_linearity_best_column <- col_name
        current_linearity_best_model <- model_name
        current_linearity_best_did_bcf_sd <- did_bcf_sds[j]
        current_linearity_best_other_model_sd <- current_sds[j]
        current_linearity_best_did_bcf_mean <- did_bcf_means[j]
      }
      
      # Check if this is the overall smallest relative difference
      if (!is.na(relative_diff) && relative_diff < overall_smallest_relative_diff) {
        overall_smallest_relative_diff <- relative_diff
        overall_best_column <- col_name
        overall_best_model <- model_name
        overall_best_linearity_degree <- linearity_degree
        overall_best_did_bcf_sd <- did_bcf_sds[j]
        overall_best_other_model_sd <- current_sds[j]
        overall_best_num_rows <- num_rows # The number of rows is consistent across models for a given linearity
        overall_best_did_bcf_mean <- did_bcf_means[j]
      }
    }
  }
  
  # Print result for the current linearity degree
  if (current_linearity_smallest_relative_diff == Inf) {
    cat("  Could not find a meaningful smallest relative difference for linearity degree", linearity_degree, "(possibly due to DiD_BCF values being zero).\n")
  } else {
    cat("  For linearity degree ", linearity_degree, ":\n",
        "    Number of rows: ", num_rows, "\n",
        "    Mean of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_mean), "\n",
        "    SD of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_sd), "\n",
        "    The column with the smallest relative difference from DiD_BCF is '", current_linearity_best_column, "' ",
        "in the '", current_linearity_best_model, "' model.\n",
        "    Relative difference: ", sprintf("%.4f", current_linearity_smallest_relative_diff), "\n",
        "    SD of '", current_linearity_best_model, "' for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_other_model_sd), "\n",
        "Estimated Power For the Simulations for Detecting at least a 25% Superiority of the DiD-BCF model:" , estimate_welch_power(n1=num_rows, n2=num_rows, sd1=current_linearity_best_did_bcf_sd, sd2=current_linearity_best_other_model_sd, delta=current_linearity_best_did_bcf_mean*0.25, alpha=0.05), "\n", 
        "Estimated Power For the Simulations for Detecting at least a 10% Superiority of the DiD-BCF model:" , estimate_welch_power(n1=num_rows, n2=num_rows, sd1=current_linearity_best_did_bcf_sd, sd2=current_linearity_best_other_model_sd, delta=current_linearity_best_did_bcf_mean*0.10, alpha=0.05), "\n", sep="")
  }
}


--- Processing for linearity degree = 1 ---
  For linearity degree 1:
    Number of rows: 100
    Mean of DiD_BCF for 'RMSE_overall': 0.3624
    SD of DiD_BCF for 'RMSE_overall': 0.0449
    The column with the smallest relative difference from DiD_BCF is 'RMSE_overall' in the 'SDiD' model.
    Relative difference: 1.4438
    SD of 'SDiD' for 'RMSE_overall': 0.0219
Estimated Power For the Simulations for Detecting at least a 25% Superiority of the DiD-BCF model:1
Estimated Power For the Simulations for Detecting at least a 10% Superiority of the DiD-BCF model:0.9999999

--- Processing for linearity degree = 2 ---
  For linearity degree 2:
    Number of rows: 100
    Mean of DiD_BCF for 'RMSE_overall': 0.3655
    SD of DiD_BCF for 'RMSE_overall': 0.0443
    The column with the smallest relative difference from DiD_BCF is 'RMSE_overall' in the 'SDiD' model.
    Relative difference: 1.4311
    SD of 'SDiD' for 'RMSE_overall': 0.0235
Estimated Power For the Simulations for Detecting at lea

In [11]:
# Install and load necessary packages
if (!requireNamespace("readxl", quietly = TRUE)) {
  install.packages("readxl")
}
library(readxl)

# Placeholder for estimate_welch_power function.
# In a real scenario, you'd use a robust power analysis function from a package like 'pwr'.
estimate_welch_power <- function(n1, n2, sd1, sd2, delta, alpha) {
  if (is.na(sd1) || is.na(sd2) || (sd1 == 0 && sd2 == 0)) return(NA) 
  
  if (is.na(delta)) return(NA)

  se_delta <- sqrt(sd1^2/n1 + sd2^2/n2)
  if (se_delta == 0) return(NA) 
  
  z_alpha <- qnorm(1 - alpha/2) 
  
  d_prime <- delta / se_delta
  
  power_val <- pnorm(d_prime - z_alpha) + pnorm(-d_prime - z_alpha)
  
  power_val <- max(0, min(1, power_val))
  
  return(power_val)
}

# Outer loop for different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n", rep("-", 50), "\n", sep="") # Separator for clarity
  cat("--- Processing for linearity degree =", linearity_degree, "---\n")
  cat(rep("-", 50), "\n\n", sep="")

  # Define file names dynamically based on linearity_degree (for Excel files)
  file_names <- c(
    sprintf("DiD_BCF_CATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("did_dr_CATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("DoubleML_did_CATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("OLS_CATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("synthdid_CATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each Excel file and select the first three columns
  for (file in file_names) {
    df <- read_excel(file)
    data_list[[file]] <- df[, 1:3] # Select only the first three columns
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  num_rows <- nrow(did_bcf_df) # Number of rows is consistent across models for a given linearity

  # Loop through each of the first three columns
  # Iterate from 1 to ncol(did_bcf_df) which will be 3 based on your selection
  for (col_idx in 1:ncol(did_bcf_df)) {
    col_name <- names(did_bcf_df)[col_idx]
    
    cat("\n--- Analyzing Column '", col_name, "' for linearity degree =", linearity_degree, "---\n", sep="")

    # Get DiD_BCF stats for the current column
    did_bcf_mean_col <- colMeans(did_bcf_df, na.rm = TRUE)[col_idx]
    did_bcf_sd_col <- apply(did_bcf_df, 2, sd, na.rm = TRUE)[col_idx]

    # Initialize variables for the best relative difference FOR THIS SPECIFIC COLUMN
    smallest_relative_diff_for_col <- Inf
    best_model_for_col <- ""
    best_other_model_sd_for_col <- NA
    best_other_model_mean_for_col <- NA

    # Compare DiD_BCF (for current column) against other models (for current column)
    # Start from the second file (index 2) as the first is DiD_BCF
    for (i in 2:length(file_names)) {
      # Extract clean model name
      model_name <- gsub(sprintf("_linearity_degree=%d\\.xlsx$", linearity_degree), "", 
                         gsub(sprintf("_linearity=%d\\.xlsx$", linearity_degree), "", 
                              gsub("did_dr_CATE_PS_and_PValues", "did_dr", 
                                   gsub("DoubleML_did_CATE_PS_and_PValues", "DoubleML_did",
                                        gsub("OLS_CATE_PS_and_PValues", "OLS",
                                        gsub("synthdid_CATE_PS_and_PValues", "SDiD",
                                             gsub("DiD_BCF_CATE_PS_and_PValues", "DiD_BCF", basename(file_names[i]))))))))
      
      current_df <- data_list[[file_names[i]]]
      
      # Ensure the column exists in the current model's dataframe
      if (!(col_name %in% names(current_df))) {
        warning(paste("Column '", col_name, "' not found in '", model_name, "' dataframe. Skipping comparison for this model/column.", sep=""))
        next
      }

      current_mean_col <- colMeans(current_df, na.rm = TRUE)[col_idx]
      current_sd_col <- apply(current_df, 2, sd, na.rm = TRUE)[col_idx]

      if (did_bcf_mean_col == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_mean_col - did_bcf_mean_col) / did_bcf_mean_col)
      }

      # Check if this is the smallest relative difference for THIS COLUMN
      if (!is.na(relative_diff) && relative_diff < smallest_relative_diff_for_col) {
        smallest_relative_diff_for_col <- relative_diff
        best_model_for_col <- model_name
        best_other_model_sd_for_col <- current_sd_col
        best_other_model_mean_for_col <- current_mean_col
      }
    } # End of inner loop (comparing models for a specific column)

    # Print results for the current column
    if (smallest_relative_diff_for_col == Inf) {
      cat("  Could not find a meaningful smallest relative difference for column '", col_name, "' ",
          "(possibly due to DiD_BCF values being zero or missing).\n", sep="")
    } else {
      # Calculate power for 25% and 10% superiority based on current column's best result
      power_25_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.25, 
                                               alpha=0.05)
      
      power_10_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.10, 
                                               alpha=0.05)

      cat("  Number of rows: ", num_rows, "\n",
          "  Mean of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_mean_col), "\n",
          "  SD of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_sd_col), "\n",
          "  Smallest relative difference for '", col_name, "' is with the '", best_model_for_col, "' model.\n",
          "  Relative difference: ", sprintf("%.4f", smallest_relative_diff_for_col), "\n",
          "  Mean of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_mean_for_col), "\n",
          "  SD of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_sd_for_col), "\n",
          "  Estimated Power (25% Superiority of DiD-BCF): ", sprintf("%.4f", power_25_percent), "\n", 
          "  Estimated Power (10% Superiority of DiD-BCF): ", sprintf("%.4f", power_10_percent), "\n", sep="")
    }
  } # End of loop for each column
} # End of loop for linearity degrees

cat("\n", rep("=", 70), "\n", sep="")
cat("Analysis Complete for all linearity degrees and columns.\n")
cat(rep("=", 70), "\n", sep="")


--------------------------------------------------
--- Processing for linearity degree = 1 ---
--------------------------------------------------


--- Analyzing Column 'RMSE_overall' for linearity degree =1---
  Number of rows: 100
  Mean of DiD_BCF for 'RMSE_overall': 0.3624
  SD of DiD_BCF for 'RMSE_overall': 0.0449
  Smallest relative difference for 'RMSE_overall' is with the 'SDiD' model.
  Relative difference: 1.4438
  Mean of 'SDiD' for 'RMSE_overall': 0.8856
  SD of 'SDiD' for 'RMSE_overall': 0.0219
  Estimated Power (25% Superiority of DiD-BCF): 1.0000
  Estimated Power (10% Superiority of DiD-BCF): 1.0000

--- Analyzing Column 'MAE_overall' for linearity degree =1---
  Number of rows: 100
  Mean of DiD_BCF for 'MAE_overall': 0.2849
  SD of DiD_BCF for 'MAE_overall': 0.0392
  Smallest relative difference for 'MAE_overall' is with the 'SDiD' model.
  Relative difference: 1.7757
  Mean of 'SDiD' for 'MAE_overall': 0.7908
  SD of 'SDiD' for 'MAE_overall': 0.0212
  Estimated Powe

In [4]:
# Install and load necessary packages
if (!requireNamespace("readxl", quietly = TRUE)) {
  install.packages("readxl")
}
library(readxl)

# Initialize variables to store the overall smallest relative difference and corresponding info
overall_smallest_relative_diff <- Inf
overall_best_column <- ""
overall_best_model <- ""
overall_best_linearity_degree <- NA
overall_best_did_bcf_sd <- NA
overall_best_other_model_sd <- NA
overall_best_num_rows <- NA
overall_best_did_bcf_mean <- NA
overall_best_other_model_mean <- NA # Storing mean of the other model as well

# Loop through different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n--- Processing for linearity degree =", linearity_degree, "---\n")

  # Define file names dynamically based on linearity_degree
  file_names <- c(
    sprintf("DiD_BCF_GATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("did_dr_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("DoubleML_did_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("OLS_GATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("synthdid_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each Excel file and select the first three columns
  for (file in file_names) {
    df <- read_excel(file)
    df_processed <- df[, 1:3] # Select only the first three columns
    
    # Adjust the third column for DiD_BCF and OLS files if it exists
    if (ncol(df_processed) >= 3) { # Ensure there are at least 3 columns
      if (grepl("DiD_BCF_GATE_PS_and_PValues", file) || grepl("OLS_GATE_PS_and_PValues", file)) {
        df_processed[[3]] <- df_processed[[3]] / 100
        cat(sprintf("  Adjusted third column of %s by dividing by 100.\n", basename(file)))
      }
    }
    data_list[[file]] <- df_processed
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  
  # Calculate DiD_BCF means, standard deviations, and number of rows
  did_bcf_means <- colMeans(did_bcf_df, na.rm = TRUE) # Added na.rm=TRUE for robustness
  did_bcf_sds <- apply(did_bcf_df, 2, sd, na.rm = TRUE) # Calculate standard deviation for each column
  num_rows <- nrow(did_bcf_df)

  # Initialize variables for the current linearity degree's best result
  current_linearity_smallest_relative_diff <- Inf
  current_linearity_best_column <- ""
  current_linearity_best_model <- ""
  current_linearity_best_did_bcf_sd <- NA
  current_linearity_best_other_model_sd <- NA
  current_linearity_best_did_bcf_mean <- NA 
  current_linearity_best_other_model_mean <- NA

  # Compare DiD_BCF with other models for the current linearity degree
  # Start from the second file in file_names (index 2) as the first is DiD_BCF
  for (i in 2:length(file_names)) {
    # Extract clean model name
    model_name <- gsub(sprintf("_linearity_degree=%d\\.xlsx$", linearity_degree), "", 
                       gsub(sprintf("_linearity=%d\\.xlsx$", linearity_degree), "", 
                            gsub("did_dr_GATE_PS_and_PValues", "did_dr", 
                                 gsub("DoubleML_did_GATE_PS_and_PValues", "DoubleML_did",
                                      gsub("OLS_GATE_PS_and_PValues", "OLS",
                                      gsub("synthdid_GATE_PS_and_PValues", "SDiD",
                                           gsub("DiD_BCF_GATE_PS_and_PValues", "DiD_BCF", basename(file_names[i]))))))))
    
    current_df <- data_list[[file_names[i]]]
    current_means <- colMeans(current_df, na.rm = TRUE) # Added na.rm=TRUE
    current_sds <- apply(current_df, 2, sd, na.rm = TRUE) # Calculate standard deviation for current model's columns

    # Calculate relative difference for each column
    for (j in 1:length(did_bcf_means)) {
      col_name <- names(did_bcf_means)[j]
      
      # Ensure column exists in both dataframes before accessing means/sds
      if (!(col_name %in% names(current_means)) || !(col_name %in% names(did_bcf_means))) {
        warning(paste("Column '", col_name, "' not found in both dataframes for comparison. Skipping.", sep=""))
        next
      }

      if (did_bcf_means[j] == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_means[j] - did_bcf_means[j]) / did_bcf_means[j])
      }

      # Check if this is the smallest relative difference for the current linearity degree
      if (!is.na(relative_diff) && relative_diff < current_linearity_smallest_relative_diff) {
        current_linearity_smallest_relative_diff <- relative_diff
        current_linearity_best_column <- col_name
        current_linearity_best_model <- model_name
        current_linearity_best_did_bcf_sd <- did_bcf_sds[col_name] # Access by name for safety
        current_linearity_best_other_model_sd <- current_sds[col_name] # Access by name for safety
        current_linearity_best_did_bcf_mean <- did_bcf_means[col_name] # Access by name for safety
        current_linearity_best_other_model_mean <- current_means[col_name] # Store other model's mean
      }
      
      # Check if this is the overall smallest relative difference
      if (!is.na(relative_diff) && relative_diff < overall_smallest_relative_diff) {
        overall_smallest_relative_diff <- relative_diff
        overall_best_column <- col_name
        overall_best_model <- model_name
        overall_best_linearity_degree <- linearity_degree
        overall_best_did_bcf_sd <- did_bcf_sds[col_name]
        overall_best_other_model_sd <- current_sds[col_name]
        overall_best_num_rows <- num_rows 
        overall_best_did_bcf_mean <- did_bcf_means[col_name]
        overall_best_other_model_mean <- current_means[col_name]
      }
    }
  }
  
  # Print result for the current linearity degree
  if (current_linearity_smallest_relative_diff == Inf) {
    cat("  Could not find a meaningful smallest relative difference for linearity degree", linearity_degree, "(possibly due to DiD_BCF values being zero or missing).\n")
  } else {
    # Calculate power for 25% and 10% superiority based on current linearity's best result
    power_25_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                             sd1=current_linearity_best_did_bcf_sd, 
                                             sd2=current_linearity_best_other_model_sd, 
                                             delta=current_linearity_best_did_bcf_mean * 0.25, 
                                             alpha=0.05)
    
    power_10_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                             sd1=current_linearity_best_did_bcf_sd, 
                                             sd2=current_linearity_best_other_model_sd, 
                                             delta=current_linearity_best_did_bcf_mean * 0.10, 
                                             alpha=0.05)

    cat("  For linearity degree ", linearity_degree, ":\n",
        "    Number of rows: ", num_rows, "\n",
        "    Mean of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_mean), "\n",
        "    SD of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_sd), "\n",
        "    The column with the smallest relative difference from DiD_BCF is '", current_linearity_best_column, "' ",
        "in the '", current_linearity_best_model, "' model.\n",
        "    Relative difference: ", sprintf("%.4f", current_linearity_smallest_relative_diff), "\n",
        "    Mean of '", current_linearity_best_model, "' for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_other_model_mean), "\n",
        "    SD of '", current_linearity_best_model, "' for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_other_model_sd), "\n",
        "    Estimated Power For the Simulations for Detecting at least a 25% Superiority of the DiD-BCF model: ", sprintf("%.4f", power_25_percent), "\n", 
        "    Estimated Power For the Simulations for Detecting at least a 10% Superiority of the DiD-BCF model: ", sprintf("%.4f", power_10_percent), "\n", sep="")
  }
}


--- Processing for linearity degree = 1 ---
  Adjusted third column of DiD_BCF_GATE_PS_and_PValues_linearity=1.xlsx by dividing by 100.
  Adjusted third column of OLS_GATE_PS_and_PValues_linearity=1.xlsx by dividing by 100.
  For linearity degree 1:
    Number of rows: 100
    Mean of DiD_BCF for 'MAPE_overall': 0.0917
    SD of DiD_BCF for 'MAPE_overall': 0.0692
    The column with the smallest relative difference from DiD_BCF is 'MAPE_overall' in the 'SDiD' model.
    Relative difference: 0.4988
    Mean of 'SDiD' for 'MAPE_overall': 0.1374
    SD of 'SDiD' for 'MAPE_overall': 0.0621
    Estimated Power For the Simulations for Detecting at least a 25% Superiority of the DiD-BCF model: 0.6890
    Estimated Power For the Simulations for Detecting at least a 10% Superiority of the DiD-BCF model: 0.1655

--- Processing for linearity degree = 2 ---
  Adjusted third column of DiD_BCF_GATE_PS_and_PValues_linearity=2.xlsx by dividing by 100.
  Adjusted third column of OLS_GATE_PS_and_PValue

In [15]:
# Install and load necessary packages
if (!requireNamespace("readxl", quietly = TRUE)) {
  install.packages("readxl")
}
library(readxl)

# Outer loop for different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n", rep("-", 50), "\n", sep="") # Separator for clarity
  cat("--- Processing for linearity degree =", linearity_degree, "---\n")
  cat(rep("-", 50), "\n\n", sep="")

  # Define file names dynamically based on linearity_degree (for Excel files)
  file_names <- c(
    sprintf("DiD_BCF_GATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("did_dr_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("DoubleML_did_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("OLS_GATE_PS_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("synthdid_GATE_PS_and_PValues_linearity_degree=%d.xlsx", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each Excel file and select the first three columns
  for (file in file_names) {
    df <- read_excel(file)
    df_processed <- df[, 1:3] # Select only the first three columns
    
    # *** Removed the division by 100 for DiD_BCF and OLS files ***
    # The previous block for:
     if (ncol(df_processed) >= 3) {
       if (grepl("DiD_BCF_GATE_PS_and_PValues", file) || grepl("OLS_GATE_PS_and_PValues", file)) {
         df_processed[[3]] <- df_processed[[3]] / 100
         cat(sprintf("  Adjusted third column of %s by dividing by 100.\n", basename(file)))
       }
     }
    # is now removed.
    
    data_list[[file]] <- df_processed
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  num_rows <- nrow(did_bcf_df) # Number of rows is consistent across models for a given linearity

  # Loop through each of the first three columns
  # Iterate from 1 to ncol(did_bcf_df) which will be 3 based on your selection
  for (col_idx in 1:ncol(did_bcf_df)) {
    col_name <- names(did_bcf_df)[col_idx]
    
    cat("\n--- Analyzing Column '", col_name, "' for linearity degree =", linearity_degree, "---\n", sep="")

    # Get DiD_BCF stats for the current column
    did_bcf_mean_col <- colMeans(did_bcf_df, na.rm = TRUE)[col_idx]
    did_bcf_sd_col <- apply(did_bcf_df, 2, sd, na.rm = TRUE)[col_idx]

    # Initialize variables for the best relative difference FOR THIS SPECIFIC COLUMN
    smallest_relative_diff_for_col <- Inf
    best_model_for_col <- ""
    best_other_model_sd_for_col <- NA
    best_other_model_mean_for_col <- NA

    # Compare DiD_BCF (for current column) against other models (for current column)
    # Start from the second file (index 2) as the first is DiD_BCF
    for (i in 2:length(file_names)) {
      # Extract clean model name
      model_name <- gsub(sprintf("_linearity_degree=%d\\.xlsx$", linearity_degree), "", 
                         gsub(sprintf("_linearity=%d\\.xlsx$", linearity_degree), "", 
                              gsub("did_dr_GATE_PS_and_PValues", "did_dr", 
                                   gsub("DoubleML_did_GATE_PS_and_PValues", "DoubleML_did",
                                        gsub("OLS_GATE_PS_and_PValues", "OLS",
                                        gsub("synthdid_GATE_PS_and_PValues", "SDiD",
                                             gsub("DiD_BCF_GATE_PS_and_PValues", "DiD_BCF", basename(file_names[i]))))))))
      
      current_df <- data_list[[file_names[i]]]
      
      # Ensure the column exists in the current model's dataframe
      if (!(col_name %in% names(current_df))) {
        warning(paste("Column '", col_name, "' not found in '", model_name, "' dataframe. Skipping comparison for this model/column.", sep=""))
        next
      }

      current_mean_col <- colMeans(current_df, na.rm = TRUE)[col_idx]
      current_sd_col <- apply(current_df, 2, sd, na.rm = TRUE)[col_idx]

      if (did_bcf_mean_col == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_mean_col - did_bcf_mean_col) / did_bcf_mean_col)
      }

      # Check if this is the smallest relative difference for THIS COLUMN
      if (!is.na(relative_diff) && relative_diff < smallest_relative_diff_for_col) {
        smallest_relative_diff_for_col <- relative_diff
        best_model_for_col <- model_name
        best_other_model_sd_for_col <- current_sd_col
        best_other_model_mean_for_col <- current_mean_col
      }
    } # End of inner loop (comparing models for a specific column)

    # Print results for the current column
    if (smallest_relative_diff_for_col == Inf) {
      cat("  Could not find a meaningful smallest relative difference for column '", col_name, "' ",
          "(possibly due to DiD_BCF values being zero or missing).\n", sep="")
    } else {
      # Calculate power for 25% and 10% superiority based on current column's best result
      power_25_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.25, 
                                               alpha=0.05)
      
      power_10_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.10, 
                                               alpha=0.05)

      cat("  Number of rows: ", num_rows, "\n",
          "  Mean of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_mean_col), "\n",
          "  SD of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_sd_col), "\n",
          "  Smallest relative difference for '", col_name, "' is with the '", best_model_for_col, "' model.\n",
          "  Relative difference: ", sprintf("%.4f", smallest_relative_diff_for_col), "\n",
          "  Mean of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_mean_for_col), "\n",
          "  SD of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_sd_for_col), "\n",
          "  Estimated Power (25% Superiority of DiD-BCF): ", sprintf("%.4f", power_25_percent), "\n", 
          "  Estimated Power (10% Superiority of DiD-BCF): ", sprintf("%.4f", power_10_percent), "\n", sep="")
    }
  } # End of loop for each column
} # End of loop for linearity degrees

cat("\n", rep("=", 70), "\n", sep="")
cat("Analysis Complete for all linearity degrees and columns.\n")
cat(rep("=", 70), "\n", sep="")


--------------------------------------------------
--- Processing for linearity degree = 1 ---
--------------------------------------------------

  Adjusted third column of DiD_BCF_GATE_PS_and_PValues_linearity=1.xlsx by dividing by 100.
  Adjusted third column of OLS_GATE_PS_and_PValues_linearity=1.xlsx by dividing by 100.

--- Analyzing Column 'RMSE_overall' for linearity degree =1---
  Number of rows: 100
  Mean of DiD_BCF for 'RMSE_overall': 0.2536
  SD of DiD_BCF for 'RMSE_overall': 0.1776
  Smallest relative difference for 'RMSE_overall' is with the 'did_dr' model.
  Relative difference: 1.9790
  Mean of 'did_dr' for 'RMSE_overall': 0.7553
  SD of 'did_dr' for 'RMSE_overall': 0.3529
  Estimated Power (25% Superiority of DiD-BCF): 0.3614
  Estimated Power (10% Superiority of DiD-BCF): 0.0984

--- Analyzing Column 'MAE_overall' for linearity degree =1---
  Number of rows: 100
  Mean of DiD_BCF for 'MAE_overall': 0.2063
  SD of DiD_BCF for 'MAE_overall': 0.1557
  Smallest relative

In [6]:
# Install and load necessary packages
if (!requireNamespace("readxl", quietly = TRUE)) {
  install.packages("readxl")
}
library(readxl)

# Initialize variables to store the overall smallest relative difference and corresponding info
overall_smallest_relative_diff <- Inf
overall_best_column <- ""
overall_best_model <- ""
overall_best_linearity_degree <- NA
overall_best_did_bcf_sd <- NA
overall_best_other_model_sd <- NA
overall_best_num_rows <- NA
overall_best_did_bcf_mean <- NA
overall_best_other_model_mean <- NA # Storing mean of the other model as well

# Loop through different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n--- Processing for linearity degree =", linearity_degree, "---\n")

  # Define file names dynamically based on linearity_degree
  file_names <- c(
    sprintf("DiD_BCF_GATE_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("did_dr_GATE_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("did2s_GATE_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("DoubleML_did_GATE_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("OLS_GATE_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("synthdid_GATE_and_PValues_linearity_degree=%d.xlsx", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each Excel file and select the first three columns
  for (file in file_names) {
    df <- read_excel(file)
    df_processed <- df[, 1:3] # Select only the first three columns
    
    # Adjust the third column for DiD_BCF and OLS files if it exists
    if (ncol(df_processed) >= 3) { # Ensure there are at least 3 columns
      if (grepl("DiD_BCF_GATE_and_PValues", file) || grepl("OLS_GATE_and_PValues", file)) {
        df_processed[[3]] <- df_processed[[3]] / 100
        cat(sprintf("  Adjusted third column of %s by dividing by 100.\n", basename(file)))
      }
    }
    data_list[[file]] <- df_processed
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  
  # Calculate DiD_BCF means, standard deviations, and number of rows
  did_bcf_means <- colMeans(did_bcf_df, na.rm = TRUE) # Added na.rm=TRUE for robustness
  did_bcf_sds <- apply(did_bcf_df, 2, sd, na.rm = TRUE) # Calculate standard deviation for each column
  num_rows <- nrow(did_bcf_df)

  # Initialize variables for the current linearity degree's best result
  current_linearity_smallest_relative_diff <- Inf
  current_linearity_best_column <- ""
  current_linearity_best_model <- ""
  current_linearity_best_did_bcf_sd <- NA
  current_linearity_best_other_model_sd <- NA
  current_linearity_best_did_bcf_mean <- NA 
  current_linearity_best_other_model_mean <- NA

  # Compare DiD_BCF with other models for the current linearity degree
  # Start from the second file in file_names (index 2) as the first is DiD_BCF
  for (i in 2:length(file_names)) {
    # Extract clean model name
    model_name <- gsub(sprintf("_linearity_degree=%d\\.xlsx$", linearity_degree), "", 
                       gsub(sprintf("_linearity=%d\\.xlsx$", linearity_degree), "", 
                            gsub("did_dr_GATE_and_PValues", "did_dr", 
                            gsub("did2s_GATE_and_PValues", "did2s", 
                                 gsub("DoubleML_did_GATE_and_PValues", "DoubleML_did",
                                      gsub("OLS_GATE_and_PValues", "OLS",
                                      gsub("synthdid_GATE_and_PValues", "SDiD",
                                           gsub("DiD_BCF_GATE_and_PValues", "DiD_BCF", basename(file_names[i])))))))))
    
    current_df <- data_list[[file_names[i]]]
    current_means <- colMeans(current_df, na.rm = TRUE) # Added na.rm=TRUE
    current_sds <- apply(current_df, 2, sd, na.rm = TRUE) # Calculate standard deviation for current model's columns

    # Calculate relative difference for each column
    for (j in 1:length(did_bcf_means)) {
      col_name <- names(did_bcf_means)[j]
      
      # Ensure column exists in both dataframes before accessing means/sds
      if (!(col_name %in% names(current_means)) || !(col_name %in% names(did_bcf_means))) {
        warning(paste("Column '", col_name, "' not found in both dataframes for comparison. Skipping.", sep=""))
        next
      }

      if (did_bcf_means[j] == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_means[j] - did_bcf_means[j]) / did_bcf_means[j])
      }

      # Check if this is the smallest relative difference for the current linearity degree
      if (!is.na(relative_diff) && relative_diff < current_linearity_smallest_relative_diff) {
        current_linearity_smallest_relative_diff <- relative_diff
        current_linearity_best_column <- col_name
        current_linearity_best_model <- model_name
        current_linearity_best_did_bcf_sd <- did_bcf_sds[col_name] # Access by name for safety
        current_linearity_best_other_model_sd <- current_sds[col_name] # Access by name for safety
        current_linearity_best_did_bcf_mean <- did_bcf_means[col_name] # Access by name for safety
        current_linearity_best_other_model_mean <- current_means[col_name] # Store other model's mean
      }
      
      # Check if this is the overall smallest relative difference
      if (!is.na(relative_diff) && relative_diff < overall_smallest_relative_diff) {
        overall_smallest_relative_diff <- relative_diff
        overall_best_column <- col_name
        overall_best_model <- model_name
        overall_best_linearity_degree <- linearity_degree
        overall_best_did_bcf_sd <- did_bcf_sds[col_name]
        overall_best_other_model_sd <- current_sds[col_name]
        overall_best_num_rows <- num_rows 
        overall_best_did_bcf_mean <- did_bcf_means[col_name]
        overall_best_other_model_mean <- current_means[col_name]
      }
    }
  }
  
  # Print result for the current linearity degree
  if (current_linearity_smallest_relative_diff == Inf) {
    cat("  Could not find a meaningful smallest relative difference for linearity degree", linearity_degree, "(possibly due to DiD_BCF values being zero or missing).\n")
  } else {
    # Calculate power for 25% and 10% superiority based on current linearity's best result
    power_25_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                             sd1=current_linearity_best_did_bcf_sd, 
                                             sd2=current_linearity_best_other_model_sd, 
                                             delta=current_linearity_best_did_bcf_mean * 0.25, 
                                             alpha=0.05)
    
    power_10_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                             sd1=current_linearity_best_did_bcf_sd, 
                                             sd2=current_linearity_best_other_model_sd, 
                                             delta=current_linearity_best_did_bcf_mean * 0.10, 
                                             alpha=0.05)

    cat("  For linearity degree ", linearity_degree, ":\n",
        "    Number of rows: ", num_rows, "\n",
        "    Mean of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_mean), "\n",
        "    SD of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_sd), "\n",
        "    The column with the smallest relative difference from DiD_BCF is '", current_linearity_best_column, "' ",
        "in the '", current_linearity_best_model, "' model.\n",
        "    Relative difference: ", sprintf("%.4f", current_linearity_smallest_relative_diff), "\n",
        "    Mean of '", current_linearity_best_model, "' for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_other_model_mean), "\n",
        "    SD of '", current_linearity_best_model, "' for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_other_model_sd), "\n",
        "    Estimated Power For the Simulations for Detecting at least a 25% Superiority of the DiD-BCF model: ", sprintf("%.4f", power_25_percent), "\n", 
        "    Estimated Power For the Simulations for Detecting at least a 10% Superiority of the DiD-BCF model: ", sprintf("%.4f", power_10_percent), "\n", sep="")
  }
}


--- Processing for linearity degree = 1 ---
  Adjusted third column of DiD_BCF_GATE_and_PValues_linearity=1.xlsx by dividing by 100.
  Adjusted third column of OLS_GATE_and_PValues_linearity=1.xlsx by dividing by 100.
  For linearity degree 1:
    Number of rows: 100
    Mean of DiD_BCF for 'RMSE_overall': 0.2433
    SD of DiD_BCF for 'RMSE_overall': 0.1707
    The column with the smallest relative difference from DiD_BCF is 'RMSE_overall' in the 'did2s' model.
    Relative difference: 0.5189
    Mean of 'did2s' for 'RMSE_overall': 0.1171
    SD of 'did2s' for 'RMSE_overall': 0.0461
    Estimated Power For the Simulations for Detecting at least a 25% Superiority of the DiD-BCF model: 0.9265
    Estimated Power For the Simulations for Detecting at least a 10% Superiority of the DiD-BCF model: 0.2761

--- Processing for linearity degree = 2 ---
  Adjusted third column of DiD_BCF_GATE_and_PValues_linearity=2.xlsx by dividing by 100.
  Adjusted third column of OLS_GATE_and_PValues_lineari

In [16]:
# Install and load necessary packages
if (!requireNamespace("readxl", quietly = TRUE)) {
  install.packages("readxl")
}
library(readxl)

# Placeholder for estimate_welch_power function.
# In a real scenario, you'd use a robust power analysis function from a package like 'pwr'.
estimate_welch_power <- function(n1, n2, sd1, sd2, delta, alpha) {
  if (is.na(sd1) || is.na(sd2) || (sd1 == 0 && sd2 == 0)) return(NA) 
  
  if (is.na(delta)) return(NA)

  se_delta <- sqrt(sd1^2/n1 + sd2^2/n2)
  if (se_delta == 0) return(NA) 
  
  z_alpha <- qnorm(1 - alpha/2) 
  
  d_prime <- delta / se_delta
  
  power_val <- pnorm(d_prime - z_alpha) + pnorm(-d_prime - z_alpha)
  
  power_val <- max(0, min(1, power_val))
  
  return(power_val)
}

# Outer loop for different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n", rep("-", 50), "\n", sep="") # Separator for clarity
  cat("--- Processing for linearity degree =", linearity_degree, "---\n")
  cat(rep("-", 50), "\n\n", sep="")

  # Define file names dynamically based on linearity_degree (for Excel files)
  file_names <- c(
    sprintf("DiD_BCF_GATE_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("did_dr_GATE_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("DoubleML_did_GATE_and_PValues_linearity_degree=%d.xlsx", linearity_degree),
    sprintf("OLS_GATE_and_PValues_linearity=%d.xlsx", linearity_degree),
    sprintf("synthdid_GATE_and_PValues_linearity_degree=%d.xlsx", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each Excel file and select the first three columns
  for (file in file_names) {
    df <- read_excel(file)
    df_processed <- df[, 1:3] # Select only the first three columns
    
    # *** Removed the division by 100 for DiD_BCF and OLS files ***
    # The previous block for:
     if (ncol(df_processed) >= 3) {
       if (grepl("DiD_BCF_GATE_and_PValues", file) || grepl("OLS_GATE_and_PValues", file)) {
         df_processed[[3]] <- df_processed[[3]] / 100
         cat(sprintf("  Adjusted third column of %s by dividing by 100.\n", basename(file)))
       }
     }
    # is now removed.
    
    data_list[[file]] <- df_processed
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  num_rows <- nrow(did_bcf_df) # Number of rows is consistent across models for a given linearity

  # Loop through each of the first three columns
  # Iterate from 1 to ncol(did_bcf_df) which will be 3 based on your selection
  for (col_idx in 1:ncol(did_bcf_df)) {
    col_name <- names(did_bcf_df)[col_idx]
    
    cat("\n--- Analyzing Column '", col_name, "' for linearity degree =", linearity_degree, "---\n", sep="")

    # Get DiD_BCF stats for the current column
    did_bcf_mean_col <- colMeans(did_bcf_df, na.rm = TRUE)[col_idx]
    did_bcf_sd_col <- apply(did_bcf_df, 2, sd, na.rm = TRUE)[col_idx]

    # Initialize variables for the best relative difference FOR THIS SPECIFIC COLUMN
    smallest_relative_diff_for_col <- Inf
    best_model_for_col <- ""
    best_other_model_sd_for_col <- NA
    best_other_model_mean_for_col <- NA

    # Compare DiD_BCF (for current column) against other models (for current column)
    # Start from the second file (index 2) as the first is DiD_BCF
    for (i in 2:length(file_names)) {
      # Extract clean model name
      model_name <- gsub(sprintf("_linearity_degree=%d\\.xlsx$", linearity_degree), "", 
                         gsub(sprintf("_linearity=%d\\.xlsx$", linearity_degree), "", 
                              gsub("did_dr_GATE_and_PValues", "did_dr", 
                                   gsub("DoubleML_did_GATE_and_PValues", "DoubleML_did",
                                        gsub("OLS_GATE_and_PValues", "OLS",
                                        gsub("synthdid_GATE_and_PValues", "SDiD",
                                             gsub("DiD_BCF_GATE_and_PValues", "DiD_BCF", basename(file_names[i]))))))))
      
      current_df <- data_list[[file_names[i]]]
      
      # Ensure the column exists in the current model's dataframe
      if (!(col_name %in% names(current_df))) {
        warning(paste("Column '", col_name, "' not found in '", model_name, "' dataframe. Skipping comparison for this model/column.", sep=""))
        next
      }

      current_mean_col <- colMeans(current_df, na.rm = TRUE)[col_idx]
      current_sd_col <- apply(current_df, 2, sd, na.rm = TRUE)[col_idx]

      if (did_bcf_mean_col == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_mean_col - did_bcf_mean_col) / did_bcf_mean_col)
      }

      # Check if this is the smallest relative difference for THIS COLUMN
      if (!is.na(relative_diff) && relative_diff < smallest_relative_diff_for_col) {
        smallest_relative_diff_for_col <- relative_diff
        best_model_for_col <- model_name
        best_other_model_sd_for_col <- current_sd_col
        best_other_model_mean_for_col <- current_mean_col
      }
    } # End of inner loop (comparing models for a specific column)

    # Print results for the current column
    if (smallest_relative_diff_for_col == Inf) {
      cat("  Could not find a meaningful smallest relative difference for column '", col_name, "' ",
          "(possibly due to DiD_BCF values being zero or missing).\n", sep="")
    } else {
      # Calculate power for 25% and 10% superiority based on current column's best result
      power_25_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.25, 
                                               alpha=0.05)
      
      power_10_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.10, 
                                               alpha=0.05)

      cat("  Number of rows: ", num_rows, "\n",
          "  Mean of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_mean_col), "\n",
          "  SD of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_sd_col), "\n",
          "  Smallest relative difference for '", col_name, "' is with the '", best_model_for_col, "' model.\n",
          "  Relative difference: ", sprintf("%.4f", smallest_relative_diff_for_col), "\n",
          "  Mean of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_mean_for_col), "\n",
          "  SD of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_sd_for_col), "\n",
          "  Estimated Power (25% Superiority of DiD-BCF): ", sprintf("%.4f", power_25_percent), "\n", 
          "  Estimated Power (10% Superiority of DiD-BCF): ", sprintf("%.4f", power_10_percent), "\n", sep="")
    }
  } # End of loop for each column
} # End of loop for linearity degrees

cat("\n", rep("=", 70), "\n", sep="")
cat("Analysis Complete for all linearity degrees and columns.\n")
cat(rep("=", 70), "\n", sep="")


--------------------------------------------------
--- Processing for linearity degree = 1 ---
--------------------------------------------------

  Adjusted third column of DiD_BCF_GATE_and_PValues_linearity=1.xlsx by dividing by 100.
  Adjusted third column of OLS_GATE_and_PValues_linearity=1.xlsx by dividing by 100.

--- Analyzing Column 'RMSE_overall' for linearity degree =1---
  Number of rows: 100
  Mean of DiD_BCF for 'RMSE_overall': 0.2433
  SD of DiD_BCF for 'RMSE_overall': 0.1707
  Smallest relative difference for 'RMSE_overall' is with the 'did_dr' model.
  Relative difference: 2.4808
  Mean of 'did_dr' for 'RMSE_overall': 0.8469
  SD of 'did_dr' for 'RMSE_overall': 0.1979
  Estimated Power (25% Superiority of DiD-BCF): 0.6433
  Estimated Power (10% Superiority of DiD-BCF): 0.1536

--- Analyzing Column 'MAE_overall' for linearity degree =1---
  Number of rows: 100
  Mean of DiD_BCF for 'MAE_overall': 0.2012
  SD of DiD_BCF for 'MAE_overall': 0.1491
  Smallest relative diffe

In [11]:
# Initialize variables to store the overall smallest relative difference and corresponding info
overall_smallest_relative_diff <- Inf
overall_best_column <- ""
overall_best_model <- ""
overall_best_linearity_degree <- NA
overall_best_did_bcf_sd <- NA
overall_best_other_model_sd <- NA
overall_best_num_rows <- NA
overall_best_did_bcf_mean <- NA
overall_best_other_model_mean <- NA 

# Loop through different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n--- Processing for linearity degree =", linearity_degree, "---\n")

  # Define file names dynamically based on linearity_degree (for CSVs)
  file_names <- c(
    sprintf("DiD_BCF_ATE_and_PValues_ATE_linearity=%d.csv", linearity_degree),
    sprintf("did_dr_ATE_and_PValueslinearity_degree=%d.csv", linearity_degree),
    sprintf("did2s_ATE_and_PValueslinearity_degree=%d.csv", linearity_degree), 
    sprintf("DoubleML_did_ATE_and_PValueslinearity_degree=%d.csv", linearity_degree),
    sprintf("OLS_ATE_and_PValues_ATE_linearity=%d.csv", linearity_degree),
    sprintf("synthdid_ATE_and_PValueslinearity_degree=%d.csv", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each CSV file, transpose, and select the first three columns
  for (file in file_names) {
    df <- read.csv(file, header = FALSE, stringsAsFactors = FALSE) 
    
    # Transpose the dataframe
    # Make the first column as row names for transposition
    row_names <- df[[1]] # Store the first column as new row names
    df_transposed <- as.data.frame(t(df[-1])) # Transpose excluding the first column
    colnames(df_transposed) <- row_names # Set original first column values as new column names

    # Now, select the first three columns of the TRANSPOSED dataframe
    # This means selecting the columns that correspond to the first three rows of the original data.
    # We need to make sure the transposed dataframe has at least 3 columns
    if (ncol(df_transposed) < 3) {
      warning(paste("Transposed data for", basename(file), "has less than 3 columns. Using all available columns."))
      df_processed <- df_transposed
    } else {
      df_processed <- df_transposed[, 0:3]
    }
    
    # No division by 100 needed anymore, so that block is removed.

    data_list[[file]] <- df_processed
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  
  # Calculate DiD_BCF means, standard deviations, and number of rows
  # Note: After transposition, 'rows' are now the original columns.
  # So num_rows here refers to the number of original columns that became rows in the transposed data.
  did_bcf_means <- colMeans(did_bcf_df, na.rm = TRUE) 
  did_bcf_sds <- apply(did_bcf_df, 2, sd, na.rm = TRUE) 
  num_rows <- nrow(did_bcf_df) # Number of observations (originally rows of transposed data)

  # Initialize variables for the current linearity degree's best result
  current_linearity_smallest_relative_diff <- Inf
  current_linearity_best_column <- ""
  current_linearity_best_model <- ""
  current_linearity_best_did_bcf_sd <- NA
  current_linearity_best_other_model_sd <- NA
  current_linearity_best_did_bcf_mean <- NA 
  current_linearity_best_other_model_mean <- NA

  # Compare DiD_BCF with other models for the current linearity degree
  for (i in 2:length(file_names)) {
    # Extract clean model name
    model_name <- gsub(sprintf("_linearity_degree=%d\\.csv$", linearity_degree), "", 
                       gsub(sprintf("_linearity=%d\\.csv$", linearity_degree), "", 
                            gsub("did_dr_ATE_and_PValues", "did_dr", 
                            gsub("did2s_ATE_and_PValues", "did2s", 
                                 gsub("DoubleML_did_ATE_and_PValues", "DoubleML_did",
                                      gsub("OLS_ATE_and_PValues", "OLS",
                                      gsub("synthdid_ATE_and_PValues", "SDiD",
                                           gsub("DiD_BCF_ATE_and_PValues", "DiD_BCF", basename(file_names[i])))))))))
    
    current_df <- data_list[[file_names[i]]]
    current_means <- colMeans(current_df, na.rm = TRUE) 
    current_sds <- apply(current_df, 2, sd, na.rm = TRUE) 

    # Calculate relative difference for each column (which are now the original row identifiers)
    for (j in 1:length(did_bcf_means)) {
      col_name <- names(did_bcf_means)[j] # These are the original values from the first column

      # Ensure column exists in both dataframes before accessing means/sds
      if (!(col_name %in% names(current_means)) || !(col_name %in% names(did_bcf_means))) {
        warning(paste("Column '", col_name, "' not found in both transposed dataframes for comparison. Skipping.", sep=""))
        next
      }

      if (did_bcf_means[j] == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_means[j] - did_bcf_means[j]) / did_bcf_means[j])
      }

      # Check if this is the smallest relative difference for the current linearity degree
      if (!is.na(relative_diff) && relative_diff < current_linearity_smallest_relative_diff) {
        current_linearity_smallest_relative_diff <- relative_diff
        current_linearity_best_column <- col_name
        current_linearity_best_model <- model_name
        current_linearity_best_did_bcf_sd <- did_bcf_sds[col_name] 
        current_linearity_best_other_model_sd <- current_sds[col_name] 
        current_linearity_best_did_bcf_mean <- did_bcf_means[col_name] 
        current_linearity_best_other_model_mean <- current_means[col_name] 
      }
      
      # Check if this is the overall smallest relative difference
      if (!is.na(relative_diff) && relative_diff < overall_smallest_relative_diff) {
        overall_smallest_relative_diff <- relative_diff
        overall_best_column <- col_name
        overall_best_model <- model_name
        overall_best_linearity_degree <- linearity_degree
        overall_best_did_bcf_sd <- did_bcf_sds[col_name]
        overall_best_other_model_sd <- current_sds[col_name]
        overall_best_num_rows <- num_rows 
        overall_best_did_bcf_mean <- did_bcf_means[col_name]
        overall_best_other_model_mean <- current_means[col_name]
      }
    }
  }
  
  # Print result for the current linearity degree
  if (current_linearity_smallest_relative_diff == Inf) {
    cat("  Could not find a meaningful smallest relative difference for linearity degree", linearity_degree, "(possibly due to DiD_BCF values being zero or missing).\n")
  } else {
    power_25_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                             sd1=current_linearity_best_did_bcf_sd, 
                                             sd2=current_linearity_best_other_model_sd, 
                                             delta=current_linearity_best_did_bcf_mean * 0.25, 
                                             alpha=0.05)
    
    power_10_percent <- estimate_welch_power(n1=num_rows, n2=num_rows, 
                                             sd1=current_linearity_best_did_bcf_sd, 
                                             sd2=current_linearity_best_other_model_sd, 
                                             delta=current_linearity_best_did_bcf_mean * 0.10, 
                                             alpha=0.05)

    cat("  For linearity degree ", linearity_degree, ":\n",
        "    Number of observations (after transpose): ", num_rows, "\n",
        "    Mean of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_mean), "\n",
        "    SD of DiD_BCF for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_did_bcf_sd), "\n",
        "    The column (originally a row) with the smallest relative difference from DiD_BCF is '", current_linearity_best_column, "' ",
        "in the '", current_linearity_best_model, "' model.\n",
        "    Relative difference: ", sprintf("%.4f", current_linearity_smallest_relative_diff), "\n",
        "    Mean of '", current_linearity_best_model, "' for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_other_model_mean), "\n",
        "    SD of '", current_linearity_best_model, "' for '", current_linearity_best_column, "': ", sprintf("%.4f", current_linearity_best_other_model_sd), "\n",
        "    Estimated Power (25% Superiority of DiD-BCF): ", sprintf("%.4f", power_25_percent), "\n", 
        "    Estimated Power (10% Superiority of DiD-BCF): ", sprintf("%.4f", power_10_percent), "\n", sep="")
  }
}


--- Processing for linearity degree = 1 ---
  For linearity degree 1:
    Number of observations (after transpose): 100
    Mean of DiD_BCF for 'ATE_RMSE': 0.1420
    SD of DiD_BCF for 'ATE_RMSE': 0.0551
    The column (originally a row) with the smallest relative difference from DiD_BCF is 'ATE_RMSE' in the 'SDiDlinearity_degree=1.csv' model.
    Relative difference: 2.7056
    Mean of 'SDiDlinearity_degree=1.csv' for 'ATE_RMSE': 0.5261
    SD of 'SDiDlinearity_degree=1.csv' for 'ATE_RMSE': 0.0944
    Estimated Power (25% Superiority of DiD-BCF): 0.8977
    Estimated Power (10% Superiority of DiD-BCF): 0.2525

--- Processing for linearity degree = 2 ---
  For linearity degree 2:
    Number of observations (after transpose): 100
    Mean of DiD_BCF for 'ATE_RMSE': 0.3775
    SD of DiD_BCF for 'ATE_RMSE': 0.3414
    The column (originally a row) with the smallest relative difference from DiD_BCF is 'ATE_RMSE' in the 'SDiDlinearity_degree=2.csv' model.
    Relative difference: 0.6731
  

In [10]:
# Outer loop for different linearity degrees
for (linearity_degree in 1:3) {
  cat("\n", rep("-", 50), "\n", sep="") # Separator for clarity
  cat("--- Processing for linearity degree =", linearity_degree, "---\n")
  cat(rep("-", 50), "\n\n", sep="")

  # Define file names dynamically based on linearity_degree (for CSVs)
  file_names <- c(
    sprintf("DiD_BCF_ATE_and_PValues_ATE_linearity=%d.csv", linearity_degree),
    sprintf("did_dr_ATE_and_PValueslinearity_degree=%d.csv", linearity_degree),
    sprintf("did2s_ATE_and_PValueslinearity_degree=%d.csv", linearity_degree), 
    sprintf("DoubleML_did_ATE_and_PValueslinearity_degree=%d.csv", linearity_degree),
    sprintf("OLS_ATE_and_PValues_ATE_linearity=%d.csv", linearity_degree),
    sprintf("synthdid_ATE_and_PValueslinearity_degree=%d.csv", linearity_degree)
  )

  # Initialize a list to store data frames for the current linearity degree
  data_list <- list()
  
  # Check if all files exist for the current linearity_degree
  all_files_exist <- TRUE
  for (file in file_names) {
    if (!file.exists(file)) {
      warning(paste("File not found:", file, "- Skipping this linearity degree."))
      all_files_exist <- FALSE
      break
    }
  }

  if (!all_files_exist) {
    next # Skip to the next linearity degree if files are missing
  }

  # Read each CSV file, transpose, and select the first three columns
  for (file in file_names) {
    df <- read.csv(file, header = FALSE, stringsAsFactors = FALSE) 
    
    # Transpose the dataframe
    # Make the first column as row names for transposition
    row_names <- df[[1]] # Store the first column as new row names
    df_transposed <- as.data.frame(t(df[-1])) # Transpose excluding the first column
    colnames(df_transposed) <- row_names # Set original first column values as new column names

    # Now, select the first three columns of the TRANSPOSED dataframe
    # This means selecting the columns that correspond to the first three rows of the original data.
    # We need to make sure the transposed dataframe has at least 3 columns
    if (ncol(df_transposed) < 3) {
      warning(paste("Transposed data for", basename(file), "has less than 3 columns. Using all available columns."))
      df_processed <- df_transposed
    } else {
      df_processed <- df_transposed[, 0:3]
    }
    
    data_list[[file]] <- df_processed
  }

  # Extract DiD_BCF data for the current linearity degree
  did_bcf_file_name <- file_names[1]
  did_bcf_df <- data_list[[did_bcf_file_name]]
  
  # Note: After transposition, 'rows' are now the original columns.
  # num_rows here refers to the number of original columns that became rows in the transposed data.
  num_observations_transposed <- nrow(did_bcf_df) 

  # Loop through each of the first three columns of the transposed data
  # Iterate from 1 to ncol(did_bcf_df) which will be 3 based on your selection
  for (col_idx in 1:ncol(did_bcf_df)) {
    col_name <- names(did_bcf_df)[col_idx]
    
    cat("\n--- Analyzing Column '", col_name, "' for linearity degree =", linearity_degree, "---\n", sep="")

    # Get DiD_BCF stats for the current column
    did_bcf_mean_col <- colMeans(did_bcf_df, na.rm = TRUE)[col_idx]
    did_bcf_sd_col <- apply(did_bcf_df, 2, sd, na.rm = TRUE)[col_idx]

    # Initialize variables for the best relative difference FOR THIS SPECIFIC COLUMN
    smallest_relative_diff_for_col <- Inf
    best_model_for_col <- ""
    best_other_model_sd_for_col <- NA
    best_other_model_mean_for_col <- NA

    # Compare DiD_BCF (for current column) against other models (for current column)
    # Start from the second file (index 2) as the first is DiD_BCF
    for (i in 2:length(file_names)) {
      # Extract clean model name
      model_name <- gsub(sprintf("_linearity_degree=%d\\.csv$", linearity_degree), "", 
                         gsub(sprintf("_linearity=%d\\.csv$", linearity_degree), "", 
                              gsub("did_dr_ATE_and_PValues", "did_dr", 
                              gsub("did2s_ATE_and_PValues", "did2s", 
                                   gsub("DoubleML_did_ATE_and_PValues", "DoubleML_did",
                                        gsub("OLS_ATE_and_PValues", "OLS",
                                        gsub("synthdid_ATE_and_PValues", "SDiD",
                                             gsub("DiD_BCF_ATE_and_PValues", "DiD_BCF", basename(file_names[i])))))))))
      
      current_df <- data_list[[file_names[i]]]
      
      # Ensure the column exists in the current model's dataframe
      if (!(col_name %in% names(current_df))) {
        warning(paste("Column '", col_name, "' not found in '", model_name, "' dataframe. Skipping comparison for this model/column.", sep=""))
        next
      }

      current_mean_col <- colMeans(current_df, na.rm = TRUE)[col_idx]
      current_sd_col <- apply(current_df, 2, sd, na.rm = TRUE)[col_idx]

      if (did_bcf_mean_col == 0) {
        relative_diff <- Inf
      } else {
        relative_diff <- abs((current_mean_col - did_bcf_mean_col) / did_bcf_mean_col)
      }

      # Check if this is the smallest relative difference for THIS COLUMN
      if (!is.na(relative_diff) && relative_diff < smallest_relative_diff_for_col) {
        smallest_relative_diff_for_col <- relative_diff
        best_model_for_col <- model_name
        best_other_model_sd_for_col <- current_sd_col
        best_other_model_mean_for_col <- current_mean_col
      }
    } # End of inner loop (comparing models for a specific column)

    # Print results for the current column
    if (smallest_relative_diff_for_col == Inf) {
      cat("  Could not find a meaningful smallest relative difference for column '", col_name, "' ",
          "(possibly due to DiD_BCF values being zero or missing for this column).\n", sep="")
    } else {
      # Calculate power for 25% and 10% superiority based on current column's best result
      power_25_percent <- estimate_welch_power(n1=num_observations_transposed, n2=num_observations_transposed, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.25, 
                                               alpha=0.05)
      
      power_10_percent <- estimate_welch_power(n1=num_observations_transposed, n2=num_observations_transposed, 
                                               sd1=did_bcf_sd_col, 
                                               sd2=best_other_model_sd_for_col, 
                                               delta=did_bcf_mean_col * 0.10, 
                                               alpha=0.05)

      cat("  Number of observations (after transpose): ", num_observations_transposed, "\n",
          "  Mean of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_mean_col), "\n",
          "  SD of DiD_BCF for '", col_name, "': ", sprintf("%.4f", did_bcf_sd_col), "\n",
          "  Smallest relative difference for '", col_name, "' is with the '", best_model_for_col, "' model.\n",
          "  Relative difference: ", sprintf("%.4f", smallest_relative_diff_for_col), "\n",
          "  Mean of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_mean_for_col), "\n",
          "  SD of '", best_model_for_col, "' for '", col_name, "': ", sprintf("%.4f", best_other_model_sd_for_col), "\n",
          "  Estimated Power (25% Superiority of DiD-BCF): ", sprintf("%.4f", power_25_percent), "\n", 
          "  Estimated Power (10% Superiority of DiD-BCF): ", sprintf("%.4f", power_10_percent), "\n", sep="")
    }
  } # End of loop for each column (originally rows)
} # End of loop for linearity degrees

cat("\n", rep("=", 70), "\n", sep="")
cat("Analysis Complete for all linearity degrees and selected columns.\n")
cat(rep("=", 70), "\n", sep="")


--------------------------------------------------
--- Processing for linearity degree = 1 ---
--------------------------------------------------


--- Analyzing Column 'ATE_RMSE' for linearity degree =1---
  Number of observations (after transpose): 100
  Mean of DiD_BCF for 'ATE_RMSE': 0.1420
  SD of DiD_BCF for 'ATE_RMSE': 0.0551
  Smallest relative difference for 'ATE_RMSE' is with the 'SDiDlinearity_degree=1.csv' model.
  Relative difference: 2.7056
  Mean of 'SDiDlinearity_degree=1.csv' for 'ATE_RMSE': 0.5261
  SD of 'SDiDlinearity_degree=1.csv' for 'ATE_RMSE': 0.0944
  Estimated Power (25% Superiority of DiD-BCF): 0.8977
  Estimated Power (10% Superiority of DiD-BCF): 0.2525

--- Analyzing Column 'ATE_MAE' for linearity degree =1---
  Number of observations (after transpose): 100
  Mean of DiD_BCF for 'ATE_MAE': 0.1214
  SD of DiD_BCF for 'ATE_MAE': 0.0475
  Smallest relative difference for 'ATE_MAE' is with the 'did2slinearity_degree=1.csv' model.
  Relative difference: 3.2742