In [1]:
#SIMULATION - POLICY INTERVENTION (Readme)
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Module conducts the following analysis:

# ---- Policy intervention simulation

# Module is input for:

# ---- Simulation-policy-effectiveness#
# ---- Simulation-treatment

In [2]:
#SET-UP
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

check_and_load <- function(packages) {
  for (pkg in packages) {
    if (!requireNamespace(pkg, quietly = TRUE)) {
      message(paste("Installing missing package:", pkg))
      install.packages(pkg, dependencies = TRUE, repos = "https://cloud.r-project.org")
    }
    if (!(pkg %in% (.packages()))) {
      suppressPackageStartupMessages(library(pkg, character.only = TRUE))
    }
  }
}

# --- Required Libraries ---
required_packages <- c(
  "dplyr",    # data manipulation (mutate, group_by, summarise, etc.)
  "Matrix",   # sparse matrix operations for spatial weights
  "FNN",      # k-nearest neighbors (get.knnx for adjacency matrix)
  "sf",       # spatial features (for coordinates, distances, geometry ops)
  "tidyr",    # tidy reshaping, replace_na
  "tibble"    # tibbles
)

check_and_load(required_packages)

In [3]:
#INPUTS AND SETTINGS
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# --- Calibration Parameters ---
distance_cutoff_rotterdam <- 50000           # [m] Max distance for seeding initial adopters around Rotterdam
rotterdam_coords <- cbind(4.4786, 51.9244)   # Rotterdam coordinates (reference point for initialization)
start_year <- 1961                           # Start year of diffusion in calibration set
noise_multiplier <- 0.5                      # Noise multiplier for second-pass calibration (to avoid overfitting)
k_max_number <- 500                          # Max number of neighbors for KNN spatial weights
km_cutoff <- 120000                          # [m] Distance cutoff for adjacency matrix (optimized in calibration)
decay_constant <- 50000                      # Decay constant for exponential weights (not used in this version)

# --- Simulation Parameters ---
initial_share <- 0.01                        # Share of adopters at start of simulation (as fraction of eligible offtakers)
carbon_price_setting <- "carbon_price"       # Fossil scenario: "carbon_price", "flat_carbon_price", or "no_carbon_price"
n_simulations <- 2500                        # Number of Monte Carlo simulation runs
start_year_sim <- 2024                       # Start year for prospective simulation
end_year <- 2100                             # End year for prospective simulation
intervention_volume <- 0.1                   # Fraction of plants targeted per sector-quantile group
quantile <- 4                                # Sectoral stratification granularity (quartiles)
seeds <- 1:n_simulations                     # reproducibility
saturation_setting <- "central"           # switch to "restricted" / "extended" if needed
cost_setting <- "mean"                       # switch to conservative / progressive if needed


In [4]:
# DATA FILES
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
offtakers <- readRDS("offtakers_centrality.rds")
cost_gap_data <- readRDS("cost_gap_data.rds")
spatial_weights <- readRDS("spatial_weights.rds")

In [5]:
# HELPER FUNCTIONS
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

filter_intervention_group <- function(df_group, type, k, seed) {
  if (type == "high") {
    slice_head(arrange(df_group, desc(contribution)), n = k)

  } else if (type == "low") {
    slice_head(arrange(df_group, contribution), n = k)

  } else {
    withr::with_seed(seed, slice_sample(df_group, n = k))
  }
}

summarize_run <- function(run_result) {
  run_result$adoption_plant %>%
    left_join(run_result$policy_cost, by = c("year", "plant_id")) %>%
    group_by(year) %>%
    summarise(
      annual_demand = sum(adoption * hydrogen_2050, na.rm = TRUE),
      annual_cost   = sum(policy_cost, na.rm = TRUE),
      .groups = "drop"
    )
}


  adopt_prob <- function(beta0, beta1, spatial_influence_detrended,
                       beta2, cost_diff,
                       beta3, distance_to_waterway,
                       beta4, distance_to_pipeline,
                       beta5) {
  1 / (1 + exp(-(beta0 +
                 beta1 * spatial_influence_detrended +
                 beta2 * cost_diff +
                 beta3 * distance_to_waterway +
                 beta4 * distance_to_pipeline +
                 beta5 * spatial_influence_detrended * cost_diff)))
}

In [6]:
# POLICY SIMULATION
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

offtakers_df <- offtakers %>%
  st_set_geometry(NULL) %>%
  mutate(beta6 = get(paste0("beta6_", saturation_setting))) %>%   # sector saturation
  select(
    plant_id, sector, hydrogen_2050,
    beta0, beta1, beta2, beta3, beta4, beta5, beta6,
    distance_to_rotterdam, distance_to_waterway, distance_to_pipeline,
    betweenness, degree
  )

sector             <- offtakers_df$sector
plant_id           <- offtakers_df$plant_id
n_offtakers        <- nrow(offtakers_df)
unique_sectors     <- unique(sector)

# model coefficients
beta0_vec <- offtakers_df$beta0
beta1_vec <- offtakers_df$beta1
beta2_vec <- offtakers_df$beta2
beta3_vec <- offtakers_df$beta3
beta4_vec <- offtakers_df$beta4
beta5_vec <- offtakers_df$beta5
beta6_vec <- offtakers_df$beta6   # saturation cap

# distances
distance_rotterdam <- offtakers_df$distance_to_rotterdam
distance_waterway  <- offtakers_df$distance_to_waterway
distance_pipeline  <- offtakers_df$distance_to_pipeline
hydrogen_2050      <- offtakers_df$hydrogen_2050

# indices for fast sector lookup
sector_index  <- split(seq_len(n_offtakers), sector)
sector_factor <- as.integer(factor(sector, levels = unique_sectors))
n_sectors     <- length(unique_sectors)
sector_counts <- tabulate(sector_factor, nbins = n_sectors)

# cost lookup table (split by year for O(1) access)
cost_diff_lookup <- cost_gap_data %>%
  filter(
    green_scenario  == cost_setting,
    fossil_scenario == carbon_price_setting
  ) %>%
  select(year, sector, cost_diff) %>%
  mutate(sector = as.character(sector)) %>%
  split(~ year)

# which centrality metric to use
centrality_types <- list(
  betweenness = offtakers_df$betweenness,
  degree      = offtakers_df$degree
)

intervention_types <- c("baseline", "high", "low", "random")

# initial adoption seeding near Rotterdam
within_threshold <- which(distance_rotterdam <= distance_cutoff_rotterdam)
set.seed(42)
selected_indices <- sample(
  within_threshold,
  size = max(1, round(initial_share * length(within_threshold))),
  replace = FALSE
)

initial_adoption <- rep.int(0L, n_offtakers)
initial_adoption[selected_indices] <- 1L


#SIMULATION FUNCTION

run_simulation <- function(intervention_type = "baseline",
                           seed = 19,
                           contribution_score) {

  set.seed(seed)

  years   <- start_year_sim:end_year
  n_years <- length(years)

  # preallocate all matrices for speed
  adoption_mat           <- matrix(0, n_offtakers, n_years)
  cost_diff_mat          <- matrix(0, n_offtakers, n_years)
  original_cost_diff_mat <- matrix(0, n_offtakers, n_years)
  policy_cost_mat        <- matrix(0, n_offtakers, n_years)

  # only built if intervention != baseline
  influence_summary_list <- vector("list", n_years)
  influence_plant_list   <- vector("list", n_years)

  # stratification for targeting by sector Ã— hydrogen quartile
  stratification_df <- tibble(
    idx           = seq_len(n_offtakers),
    sector        = sector,
    hydrogen_2050 = hydrogen_2050,
    contribution  = contribution_score
  ) %>%
    filter(!is.na(contribution)) %>%
    group_by(sector) %>%
    mutate(quant_group = ntile(hydrogen_2050, !!quantile)) %>%
    ungroup()

  target_offtakers <- integer(0)

  # assign treated plants
  if (intervention_type != "baseline") {
    targeted_df <- stratification_df %>%
      group_by(sector, quant_group) %>%
      group_modify(~ {
        k <- ceiling(intervention_volume * nrow(.x))
        filter_intervention_group(.x, intervention_type, k, seed)
      }) %>%
      ungroup()

    target_offtakers <- targeted_df$idx
  }

  # flag vector for treated plants
  treated_flag_vec <- logical(n_offtakers)
  treated_flag_vec[target_offtakers] <- TRUE

  # summary of treated groups
  sector_quantile_summary <- NULL
  if (length(target_offtakers)) {
    sector_quantile_summary <- stratification_df %>%
      mutate(
        targeted = if_else(idx %in% target_offtakers, intervention_type, "none")
      ) %>%
      group_by(sector, quant_group, targeted) %>%
      summarise(
        mean_contribution  = mean(contribution),
        mean_hydrogen_2050 = mean(hydrogen_2050),
        n_plants           = n(),
        .groups = "drop"
      )
  }

  previous_adoption <- initial_adoption

  # continuous sectors accumulate smoothly
  continuous_sectors <- c("Heavy duty", "Aviation", "Shipping")
  continuous_flag    <- sector %in% continuous_sectors
  discrete_sectors   <- unique_sectors[!unique_sectors %in% continuous_sectors]

  # ---- yearly iteration ----
  for (tt in seq_len(n_years)) {
    year <- years[tt]

    # spatial multiplier (fast)
    spatial_influence <- as.numeric(spatial_weights %*% previous_adoption)
    spatial_influence[is.na(spatial_influence)] <- 0
    spatial_influence_detrended <- spatial_influence - mean(spatial_influence)

    # store influence metrics
    if (length(target_offtakers)) {
      influence_summary_list[[tt]] <- tibble(
        year                        = year,
        treated_influence_received  = sum(spatial_influence[treated_flag_vec]),
        untreated_influence_received = sum(spatial_influence[!treated_flag_vec]),
        mean_adoption_treated        = mean(previous_adoption[treated_flag_vec]),
        mean_adoption_untreated      = mean(previous_adoption[!treated_flag_vec])
      )

      influence_plant_list[[tt]] <- tibble(
        year              = year,
        plant_id          = plant_id,
        spatial_influence = spatial_influence,
        treated           = treated_flag_vec
      )
    }

    # cost competitiveness
    cost_year <- cost_diff_lookup[[as.character(year)]]
    cost_vec  <- cost_year$cost_diff
    names(cost_vec) <- cost_year$sector

    cost_diff <- cost_vec[sector]
    cost_diff[is.na(cost_diff)] <- 0
    original_cost_diff <- cost_diff

    # subsidy floor
    if (length(target_offtakers)) {
      cost_diff[target_offtakers] <- pmax(0, cost_diff[target_offtakers])
    }

    cost_diff_mat[, tt]          <- cost_diff
    original_cost_diff_mat[, tt] <- original_cost_diff

    # logistic probability
    p_raw <- adopt_prob(
      beta0_vec,
      beta1_vec, spatial_influence_detrended,
      beta2_vec, cost_diff,
      beta3_vec, distance_waterway,
      beta4_vec, distance_pipeline,
      beta5_vec
    )
    p_raw[is.na(p_raw)] <- 0

    # sector adoption means (fast)
    sector_sum      <- rowsum(previous_adoption, group = sector_factor, reorder = FALSE)
    sector_mean     <- as.numeric(sector_sum) / sector_counts
    sector_mean_vec <- sector_mean[sector_factor]

    # remaining saturation
residual_share <- pmax(
  0,
  (beta6_vec - sector_mean_vec) / pmax(1 - sector_mean_vec, 1e-6)
)

    residual_p <- p_raw * residual_share
    residual_p[is.na(residual_p)] <- 0

    # update adoption
    adoption <- previous_adoption

    # continuous sectors: smooth update
    if (any(continuous_flag)) {
      idx_c <- which(continuous_flag)
      adoption[idx_c] <- 1 - (1 - adoption[idx_c]) * (1 - residual_p[idx_c])
    }

    # discrete sectors: Bernoulli draw
    for (s in discrete_sectors) {
      idx_s <- sector_index[[s]]
      adoption[idx_s] <- pmax(
        adoption[idx_s],
        rbinom(length(idx_s), 1, residual_p[idx_s])
      )
    }

    adoption_mat[, tt] <- adoption

    # policy cost
    policy_cost_mat[, tt] <- ifelse(
      original_cost_diff < 0 & cost_diff == 0,
      hydrogen_2050 * -original_cost_diff * adoption * 33000 / 1e9,
      0
    )

    previous_adoption <- adoption
  }

  # reconstruct tibbles
  year_rep   <- rep(years, each = n_offtakers)
  plant_rep  <- rep(plant_id, times = n_years)
  sector_rep <- rep(sector,   times = n_years)
  h2050_rep  <- rep(hydrogen_2050, times = n_years)

  adoption_plant_df <- tibble(
    year               = year_rep,
    plant_id           = plant_rep,
    sector             = sector_rep,
    adoption           = as.vector(adoption_mat),
    cost_diff          = as.vector(cost_diff_mat),
    original_cost_diff = as.vector(original_cost_diff_mat),
    hydrogen_2050      = h2050_rep
  )

  policy_cost_df <- tibble(
    year        = year_rep,
    plant_id    = plant_rep,
    policy_cost = as.vector(policy_cost_mat)
  )

  cost_diff_df <- tibble(
    year      = year_rep,
    plant_id  = plant_rep,
    cost_diff = as.vector(cost_diff_mat)
  )

  list(
    adoption_plant        = adoption_plant_df,
    policy_cost           = policy_cost_df,
    cost_diff             = cost_diff_df,
    influence_plant       = if (length(target_offtakers)) bind_rows(influence_plant_list) else NULL,
    influence_summary     = if (length(target_offtakers)) bind_rows(influence_summary_list) else NULL,
    treated_offtakers     = if (length(target_offtakers)) tibble(
      plant_id  = plant_id,
      treatment = case_when(
        treated_flag_vec & intervention_type == "high"   ~ "Treated High",
        treated_flag_vec & intervention_type == "low"    ~ "Treated Low",
        treated_flag_vec & intervention_type == "random" ~ "Treated Random",
        TRUE                                             ~ "Untreated"
      )
    ) else NULL,
    sector_quantile_summary = sector_quantile_summary
  )
}


In [None]:
#SIMULATION CALL
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

results_all <- list()
results_all_dist <- list()

for (i in seq_len(n_simulations)) {
  seed_i <- seeds[i]

  for (cent_name in names(centrality_types)) {

    score <- centrality_types[[cent_name]]  

    for (intervention in intervention_types) {

      key <- paste(cent_name, intervention, sep = "_")

      run_result <- run_simulation(intervention, seed_i, score)

      this_summary <- summarize_run(run_result)

      if (i == 1) {

        results_all[[key]]     <- run_result
        results_all_dist[[key]] <- list(this_summary)

      } else {

        results_all_dist[[key]][[i]] <- this_summary


        base <- results_all[[key]]$adoption_plant
        new  <- run_result$adoption_plant

        num_cols <- setdiff(names(base), c("year", "plant_id", "sector", "hydrogen_2050"))

        base[num_cols] <- (base[num_cols] * (i - 1) + new[num_cols]) / i
        results_all[[key]]$adoption_plant <- base


        #policy cost
        base <- results_all[[key]]$policy_cost
        new  <- run_result$policy_cost

        base$policy_cost <- (base$policy_cost * (i - 1) + new$policy_cost) / i
        results_all[[key]]$policy_cost <- base


        #cost diff
        base <- results_all[[key]]$cost_diff
        new  <- run_result$cost_diff

        base$cost_diff <- (base$cost_diff * (i - 1) + new$cost_diff) / i
        results_all[[key]]$cost_diff <- base


        #influence plant
        if (!is.null(run_result$influence_plant)) {

          base <- results_all[[key]]$influence_plant
          new  <- run_result$influence_plant

          base$spatial_influence <- (base$spatial_influence * (i - 1) +
                                     new$spatial_influence) / i

          results_all[[key]]$influence_plant <- base
        }


        #influence summary
        if (!is.null(run_result$influence_summary)) {

          base <- results_all[[key]]$influence_summary
          new  <- run_result$influence_summary

          num_cols <- setdiff(names(base), "year")
          base[num_cols] <- (base[num_cols] * (i - 1) + new[num_cols]) / i

          results_all[[key]]$influence_summary <- base
        }
      }

      message(sprintf("[%s] Finished run %d/%d for %s",
                      Sys.time(), i, n_simulations, key))

      rm(run_result)
      if (i %% 5 == 0) gc()
    }
  }
}


saveRDS(results_all_dist, "results_all_dist_tryout_adj.rds")
#saveRDS(results_all, "results_all.rds")