In [1]:
#SIMULATION - MODEL (Readme)
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Module conducts the following analysis:

# ---- Prepares offtaker dataset for analysis (aggregation of heavy duty, etc.)
# ---- Calculate predictor variables on the H2 offtaker dataset
# ---- Simulates green H2 demand baseline projections
# ---- Figure 5: Baseline projections of expected green H2 demand before policy intervention
# ---- Supplementary Figure 5: Baseline projections of expected green hydrogen demand before policy intervention using wind as historical analogue 
#      (Uses Figure 5 code; produced when Empirics-Model is first run for Wind)
# ---- Supplementary Figure 8: Baseline projections of expected green hydrogen demand before policy intervention using wind as historical analogue 
#      (Uses Figure 5 code; produced when Empirics-Model and Simulation-Model are first run for carbon_price_setting = flat_carbon_price)
# ---- Supplementary Figure 5: Baseline projections of expected green H2 demand before policy intervention using wind as historical analogue (uses Figure 5 code, produces SF 5 when EMPIRICS - MODEL Module is run for Wind before)
# ---- Extended Data Tables 2 and 3
# ---- Extended Data Tables 2 and 3
# ---- Supplementary Figure 11: Map of baseline green H2 demand in central saturation + mean cost scenario, 2050

# Module is input for:

# ---- Network-centrality (requires offtaker dataset)
# ---- Simulation-policy-intervention

In [2]:
#SET-UP
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Sys.setenv(PROJ_LIB = "/opt/conda/share/proj")
Sys.getenv("PROJ_LIB")

check_and_load <- function(packages) {
  for (pkg in packages) {
    if (!requireNamespace(pkg, quietly = TRUE)) {
      message(paste("Installing missing package:", pkg))
      install.packages(pkg, dependencies = TRUE, repos = "https://cloud.r-project.org")
    }
    if (!(pkg %in% (.packages()))) {
      suppressPackageStartupMessages(library(pkg, character.only = TRUE))
    }
  }
}

# required libraries
required_packages <- c(
  # data handling
  "tidyverse",   # dplyr, ggplot2, tidyr, tibble, etc.
  "data.table",  # fast data manipulation
  "readxl",      # read Excel
  "writexl",     # write Excel
  "jsonlite",    # JSON
  
  # spatial analysis
  "sf",          # spatial data
  "giscoR",      # EU/NUTS geodata
  "Matrix",      # sparse matrices for spatial weights
  "FNN",         # nearest-neighbor spatial weights
  "geosphere",
  
  # visualization
  "ggsci",       # Nature/NPG palettes
  "patchwork",   # combine ggplots
  "cowplot",      # facet plot

  # --- Simulation and forecasting utilities ---
  "forecast",     # Time-series and forecasting tools (trend extrapolation)
  "minpack.lm"   # Nonlinear least squares optimisation (Levenberg–Marquardt)
)

# --- Load all required packages (auto-install if missing) ------------
check_and_load(required_packages)

In [None]:
#INPUTS AND SETTIGS
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#Simulation
initial_share <- 0.01                       # Adopters at beginning of simulation initialized to 1% of total offtakers; note that calibration is 
carbon_price_setting <- "carbon_price"      # Insert carbon price assumption as "flat_carbon_price" or escalating "carbon_price"
rotterdam_coords <- cbind(4.4786, 51.9244)  # Rotterdam coordinates
distance_cutoff_rotterdam = 50000           # Distance cutoff for initialization
k_max_number <- 500                         # Max number of neighbors (FNN KNN used for efficient creation of spatial weights matrix)
km_cutoff = 120000                          # Distance cutoff for adjacency matrix (based on optimization loop, see above)
simulation_years    <- 2024:2100            # Simulation years
continuous_sectors  <- c("Heavy duty", "Aviation", "Shipping")  #Continuous instead of binary adoption


#Graphics
options(repr.plot.res = 600)

#CRS
crs <- 3035                                 # EPSG:3035 - LAEA Europe

# Saturation rates - reflecting competition vs other technologies
define_saturation <- function(version = c("restricted", "central", "extended")) {
  version <- match.arg(version)
  sectors <- c("Power", "Heavy duty", "Pulp & paper", "Chemicals", "Refining",
               "Iron & steel", "Non-ferrous metals", "Other", "Aviation",
               "Shipping", "Non-metallic minerals", "Heat")

  values <- switch(version,
    restricted     = c(0, 0, 0, 0.7, 1, 0.5, 0, 0, 0, 0, 0, 0),
    central  = c(0, 0.3, 0, 0.85, 1, 0.65, 0.2, 0.2, 0.2, 0.2, 0.2, 0),
    extended = c(1, 0.3, 0.7, 1, 1, 0.65, 1, 0.2, 0.8, 0.8, 1, 0.2)
  )

  setNames(values, sectors)
}

In [None]:
#DATA FILES
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#Paths
inet_pipesegments_geojson_path <- "/home/h1604190/Data1/INET_PipeSegments.geojson" 
h2_database_path <- "/home/h1604190/Data/h2_database.xlsx"

#Load datafiles
pipe_segments <- st_read(inet_pipesegments_geojson_path) 
grid_clean <-  suppressWarnings(read_excel(h2_database_path))
grid_clean_sf <- grid_clean %>%
  st_drop_geometry() %>%
  mutate(
    lon = as.numeric(lon),
    lat = as.numeric(lat)
  ) %>%
  st_as_sf(coords = c("lon", "lat"), crs = 4326, remove = FALSE)
maritime_sf <- grid_clean_sf %>% filter(Industry == "Shipping") #filter maritime port subset
iww_sf <- grid_clean_sf %>% filter(Industry == "Inland Shipping") #filter inland port subset
second_pass_coefs <- readRDS("second_pass_coefs.rds") #comment out if there is no access to the data underlying the empirical module
cost_gap_data <- readRDS("cost_gap_data.rds") #comment out if there is no access to the data underlying the empirical module

# Load NUTS shapefiles
nuts2_shapefile <- gisco_get_nuts(year = 2021, nuts_level = 2, resolution = "20")
if (is.na(st_crs(nuts2_shapefile))) st_crs(nuts2_shapefile) <- 4258
nuts2_shapefile <- st_transform(nuts2_shapefile, crs = 4326)

In [None]:
str(second_pass_coefs)

In [None]:
#SPECIAL INPUTS - use in case there is no access to the S&P Capital IQ data underlying the empirical model
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#note that this chunk is relevant only in case there is no access to the underlying data for the empirics module. This chunk allows to run the simulation based on the
#results of the empirical analysis.

'# Datafiles
#second_pass_coefs <- "/home/h1604190/Data/second_pass_coefs.rds"
cost_green_path <- "/home/h1604190/Data/cost_competitiveness_green.xlsx"
cost_fossil_path <- "/home/h1604190/Data/cost_competitiveness_fossil.xlsx"

cost_gap_green <- read_excel(cost_green_path) %>%
  pivot_longer(`2024`:`2050`, names_to = "year", values_to = "green_cost") %>%
  mutate(year = as.integer(year))
cost_gap_fossil <- read_excel(cost_fossil_path) %>%
  pivot_longer(`2024`:`2050`, names_to = "year", values_to = "fossil_cost") %>%
  mutate(year = as.integer(year))

# Cost forecast settings
future_years <- 2051:2100                   # Cost competitiveness forecast extension years
currency <- "EUR"                           # Set to EUR or USD
exchange_rate <- 1/1.0321                   # ECB as of 2 Jan 25
# H2 sector mapping
sector_mapping <- tribble(
  ~sector,                ~green_commodity,       ~fossil_commodity, 
  "Aviation",             "E-Kerosine",           "Kerosine", 
  "Chemicals",            "Green Hydrogen",       "Grey Hydrogen", 
  "Heat",                 "E-Methane",            "Natural Gas", 
  "Heavy duty",           "Green Hydrogen Mobility", "Diesel", 
  "Iron & steel",         "Green Hydrogen Steel", "Natural Gas", 
  "Non-ferrous metals",   "Green Hydrogen",       "Natural Gas",
  "Non-metallic minerals","Green Hydrogen",       "Natural Gas",
  "Other",                "Green Hydrogen",       "Natural Gas",
  "Power",                "E-Methane",            "Natural Gas",
  "Pulp & paper",         "Green Hydrogen",       "Natural Gas", 
  "Refining",             "Green Hydrogen",       "Grey Hydrogen",
  "Shipping",             "E-Methanol",           "Diesel"
)

# Forecast functions
wrights_law_forecast <- function(years, values, future_years) {
  C_2050 <- values[years == 2050]
  anchored <- function(t, r) C_2050 * exp(-r * (t - 2050))
  fit <- nlsLM(values ~ anchored(years, r),
               start = list(r = 0.05), lower = 0.001, upper = 1,
               control = nls.lm.control(maxiter = 500))
  r_est <- coef(fit)[["r"]]
  C_2050 * exp(-r_est * (future_years - 2050))
}

arima_forecast <- function(years, values, future_years) {
  if (length(unique(values)) < 5) return(rep(NA, length(future_years)))
  ts_data <- ts(values, start = min(years), frequency = 1)
  forecast(auto.arima(ts_data), h = length(future_years))$mean |> as.numeric()
}

ar1_forecast <- function(years, values, future_years) {
  if (length(unique(values)) < 5) return(rep(NA, length(future_years)))

  ts_data <- ts(values, start = min(years), frequency = 1)

  fit <- arima(
    ts_data,
    order = c(1, 0, 0),
    method = "ML",
    transform.pars = TRUE   # forces |phi| < 1
  )

  preds <- predict(fit, n.ahead = length(future_years))$pred
  as.numeric(preds)
}


# Forecast data
forecast_green <- cost_gap_green %>%
  group_by(commodity, green_scenario) %>%
  group_map(~{
    if (nrow(.x) < 5) return(NULL)
    tibble(
      commodity = .y$commodity,
      green_scenario = .y$green_scenario,
      year = future_years,
      green_cost = wrights_law_forecast(.x$year, .x$green_cost, future_years)
    )
  }) %>% bind_rows()

forecast_fossil <- cost_gap_fossil %>%
  group_by(commodity, fossil_scenario) %>%
  group_map(~{

    if (nrow(.x) < 5) return(NULL)

    is_flat <- (.y$fossil_scenario == "flat_carbon_price")

    tibble(
      commodity       = .y$commodity,
      fossil_scenario = .y$fossil_scenario,
      year            = future_years,
      fossil_cost     = if (is_flat) {
        # AR(1) for flat carbon price
        ar1_forecast(.x$year, .x$fossil_cost, future_years)
      } else {
        # ARIMA for all default carbon price
        arima_forecast(.x$year, .x$fossil_cost, future_years)
      }
    )
  }) %>%
  bind_rows()

# Combine data
green_all  <- bind_rows(cost_gap_green,  forecast_green)
fossil_all <- bind_rows(cost_gap_fossil, forecast_fossil)
scenario_grid <- expand.grid(
  green_scenario = unique(green_all$green_scenario),
  fossil_scenario = unique(fossil_all$fossil_scenario),
  year = sort(unique(c(green_all$year, fossil_all$year)))
)

# Map to sector
sector_data <- sector_mapping %>%
  crossing(scenario_grid) %>%
  left_join(green_all,  by = c("green_commodity" = "commodity", "green_scenario", "year")) %>%
  left_join(fossil_all, by = c("fossil_commodity" = "commodity", "fossil_scenario", "year")) %>%
  filter(!is.na(green_cost), !is.na(fossil_cost)) %>%
  mutate(
    across(c(green_cost, fossil_cost),
           ~ if (currency == "EUR") .x / exchange_rate else .x),
    cost_diff = fossil_cost - green_cost,
    scenario_pair = paste0("green: ", green_scenario, " | fossil: ", fossil_scenario)
  ) %>%
  select(
    sector, year, green_commodity, fossil_commodity,
    green_scenario, fossil_scenario, scenario_pair,
    green_cost, fossil_cost, cost_diff
  )

#Extract cost competitiveness values
cost_gap_data <- sector_data %>%
  filter(fossil_scenario == carbon_price_setting) #Based on carbon price setting in global settings
table(cost_gap_data$sector) #Check -> should be 231 for all sectors
saveRDS(cost_gap_data, "cost_gap_data.rds") #Save for use in simulation modules'

In [None]:
#DATA HANDLING
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Pipeline data
pipe_segments_expanded <- pipe_segments %>%
  mutate(
    param  = map(param,  ~ fromJSON(.) %>% as.data.frame()),
    method = map(method, ~ fromJSON(.) %>% as.data.frame())
  ) %>%
  unnest_wider(param,  names_sep = "_") %>%
  unnest_wider(method, names_sep = "_")

# prepare offtaker data
grid_clean_sf <- st_as_sf(as.data.frame(grid_clean),
                          coords = c("lon", "lat"), crs = 4326) %>%
  filter(
    st_coordinates(.)[, 1] >= -30 & st_coordinates(.)[, 1] <= 40,
    st_coordinates(.)[, 2] >= 30  & st_coordinates(.)[, 2] <= 72
  ) %>%
  mutate(index = row_number())


grid_clean_sf <- grid_clean_sf %>%
  mutate(
    coords = st_coordinates(geometry),
    lon = coords[,1],
    lat = coords[,2],
    distance_to_rotterdam = distHaversine(cbind(lon, lat), rotterdam_coords) / 1000) %>%
  select(-coords, -geometry)

# heavy-duty aggregation to NUTS3
        grid_clean_hd <- grid_clean_sf %>% filter(Sector == "Heavy duty")
        
        grid_clean_hd_aggregated <- grid_clean_hd %>%
          group_by(contact_country, NUTS3_code) %>%
          summarise(
            hydrogen_sum        = sum(hydrogen,        na.rm = TRUE),
            hydrogen_sum_2030   = sum(hydrogen_2030,   na.rm = TRUE),
            hydrogen_sum_2050   = sum(hydrogen_2050,   na.rm = TRUE),
            mean_distance_to_rotterdam = mean(distance_to_rotterdam, na.rm = TRUE),
            geometry = st_union(geometry),
            .groups = "drop"
          ) %>%
          mutate(centroid = st_centroid(geometry))
        
        # representative points
        nearest_points <- st_nearest_feature(grid_clean_hd_aggregated$centroid, grid_clean_hd)
        representative_points <- grid_clean_hd[nearest_points, c("NUTS3_code", "geometry")]
        
        grid_clean_hd_aggregated <- grid_clean_hd_aggregated %>%
          left_join(
            representative_points %>%
              st_drop_geometry() %>%
              mutate(
                lon = st_coordinates(grid_clean_hd[nearest_points, ])[,1],
                lat = st_coordinates(grid_clean_hd[nearest_points, ])[,2]
              ),
            by = "NUTS3_code"
          ) %>%
          select(-centroid) %>%
          rename(
            distance_to_rotterdam = mean_distance_to_rotterdam
          ) %>%
          mutate(
            hydrogen       = hydrogen_sum,
            hydrogen_2030  = hydrogen_sum_2030,
            hydrogen_2050  = hydrogen_sum_2050,
            Sector         = "Heavy duty",
            Industry       = "Heavy duty trucks"
          )
        
        # recombine sectors
        grid_clean_abm <- grid_clean_sf %>%
          filter(Sector != "Heavy duty") %>%
          bind_rows(grid_clean_hd_aggregated) %>%
          select(-geometry, -index)
        
        grid_clean_abm_sf <- st_as_sf(grid_clean_abm, coords = c("lon","lat"), crs = 4326)

# final offtakers
simulation_data <- grid_clean_abm_sf %>%
  mutate(offtaker_id = row_number()) %>%
  rename(sector = Sector) %>%
  st_drop_geometry() %>%
  filter(!is.na(lat)) %>%
  st_as_sf(coords = c("lon","lat"), crs = 4326) %>%
  st_transform(crs = crs)

# infrastructure
maritime_sf      <- st_transform(maritime_sf, crs)
iww_sf           <- st_transform(iww_sf, crs)
pipe_segments_sf <- st_transform(pipe_segments, crs)

# distance fields
distance_to_port     <- apply(st_distance(simulation_data, maritime_sf),      1, min) / 1000
distance_to_iww      <- apply(st_distance(simulation_data, iww_sf),           1, min) / 1000
distance_to_pipeline <- apply(st_distance(simulation_data, pipe_segments_sf), 1, min) / 1000

# store
simulation_data <- simulation_data %>%
  mutate(
    plant_id            = row_number(),
    distance_to_rotterdam = distance_to_rotterdam,
    distance_to_port      = distance_to_port,
    distance_to_iww       = distance_to_iww,
    distance_to_pipeline  = distance_to_pipeline,
    distance_to_waterway  = pmin(distance_to_port, distance_to_iww, na.rm = TRUE)
  )


In [None]:
#EXTENDED DATA TABLES 2 and 3
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# by country
table_country <- simulation_data %>%
  group_by(contact_country) %>%
  summarise(
    n = n(),
    total_h2_2050 = sum(hydrogen_2050, na.rm = TRUE),
    avg_h2_2050 = mean(hydrogen_2050, na.rm = TRUE)
  ) %>%
  arrange(desc(n))

# by sector
table_sector <- simulation_data %>%
  group_by(sector) %>%
  summarise(
    n = n(),
    total_h2_2050 = sum(hydrogen_2050, na.rm = TRUE),
    avg_h2_2050 = mean(hydrogen_2050, na.rm = TRUE)
  ) %>%
  arrange(desc(n))

# by industry (not shown in paper)
table_industry <- simulation_data %>%
  group_by(Industry) %>%
  summarise(
    n = n(),
    total_h2_2050 = sum(hydrogen_2050, na.rm = TRUE),
    avg_h2_2050 = mean(hydrogen_2050, na.rm = TRUE)
  ) %>%
  arrange(desc(n))

# View all results
print(table_country, n = Inf)
print(table_sector, n = Inf)
print(table_industry, n = Inf)

In [None]:
# SIMULATION SET-UP

offtakers <- simulation_data %>%
  select(
    plant_id, installation_name, account_holder_name, sector,
    distance_to_rotterdam, distance_to_port, distance_to_iww,
    distance_to_pipeline, distance_to_waterway,
    hydrogen, hydrogen_2050, emissions, contact_country
  ) %>%
  mutate(
    adoption = 0,
    previous_adoption = 0,
    p = 0
  )

# initial adopters near Rotterdam
within_threshold <- which(offtakers$distance_to_rotterdam <= distance_cutoff_rotterdam)

set.seed(42)
selected_indices <- sample(
  within_threshold,
  size = max(1, round(initial_share * length(within_threshold))),
  replace = FALSE
)

offtakers <- offtakers %>%
  mutate(previous_adoption = if_else(row_number() %in% selected_indices, 1, 0))

# logistic model coefficients
get_coef <- function(name) if (name %in% names(second_pass_coefs)) as.numeric(second_pass_coefs[[name]]) else 0

beta0_scalar <- get_coef("(Intercept)")
beta1_scalar <- get_coef("spatial_influence_detrended")
beta2_scalar <- get_coef("cost_proxy_scaled")
beta3_scalar <- get_coef("distance_to_waterway")
beta4_scalar <- get_coef("distance_to_pipeline")
beta5_scalar <- get_coef("cost_proxy_scaled:spatial_influence_detrended")

offtakers <- offtakers %>%
  mutate(
    beta0 = beta0_scalar,
    beta1 = beta1_scalar,
    beta2 = beta2_scalar,
    beta3 = beta3_scalar,
    beta4 = beta4_scalar,
    beta5 = beta5_scalar
  )

# logistic adoption probability
adopt_prob <- function(beta0, beta1, spatial_influence_detrended,
                       beta2, cost_diff,
                       beta3, distance_to_waterway,
                       beta4, distance_to_pipeline,
                       beta5) {

  1 / (1 + exp(-(beta0 +
                 beta1 * spatial_influence_detrended +
                 beta2 * cost_diff +
                 beta3 * distance_to_waterway +
                 beta4 * distance_to_pipeline +
                 beta5 * spatial_influence_detrended * cost_diff)))
}

# saturation (β6)
offtakers <- offtakers %>%
  mutate(
    beta6_restricted = recode(sector, !!!define_saturation("restricted"), .default = 0),
    beta6_central    = recode(sector, !!!define_saturation("central"),    .default = 0),
    beta6_extended   = recode(sector, !!!define_saturation("extended"),   .default = 0)
  )

# spatial weights 
compute_distance_weights <- function(offtakers, distance_cutoff = km_cutoff, k_max = k_max_number) {
  coords <- st_coordinates(offtakers)

  knn <- get.knnx(data = coords, query = coords, k = k_max)

  i_vec <- rep(seq_len(nrow(coords)), each = k_max)
  j_vec <- as.vector(knn$nn.index)
  d_vec <- as.vector(knn$nn.dist)

  valid <- which(d_vec > 0 & d_vec <= distance_cutoff)

  i <- i_vec[valid]
  j <- j_vec[valid]

  W <- sparseMatrix(i = i, j = j, x = 1, dims = c(nrow(coords), nrow(coords)))
  W_norm <- W / pmax(rowSums(W), 1)

  list(W = W_norm, neighbors_matrix = W)
}

weights <- compute_distance_weights(offtakers)
spatial_weights  <- weights$W
neighbors_matrix <- weights$neighbors_matrix

# scenario grid
scenarios <- expand.grid(
  saturation      = c("restricted", "central", "extended"),
  green_scenario  = c("conservative", "progressive", "mean"),
  stringsAsFactors = FALSE
)

# simulation
all_results <- list()
continuous_sectors <- c("Heavy duty", "Aviation", "Shipping")

for (i in seq_len(nrow(scenarios))) {

  sat <- scenarios$saturation[i]
  gs  <- scenarios$green_scenario[i]
  label <- paste(sat, "|", gs)
  message("Running scenario: ", label)

  offtakers_run <- offtakers %>%
    mutate(
      beta6 = get(paste0("beta6_", sat)),
      previous_adoption = previous_adoption
    )

  cost_diff_lookup <- cost_gap_data %>%
    filter(green_scenario == gs,
           fossil_scenario == carbon_price_setting) %>%
    select(year, sector, cost_diff) %>%
    split(.$year)

  cumulative_adoption <- offtakers_run$previous_adoption
  uptake_years <- list()

  for (t in seq_along(simulation_years)) {

    current_year <- simulation_years[t]

    # spatial spillovers
    spatial_influence <- as.numeric(spatial_weights %*% cumulative_adoption)
    spatial_influence[is.na(spatial_influence)] <- 0
    spatial_detrended <- spatial_influence - mean(spatial_influence)

    # cost differences
    cost_year <- cost_diff_lookup[[as.character(current_year)]]
    cost_year <- distinct(cost_year, sector, .keep_all = TRUE)

    cost_diff <- offtakers_run %>%
      select(sector) %>%
      left_join(cost_year, by = "sector") %>%
      mutate(cost_diff = replace_na(cost_diff, 0)) %>%
      pull(cost_diff)

    # logistic baseline probability
    p_raw <- adopt_prob(
      offtakers_run$beta0,
      offtakers_run$beta1, spatial_detrended,
      offtakers_run$beta2, cost_diff,
      offtakers_run$beta3, offtakers_run$distance_to_waterway,
      offtakers_run$beta4, offtakers_run$distance_to_pipeline,
      offtakers_run$beta5
    )
    p_raw[is.na(p_raw)] <- 0

    # residual probability toward β6
    sector_summary <- tibble(
      sector = offtakers_run$sector,
      cumulative_adoption = cumulative_adoption,
      beta6 = offtakers_run$beta6
    ) %>%
      group_by(sector) %>%
      summarise(
        sector_mean = mean(cumulative_adoption),
        beta6       = mean(beta6),
        .groups = "drop"
      ) %>%
      mutate(
        residual_share = pmax(0, (beta6 - sector_mean) / pmax(1e-6, 1 - sector_mean))
      )
      
    residual_p <- tibble(sector = offtakers_run$sector, p = p_raw) %>%
      left_join(sector_summary, by = "sector") %>%
      transmute(residual_p = replace_na(p, 0) * replace_na(residual_share, 0)) %>%
      pull(residual_p)

    residual_p[is.na(residual_p)] <- 0

    # adoption update
    for (s in unique(offtakers_run$sector)) {
      idx_s <- which(offtakers_run$sector == s)

      if (s %in% continuous_sectors) {
        cumulative_adoption[idx_s] <-
          1 - (1 - cumulative_adoption[idx_s]) * (1 - residual_p[idx_s])
      } else {
        cumulative_adoption[idx_s] <- pmax(
          cumulative_adoption[idx_s],
          rbinom(length(idx_s), 1, residual_p[idx_s])
        )
      }
    }

    # H2 uptake
    h2_uptake <- cumulative_adoption * offtakers_run$hydrogen_2050

    uptake_years[[t]] <- tibble(
      year           = current_year,
      sector         = offtakers_run$sector,
      plant_id       = offtakers_run$plant_id,
      adoption       = cumulative_adoption,
      probability    = residual_p,
      h2_uptake      = h2_uptake,
      scenario       = sat,
      green_scenario = gs
    )
  }

  all_results[[label]] <- bind_rows(uptake_years)
}

# results
h2_all <- bind_rows(all_results)

h2_summary <- h2_all %>%
  group_by(year, scenario, green_scenario) %>%
  summarise(
    avg_h2_uptake = sum(h2_uptake, na.rm = TRUE) / 1000,
    .groups = "drop"
  )

saveRDS(h2_all, "h2_all.rds")
saveRDS(offtakers, "offtakers.rds")
saveRDS(neighbors_matrix, "neighbors_matrix.rds")
saveRDS(spatial_weights, "spatial_weights.rds")


In [None]:
#FIGURE 5: H2 DEMAND 2030, 2050, DIFFUSION
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Color Palette
grey_palette <- c(
  "conservative" = "grey20",
  "mean"         = "grey50",
  "progressive"  = "grey80"
)

all_sectors <- sort(c(
  "Power","Heavy duty","Pulp & paper","Chemicals","Refining",
  "Iron & steel","Non-ferrous metals","Other","Aviation",
  "Shipping","Non-metallic minerals","Heat"
))

sector_palette <- setNames(colorRampPalette(pal_npg("nrc")(10))(length(all_sectors)), all_sectors)

#Switch color order to improve contrast
tmp_iron  <- sector_palette["Iron & steel"]
tmp_nf    <- sector_palette["Non-ferrous metals"]
tmp_heat  <- sector_palette["Heat"]

sector_palette["Iron & steel"]        <- tmp_nf
sector_palette["Heat"]                <- tmp_iron
sector_palette["Non-ferrous metals"]  <- tmp_heat

# Plot theme
plot_theme <- theme_minimal(base_size = 18) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "right",
    legend.box = "vertical",
    legend.title = element_text(face = "plain", size = 18),
    legend.text = element_text(size = 18),
    legend.key.width = unit(1.2, "cm"),
    legend.key.height = unit(0.5, "cm"),
    panel.spacing = unit(1.2, "lines"),
    panel.border = element_rect(color = "black", fill = NA),
    axis.line = element_line(color = "black"),
    axis.ticks = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 18),
    axis.text.y = element_text(size = 18),
    axis.text.x.top = element_blank(),
    axis.text.y.right = element_blank(),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_blank(),
    strip.text = element_text(size = 18),
    plot.title = element_text(hjust = 0.5, size = 18)
  )

h2_all <- h2_all %>%
  mutate(
    scenario = factor(
      scenario,
      levels = c("restricted", "central", "extended") 
    )
  )

# Data Preparation
h2_summary <- h2_all %>%
  group_by(year, green_scenario, scenario) %>%
  summarise(avg_h2_uptake = sum(h2_uptake, na.rm = TRUE)/1000, .groups = "drop")

sector_bars <- h2_all %>%
  filter(year %in% c(2030, 2050)) %>%
  mutate(sector = factor(sector, levels = all_sectors)) %>%
  group_by(year, sector, green_scenario, scenario) %>%
  summarise(h2_uptake = sum(h2_uptake, na.rm = TRUE)/1000, .groups = "drop")

bar_ref_lines <- tibble(
  year = c(2030, 2050),
  yintercept = c(20, 50),
  label = "Policy Target"
)

# Line plot
line_plot <- ggplot(h2_summary,
                    aes(x = year, y = avg_h2_uptake, color = green_scenario)) +
  geom_line(linewidth = 1.2) +
  geom_hline(data = bar_ref_lines,
             aes(yintercept = yintercept, linetype = label),
             inherit.aes = FALSE,
             color = "black", linewidth = 0.6) +
  geom_text(data = bar_ref_lines,
            aes(x = 2075, y = yintercept, label = paste0(year," Target")),
            inherit.aes = FALSE,
            hjust = -0.1, vjust = -0.5,
            size = 5, color = "black") +
  scale_color_manual(values = grey_palette, name = "Cost Competitiveness") +
  scale_linetype_manual(values = c("Policy Target" = "dashed"),
                        name = "", guide = guide_legend(order = 2)) +
  labs(x = "Year", title = "Diffusion over Time") +
  facet_grid(rows = vars(scenario),
             scales = "fixed") +
  plot_theme +
  theme(
    strip.text.y = element_blank(),
    axis.text.y  = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank()
  )

# Bar matrix
bar_matrix <- ggplot(sector_bars,
                     aes(x = green_scenario, y = h2_uptake, fill = sector)) +
  geom_bar(stat = "identity", position = "stack", width = 0.7) +
  geom_hline(data = bar_ref_lines,
             aes(yintercept = yintercept, linetype = label),
             inherit.aes = FALSE,
             color = "black", linewidth = 0.6) +
  facet_grid(
    rows = vars(scenario),
    cols = vars(year),
    switch = "y",
    labeller = labeller(
      scenario = c("restricted"="Restricted","central"="Central","extended"="Extended")
    )
  ) +
  scale_fill_manual(values = sector_palette,
                    name = "Sector", drop = FALSE) +
  scale_x_discrete(labels = c(
    conservative = "Conservative",
    mean         = "Mean",
    progressive  = "Progressive"
  )) +
  scale_linetype_manual(values = c("Policy Target" = "dashed"),
                        name = "", guide = guide_legend(order = 2)) +
  labs(x = "Cost Competitiveness", y = "Green H₂ Demand (Mt)",
       title = "Expected Green H₂ Demand by Sector") +
  coord_cartesian(clip = "off") +
  plot_theme

# Combine
combined <- (bar_matrix | line_plot) +
  plot_layout(guides = "collect", widths = c(8,6)) +
  plot_annotation(tag_levels = 'a') &
  theme(
    plot.tag = element_text(size = 18),
    legend.position = "right",
    axis.title.y = element_blank()
  )

figure5 <- ggdraw(combined) +
  draw_label("Green H₂ Demand (Mt)",
             x = 0, y = 0.5, vjust = 1.5, angle = 90, size = 18)

options(repr.plot.width = 16, repr.plot.height = 10, repr.plot.res = 600)
print(figure5)

ggsave("figure5.pdf", figure5,
       device = cairo_pdf, width = 16, height = 10, units = "in", dpi = 800)

In [None]:
#SECTOR SHARES
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# total per (year,green_scenario,scenario)
totals <- sector_bars %>%
  group_by(year, green_scenario, scenario) %>%
  summarise(h2_tot = sum(h2_uptake), .groups = "drop")

# restrict to the two sectors and compute separate shares
chem_steel_sep <- sector_bars %>%
  filter(sector %in% c("Chemicals","Iron & steel"),
         year %in% c(2030,2050)) %>%
  left_join(totals,
            by = c("year","green_scenario","scenario")) %>%
  mutate(share = h2_uptake / h2_tot) %>%
  select(year, green_scenario, scenario, sector, share)

chem_steel_sep

In [None]:
#2050 DEMAND
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

h2_2050 <- h2_all %>%
  group_by(year, green_scenario, scenario) %>%
  summarise(h2_uptake = sum(h2_uptake, na.rm = TRUE), .groups = "drop") %>%
  filter(year == 2030)

print(h2_2050)

In [None]:
#SUPPLEMENTARY FIGURE 11 - Map of baseline demand in central saturation + mean cost scenario in 2050
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

library(rnaturalearth)
library(rnaturalearthdata)

uptake_data <- h2_all %>%
  filter(year == "2050",
         scenario == "central",
         green_scenario == "mean") %>%
  select(plant_id, h2_uptake)

offtakers_gs <- offtakers %>%
  left_join(uptake_data, by = "plant_id") %>%
  filter(!is.na(h2_uptake))

# Attach NUTS2 region
offtakers_nuts <- st_join(
  st_transform(offtakers_gs, 3035),
  st_transform(nuts2_shapefile, 3035),
  join = st_within
)

# Aggregate by NUTS2
nuts2_ready <- offtakers_nuts %>%
  st_drop_geometry() %>%
  group_by(NUTS_ID) %>%
  summarise(h2_uptake = sum(h2_uptake, na.rm = TRUE) / 1000, .groups = "drop") %>%
  right_join(nuts2_shapefile, by = "NUTS_ID") %>%
  mutate(h2_uptake = replace_na(h2_uptake, 0)) %>%
  st_as_sf() %>%
  st_transform(4326)

# Top Regions
top10_nuts2 <- nuts2_ready %>%
  arrange(desc(h2_uptake)) %>%
  slice_head(n = 10) %>%
  mutate(ID = row_number(),
         label_clean = NAME_LATN)

# Centroids
top10_centroids <- st_point_on_surface(top10_nuts2)

# Base map
world <- ne_countries(scale = "medium", returnclass = "sf")

# Legend
max_h2 <- max(nuts2_ready$h2_uptake)

# Plopt

title_size <- 16
axis_size  <- 16

p_map <- ggplot() +
  geom_sf(data = world, fill = "grey97", color = "grey85", size = 0.2) +
  geom_sf(data = nuts2_ready, aes(fill = h2_uptake), color = NA) +
  geom_sf(data = top10_nuts2, color = "black", fill = NA, linewidth = 0.5) +

  # bubbles
  geom_point(
    data = st_coordinates(top10_centroids) %>%
      as.data.frame() %>% mutate(ID = top10_nuts2$ID),
    aes(X, Y),
    shape = 21,
    size = 6,
    fill = "white",
    color = "black",
    stroke = 1
  ) +

  # bubble labels
  geom_text(
    data = st_coordinates(top10_centroids) %>%
      as.data.frame() %>% mutate(ID = top10_nuts2$ID),
    aes(X, Y, label = ID),
    size = 4,
    fontface = "bold"
  ) +

  scale_fill_viridis_c(
    name = "H2 Demand (Mt)",
    option = "mako", direction = -1,
    limits = c(0, max_h2),
    trans = "sqrt"
  ) +
  coord_sf(xlim = c(-15, 45), ylim = c(30, 75), expand = FALSE) +
  labs(
    title = "Expected green H₂ demand by NUTS-2 region, 2050",
    x = "Longitude", y = "Latitude"
  ) +
  theme_classic(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0, size = title_size),
    axis.text  = element_text(size = axis_size),
    axis.title = element_text(size = axis_size),
    panel.border = element_rect(color = "black", fill = NA),
    legend.position = "right"  
  )


# Bar chart
p_bar <- ggplot(top10_nuts2,
                 aes(x = h2_uptake, y = reorder(label_clean, h2_uptake))) +
  geom_col(width = 0.6, fill = "#3C5488FF") +
  geom_point(
    aes(x = -0.02, y = reorder(label_clean, h2_uptake)),
    shape = 21, fill = "white", color = "black", size = 7, stroke = 1
  ) +
  geom_text(
    aes(x = -0.02, y = reorder(label_clean, h2_uptake), label = ID),
    fontface = "bold", size = 5
  ) +
  geom_text(
    aes(label = sprintf("%.1f", h2_uptake), x = h2_uptake + 0.15),
    hjust = 0, size = 5
  ) +
  scale_x_continuous(
    limits = c(-0.1, 2.5),
    expand = expansion(mult = c(0, 0.02))
  ) +
  labs(
    x = "Green H₂ Demand (Mt per year)",
    y = NULL,
    title = "10 NUTS-2 regions with highest green H₂ demand, 2050"
  ) +
  theme_classic(base_size = 14) +
  theme(
    plot.title = element_text(size = title_size, hjust = 0),
    axis.text  = element_text(size = axis_size),
    axis.title = element_text(size = axis_size),
    axis.text.y = element_text(size = axis_size, hjust = 0, margin = margin(r = -5)),
    plot.margin = margin(5, 5, 5, 5)
  )


# Print and save
supplementary_figure_11 <-
  p_map | p_bar +
  plot_layout(
    widths = c(3, 3),
    guides = "collect"
  ) +
  plot_annotation(tag_levels = "a")


options(repr.plot.width = 22, repr.plot.height = 9, repr.plot.res = 800)
print(final_plot)

ggsave(
  "supplementary-figure-11.pdf",
  supplementary-figure-11,
  device = cairo_pdf,
  width = 22, height = 9, units = "in", dpi = 800
)


In [None]:
#INLINE FIGURE ONLY -> COST COMPETITIVENESS VALUES AT ADOPTION
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

h2_cost <- h2_all %>%
  left_join(
    cost_gap_data %>%
      filter(fossil_scenario == carbon_price_setting) %>%
      select(year, sector, green_scenario, cost_diff),
    by = c("year", "sector", "green_scenario")
  ) %>%
  filter(!is.na(cost_diff))

h2_cost <- h2_cost %>%
  mutate(
    green_scenario = factor(green_scenario, levels = c("conservative", "mean", "progressive")),
    scenario       = factor(scenario, levels = c("restricted", "central", "extended"))
  )

# Color palettes
nrc_colors <- pal_npg("nrc")(10)
green_scenario_palette <- c(
  "conservative" = nrc_colors[2],
  "mean"         = nrc_colors[3],
  "progressive"  = nrc_colors[4]
)

# Historgram
p_hist_adoption <- ggplot(h2_cost, aes(x = cost_diff, weight = adoption, fill = green_scenario)) +
  geom_histogram(bins = 50, position = "identity", alpha = 0.3, colour = "white") +
  facet_grid(. ~ scenario) +
  scale_fill_manual(name = "Cost competitiveness trajector", values = green_scenario_palette) +
  scale_x_continuous(sec.axis = dup_axis(name = NULL, labels = NULL)) +
  scale_y_continuous(sec.axis = dup_axis(name = NULL, labels = NULL)) +
  labs(
    title = "Adoption distribution by cost competitiveness",
    x = "Cost difference (EUR/MWh, Green – Fossil)",
    y = "Total adoption (weighted count)"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.line   = element_line(color = "black"),
    axis.ticks  = element_line(color = "black"),
    axis.text   = element_text(size = 14),
    axis.title  = element_text(size = 14),
    plot.title  = element_text(size = 14, face = "plain", hjust = 0.5),
    strip.text  = element_text(size = 12, face = "plain"),
    legend.position = "bottom",
    legend.title    = element_text(face = "plain"),
    # hide duplicated tick labels
    axis.text.x.top    = element_blank(),
    axis.text.y.right  = element_blank(),
    axis.ticks.x.top   = element_blank(),
    axis.ticks.y.right = element_blank()
  )

p_hist_adoption
ggsave("hist_adoption_cost_diff.pdf", p_hist_adoption,
       device = cairo_pdf, width = 10, height = 6, units = "in", dpi = 800)