# Check Fortran updates

In [3]:
suppressMessages(library("rwwa"))

## Original synthesis functions

In [4]:
getsynmean <- function(data, sig_mod = 0) {

  # calculate weight for each model based on inverse variance
  w = 1/(((data$upper - data$lower))^2 + (2*sig_mod)^2)
  w1 = sum(w)

  # weighted sum of squares & bounds
  s1 <- sum(w*data$est) / w1
  ss2 <- apply(data[,c("est", "lower", "upper")], 2, function(b) sum((w*(data$est - b))^2))
  ss2 <- sqrt(ss2) / w1

  # if non-zero model representation error, extend 95% intervals accordingly
  if (sig_mod > 0) { ss2 <- sqrt(ss2^2 + (sig_mod)^2) }

  return(s1 + (ss2 * c(0,-1,1)))
}


################################################################################################################################
getsynchi2 <- function(data, sig_mod = 0) {

  # function to be minimized by finding sig_mod such that chi^2/mdof ~= 1

  # get best estimate of weighted mean
  s1 <- getsynmean(data, sig_mod)["est"]

  # compute chi2 by converting model intervals to standard deviations & adding sig_mod adjustment
  chi2 <- sum(apply(data[,c("est", "lower", "upper")], 1, function(x) {
    if(x["est"] > s1) {
      (x["est"] - s1)^2 / (((x["est"] - x["lower"]))^2 + (sig_mod)^2)
    } else {
      (x["est"] - s1)^2 / (((x["est"] - x["upper"]))^2 + (sig_mod)^2)
    }
  }))
  return(chi2*4)
}


################################################################################################################################
synthesis <- function(obs_in = NA, models_in, synth_type = "abs") {

  if(is.na(unlist(obs_in))[1]) {
    no_obs <- T
    # create a dummy dataframe to avoid having to rewrite everything twice
    obs_in <- data.frame("est" = 0, "lower" = 0, "upper" = 0)
    rownames(obs_in) <- "dummy"
  } else {
    no_obs <- F
  }

  # relabel the data for easier reference later
  colnames(obs_in) <- colnames(models_in) <- c("est", "lower", "upper")

  if(!("model" %in% colnames(obs_in))) obs_in$model <- rownames(obs_in)
  if(!("model" %in% colnames(models_in))) models_in$model <- rownames(models_in)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  if(!synth_type %in% c("abs", "rel", "PR")) {
    cat(paste0("Synthesis type '",synth_type,"' not implemented - must be abs, rel or PR"), "\n")
  }

  if(synth_type == "PR") {
    obs_in[,c("est", "lower", "upper")] <- log(obs_in[,c("est", "lower", "upper")])
    models_in[,c("est", "lower", "upper")] <- log(models_in[,c("est", "lower", "upper")])
  } else if(synth_type == "rel") {
    obs_in[,c("est", "lower", "upper")] <- log(1+obs_in[,c("est", "lower", "upper")]/100)
    models_in[,c("est", "lower", "upper")] <- log(1+models_in[,c("est", "lower", "upper")]/100)
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get mean of intervals to estimate natural variability component

  # compute representation error from scatter of mean ($\sigma_{rep}$ in the paper)
  nobs = nrow(obs_in)
  obs <- apply(obs_in[,c("est", "lower", "upper"),drop = F], 2, mean)
  if(nobs == 1) {
    sig_obs = 0
  } else {
    s2 = sum((obs_in$est - obs[1])^2)
    sig_obs = sqrt(s2/(nobs-1))
  }

  # add representation error to individual observations
  obs_in$l_wb <- obs_in$est - sqrt((obs_in$est - obs_in$lower)**2 + (1.96*sig_obs)**2)
  obs_in$u_wb <- obs_in$est + sqrt((obs_in$est - obs_in$upper)**2 + (1.96*sig_obs)**2)

  # apply representation error to obs synthesis
  # we're working with confidence intervals here, so we extend them by adding (1.96sig_obs)^2 in quadrature
  obs[2] <- obs[1] - sqrt( (obs[1] - obs[2])**2 + (1.96*sig_obs)**2 )
  obs[3] <- obs[1] + sqrt( (obs[1] - obs[3])**2 + (1.96*sig_obs)**2 )

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get initial estimate of model mean & calculate chi^2
  chi2 <- getsynchi2(models_in, sig_mod = 0)
  mdof <- nrow(models_in)-1

  if ( chi2/mdof > 1 ) {
    # find sig_mod such that chi^2/dof = 1
    sig_mod <- optim(0, function(x) {(getsynchi2(models_in, sig_mod = x) - (nrow(models_in)-1))^2},
                     method = "Brent", lower = 0, upper = 5)$par
  } else {
    sig_mod <- 0
  }

  # get weighted model mean
  models <- getsynmean(models_in, sig_mod = sig_mod)

  # add representation error to individual models
  models_in$l_wb <- models_in$est - sqrt((models_in$est - models_in$lower)**2 + (sig_mod)**2)
  models_in$u_wb <- models_in$est + sqrt((models_in$est - models_in$upper)**2 + (sig_mod)**2)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # weighted mean of models & obs (coloured bar)
  w_obs <- unname((obs["upper"] - obs["lower"])^{-2})
  w_mod <- unname((models["upper"] - models["lower"])^{-2})

  wmean <- (w_obs * obs["est"] + w_mod * models["est"]) / (w_obs + w_mod)
  synth <- setNames(c(wmean,
                      wmean - sqrt( (w_obs*(obs["est"]-obs["lower"]))^2 + (w_mod*(models["est"]-models["lower"]))^2 )/(w_obs+w_mod),
                      wmean + sqrt( (w_obs*(obs["est"]-obs["upper"]))^2 + (w_mod*(models["est"]-models["upper"]))^2 )/(w_obs+w_mod)),
                    c("est", "lower", "upper"))


  # unweighted mean of obs and models
  umean <- (obs["est"] +  models["est"]) / 2
  synth["l_wb"] <- umean - sqrt((obs["est"]-obs["lower"])^2 + (models["est"]-models["lower"])^2)/2
  synth["u_wb"] <- umean + sqrt((obs["est"]-obs["upper"])^2 + (models["est"]-models["upper"])^2)/2

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # add group labels for easier plotting
  obs_in <- cbind(obs_in, "group" = "obs")
  obs <- data.frame(t(c("model" = "Observations", "group" = "obs_synth", obs)))
  models_in <- cbind(models_in, "group" = "models")
  models <- data.frame(t(c("model" = "Models", "group" = "model_synth", models)))
  synth <- data.frame(t(c("model" = "Synthesis", "group" = "synth", synth)))

  # combine all the data together in one dataframe
  res <- rbind.fill(obs_in, obs, models_in, models, synth)[,c("group", "model", "est", "lower", "upper", "l_wb", "u_wb")]
  for(cnm in c("est", "lower", "upper", "l_wb", "u_wb")) { res[,cnm] <- as.numeric(res[,cnm]) }

  # if only dummy obs, remove
  if(no_obs) {
    # drop all rows that don't relate to models
    res <- res[grepl("model", res$group),]
    sig_obs <- NA
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # reverse any transformations applied
  if(synth_type == "PR") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])
    sig_obs <- exp(sig_obs)
    sig_mod <- exp(sig_mod)
    umean <- exp(umean)
  } else if(synth_type == "rel") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- 100*(exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])-1)
    sig_obs <- 100*(exp(sig_obs)-1)
    sig_mod <- 100*(exp(sig_mod)-1)
    umean <- 100*(exp(umean)-1)
  }

  return(list(synth_type = synth_type, sig_obs = sig_obs, sig_mod = sig_mod, "chi2/dof" = chi2 / mdof, df = res, uw_mean = umean))
}

## 1. Rearrange getsynchi2 (no revision yet)

In [5]:
getsynmean_rev1 <- function(data, sig_mod = 0) {

  # calculate weight for each model based on inverse variance
  w = 1/(((data$upper - data$lower))^2 + (2*sig_mod)^2)
  w1 = sum(w)

  # weighted sum of squares & bounds
  s1 <- sum(w*data$est) / w1
  ss2 <- apply(data[,c("est", "lower", "upper")], 2, function(b) sum((w*(data$est - b))^2))
  ss2 <- sqrt(ss2) / w1

  # if non-zero model representation error, extend 95% intervals accordingly
  if (sig_mod > 0) { ss2 <- sqrt(ss2^2 + (sig_mod)^2) }

  return(s1 + (ss2 * c(0,-1,1)))
}


################################################################################################################################
getsynchi2_rev1 <- function(data, sig_mod = 0) {

  # function to be minimized by finding sig_mod such that chi^2/mdof ~= 1

  # get best estimate of weighted mean
  s1 <- getsynmean_rev1(data, sig_mod)["est"]

  # compute chi2 by converting model intervals to standard deviations & adding sig_mod adjustment
  chi2 <- sum(apply(data[,c("est", "lower", "upper")], 1, function(x) {
    if(x["est"] > s1) {
      (x["est"] - s1)^2 / (((x["est"] - x["lower"])/2)^2 + (sig_mod)^2)    # MODIFIED
    } else {
      (x["est"] - s1)^2 / (((x["est"] - x["upper"])/2)^2 + (sig_mod)^2)    # MODIFIED
    }
  }))
  return(chi2)
}


################################################################################################################################
synthesis_rev1 <- function(obs_in = NA, models_in, synth_type = "abs") {

  if(is.na(unlist(obs_in))[1]) {
    no_obs <- T
    # create a dummy dataframe to avoid having to rewrite everything twice
    obs_in <- data.frame("est" = 0, "lower" = 0, "upper" = 0)
    rownames(obs_in) <- "dummy"
  } else {
    no_obs <- F
  }

  # relabel the data for easier reference later
  colnames(obs_in) <- colnames(models_in) <- c("est", "lower", "upper")

  if(!("model" %in% colnames(obs_in))) obs_in$model <- rownames(obs_in)
  if(!("model" %in% colnames(models_in))) models_in$model <- rownames(models_in)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  if(!synth_type %in% c("abs", "rel", "PR")) {
    cat(paste0("Synthesis type '",synth_type,"' not implemented - must be abs, rel or PR"), "\n")
  }

  if(synth_type == "PR") {
    obs_in[,c("est", "lower", "upper")] <- log(obs_in[,c("est", "lower", "upper")])
    models_in[,c("est", "lower", "upper")] <- log(models_in[,c("est", "lower", "upper")])
  } else if(synth_type == "rel") {
    obs_in[,c("est", "lower", "upper")] <- log(1+obs_in[,c("est", "lower", "upper")]/100)
    models_in[,c("est", "lower", "upper")] <- log(1+models_in[,c("est", "lower", "upper")]/100)
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get mean of intervals to estimate natural variability component

  # compute representation error from scatter of mean ($\sigma_{rep}$ in the paper)
  nobs = nrow(obs_in)
  obs <- apply(obs_in[,c("est", "lower", "upper"),drop = F], 2, mean)
  if(nobs == 1) {
    sig_obs = 0
  } else {
    s2 = sum((obs_in$est - obs[1])^2)
    sig_obs = sqrt(s2/(nobs-1))
  }

  # add representation error to individual observations
  obs_in$l_wb <- obs_in$est - sqrt((obs_in$est - obs_in$lower)**2 + (1.96*sig_obs)**2)
  obs_in$u_wb <- obs_in$est + sqrt((obs_in$est - obs_in$upper)**2 + (1.96*sig_obs)**2)

  # apply representation error to obs synthesis
  # we're working with confidence intervals here, so we extend them by adding (1.96sig_obs)^2 in quadrature
  obs[2] <- obs[1] - sqrt( (obs[1] - obs[2])**2 + (1.96*sig_obs)**2 )
  obs[3] <- obs[1] + sqrt( (obs[1] - obs[3])**2 + (1.96*sig_obs)**2 )

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get initial estimate of model mean & calculate chi^2
  chi2 <- getsynchi2_rev1(models_in, sig_mod = 0)
  mdof <- nrow(models_in)-1

  if ( chi2/mdof > 1 ) {
    # find sig_mod such that chi^2/dof = 1
    sig_mod <- optim(0, function(x) {(getsynchi2_rev1(models_in, sig_mod = x) - (nrow(models_in)-1))^2},
                     method = "Brent", lower = 0, upper = 5)$par
  } else {
    sig_mod <- 0
  }

  # get weighted model mean
  models <- getsynmean_rev1(models_in, sig_mod = sig_mod)

  # add representation error to individual models
  models_in$l_wb <- models_in$est - sqrt((models_in$est - models_in$lower)**2 + (sig_mod)**2)
  models_in$u_wb <- models_in$est + sqrt((models_in$est - models_in$upper)**2 + (sig_mod)**2)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # weighted mean of models & obs (coloured bar)
  w_obs <- unname((obs["upper"] - obs["lower"])^{-2})
  w_mod <- unname((models["upper"] - models["lower"])^{-2})

  wmean <- (w_obs * obs["est"] + w_mod * models["est"]) / (w_obs + w_mod)
  synth <- setNames(c(wmean,
                      wmean - sqrt( (w_obs*(obs["est"]-obs["lower"]))^2 + (w_mod*(models["est"]-models["lower"]))^2 )/(w_obs+w_mod),
                      wmean + sqrt( (w_obs*(obs["est"]-obs["upper"]))^2 + (w_mod*(models["est"]-models["upper"]))^2 )/(w_obs+w_mod)),
                    c("est", "lower", "upper"))


  # unweighted mean of obs and models
  umean <- (obs["est"] +  models["est"]) / 2
  synth["l_wb"] <- umean - sqrt((obs["est"]-obs["lower"])^2 + (models["est"]-models["lower"])^2)/2
  synth["u_wb"] <- umean + sqrt((obs["est"]-obs["upper"])^2 + (models["est"]-models["upper"])^2)/2

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # add group labels for easier plotting
  obs_in <- cbind(obs_in, "group" = "obs")
  obs <- data.frame(t(c("model" = "Observations", "group" = "obs_synth", obs)))
  models_in <- cbind(models_in, "group" = "models")
  models <- data.frame(t(c("model" = "Models", "group" = "model_synth", models)))
  synth <- data.frame(t(c("model" = "Synthesis", "group" = "synth", synth)))

  # combine all the data together in one dataframe
  res <- rbind.fill(obs_in, obs, models_in, models, synth)[,c("group", "model", "est", "lower", "upper", "l_wb", "u_wb")]
  for(cnm in c("est", "lower", "upper", "l_wb", "u_wb")) { res[,cnm] <- as.numeric(res[,cnm]) }

  # if only dummy obs, remove
  if(no_obs) {
    # drop all rows that don't relate to models
    res <- res[grepl("model", res$group),]
    sig_obs <- NA
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # reverse any transformations applied
  if(synth_type == "PR") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])
    sig_obs <- exp(sig_obs)
    sig_mod <- exp(sig_mod)
    umean <- exp(umean)
  } else if(synth_type == "rel") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- 100*(exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])-1)
    sig_obs <- 100*(exp(sig_obs)-1)
    sig_mod <- 100*(exp(sig_mod)-1)
    umean <- 100*(exp(umean)-1)
  }

  return(list(synth_type = synth_type, sig_obs = sig_obs, sig_mod = 2*sig_mod, "chi2/dof" = chi2 / mdof, df = res, uw_mean = umean))
}

## 2. Revise getsynchi2

In [6]:
getsynmean_rev2 <- function(data, sig_mod = 0) {

  # calculate weight for each model based on inverse variance
  w = 1/(((data$upper - data$lower))^2 + (2*sig_mod)^2)
  w1 = sum(w)

  # weighted sum of squares & bounds
  s1 <- sum(w*data$est) / w1
  ss2 <- apply(data[,c("est", "lower", "upper")], 2, function(b) sum((w*(data$est - b))^2))
  ss2 <- sqrt(ss2) / w1

  # if non-zero model representation error, extend 95% intervals accordingly
  if (sig_mod > 0) { ss2 <- sqrt(ss2^2 + (sig_mod)^2) }

  return(s1 + (ss2 * c(0,-1,1)))
}


################################################################################################################################
getsynchi2_rev2 <- function(data, sig_mod = 0) {

  # function to be minimized by finding sig_mod such that chi^2/mdof ~= 1

  # get best estimate of weighted mean
  s1 <- getsynmean_rev2(data, sig_mod)["est"]

  # compute chi2 by converting model intervals to standard deviations & adding sig_mod adjustment
  chi2 <- sum(apply(data[,c("est", "lower", "upper")], 1, function(x) {
    if(x["est"] > s1) {
      (x["est"] - s1)^2 / (((x["est"] - x["lower"])/1.96)^2 + (sig_mod)^2)    # MODIFIED
    } else {
      (x["est"] - s1)^2 / (((x["est"] - x["upper"])/1.96)^2 + (sig_mod)^2)    # MODIFIED
    }
  }))
  return(chi2)
}


################################################################################################################################
synthesis_rev2 <- function(obs_in = NA, models_in, synth_type = "abs") {

  if(is.na(unlist(obs_in))[1]) {
    no_obs <- T
    # create a dummy dataframe to avoid having to rewrite everything twice
    obs_in <- data.frame("est" = 0, "lower" = 0, "upper" = 0)
    rownames(obs_in) <- "dummy"
  } else {
    no_obs <- F
  }

  # relabel the data for easier reference later
  colnames(obs_in) <- colnames(models_in) <- c("est", "lower", "upper")

  if(!("model" %in% colnames(obs_in))) obs_in$model <- rownames(obs_in)
  if(!("model" %in% colnames(models_in))) models_in$model <- rownames(models_in)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  if(!synth_type %in% c("abs", "rel", "PR")) {
    cat(paste0("Synthesis type '",synth_type,"' not implemented - must be abs, rel or PR"), "\n")
  }

  if(synth_type == "PR") {
    obs_in[,c("est", "lower", "upper")] <- log(obs_in[,c("est", "lower", "upper")])
    models_in[,c("est", "lower", "upper")] <- log(models_in[,c("est", "lower", "upper")])
  } else if(synth_type == "rel") {
    obs_in[,c("est", "lower", "upper")] <- log(1+obs_in[,c("est", "lower", "upper")]/100)
    models_in[,c("est", "lower", "upper")] <- log(1+models_in[,c("est", "lower", "upper")]/100)
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get mean of intervals to estimate natural variability component

  # compute representation error from scatter of mean ($\sigma_{rep}$ in the paper)
  nobs = nrow(obs_in)
  obs <- apply(obs_in[,c("est", "lower", "upper"),drop = F], 2, mean)
  if(nobs == 1) {
    sig_obs = 0
  } else {
    s2 = sum((obs_in$est - obs[1])^2)
    sig_obs = sqrt(s2/(nobs-1))
  }

  # add representation error to individual observations
  obs_in$l_wb <- obs_in$est - sqrt((obs_in$est - obs_in$lower)**2 + (1.96*sig_obs)**2)
  obs_in$u_wb <- obs_in$est + sqrt((obs_in$est - obs_in$upper)**2 + (1.96*sig_obs)**2)

  # apply representation error to obs synthesis
  # we're working with confidence intervals here, so we extend them by adding (1.96sig_obs)^2 in quadrature
  obs[2] <- obs[1] - sqrt( (obs[1] - obs[2])**2 + (1.96*sig_obs)**2 )
  obs[3] <- obs[1] + sqrt( (obs[1] - obs[3])**2 + (1.96*sig_obs)**2 )

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get initial estimate of model mean & calculate chi^2
  chi2 <- getsynchi2_rev2(models_in, sig_mod = 0)
  mdof <- nrow(models_in)-1

  if ( chi2/mdof > 1 ) {
    # find sig_mod such that chi^2/dof = 1
    sig_mod <- optim(0, function(x) {(getsynchi2_rev2(models_in, sig_mod = x) - (nrow(models_in)-1))^2},
                     method = "Brent", lower = 0, upper = 5)$par
  } else {
    sig_mod <- 0
  }

  # get weighted model mean
  models <- getsynmean_rev2(models_in, sig_mod = sig_mod)

  # add representation error to individual models
  models_in$l_wb <- models_in$est - sqrt((models_in$est - models_in$lower)**2 + (sig_mod)**2)
  models_in$u_wb <- models_in$est + sqrt((models_in$est - models_in$upper)**2 + (sig_mod)**2)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # weighted mean of models & obs (coloured bar)
  w_obs <- unname((obs["upper"] - obs["lower"])^{-2})
  w_mod <- unname((models["upper"] - models["lower"])^{-2})

  wmean <- (w_obs * obs["est"] + w_mod * models["est"]) / (w_obs + w_mod)
  synth <- setNames(c(wmean,
                      wmean - sqrt( (w_obs*(obs["est"]-obs["lower"]))^2 + (w_mod*(models["est"]-models["lower"]))^2 )/(w_obs+w_mod),
                      wmean + sqrt( (w_obs*(obs["est"]-obs["upper"]))^2 + (w_mod*(models["est"]-models["upper"]))^2 )/(w_obs+w_mod)),
                    c("est", "lower", "upper"))


  # unweighted mean of obs and models
  umean <- (obs["est"] +  models["est"]) / 2
  synth["l_wb"] <- umean - sqrt((obs["est"]-obs["lower"])^2 + (models["est"]-models["lower"])^2)/2
  synth["u_wb"] <- umean + sqrt((obs["est"]-obs["upper"])^2 + (models["est"]-models["upper"])^2)/2

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # add group labels for easier plotting
  obs_in <- cbind(obs_in, "group" = "obs")
  obs <- data.frame(t(c("model" = "Observations", "group" = "obs_synth", obs)))
  models_in <- cbind(models_in, "group" = "models")
  models <- data.frame(t(c("model" = "Models", "group" = "model_synth", models)))
  synth <- data.frame(t(c("model" = "Synthesis", "group" = "synth", synth)))

  # combine all the data together in one dataframe
  res <- rbind.fill(obs_in, obs, models_in, models, synth)[,c("group", "model", "est", "lower", "upper", "l_wb", "u_wb")]
  for(cnm in c("est", "lower", "upper", "l_wb", "u_wb")) { res[,cnm] <- as.numeric(res[,cnm]) }

  # if only dummy obs, remove
  if(no_obs) {
    # drop all rows that don't relate to models
    res <- res[grepl("model", res$group),]
    sig_obs <- NA
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # reverse any transformations applied
  if(synth_type == "PR") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])
    sig_obs <- exp(sig_obs)
    sig_mod <- exp(sig_mod)
    umean <- exp(umean)
  } else if(synth_type == "rel") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- 100*(exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])-1)
    sig_obs <- 100*(exp(sig_obs)-1)
    sig_mod <- 100*(exp(sig_mod)-1)
    umean <- 100*(exp(umean)-1)
  }

  return(list(synth_type = synth_type, sig_obs = sig_obs, sig_mod = 2*sig_mod, "chi2/dof" = chi2 / mdof, df = res, uw_mean = umean))
}

## 3. Revise getsynmean from interval weighting to variance weighting, don't square weights

In [7]:
getsynmean_rev3 <- function(data, sig_mod = 0) {

  # calculate weight for each model based on inverse variance
  w = 1/(((data$upper - data$lower)/3.96)^2 + (sig_mod)^2)
  w1 = sum(w)

  # weighted sum of squares & bounds
  s1 <- sum(w*data$est) / w1
  ss2 <- apply(data[,c("est", "lower", "upper")], 2, function(b) sum(w*(data$est - b)/1.96)^2)
  ss2 <- sqrt(ss2) / w1

  # if non-zero model representation error, extend 95% intervals accordingly
  if (sig_mod > 0) { ss2 <- sqrt(ss2^2 + (sig_mod)^2) }

  return(s1 + (ss2 * 1.96*c(0,-1,1)))
}


################################################################################################################################
getsynchi2_rev3 <- function(data, sig_mod = 0) {

  # function to be minimized by finding sig_mod such that chi^2/mdof ~= 1

  # get best estimate of weighted mean
  s1 <- getsynmean_rev3(data, sig_mod)["est"]

  # compute chi2 by converting model intervals to standard deviations & adding sig_mod adjustment
  chi2 <- sum(apply(data[,c("est", "lower", "upper")], 1, function(x) {
    if(x["est"] > s1) {
      (x["est"] - s1)^2 / (((x["est"] - x["lower"])/1.96)^2 + (sig_mod)^2)    # MODIFIED
    } else {
      (x["est"] - s1)^2 / (((x["est"] - x["upper"])/1.96)^2 + (sig_mod)^2)    # MODIFIED
    }
  }))
  return(chi2)
}


################################################################################################################################
synthesis_rev3 <- function(obs_in = NA, models_in, synth_type = "abs") {

  if(is.na(unlist(obs_in))[1]) {
    no_obs <- T
    # create a dummy dataframe to avoid having to rewrite everything twice
    obs_in <- data.frame("est" = 0, "lower" = 0, "upper" = 0)
    rownames(obs_in) <- "dummy"
  } else {
    no_obs <- F
  }

  # relabel the data for easier reference later
  colnames(obs_in) <- colnames(models_in) <- c("est", "lower", "upper")

  if(!("model" %in% colnames(obs_in))) obs_in$model <- rownames(obs_in)
  if(!("model" %in% colnames(models_in))) models_in$model <- rownames(models_in)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  if(!synth_type %in% c("abs", "rel", "PR")) {
    cat(paste0("Synthesis type '",synth_type,"' not implemented - must be abs, rel or PR"), "\n")
  }

  if(synth_type == "PR") {
    obs_in[,c("est", "lower", "upper")] <- log(obs_in[,c("est", "lower", "upper")])
    models_in[,c("est", "lower", "upper")] <- log(models_in[,c("est", "lower", "upper")])
  } else if(synth_type == "rel") {
    obs_in[,c("est", "lower", "upper")] <- log(1+obs_in[,c("est", "lower", "upper")]/100)
    models_in[,c("est", "lower", "upper")] <- log(1+models_in[,c("est", "lower", "upper")]/100)
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get mean of intervals to estimate natural variability component

  # compute representation error from scatter of mean ($\sigma_{rep}$ in the paper)
  nobs = nrow(obs_in)
  obs <- apply(obs_in[,c("est", "lower", "upper"),drop = F], 2, mean)
  if(nobs == 1) {
    sig_obs = 0
  } else {
    s2 = sum((obs_in$est - obs[1])^2)
    sig_obs = sqrt(s2/(nobs-1))
  }

  # add representation error to individual observations
  obs_in$l_wb <- obs_in$est - sqrt((obs_in$est - obs_in$lower)**2 + (1.96*sig_obs)**2)
  obs_in$u_wb <- obs_in$est + sqrt((obs_in$est - obs_in$upper)**2 + (1.96*sig_obs)**2)

  # apply representation error to obs synthesis
  # we're working with confidence intervals here, so we extend them by adding (1.96sig_obs)^2 in quadrature
  obs[2] <- obs[1] - sqrt( (obs[1] - obs[2])**2 + (1.96*sig_obs)**2 )
  obs[3] <- obs[1] + sqrt( (obs[1] - obs[3])**2 + (1.96*sig_obs)**2 )

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get initial estimate of model mean & calculate chi^2
  chi2 <- getsynchi2_rev3(models_in, sig_mod = 0)
  mdof <- nrow(models_in)-1

  if ( chi2/mdof > 1 ) {
    # find sig_mod such that chi^2/dof = 1
    sig_mod <- optim(0, function(x) {(getsynchi2_rev3(models_in, sig_mod = x) - (nrow(models_in)-1))^2},
                     method = "Brent", lower = 0, upper = 5)$par
  } else {
    sig_mod <- 0
  }

  # get weighted model mean
  models <- getsynmean_rev3(models_in, sig_mod = sig_mod)

  # add representation error to individual models
  models_in$l_wb <- models_in$est - sqrt((models_in$est - models_in$lower)**2 + (sig_mod)**2)
  models_in$u_wb <- models_in$est + sqrt((models_in$est - models_in$upper)**2 + (sig_mod)**2)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # weighted mean of models & obs (coloured bar)
  w_obs <- unname((obs["upper"] - obs["lower"])^{-2})
  w_mod <- unname((models["upper"] - models["lower"])^{-2})

  wmean <- (w_obs * obs["est"] + w_mod * models["est"]) / (w_obs + w_mod)
  synth <- setNames(c(wmean,
                      wmean - sqrt( (w_obs*(obs["est"]-obs["lower"]))^2 + (w_mod*(models["est"]-models["lower"]))^2 )/(w_obs+w_mod),
                      wmean + sqrt( (w_obs*(obs["est"]-obs["upper"]))^2 + (w_mod*(models["est"]-models["upper"]))^2 )/(w_obs+w_mod)),
                    c("est", "lower", "upper"))


  # unweighted mean of obs and models
  umean <- (obs["est"] +  models["est"]) / 2
  synth["l_wb"] <- umean - sqrt((obs["est"]-obs["lower"])^2 + (models["est"]-models["lower"])^2)/2
  synth["u_wb"] <- umean + sqrt((obs["est"]-obs["upper"])^2 + (models["est"]-models["upper"])^2)/2

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # add group labels for easier plotting
  obs_in <- cbind(obs_in, "group" = "obs")
  obs <- data.frame(t(c("model" = "Observations", "group" = "obs_synth", obs)))
  models_in <- cbind(models_in, "group" = "models")
  models <- data.frame(t(c("model" = "Models", "group" = "model_synth", models)))
  synth <- data.frame(t(c("model" = "Synthesis", "group" = "synth", synth)))

  # combine all the data together in one dataframe
  res <- rbind.fill(obs_in, obs, models_in, models, synth)[,c("group", "model", "est", "lower", "upper", "l_wb", "u_wb")]
  for(cnm in c("est", "lower", "upper", "l_wb", "u_wb")) { res[,cnm] <- as.numeric(res[,cnm]) }

  # if only dummy obs, remove
  if(no_obs) {
    # drop all rows that don't relate to models
    res <- res[grepl("model", res$group),]
    sig_obs <- NA
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # reverse any transformations applied
  if(synth_type == "PR") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])
    sig_obs <- exp(sig_obs)
    sig_mod <- exp(sig_mod)
    umean <- exp(umean)
  } else if(synth_type == "rel") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- 100*(exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])-1)
    sig_obs <- 100*(exp(sig_obs)-1)
    sig_mod <- 100*(exp(sig_mod)-1)
    umean <- 100*(exp(umean)-1)
  }

  return(list(synth_type = synth_type, sig_obs = sig_obs, sig_mod = 2*sig_mod, "chi2/dof" = chi2 / mdof, df = res, uw_mean = umean))
}

## 4. Don't square weights when weighting obs & models

In [37]:
getsynmean_rev4 <- function(data, sig_mod = 0) {

  # calculate weight for each model based on inverse variance
  w = 1/(((data$upper - data$lower)/3.96)^2 + (sig_mod)^2)
  w1 = sum(w)

  # weighted sum of squares & bounds
  s1 <- sum(w*data$est) / w1
  ss2 <- apply(data[,c("est", "lower", "upper")], 2, function(b) sum(w*(data$est - b)/1.96)^2)
  ss2 <- sqrt(ss2) / w1

  # if non-zero model representation error, extend 95% intervals accordingly
  if (sig_mod > 0) { ss2 <- sqrt(ss2^2 + (sig_mod)^2) }

  return(s1 + (ss2 * 1.96*c(0,-1,1)))
}


################################################################################################################################
getsynchi2_rev4 <- function(data, sig_mod = 0) {

  # function to be minimized by finding sig_mod such that chi^2/mdof ~= 1

  # get best estimate of weighted mean
  s1 <- getsynmean_rev4(data, sig_mod)["est"]

  # compute chi2 by converting model intervals to standard deviations & adding sig_mod adjustment
  chi2 <- sum(apply(data[,c("est", "lower", "upper")], 1, function(x) {
    if(x["est"] > s1) {
      (x["est"] - s1)^2 / (((x["est"] - x["lower"])/1.96)^2 + (sig_mod)^2)    # MODIFIED
    } else {
      (x["est"] - s1)^2 / (((x["est"] - x["upper"])/1.96)^2 + (sig_mod)^2)    # MODIFIED
    }
  }))
  return(chi2)
}


################################################################################################################################
synthesis_rev4 <- function(obs_in = NA, models_in, synth_type = "abs") {

  if(is.na(unlist(obs_in))[1]) {
    no_obs <- T
    # create a dummy dataframe to avoid having to rewrite everything twice
    obs_in <- data.frame("est" = 0, "lower" = 0, "upper" = 0)
    rownames(obs_in) <- "dummy"
  } else {
    no_obs <- F
  }

  # relabel the data for easier reference later
  colnames(obs_in) <- colnames(models_in) <- c("est", "lower", "upper")

  if(!("model" %in% colnames(obs_in))) obs_in$model <- rownames(obs_in)
  if(!("model" %in% colnames(models_in))) models_in$model <- rownames(models_in)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  if(!synth_type %in% c("abs", "rel", "PR")) {
    cat(paste0("Synthesis type '",synth_type,"' not implemented - must be abs, rel or PR"), "\n")
  }

  if(synth_type == "PR") {
    obs_in[,c("est", "lower", "upper")] <- log(obs_in[,c("est", "lower", "upper")])
    models_in[,c("est", "lower", "upper")] <- log(models_in[,c("est", "lower", "upper")])
  } else if(synth_type == "rel") {
    obs_in[,c("est", "lower", "upper")] <- log(1+obs_in[,c("est", "lower", "upper")]/100)
    models_in[,c("est", "lower", "upper")] <- log(1+models_in[,c("est", "lower", "upper")]/100)
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get mean of intervals to estimate natural variability component

  # compute representation error from scatter of mean ($\sigma_{rep}$ in the paper)
  nobs = nrow(obs_in)
  obs <- apply(obs_in[,c("est", "lower", "upper"),drop = F], 2, mean)
  if(nobs == 1) {
    sig_obs = 0
  } else {
    s2 = sum((obs_in$est - obs[1])^2)
    sig_obs = sqrt(s2/(nobs-1))
  }

  # add representation error to individual observations
  obs_in$l_wb <- obs_in$est - sqrt((obs_in$est - obs_in$lower)**2 + (1.96*sig_obs)**2)
  obs_in$u_wb <- obs_in$est + sqrt((obs_in$est - obs_in$upper)**2 + (1.96*sig_obs)**2)

  # apply representation error to obs synthesis
  # we're working with confidence intervals here, so we extend them by adding (1.96sig_obs)^2 in quadrature
  obs[2] <- obs[1] - sqrt( (obs[1] - obs[2])**2 + (1.96*sig_obs)**2 )
  obs[3] <- obs[1] + sqrt( (obs[1] - obs[3])**2 + (1.96*sig_obs)**2 )

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get initial estimate of model mean & calculate chi^2
  chi2 <- getsynchi2_rev4(models_in, sig_mod = 0)
  mdof <- nrow(models_in)-1

  if ( chi2/mdof > 1 ) {
    # find sig_mod such that chi^2/dof = 1
    sig_mod <- optim(0, function(x) {(getsynchi2_rev4(models_in, sig_mod = x) - (nrow(models_in)-1))^2},
                     method = "Brent", lower = 0, upper = 5)$par
  } else {
    sig_mod <- 0
  }

  # get weighted model mean
  models <- getsynmean_rev4(models_in, sig_mod = sig_mod)

  # add representation error to individual models
  models_in$l_wb <- models_in$est - sqrt((models_in$est - models_in$lower)**2 + (sig_mod)**2)
  models_in$u_wb <- models_in$est + sqrt((models_in$est - models_in$upper)**2 + (sig_mod)**2)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # weighted mean of models & obs (coloured bar)
  w_obs <- unname((obs["upper"] - obs["lower"])^{-2})
  w_mod <- unname((models["upper"] - models["lower"])^{-2})

  wmean <- (w_obs * obs["est"] + w_mod * models["est"]) / (w_obs + w_mod)
  synth <- setNames(c(wmean,
                      wmean - sqrt( w_obs*((obs["est"]-obs["lower"])^2) + w_mod*((models["est"]-models["lower"])^2) )/(w_obs+w_mod),
                      wmean + sqrt( w_obs*((obs["est"]-obs["upper"])^2) + w_mod*((models["est"]-models["upper"])^2) )/(w_obs+w_mod)),
                    c("est", "lower", "upper"))


  # unweighted mean of obs and models
  umean <- (obs["est"] +  models["est"]) / 2
  synth["l_wb"] <- umean - sqrt(((obs["est"]-obs["lower"])^2 + (models["est"]-models["lower"])^2)/2)
  synth["u_wb"] <- umean + sqrt(((obs["est"]-obs["upper"])^2 + (models["est"]-models["upper"])^2)/2)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # add group labels for easier plotting
  obs_in <- cbind(obs_in, "group" = "obs")
  obs <- data.frame(t(c("model" = "Observations", "group" = "obs_synth", obs)))
  models_in <- cbind(models_in, "group" = "models")
  models <- data.frame(t(c("model" = "Models", "group" = "model_synth", models)))
  synth <- data.frame(t(c("model" = "Synthesis", "group" = "synth", synth)))

  # combine all the data together in one dataframe
  res <- rbind.fill(obs_in, obs, models_in, models, synth)[,c("group", "model", "est", "lower", "upper", "l_wb", "u_wb")]
  for(cnm in c("est", "lower", "upper", "l_wb", "u_wb")) { res[,cnm] <- as.numeric(res[,cnm]) }

  # if only dummy obs, remove
  if(no_obs) {
    # drop all rows that don't relate to models
    res <- res[grepl("model", res$group),]
    sig_obs <- NA
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # reverse any transformations applied
  if(synth_type == "PR") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])
    sig_obs <- exp(sig_obs)
    sig_mod <- exp(sig_mod)
    umean <- exp(umean)
  } else if(synth_type == "rel") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- 100*(exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])-1)
    sig_obs <- 100*(exp(sig_obs)-1)
    sig_mod <- 100*(exp(sig_mod)-1)
    umean <- 100*(exp(umean)-1)
  }

  return(list(synth_type = synth_type, sig_obs = sig_obs, sig_mod = 2*sig_mod, "chi2/dof" = chi2 / mdof, df = res, uw_mean = umean))
}

## 5. Fix interval widths

In [55]:
getsynmean_rev5 <- function(data, sig_mod = 0, print_weights = F) {

  # calculate weight for each model based on inverse variance
  w = 1/(((data$upper - data$lower)/3.96)^2 + (sig_mod)^2)
  w1 = sum(w)
  if (print_weights) print(w/w1)

  # weighted sum of squares & bounds
  s1 <- sum(w*data$est) / w1
  ss2 <- apply(data[,c("est", "lower", "upper")], 2, function(b) sum(w*(data$est - b)/1.96)^2)
  ss2 <- sqrt(ss2) / w1

  # if non-zero model representation error, extend 95% intervals accordingly
  if (sig_mod > 0) { ss2 <- sqrt(ss2^2 + (sig_mod)^2) }

  return(s1 + (ss2 * 1.96*c(0,-1,1)))
}


################################################################################################################################
getsynchi2_rev5 <- function(data, sig_mod = 0) {

  # function to be minimized by finding sig_mod such that chi^2/mdof ~= 1

  # get best estimate of weighted mean
  s1 <- getsynmean_rev5(data, sig_mod)["est"]

  # compute chi2 by converting model intervals to standard deviations & adding sig_mod adjustment
  chi2 <- sum(apply(data[,c("est", "lower", "upper")], 1, function(x) {
    if(x["est"] > s1) {
      (x["est"] - s1)^2 / (((x["est"] - x["lower"])/1.96)^2 + (sig_mod)^2)    # MODIFIED
    } else {
      (x["est"] - s1)^2 / (((x["est"] - x["upper"])/1.96)^2 + (sig_mod)^2)    # MODIFIED
    }
  }))
  return(chi2)
}


################################################################################################################################
synthesis_rev5 <- function(obs_in = NA, models_in, synth_type = "abs") {

  if(is.na(unlist(obs_in))[1]) {
    no_obs <- T
    # create a dummy dataframe to avoid having to rewrite everything twice
    obs_in <- data.frame("est" = 0, "lower" = 0, "upper" = 0)
    rownames(obs_in) <- "dummy"
  } else {
    no_obs <- F
  }

  # relabel the data for easier reference later
  colnames(obs_in) <- colnames(models_in) <- c("est", "lower", "upper")

  if(!("model" %in% colnames(obs_in))) obs_in$model <- rownames(obs_in)
  if(!("model" %in% colnames(models_in))) models_in$model <- rownames(models_in)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  if(!synth_type %in% c("abs", "rel", "PR")) {
    cat(paste0("Synthesis type '",synth_type,"' not implemented - must be abs, rel or PR"), "\n")
  }

  if(synth_type == "PR") {
    obs_in[,c("est", "lower", "upper")] <- log(obs_in[,c("est", "lower", "upper")])
    models_in[,c("est", "lower", "upper")] <- log(models_in[,c("est", "lower", "upper")])
  } else if(synth_type == "rel") {
    obs_in[,c("est", "lower", "upper")] <- log(1+obs_in[,c("est", "lower", "upper")]/100)
    models_in[,c("est", "lower", "upper")] <- log(1+models_in[,c("est", "lower", "upper")]/100)
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get mean of intervals to estimate natural variability component

  # compute representation error from scatter of mean ($\sigma_{rep}$ in the paper)
  nobs = nrow(obs_in)
  obs <- apply(obs_in[,c("est", "lower", "upper"),drop = F], 2, mean)
  if(nobs == 1) {
    sig_obs = 0
  } else {
    s2 = sum((obs_in$est - obs[1])^2)
    sig_obs = sqrt(s2/(nobs-1))
  }

  # add representation error to individual observations
  obs_in$l_wb <- obs_in$est - sqrt((obs_in$est - obs_in$lower)**2 + (1.96*sig_obs)**2)
  obs_in$u_wb <- obs_in$est + sqrt((obs_in$est - obs_in$upper)**2 + (1.96*sig_obs)**2)

  # apply representation error to obs synthesis
  # we're working with confidence intervals here, so we extend them by adding (1.96sig_obs)^2 in quadrature
  obs[2] <- obs[1] - sqrt( (obs[1] - obs[2])**2 + (1.96*sig_obs)**2 )
  obs[3] <- obs[1] + sqrt( (obs[1] - obs[3])**2 + (1.96*sig_obs)**2 )

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # get initial estimate of model mean & calculate chi^2
  chi2 <- getsynchi2_rev5(models_in, sig_mod = 0)
  mdof <- nrow(models_in)-1

  if ( chi2/mdof > 1 ) {
    # find sig_mod such that chi^2/dof = 1
    sig_mod <- optim(0, function(x) {(getsynchi2_rev5(models_in, sig_mod = x) - (nrow(models_in)-1))^2},
                     method = "Brent", lower = 0, upper = 5)$par
  } else {
    sig_mod <- 0
  }

  # get weighted model mean
  models <- getsynmean_rev5(models_in, sig_mod = sig_mod)

  # add representation error to individual models
  models_in$l_wb <- models_in$est - sqrt((models_in$est - models_in$lower)**2 + (sig_mod)**2)
  models_in$u_wb <- models_in$est + sqrt((models_in$est - models_in$upper)**2 + (sig_mod)**2)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # weighted mean of models & obs (coloured bar)
  w_obs <- unname((obs["upper"] - obs["lower"])^{-2})
  w_mod <- unname((models["upper"] - models["lower"])^{-2})

  wmean <- (w_obs * obs["est"] + w_mod * models["est"]) / (w_obs + w_mod)
  synth <- setNames(c(wmean,
                      wmean - sqrt(( w_obs*(obs["est"]-obs["lower"])^2 + w_mod*(models["est"]-models["lower"])^2 )/(w_obs+w_mod)),
                      wmean + sqrt(( w_obs*(obs["est"]-obs["upper"])^2 + w_mod*(models["est"]-models["upper"])^2 )/(w_obs+w_mod))),
                    c("est", "lower", "upper"))


  # unweighted mean of obs and models
  umean <- (obs["est"] +  models["est"]) / 2
  synth["l_wb"] <- umean - sqrt(((obs["est"]-obs["lower"])^2 + (models["est"]-models["lower"])^2)/2)
  synth["u_wb"] <- umean + sqrt(((obs["est"]-obs["upper"])^2 + (models["est"]-models["upper"])^2)/2)
  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # add group labels for easier plotting
  obs_in <- cbind(obs_in, "group" = "obs")
  obs <- data.frame(t(c("model" = "Observations", "group" = "obs_synth", obs)))
  models_in <- cbind(models_in, "group" = "models")
  models <- data.frame(t(c("model" = "Models", "group" = "model_synth", models)))
  synth <- data.frame(t(c("model" = "Synthesis", "group" = "synth", synth)))

  # combine all the data together in one dataframe
  res <- rbind.fill(obs_in, obs, models_in, models, synth)[,c("group", "model", "est", "lower", "upper", "l_wb", "u_wb")]
  for(cnm in c("est", "lower", "upper", "l_wb", "u_wb")) { res[,cnm] <- as.numeric(res[,cnm]) }

  # if only dummy obs, remove
  if(no_obs) {
    # drop all rows that don't relate to models
    res <- res[grepl("model", res$group),]
    sig_obs <- NA
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # reverse any transformations applied
  if(synth_type == "PR") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])
    sig_obs <- exp(sig_obs)
    sig_mod <- exp(sig_mod)
    umean <- exp(umean)
  } else if(synth_type == "rel") {
    res[,c("est", "lower", "upper", "l_wb", "u_wb")] <- 100*(exp(res[,c("est", "lower", "upper", "l_wb", "u_wb")])-1)
    sig_obs <- 100*(exp(sig_obs)-1)
    sig_mod <- 100*(exp(sig_mod)-1)
    umean <- 100*(exp(umean)-1)
  }

  return(list(synth_type = synth_type, sig_obs = sig_obs, sig_mod = 2*sig_mod, "chi2/dof" = chi2 / mdof, df = res, uw_mean = umean))
}

## Compare results

In [56]:
fnm <- "data/helene_potint_dI-abs.csv"
res <- read.csv(fnm, row.names = 2)
obs = res[res$src == "obs",-1]
models = res[res$src == "models",-1]

stype <- switch(strsplit(gsub(".csv","",fnm), "_")[[1]][3], "PR" = "PR", "dI-abs" = "abs", "dI-rel" = "rel")

synth_rev0 = synthesis(obs, models, synth_type = stype)         # should match current Climate Explorer code but white boxes on models are wrong (slightly wider)
synth_rev1 = synthesis_rev1(obs, models, synth_type = stype)    # rearranged getsynchi2 to remove 4x multiplier. chi2/dof same, sig_mod slightly larger
synth_rev2 = synthesis_rev2(obs, models, synth_type = stype)    # corrected 2x to 1.96x in getsynchi2; chi2/dof smaller but sig_mod is same. White boxes slightly narrower, model_synth est slightly lower
synth_rev3 = synthesis_rev3(obs, models, synth_type = stype)    # updates to getsynmean: convert intervals to SDs and remove weights from square. White boxes & model confint slightly wider than originally
synth_rev4 = synthesis_rev4(obs, models, synth_type = stype)    # 
synth_rev5 = synthesis_rev5(obs, models, synth_type = stype)    # 

syntheses <- sapply(ls(pattern = "synth_"), "get", simplify = F)

In [57]:
rbind(climexp = c(5.762, 2.89), 
      t(data.frame(sapply(syntheses, "[", c("chi2/dof", "sig_mod")))))

Unnamed: 0_level_0,chi2/dof,sig_mod,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
climexp,5.762,2.89,,,,,,,,,,,,
synth_rev0,5.76228,2.885292,,,,,,,,,,,,
synth_rev1,5.76228,2.908996,,,,,,,,,,,,
synth_rev2,5.534093,2.895292,,,,,,,,,,,,
synth_rev3,5.534093,2.872642,,,,,,,,,,,,
synth_rev4,5.534093,2.872642,,,,,,,,,,,,
synth_rev5,5.534093,2.872642,,,,,,,,,,,,


In [49]:
rbind(climexp = c("-", "-", 5.42, 3.82, 6.91, 2.12, 8.67),
      t(sapply(syntheses, function(s) s$df[s$df$model == "BCC-CSM2-MR",], simplify = T)))

Unnamed: 0_level_0,group,model,est,lower,upper,l_wb,u_wb,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
climexp,-,-,5.42,3.82,6.91,2.12,8.67,,,,,,,,,,,,,,
synth_rev0,models,BCC-CSM2-MR,5.419148,3.823571,6.907285,2.122062,8.665602,,,,,,,,,,,,,,
synth_rev1,models,BCC-CSM2-MR,5.419148,3.823571,6.907285,3.260114,7.500041,,,,,,,,,,,,,,
synth_rev2,models,BCC-CSM2-MR,5.419148,3.823571,6.907285,3.264724,7.495257,,,,,,,,,,,,,,
synth_rev3,models,BCC-CSM2-MR,5.419148,3.823571,6.907285,3.272317,7.487377,,,,,,,,,,,,,,
synth_rev4,models,BCC-CSM2-MR,5.419148,3.823571,6.907285,3.272317,7.487377,,,,,,,,,,,,,,
synth_rev5,models,BCC-CSM2-MR,5.419148,3.823571,6.907285,3.272317,7.487377,,,,,,,,,,,,,,


In [50]:
rbind(climexp = c(3.86, 0.898, 6.80), 
      t(sapply(syntheses, function(s) round(s$df[s$df$group == "model_synth",3:5], 2), simplify = T)))

Unnamed: 0_level_0,est,lower,upper,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
climexp,3.86,0.898,6.8,,,,,,,,,,,,,,,,,,
synth_rev0,3.86,0.9,6.8,,,,,,,,,,,,,,,,,,
synth_rev1,3.67,2.09,5.23,,,,,,,,,,,,,,,,,,
synth_rev2,3.67,2.1,5.22,,,,,,,,,,,,,,,,,,
synth_rev3,3.85,0.69,6.96,,,,,,,,,,,,,,,,,,
synth_rev4,3.85,0.69,6.96,,,,,,,,,,,,,,,,,,
synth_rev5,3.85,0.69,6.96,,,,,,,,,,,,,,,,,,


In [51]:
rbind(climexp = c(4.04, 1.79, 6.15, 1.79, 6.17), 
      t(sapply(syntheses, function(s) round(s$df[s$df$group == "synth",-(1:2)], 2), simplify = T)))

Unnamed: 0_level_0,est,lower,upper,l_wb,u_wb,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
climexp,4.04,1.79,6.15,1.79,6.17,,,,,,,,,,,,,,,,
synth_rev0,4.04,1.79,6.15,1.79,6.17,,,,,,,,,,,,,,,,
synth_rev1,3.78,2.35,5.17,2.07,5.66,,,,,,,,,,,,,,,,
synth_rev2,3.78,2.35,5.16,2.07,5.66,,,,,,,,,,,,,,,,
synth_rev3,4.05,1.72,6.22,1.72,6.22,,,,,,,,,,,,,,,,
synth_rev4,4.05,-10.82,17.85,0.75,7.12,,,,,,,,,,,,,,,,
synth_rev5,4.05,0.75,7.12,0.75,7.12,,,,,,,,,,,,,,,,


In [52]:
# # Generated by synthesis version synthesis
# # synthesis update 2021-12-15
# # Change in intensity, past-present, RX1day, Earth, 2 yr
# # model chi2/dof =      5.762    
# # model uncertainty (2&sigma;)     2.89    
#  1900 2024    4.26       0.810        7.28       0.810        7.28      2 "ERA5 "
#  1900 2024    5.42        3.82        6.91        2.12        8.67      3 "BCC-CSM2-MR (1) "
#  1900 2024    2.31        1.19        3.27      -0.786        5.35      3 "CanESM5 (1) "
#  1900 2024    2.40        1.53        3.32      -0.614        5.43      3 "CMCC-ESM2 (1) "
#  1900 2024    5.64        3.92        7.21        2.28        8.92      3 "NorESM2-LM (1) "
#  1900 2024    4.07        1.93        5.87       0.479        7.47      3 "NorESM2-MM (1) "
#  1900 2024    3.86       0.898        6.80       0.898        6.80      4 "models "
#  1900 2024    4.04        1.79        6.15        1.79        6.17      5 "synthesis "

# Replicate climexp routines

In [36]:
getsynmean_cx <- function(data, sig_mod = 0, print_weights = F) {
    s1 <- 0
    ss2 <- rep(0,3)
    w1 <- 0
    weights <- c()
    data <- t(data)

    for (i in 1:ncol(data)) {
        if ( sig_mod > 0 ) {
            w <- 1/((data[3,i] - data[2,i])**2 + (2*sig_mod)**2)
        } else{
            w <- 1/(data[3,i] - data[2,i])**2
        }
        weights[i] <- w
        w1 <- w1 + w
        s1 = s1 + w*data[1,i]
        for (j in 2:3) {
            ss2[j] = ss2[j] + (w*(data[1,i]-data[j,i]))**2
        }
    }
    if(print_weights) print(weights / w1)
    s1 = s1/w1
    for (j in 2:3) {
        ss2[j] = sqrt(ss2[j])/w1
        if ( sig_mod > 0 ) {
            ss2[j] = sqrt(ss2[j]**2 + sig_mod**2)
        }
    }
    
    return(c("est" = s1, "lower" = s1 - ss2[2], "upper" = s1 + ss2[3]))
}    


getsynchi2_cx <- function(data, sig_mod = 0) {
    chi2 <- 0
    s1 <- getsynmean_cx(data, sig_mod)["est"]

    data <- t(data)

    for(i in 1:ncol(data)) {
        if(sig_mod > 0) {
            if (data[1,i] > s1) {
                chi2 = chi2 + (data[1,i]-s1)**2/((data[1,i]-data[2,i])**2 + sig_mod**2)
            } else {
                chi2 = chi2 + (s1-data[1,i])**2/((data[3,i]-data[1,i])**2 + sig_mod**2)
            }
        } else {
            if ( data[1,i] > s1 ) {
                chi2 = chi2 + ((data[1,i]-s1)/(data[1,i]-data[2,i]))**2
            } else {
                chi2 = chi2 + ((s1-data[1,i])/(data[3,i]-data[1,i]))**2
            }
        }
    }
    chi2 = chi2 * 4
    return(chi2)
}

getsynchi2_cx(models) / (nrow(models)-1)

In [19]:
# reflect updated code
getsynmean_cx2 <- function(data, sig_mod = 0, print_weights = F) {
    s1 <- 0
    ss2 <- rep(0,3)
    weights <- c()
    w1 <- 0
    data <- t(data)

    for (i in 1:ncol(data)) {
        if ( sig_mod > 0 ) {
            w <- 1/(((data[3,i] - data[2,i])/3.92)**2 + (sig_mod)**2)
        } else{
            w <- 1/((data[3,i] - data[2,i])/3.92)**2
        }
        weights[i] <- w
        w1 <- w1 + w
        s1 = s1 + w*data[1,i]
        for (j in 2:3) {
            ss2[j] = ss2[j] + w*((data[1,i]-data[j,i])/1.96)**2
        }
    }
    if(print_weights)print(weights / w1)

    s1 = s1/w1
    for (j in 2:3) {
        ss2[j] = sqrt(ss2[j]) / w1

        if ( sig_mod > 0 ) {
            ss2[j] = 1.96 * sqrt(ss2[j]/w1 + sig_mod**2)
        } else {
            ss2[j] = 1.96 * sqrt(ss2[j]/w1)
        }
    }
    return(c("est" = s1, "lower" = s1 - ss2[2], "upper" = s1 + ss2[3]))
}    


getsynchi2_cx2 <- function(data, sig_mod = 0) {
    chi2 <- 0
    s1 <- getsynmean_cx2(data, sig_mod)["est"]

    data <- t(data)

    for(i in 1:ncol(data)) {
        if(sig_mod > 0) {
            if (data[1,i] > s1) {
                chi2 = chi2 + (data[1,i]-s1)**2/(((data[1,i]-data[2,i])/1.96)**2 + sig_mod**2)
            } else {
                chi2 = chi2 + (s1-data[1,i])**2/(((data[3,i]-data[1,i])/1.96)**2 + sig_mod**2)
            }
        } else {
            if ( data[1,i] > s1 ) {
#                 chi2 = chi2 + ((data[1,i]-s1)/((data[1,i]-data[2,i]))/1.96)**2
                chi2 = chi2 + (data[1,i]-s1)^2/(((data[1,i]-data[2,i]))/1.96)**2
#                 chi2 = chi2 + ((data(1,i)-s1)/((data(1,i)-data(2,i))) / 1.96)**2
            } else {
#                 chi2 = chi2 + ((s1-data[1,i])/((data[3,i]-data[1,i]))/1.96)**2
                chi2 = chi2 + (s1-data[1,i])^2/(((data[3,i]-data[1,i]))/1.96)**2
            }
        }
    }
    return(chi2)
}

In [20]:
# revised CX method gives much lower value of chi2 (original CX code matches ori)
getsynchi2(models) / (nrow(models)-1)
getsynchi2_cx(models) / (nrow(models) - 1)
getsynchi2_cx2(models) / (nrow(models)-1)
getsynchi2_rev5(models) / (nrow(models)-1)

In [21]:
# all three methods give the same relative weights, so that's not where the problem lies
getsynmean_rev5(models, print_weights = T)
getsynmean_cx(models, print_weights = T)
getsynmean_cx2(models, print_weights = T)

[1] 0.13044033 0.28757550 0.38743087 0.11454875 0.08000456


[1] 0.13044033 0.28757550 0.38743087 0.11454875 0.08000456


[1] 0.13044033 0.28757550 0.38743087 0.11454875 0.08000456


In [22]:
# ss2 is 
getsynmean_cx(models)
getsynmean_cx2(models)

In [23]:
getsynmean_cx(models)[-(1:3)] / sum(getsynmean_cx(models)[-(1:3)])
getsynmean_cx2(models)[-(1:3)] / sum(getsynmean_cx2(models)[-(1:3)])

In [24]:
getsynmean_rev5(models)
