# **East Africa drought study - analysis of model data**

In [1]:
source("../wwa_nonstationary_fitting.r")

---
## **Observations**

### **Prep & testing**

In [2]:
# load data, fix values for factual & counterfactual climate

df <- read.csv("data/gmst-nino-cpc.csv")

gmst_2022 <- df[df$year == 2022, "gmst"]
gmst_cf <- gmst_2022 - 1.2

ondnino_2022 <- df[df$year == 2022, "nino_ond"]
ondnino_cf <- ondnino_2022

In [75]:
# wrapper function to get bootstrapped confidence intervals for spreadsheet
bootstrap_obs_results <- function(mdl, cov1, cov2, cov1_cf = NA, cov2_cf = NA, seed = 101, nsamp = 1000, dp = 5, convert_logs = T) {

    # create internal function to give required results
    get_mdl_values <- function(mdl, ev, cov1, cov2, cov1_cf = NA, cov2_cf = NA) {
        
        pars <- mdl$par
        
        rp <- 1/map_to_u(mdl, x = event_value, cov1 = cov1, cov2 = cov2)
        rp_cf <- 1/map_to_u(mdl, x = event_value, cov1 = cov1_cf, cov2 = cov2_cf)
        rl_cf <- map_from_u(1/rp, mdl, cov1 = cov1_cf, cov2 = cov2_cf)
        
        # if using log values, convert back to natural units
        if(convert_logs) {
            if(substr(mdl$varnm,1,5) == "log10") {
                ev <- 10^ev
                rl_cf <- 10^rl_cf
            } else if(substr(mdl$varnm,1,3) == "log") {
                ev <- exp(ev)
                rl_cf <- exp(rl_cf)
            }
        }
        c(pars, 
          "dispersion" = unname(pars["sigma0"] / pars["mu0"]),
          "event_magnitude" = ev,
          "return_period" = rp,
          "probability_ratio" = rp_cf/rp,
          "abs_change_in_intensity" = (ev - rl_cf),
          "rel_change_in_intensity" = ((ev - rl_cf) / rl_cf) * 100
         )   
    }
    
    # get best estimate from the observed data
    event_value <- mdl$x[mdl$ev_idx]
    mdl_res <- get_mdl_values(mdl, event_value, cov1, cov2, cov1_cf, cov2_cf)
    mdl_df <- setNames(data.frame(mdl$x, mdl$cov1, mdl$cov2), c(mdl$varnm, mdl$covnm_1, mdl$covnm_2)) 
    
    # get bootstrap sample
    set.seed(seed)    
    boot_res <- sapply(1:nsamp, function(i) {
        boot_df <- mdl_df[sample(1:nrow(mdl_df), nrow(mdl_df), replace = T),]
        boot_mdl <- fit_ns(mdl$dist, mdl$type, boot_df, varnm = mdl$varnm, covnm_1 = mdl$covnm_1, covnm_2 = mdl$covnm_2, lower = mdl$lower)
        get_mdl_values(boot_mdl, event_value, cov1, cov2, cov1_cf, cov2_cf)
    })
    boot_qq <- t(rbind("bestimate" = mdl_res, apply(boot_res, 1, quantile, c(0.025, 0.975), na.rm = T)))
    if(!is.na(dp)) boot_qq <- round(boot_qq, dp)
    return(boot_qq)
}

#### **Check results against Climate Explorer**

In [79]:
# fit test model to log precip - should give same results as Climate Explorer
bootstrap_obs_results(fit_ns("norm", "fixeddisp", df, varnm = "log10pr_ond", covnm_1 = "gmst", lower = T),
                        cov1 = gmst_2022, cov2 = ondnino_2022, cov1_cf = gmst_cf, cov2_cf = ondnino_cf, convert_logs = F)

Unnamed: 0,bestimate,2.5%,97.5%
mu0,2.11385,1.99395,2.26001
sigma0,0.19262,0.14105,0.24037
alpha,0.0756,-0.16343,0.27828
beta,0.0,0.0,0.0
dispersion,0.09112,0.06683,0.11084
event_magnitude,1.93781,1.93781,1.93781
return_period,9.27779,4.73671,36.1008
probability_ratio,0.51682,0.06253,4.53706
abs_change_in_intensity,0.08141,-0.17479,0.30212
rel_change_in_intensity,4.38518,-8.27384,18.47067


In [80]:
# test same model using lognormal fit - should give comparable disp/RP/PR
bootstrap_obs_results(fit_ns("lnorm", "fixeddisp", df, varnm = "pr_ond", covnm_1 = "gmst", lower = T),
                  cov1 = gmst_2022, cov2 = ondnino_2022, cov1_cf = gmst_cf, cov2_cf = ondnino_cf, convert_logs = F)

Unnamed: 0,bestimate,2.5%,97.5%
mu0,4.86711,-0.08581,5.33036
sigma0,0.44359,0.32675,12.90564
alpha,0.17449,-0.60568,0.77568
beta,0.0,0.0,0.0
dispersion,0.09114,-3.57975,0.42307
event_magnitude,86.65775,86.65775,86.65775
return_period,9.2793,1.11803,35.59413
probability_ratio,0.51613,0.06257,10.59477
abs_change_in_intensity,14.84415,-655881100000000.0,43.76041
rel_change_in_intensity,20.67039,-100.0,102.39797


#### **Instability in lognormal fit**

_Some instability in the model fit when using lognormal distribution: prefer to use log_pr & convert_

In [76]:
bootstrap_obs_results(fit_ns("lnorm", "fixeddisp", df, varnm = "pr_ond", covnm_1 = "gmst", covnm_2 = "nino_ond", lower = T),
                  cov1 = gmst_2022, cov2 = ondnino_2022, cov1_cf = gmst_cf, cov2_cf = ondnino_cf, convert_logs = T)

Unnamed: 0,bestimate,2.5%,97.5%
mu0,4.80534,-0.00591,46.01593
sigma0,0.38312,0.30291,183.5417
alpha,0.3111,-148.25073,3.50879
beta,0.18709,-7.72621,2.17995
dispersion,0.07973,-7.74113,1420148.0
event_magnitude,86.65775,86.65775,86.65775
return_period,7.26351,1.03753,27.0611
probability_ratio,0.31708,0.06809,133.6318
abs_change_in_intensity,24.57644,-inf,68.58379
rel_change_in_intensity,39.58751,-100.0,448.1868


In [77]:
# pr_ond ~ gmst + nino_ond
bootstrap_obs_results(fit_ns("norm", "fixeddisp", df, varnm = "log10pr_ond", covnm_1 = "gmst", covnm_2 = "nino_ond", lower = T),
                  cov1 = gmst_2022, cov2 = ondnino_2022, cov1_cf = gmst_cf, cov2_cf = ondnino_cf, convert_logs = T)

Unnamed: 0,bestimate,2.5%,97.5%
mu0,2.08734,1.96873,2.20702
sigma0,0.1665,0.12496,0.19207
alpha,0.13447,-0.07215,0.32123
beta,0.07967,0.0265,0.12575
dispersion,0.07977,0.05895,0.09303
event_magnitude,86.65774,86.65774,86.65774
return_period,7.35726,3.69988,30.5983
probability_ratio,0.31701,0.06059,2.08681
abs_change_in_intensity,24.47894,-17.13128,47.55208
rel_change_in_intensity,39.36863,-16.50587,121.6026


In [78]:
# confirm: if we use natural log, we get almost same fitted parameters as using lognormal, but without the model instability
df["lognpr_ond"] <- log(df$pr_ond)
bootstrap_obs_results(fit_ns("norm", "fixeddisp", df, varnm = "lognpr_ond", covnm_1 = "gmst", covnm_2 = "nino_ond", lower = T),
                  cov1 = gmst_2022, cov2 = ondnino_2022, cov1_cf = gmst_cf, cov2_cf = ondnino_cf, convert_logs = T)

Unnamed: 0,bestimate,2.5%,97.5%
mu0,4.80631,4.5332,5.08185
sigma0,0.38336,0.28774,0.4423
alpha,0.30973,-0.16611,0.74022
beta,0.18346,0.06101,0.28957
dispersion,0.07976,0.05895,0.09304
event_magnitude,86.65775,86.65775,86.65775
return_period,7.36042,3.70003,30.59794
probability_ratio,0.31685,0.06059,2.08684
abs_change_in_intensity,24.48457,-17.13121,47.57549
rel_change_in_intensity,39.38124,-16.50581,121.73512


### **Results for CPC**

In [82]:
# load data, fix values for factual & counterfactual climate

df <- read.csv("data/gmst-nino-cpc.csv")

gmst_2022 <- df[df$year == 2022, "gmst"]
gmst_cf <- gmst_2022 - 1.2

ondnino_2022 <- df[df$year == 2022, "nino_ond"]
ondnino_cf <- ondnino_2022

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# wrapper function to get bootstrapped confidence intervals for spreadsheet
bootstrap_obs_results <- function(mdl, cov1, cov2, cov1_cf = NA, cov2_cf = NA, seed = 101, nsamp = 1000, dp = 5, convert_logs = T) {

    # create internal function to give required results
    get_mdl_values <- function(mdl, ev, cov1, cov2, cov1_cf = NA, cov2_cf = NA) {
        
        pars <- mdl$par
        
        rp <- 1/map_to_u(mdl, x = event_value, cov1 = cov1, cov2 = cov2)
        rp_cf <- 1/map_to_u(mdl, x = event_value, cov1 = cov1_cf, cov2 = cov2_cf)
        rl_cf <- map_from_u(1/rp, mdl, cov1 = cov1_cf, cov2 = cov2_cf)
        
        # if using log values, convert back to natural units
        if(convert_logs) {
            if(substr(mdl$varnm,1,5) == "log10") {
                ev <- 10^ev
                rl_cf <- 10^rl_cf
            } else if(substr(mdl$varnm,1,3) == "log") {
                ev <- exp(ev)
                rl_cf <- exp(rl_cf)
            }
        }
        c(pars, 
          "dispersion" = unname(pars["sigma0"] / pars["mu0"]),
          "event_magnitude" = ev,
          "return_period" = rp,
          "probability_ratio" = rp_cf/rp,
          "abs_change_in_intensity" = (ev - rl_cf),
          "rel_change_in_intensity" = ((ev - rl_cf) / rl_cf) * 100
         )   
    }
    
    # get best estimate from the observed data
    event_value <- mdl$x[mdl$ev_idx]
    mdl_res <- get_mdl_values(mdl, event_value, cov1, cov2, cov1_cf, cov2_cf)
    mdl_df <- setNames(data.frame(mdl$x, mdl$cov1, mdl$cov2), c(mdl$varnm, mdl$covnm_1, mdl$covnm_2)) 
    
    # get bootstrap sample
    set.seed(seed)    
    boot_res <- sapply(1:nsamp, function(i) {
        boot_df <- mdl_df[sample(1:nrow(mdl_df), nrow(mdl_df), replace = T),]
        boot_mdl <- fit_ns(mdl$dist, mdl$type, boot_df, varnm = mdl$varnm, covnm_1 = mdl$covnm_1, covnm_2 = mdl$covnm_2, lower = mdl$lower)
        get_mdl_values(boot_mdl, event_value, cov1, cov2, cov1_cf, cov2_cf)
    })
    boot_qq <- t(rbind("bestimate" = mdl_res, apply(boot_res, 1, quantile, c(0.025, 0.975), na.rm = T)))
    if(!is.na(dp)) boot_qq <- round(boot_qq, dp)
    return(boot_qq)
}

In [83]:
# pr_ond ~ gmst + nino_ond
bootstrap_obs_results(fit_ns("norm", "fixeddisp", df, varnm = "log10pr_ond", covnm_1 = "gmst", covnm_2 = "nino_ond", lower = T),
                  cov1 = gmst_2022, cov2 = ondnino_2022, cov1_cf = gmst_cf, cov2_cf = ondnino_cf, convert_logs = T)

Unnamed: 0,bestimate,2.5%,97.5%
mu0,2.08734,1.96873,2.20702
sigma0,0.1665,0.12496,0.19207
alpha,0.13447,-0.07215,0.32123
beta,0.07967,0.0265,0.12575
dispersion,0.07977,0.05895,0.09303
event_magnitude,86.65774,86.65774,86.65774
return_period,7.35726,3.69988,30.5983
probability_ratio,0.31701,0.06059,2.08681
abs_change_in_intensity,24.47894,-17.13128,47.55208
rel_change_in_intensity,39.36863,-16.50587,121.6026


In [84]:
# pr_mam ~ gmst
bootstrap_obs_results(fit_ns("norm", "fixeddisp", df, varnm = "log10pr_mam", covnm_1 = "gmst", lower = T),
                  cov1 = gmst_2022, cov2 = ondnino_2022, cov1_cf = gmst_cf, cov2_cf = ondnino_cf, convert_logs = T)

Unnamed: 0,bestimate,2.5%,97.5%
mu0,2.30264,2.18556,2.42336
sigma0,0.14889,0.11475,0.17262
alpha,-0.12474,-0.36445,0.09124
beta,0.0,0.0,0.0
dispersion,0.06466,0.04878,0.07624
event_magnitude,100.62882,100.62882,100.62882
return_period,10.88907,4.36724,52.71624
probability_ratio,6.93885,0.24862,3622.23391
abs_change_in_intensity,-36.53563,-150.36741,20.01821
rel_change_in_intensity,-26.63637,-59.90816,24.83328


In [89]:
# pr_24 ~ gmst
bootstrap_obs_results(fit_ns("norm", "fixeddisp", df[-1,], varnm = "log10pr24", covnm_1 = "gmst", lower = T),
                  cov1 = gmst_2022, cov2 = ondnino_2022, cov1_cf = gmst_cf, cov2_cf = ondnino_cf, convert_logs = T)

Unnamed: 0,bestimate,2.5%,97.5%
mu0,2.93015,2.87098,3.0001
sigma0,0.08199,0.06249,0.09598
alpha,-0.0263,-0.14273,0.07319
beta,0.0,0.0,0.0
dispersion,0.02798,0.02122,0.03282
event_magnitude,588.2868,588.2868,588.2868
return_period,21.47824,8.05635,135.69069
probability_ratio,2.27386,0.0904,361.33541
abs_change_in_intensity,-42.06057,-267.51131,102.85892
rel_change_in_intensity,-6.6726,-31.25869,21.18943


In [91]:
# pet_24 ~ gmst (need to check model structure!)
bootstrap_obs_results(fit_ns("norm", "shift", df[-1,], varnm = "pet_24", covnm_1 = "gmst", lower = T),
                  cov1 = gmst_2022, cov2 = ondnino_2022, cov1_cf = gmst_cf, cov2_cf = ondnino_cf, convert_logs = T)

Unnamed: 0,bestimate,2.5%,97.5%
mu0,104.5189,100.82351,107.8997
sigma0,4.37897,3.48799,5.04177
alpha,16.92236,11.39885,23.01569
beta,0.0,0.0,0.0
dispersion,0.0419,0.03329,0.04794
event_magnitude,121.41286,121.41286,121.41286
return_period,1.60274,1.22736,2.40824
probability_ratio,0.62393,0.41524,0.81477
abs_change_in_intensity,20.30683,13.67862,27.61882
rel_change_in_intensity,20.08469,12.69664,29.44625
