In [137]:
suppressMessages(library("rwwa", "lubridate"))
aic_fevd <- function(mdl) {
    ll <- mdl$results$value
    npars <- length(mdl$results$par)
    return(2*npars + 2*ll)
}

rnm <- "CM"
th <- "99%"

In [138]:
if(nchar(rnm) == 2) {
    fnm <- paste0("data/ts-ES",rnm,"_era5.csv")
} else {
    fnm <- paste0("data/ghcnd_",rnm,".csv")
}

ires <- 250   # image size per panel, in pixels

In [139]:
gmst <- read.table("ts/gmst.txt", col.names = c("year", "gmst"))
df <- read.csv(fnm)

df$year <- as.integer(substr(df$time,1,4))
df$month <- as.integer(substr(df$time,6,7))

df <- merge(df, gmst)

qq <- quantile(df[(df$year >= 1991) & (df$year <= 2020),"pr"], c(0.95, .99, .995, .9975, .999))
round(t(data.frame(qq)),1)

Unnamed: 0,95%,99%,99.5%,99.75%,99.9%
qq,7.5,14.2,17.2,19.5,22.3


In [140]:
# decluster data and add to data.frame for easier use later
dc <- decluster(df$pr, threshold = qq[th], r = 1, method = "runs", clusterfun = "max") # clusters are separated by at least 1 day
df$pr_dc <- c(dc)

# Exploratory plots

In [141]:
df_x <- df[df$pr >= qq[th],]
n_peryear <- aggregate(df$pr >= qq["99%"], by = list("year" = df$year), FUN = "sum", simplify = T)
n_perymonth <- aggregate(df$pr >= qq["99%"], by = list(df$month, df$year), FUN = "sum", simplify = T)
n_permonth <- aggregate(n_perymonth[,"x",drop = F], by = list("cmonth" = n_perymonth$Group.1), FUN = "mean")
n_permonth$m_offset <- ((n_permonth$cmonth + 4) %% 12) + 1 # adjust months to cut at start of August (driest part of year)

In [142]:
nc <- 3
png(paste0("fig_gpd/",rnm,"_q",gsub("\\.","-",gsub("%","",th)),"_summary.png"), h = ires, w = ires * nc); {
    prep_window(c(1,nc))

    plot(dc, main = "Declustered exceedances")
    title("Declustered exceedances")

    plot(df_x$year, df_x$pr, main = "Exceedances")
    lines(df_x$year, fitted(loess(pr ~ year, df_x)), col = "blue", lty = "22", lwd = 2) # add a smoother through number of exceedances

    plot(n_peryear, main = "# excedances per year", xlab = "Year")
    lines(n_peryear$year, fitted(loess(x ~ year, n_peryear)), col = "blue", lty = "22", lwd = 2) # add a smoother through number of exceedances

#     plot(n_permonth$m_offset, n_permonth$x, main = "# exceedances per calendar month", xaxt = "n", xlim = c(1,12), xlab = "Calendar month")
#     axis(1, at = 1:12, labels = c(8:12,1:7))
}; dev.off()

## Test sensitivity to threshold

In [143]:
nc <- 4
png(paste0("fig_gpd/",rnm,"_threshrange-plot-PP.png"), h = ires, w = ires * nc); {
    prep_window(c(1,nc))

    xlim = c(floor(qq[1]), ceiling(qq[length(qq)]))

    threshrange.plot(df$pr, r = xlim, nint = xlim[2]-xlim[1], set.panels = F, type = "PP")
    abline(v = qq, col = adjustcolor("red", 0.5))

    mrlplot(df$pr, xlim = xlim)
    abline(v = qq, col = adjustcolor("red", 0.5))
    text(qq, y = 0, labels = names(qq), pos = 4, col = adjustcolor("red", 0.5))

    # anywhere up to ~24 should be pretty stable - somewhere between q99.5 and q99.75
    # will use 99th percentile of 1990-2020 climatology for now
}; dev.off()

In [144]:
nc <- 4
png(paste0("fig_gpd/",rnm,"_threshrange-plot-GP.png"), h = ires, w = ires * nc); {
    prep_window(c(1,nc))

    xlim = c(floor(qq[1]), ceiling(qq[length(qq)]))

    threshrange.plot(df$pr, r = xlim, nint = xlim[2]-xlim[1], set.panels = F, type = "GP")
    abline(v = qq, col = adjustcolor("red", 0.5))
    
    mrlplot(df$pr, xlim = xlim)
    abline(v = qq, col = adjustcolor("red", 0.5))
    text(qq, y = 0, labels = names(qq), pos = 4, col = adjustcolor("red", 0.5))

    # anywhere up to ~24 should be pretty stable - somewhere between q99.5 and q99.75
    # will use 99th percentile of 1990-2020 climatology for now
}; dev.off()

# Fit some example models

## GPD without declustering

In [145]:
mdl1 <- fevd(df$pr, data = df, threshold = qq[th], location.fun = ~gmst, type = "GP")

In [146]:
nc <- 4
png(paste0("fig_gpd/",rnm,"_q",gsub("\\.","-",gsub("%","",th)),"_GP-allpoints.png"), h = ires, w = ires * nc); {
    prep_window(c(1,nc), oma = c(0,0,2,0))
    plot(df$pr, main = "Exceedances")
    abline(h = qq[th], col = "magenta")
    plot(mdl1, type = "qq2")
    title("Q-Q plot")
    plot(mdl1, type = "rl", rperiods = c(5,20), main = "Return levels")
    mtext(paste0("Generalised Pareto, exceedances of ", th, " (no declustering) - ", rnm), side = 3, outer = T, font = 2)
}; dev.off()

## GPD, declustering

In [147]:
mdl4 <- fevd(df$pr_dc, data = df, threshold = qq[th], location.fun = ~gmst, type = "GP")

In [148]:
nc <- 4
png(paste0("fig_gpd/",rnm,"_q",gsub("\\.","-",gsub("%","",th)),"_GP-declustered.png"), h = ires, w = ires * nc); {
    prep_window(c(1,nc), oma = c(0,0,2,0))
    plot(dc, main = "Declustered points")
    title("Declustered points")
    plot(mdl4, type = "qq2")
    title("Q-Q plot")
    plot(mdl4, type = "rl", rperiods = c(5,20), main = "Return levels")
    mtext(paste0("Generalised Pareto, exceedances of ", th, " (declustered, 1 day between runs) - ", rnm), side = 3, outer = T, font = 2)

}; dev.off()

## Point process model, no declustering

In [149]:
# point process model only
mdl2 <- fevd(df$pr, data = df, threshold = qq[th], location.fun = ~gmst, type = "PP")

In [150]:
nc <- 4
png(paste0("fig_gpd/",rnm,"_q",gsub("\\.","-",gsub("%","",th)),"_PP-allpoints.png"), h = ires, w = ires * nc); {
    prep_window(c(1,nc), oma = c(0,0,2,0))
    plot(df$pr, main = "Exceedances")
    plot(mdl2, type = "qq2")
    title("Q-Q plot")
    plot(mdl2, type = "rl", rperiods = c(5,20), main = "Return levels")
    plot(mdl2, type = "Zplot")
    title("Z plot")
    mtext(paste0("Point process, exceedances of ", th, " (declustered, 1 day between runs) - ", rnm), side = 3, outer = T, font = 2)

}; dev.off()

## Point process model, declustering

In [151]:
mdl3 <- fevd(df$pr_dc, data = df, threshold = qq[th], location.fun = ~gmst, type = "PP")

In [152]:
nc <- 4
png(paste0("fig_gpd/",rnm,"_q",gsub("\\.","-",gsub("%","",th)),"_PP-declustered.png"), h = ires, w = ires * nc); {
    prep_window(c(1,nc), oma = c(0,0,2,0))
    plot(dc, main = "Declustered points")
    plot(mdl3, type = "qq2")
    title("Q-Q plot")
    plot(mdl3, type = "rl", rperiods = c(5,20), main = "Return levels")
    plot(mdl3, type = "Zplot")
    title("Z plot")
    mtext(paste0("Point process, exceedances of ", th, " (declustered, 1 day between runs) - ", rnm), side = 3, outer = T, font = 2)
}; dev.off()

# Check model fits

- extract model parameters for comparison (not clear how to do this from CI object)

In [153]:
# Point process is always better than GP - but is this to do with how log-likelihood is calculated?
mlist <- list("GPD, all" = mdl1, "PP, all"= mdl2, "GPD, declustered" = mdl4, "PP, declustered" = mdl3)