-
Notifications
You must be signed in to change notification settings - Fork 21
/
model-validation.R
87 lines (83 loc) · 3.23 KB
/
model-validation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#' Evaluate nowcasts using proper scoring rules
#'
#' Acts as a wrapper to [scoringutils::score()]. In particular,
#' handling filtering nowcast summary output and linking this output to
#' observed data. See the documentation for the `scoringutils` package for more
#' on forecast scoring.
#'
#' @param nowcast A posterior nowcast or posterior prediction as returned by
#' [summary.epinowcast()], when used on the output of [epinowcast()].
#'
#' @param latest_obs A `data.frame` of the latest available observations as
#' produced by [enw_latest_data()] or otherwise.
#'
#' @param log Logical, defaults to FALSE. Should scores be calculated on the
#' log scale (with a 0.01 shift) for both observations and nowcasts. Scoring in
#' this way can be thought of as a relative score vs the more usual absolute
#' measure. It may be useful when targets are on very different scales or when
#' the forecaster is more interested in good all round performance versus good
#' performance for targets with large values.
#'
#' @param check Logical, defaults to FALSE. Should
#' [scoringutils::check_forecasts()] be used to check input nowcasts.
#'
#' @param round_to Integer defaults to 3. Number of digits to round scoring
#' output to.
#'
#' @param ... Additional arguments passed to [scoringutils::score()].
#'
#' @return A `data.table` as returned by [scoringutils::score()].
#' @family modelvalidation
#' @importFrom data.table setnames
#' @export
#' @examplesIf interactive()
#' library(data.table)
#' library(scoringutils)
#'
#' # Summarise example nowcast
#' nowcast <- enw_example("nowcast")
#' summarised_nowcast <- summary(nowcast)
#'
#' # Load latest available observations
#' obs <- enw_example("observations")
#'
#' # Keep the last 7 days of data
#' obs <- obs[reference_date > (max(reference_date) - 7)]
#'
#' # score on the absolute scale
#' scores <- enw_score_nowcast(summarised_nowcast, obs)
#' summarise_scores(scores, by = "location")
#'
#' # score overall on a log scale
#' log_scores <- enw_score_nowcast(summarised_nowcast, obs, log = TRUE)
#' summarise_scores(log_scores, by = "location")
enw_score_nowcast <- function(nowcast, latest_obs, log = FALSE,
check = FALSE, round_to = 3, ...) {
if (!requireNamespace("scoringutils")) {
stop("scoringutils is required for this function to work")
}
long_nowcast <- enw_quantiles_to_long(nowcast)
if (!is.null(long_nowcast[["mad"]])) {
long_nowcast[, "mad" := NULL]
}
latest_obs <- coerce_dt(latest_obs)
data.table::setnames(latest_obs, "confirm", "true_value", skip_absent = TRUE)
latest_obs[, report_date := NULL]
cols <- intersect(colnames(nowcast), colnames(latest_obs))
long_nowcast <- merge(long_nowcast, latest_obs, by = cols)
if (log) {
cols <- c("true_value", "prediction")
long_nowcast[, (cols) := purrr::map(.SD, ~ log(. + 0.01)), .SDcols = cols]
}
long_nowcast[, prediction := as.numeric(prediction)]
long_nowcast[, true_value := as.numeric(true_value)]
if (check) {
scoringutils::check_forecasts(long_nowcast)
}
scores <- scoringutils::score(long_nowcast, ...)
numeric_cols <- colnames(scores)[sapply(scores, is.numeric)]
scores <- scores[, (numeric_cols) := lapply(.SD, signif, digits = round_to),
.SDcols = numeric_cols
]
return(scores[])
}