-
Notifications
You must be signed in to change notification settings - Fork 1
/
supplement.Rmd
144 lines (125 loc) · 4.31 KB
/
supplement.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
---
title: "A new model to predict survival in patients with end-stage liver disease"
subtitle: >
Supplement
author:
- Sebastian Gibb^[mail@sebastiangibb.de]
- Thomas Berg
- Daniel Seehofer
- Berend Isermann
- Thorsten Kaiser
bibliography:
- "`r rprojroot::find_rstudio_root_file('analysis', 'bibliography', 'bibliography.bib')`"
output:
bookdown::html_document2:
toc: false
bookdown::word_document2:
keep_md: true
pandoc_args: # pandoc_args doesn't support r evaluation
- --csl=pandoc/csl/vancouver.csl
---
```{r knitr_setup, include = FALSE}
```
```{r knitr_setup_word, eval = !knitr::is_html_output(), include = FALSE}
knitr::opts_chunk$set(dpi = 1200, echo = FALSE)
```
```{r supplement_setup, echo = FALSE, message = FALSE}
library("ameld")
library("data.table")
library("targets")
library("mlr3")
library("mlr3misc")
library("mlr3proba")
library("mlr3viz")
library("viridis")
library("ggplot2")
```
# Survival plot
```{r survival.plot, fig.cap = "Survival plot. Kaplan-Meier survival estimate for the first 90 days of the analysed population.", fig.width = 8, fig.height = 5}
tar_load(zlog_data)
srv <- Surv(zlog_data$DaysAtRisk, zlog_data$Deceased)
col <- palette.colors(nlevels(zlog_data$MeldCategory))
srvfit <- survfit(srv ~ 1)
times <- c(2, 7, 14, 30, 90)
## calculate risk tables
sm <- summary(srvfit, times = times)
nrisk <- as.matrix(sm$n.risk)
ncumevents <- as.matrix(cumsum(sm$n.event))
rownames(nrisk) <- rownames(ncumevents) <- times
## keep old graphic parameters and restore them afterwards
old.par <- par(no.readonly = TRUE)
layout(matrix(1:3, nrow = 3), height = c(5, 1, 1))
plot_surv(
srvfit,
main = "Kaplan-Meier survival estimate to day 90",
times = times,
xmax = 90,
col = col[2],
cex = 1.2
)
par(mar = c(1.1, 5.1, 1.1, 2.1))
plot_table(
nrisk, at = times, main = "Number at risk",
xaxis = FALSE, cex.text = 1, ylabels = FALSE
)
par(mar = c(1.1, 5.1, 1.1, 2.1))
plot_table(
ncumevents, at = times, main = "Cumulative number of events",
xaxis = FALSE, cex.text = 1, ylabels = FALSE
)
par(old.par)
```
# Benchmark results of machine learning algorithms
```{r benchmark-plot, fig.cap = "Benchmark results of machine learning algorithms.", echo = FALSE, fig.width = 18, fig.height = 12}
tar_load(bmrk_results)
r <- bmrk_results$clone()
id <- grep("^scale", x = r$learners$learner_id, invert = TRUE, value = TRUE)
r$filter(task_id = "zlog_eldd", learner_id = id)
autoplot(r) +
geom_boxplot(aes(fill = learner_id)) +
geom_jitter(position = position_jitter(0.2)) +
scale_fill_viridis(discrete = TRUE) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
```
```{r benchmark-table, echo = FALSE}
agg <- bmrk_results$aggregate(msr("surv.cindex", id = "harrell"))
agg[, `:=`
(nr = NULL, resample_result = NULL, resampling_id = NULL, iters = NULL)
]
agg <- subset(agg, task_id == "zlog_eldd" & !grepl("^scale", learner_id))
agg[, task_id := NULL]
setorder(agg, -harrell)
knitr::kable(
agg,
col.names = c("Learner ID", "Harrell C"),
caption = paste0("Benchmark results. Ranking of the ", nrow(agg), " tested machine learning algorithms.")
)
```
# AUROC trend
```{r roc-plot, echo = FALSE, fig.width = 18, fig.height = 12, fig.cap = "Trend in AUROC. Trend in the area under the time-dependent receiver operating characteristic curve (AUROC) based on the nonparametric inverse probability of censoring weighting estimate (IPCW) for AMELD, MELD, MELD-Na and MELD-Plus7 as described in [@blanche2013]."}
tar_load(timeROC_MELD)
tar_load(timeROC_MELDNa)
tar_load(timeROC_MELDPlus7)
tar_load(timeROC_RCV)
m <- list(
AMELD = timeROC_RCV,
MELD = timeROC_MELD,
"MELD-Na" = timeROC_MELDNa,
"MELD-Plus7" = timeROC_MELDPlus7
)
plot_surv_roc_trend(m, xlab = "time [days]", col = viridisLite::viridis(5)[-2])
```
# Variable importance
```{r vimpbs, fig.width = 8, fig.height = 10, fig.cap = "Variable importance by frequency of bootstrap selections.", echo = FALSE, message = FALSE}
tar_load(bootrcv)
plot(bootrcv, what = "selected", cex = 0.5)
```
```{r vimprf, fig.width = 8, fig.height = 10, fig.cap = "Variable importance by logrank in random forest.", echo = FALSE, message = FALSE}
tar_load(rngr)
plot_dots(
sort(rngr$variable.importance),
main = "Variable importance (random forest)",
xlab = rngr$splitrule
)
```
# References