In [None]:
library(readr)

In [None]:
dat <- read_csv(
  paste0(
    "https://raw.githubusercontent.com/tlverse/tlverse-data/master/",
    "wash-benefits/washb_data.csv"
  )
)

In [None]:
spec(dat)

In [None]:
library(skimr)

skim(dat)

In [None]:
ist <- read_csv(
  paste0(
    "https://raw.githubusercontent.com/tlverse/tlverse-handbook/master/",
    "data/ist_sample.csv"
  )
)

In [None]:
spec(ist)

In [None]:
skim(ist)

In [None]:
nhefs_data <- read_csv(
  paste0(
    "https://raw.githubusercontent.com/tlverse/tlverse-handbook/master/",
    "data/NHEFS.csv"
  )
)

In [None]:
spec(nhefs_data)

In [None]:
skim(nhefs_data)

In [None]:
library(data.table)
library(origami)
library(knitr)
library(kableExtra)

# load data set and take a peek
washb_data <- fread(
  paste0(
    "https://raw.githubusercontent.com/tlverse/tlverse-data/master/",
    "wash-benefits/washb_data.csv"
  ),
  stringsAsFactors = TRUE
)

In [None]:
n_samp <- 30
washb_data <- washb_data[seq_len(n_samp), ]
head(washb_data) %>%
    kable(format = "latex")

In [None]:
library(sl3)
library(readr)
library(magrittr)

db_data <- url(
    "https://raw.githubusercontent.com/benkeser/sllecture/master/chspred.csv"
)
chspred <- read_csv(file = db_data, col_names = TRUE)

In [None]:
# make task
chspred_task <- make_sl3_Task(
    data = chspred,
    covariates = head(colnames(chspred), -1),
    outcome = "mi"
)

In [None]:
# make learners
glm_learner <- Lrnr_glm$new()
lasso_learner <- Lrnr_glmnet$new(alpha = 1)
ridge_learner <- Lrnr_glmnet$new(alpha = 0)
enet_learner <- Lrnr_glmnet$new(alpha = 0.5)
# curated_glm_learner uses formula = "mi ~ smoke + beta + waist"
curated_glm_learner <- Lrnr_glm_fast$new(covariates = c("smoke", "beta", "waist"))
mean_learner <- Lrnr_mean$new() # That is one mean learner!
glm_fast_learner <- Lrnr_glm_fast$new()
ranger_learner <- Lrnr_ranger$new()
svm_learner <- Lrnr_svm$new()
xgb_learner <- Lrnr_xgboost$new()

In [None]:
# screening
screen_cor <- make_learner(Lrnr_screener_correlation)
glm_pipeline <- make_learner(Pipeline, screen_cor, glm_learner)

In [None]:
# stack learners together
stack <- make_learner(
    Stack,
    glm_pipeline, glm_learner,
    lasso_learner, ridge_learner, enet_learner,
    curated_glm_learner, mean_learner, glm_fast_learner,
    ranger_learner, svm_learner, xgb_learner
)

In [None]:
# make and train SL
sl <- Lrnr_sl$new(
    learners = stack
)
sl_fit <- sl$train(chspred_task)
sl_fit$print()

In [None]:
CVsl <- CV_lrnr_sl(sl_fit, chspred_task, loss_loglik_binomial)
CVsl

In [None]:
varimp <- importance(sl_fit, type = "permute")
varimp %>%
    importance_plot(
        main = "sl3 Variable Importance for Myocardial Infarction Prediction"
    )