# Comparaison des méthodes d'apprentissage statistique

In [None]:
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
library(magrittr)
library(stringr)
library(tools)
library(patchwork)

In [None]:
options(repr.plot.width = 12, repr.plot.height = 8)

In [None]:
plot_biases <- function(data, n_sim, effect) {
  if (ncol(data) == 5) {
    data_ggp <- data.frame(
      x = seq(n_sim),
      y = c(data[, 1], data[, 2], data[, 3], data[, 4], data[, 5]),
      librairie = c(rep("glm", n_sim),
                    rep("glm_interaction", n_sim),
                    rep("ranger", n_sim),
                    rep("bayes_glm", n_sim),
                    rep("speed_lm", n_sim)
                   )
    )
  } else {
    if (str_detect(effect, "TMLE") | str_detect(effect, "one-step")) {
      data_ggp <- data.frame(
        x = seq(n_sim),
        y = c(data[, 1], data[, 2], data[, 3], data[, 4]),
        librairie = c(rep("bayes_glm", n_sim),
                      rep("glm_fast", n_sim),
                      rep("ranger", n_sim),
                      rep("nnet", n_sim)
                     )
      )
    } else {
      data_ggp <- data.frame(
        x = seq(n_sim),
        y = c(data[, 1], data[, 2], data[, 3], data[, 4]),
        librairie = c(rep("glm", n_sim),
                      rep("glm_interaction", n_sim),
                      rep("ranger", n_sim),
                      rep("bayes_glm", n_sim)
                     )
      )
    }
  }
 
  p <- ggplot(data_ggp, aes(x, y, fill = librairie)) +
    geom_boxplot() +
    geom_hline(yintercept = 0, linetype = "dashed",
               color = "black", size = 1) +
    ggtitle(paste("Biais sur ", effect, " où n = ", n_sim, sep = "")) +
    theme(legend.title = element_text(size = 16, face = "bold"),
          legend.text = element_text(size = 15),
          plot.title = element_text(size = 18, hjust = 0.5))
  print(p)
}

In [None]:
dir <- 0.124793
ind <- 0.3026875

In [None]:
n_sims <- c(100, 200, 300, 400, 500)

## Données originales

### g-computation

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sde_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SDE")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sie_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SIE")
}

### IPTW

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sde_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SDE")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sie_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SIE")
}

### TMLE

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sde_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SDE")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sie_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SIE")
}

### one-step

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sde_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SDE")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sie_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SIE")
}

## Données avec problème de positivité

### g-computation

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sde_posit_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SDE avec problème de positivité")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sie_posit_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SIE avec problème de positivité")
}

### IPTW

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sde_posit_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SDE avec problème de positivité")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sie_posit_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SIE avec problème de positivité")
}

### TMLE

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sde_posit_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SDE avec problème de positivité")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sie_posit_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SIE avec problème de positivité")
}

### one-step

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sde_posit_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SDE avec problème de positivité")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sie_posit_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SIE avec problème de positivité")
}

## Variables baseline continues

### g-computation

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sde_quant_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SDE avec variables baseline continues")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sie_quant_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SIE avec variables baseline continues")
}

### IPTW

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sde_quant_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SDE avec variables baseline continues")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sie_quant_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SIE avec variables baseline continues")
}

### TMLE

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sde_quant_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SDE avec variables baseline continues")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sie_quant_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SIE avec variables baseline continues")
}

### one-step

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sde_quant_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SDE avec variables baseline continues")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sie_quant_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SIE avec variables baseline continues")
}

## Données complexes (Rudolph)

### g-computation

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sde_rud_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SDE avec modèle complexe")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sie_rud_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SIE avec modèle complexe")
}

### IPTW

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sde_rud_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SDE avec modèle complexe")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sie_rud_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SIE avec modèle complexe")
}

### TMLE

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sde_rud_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SDE avec modèle complexe")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sie_rud_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SIE avec modèle complexe")
}

### one-step

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sde_rud_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SDE avec modèle complexe")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sie_rud_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SIE avec modèle complexe")
}

## Variable de confusion continue (à faire ?)

### g-computation

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sde_conf_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SDE avec variable de confusion continue")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("gcomp_sie_conf_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "g-computation sur SIE avec variable de confusion continue")
}

### IPTW

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sde_conf_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SDE avec variable de confusion continue")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("iptw_sie_conf_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "IPTW sur SIE avec variable de confusion continue")
}

### TMLE

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sde_conf_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SDE avec variable de confusion continue")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("tmle_sie_conf_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "TMLE sur SIE avec variable de confusion continue")
}

### one-step

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sde_conf_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SDE variable de confusion continue")
}

In [None]:
for (n_sim in n_sims) {
  data <- read.csv(paste0("onestep_sie_conf_", n_sim, ".csv", sep = ""))
  plot_biases(data, n_sim, "one-step sur SIE variable de confusion continue")
}