<strong>Comparaison des méthodes d'apprentissage statistique</strong>

In [None]:
library(tidyr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
library(magrittr)
library(stringr)
library(tools)
library(patchwork)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 12)

In [None]:
get_dataframes <- function(estimator, effect, n_sims) {
    ## Cette fonction permet de fusionner plusieurs dataframes en un seul
    ## afin d'avoir le bon format pour ggplot2
    ## arguments:
    ##    estimator: estimateur sous forme de chaîne de caractères
    ##    effet: effet direct ou indirect au même format
    ##    n_sims: vecteur contenant le nombre de simulations
    pattern <- paste0("^", estimator, "_", effect, "_", "[0-9]")
    my_files <- list.files(pattern = pattern, ignore.case = TRUE)
    my_data <- lapply(my_files, read.csv)

    big_data <- bind_rows(my_data)
    keycol <- "library"
    valuecol <- "bias"
    gathercols <- colnames(big_data)

    data_long <- gather_(big_data, keycol, valuecol, gathercols)
    data_long$n <- c(rep(n_sims[1], n_sims[1]),
                     rep(n_sims[2], n_sims[2]),
                     rep(n_sims[3], n_sims[3]),
                     rep(n_sims[4], n_sims[4]),
                     rep(n_sims[5], n_sims[5]))

    return(data_long)
}

In [None]:
plot_biases <- function(estimator, effect, n_sims, main) {
    data_long <- get_dataframes(estimator, effect, n_sims)

    ggplot(data_long, aes(y = bias, fill = library)) +
    geom_boxplot() +
    geom_hline(yintercept = 0, linetype = "dashed",
               color = "black", size = 1) +
    facet_wrap(~n, nrow = 2) +
    # guides(fill = guide_legend(ncol = 2)) +
    theme(strip.text.x = element_text(size = 12, face = "bold"),
          legend.position = c(1., .45),
          legend.justification = c("right", "top"),
          legend.box.just = "right",
          legend.margin = margin(6, 6, 6, 6),
          legend.title = element_text(size = 13, face = "bold"),
          legend.text = element_text(size = 12)) ->
    p

    return(p)
}

In [None]:
n_sims <- c(100, 200, 300, 400, 500)

# Données originales

## g-computation

In [None]:
p <- plot_biases("gcomp", "sde", n_sims, "g-computation sur SDE")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_sl_sde.png")

In [None]:
p <- plot_biases("gcomp", "sie", n_sims, "g-computation sur SIE")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_sl_sie.png")

## IPTW

In [None]:
p <- plot_biases("iptw", "sde", n_sims, "IPTW sur SDE")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_sl_sde.png")

In [None]:
p <- plot_biases("iptw", "sie", n_sims, "IPTW sur SIE")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_sl_sie.png")

## TMLE

In [None]:
p <- plot_biases("tmle", "sde", n_sims, "TMLE sur SDE")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_sl_sde.png")

In [None]:
p <- plot_biases("tmle", "sie", n_sims, "TMLE sur SIE")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_sl_sie.png")

## one-step

In [None]:
p <- plot_biases("onestep", "sde", n_sims, "one-step sur SDE")
print(p)
# ggsave("C:/Users/luis_/Downloads/onestep_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/onestep_sl_sde.png")

In [None]:
p <- plot_biases("onestep", "sie", n_sims, "one-step sur SIE")
print(p)
# ggsave("C:/Users/luis_/Downloads/onestep_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/onestep_sl_sie.png")

# Données avec problème de positivité

## g-computation

In [None]:
p <- plot_biases("gcomp", "sde_posit", n_sims, "g-computation sur SDE\net problème de positivité")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_posit_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_posit_sl_sde.png")

In [None]:
p <- plot_biases("gcomp", "sie_posit", n_sims, "g-computation sur SIE\net problème de positivité")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_posit_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_posit_sl_sie.png")

## IPTW

In [None]:
p <- plot_biases("iptw", "sde_posit", n_sims, "IPTW sur SDE\net problème de positivité")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_posit_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_posit_sl_sde.png")

In [None]:
p <- plot_biases("iptw", "sie_posit", n_sims, "IPTW sur SIE\net problème de positivité")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_posit_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_posit_sl_sie.png")

## TMLE

In [None]:
p <- plot_biases("tmle", "sde_posit", n_sims, "TMLE sur SDE\net problème de positivité")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_posit_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_posit_sl_sde.png")

In [None]:
p <- plot_biases("tmle", "sie_posit", n_sims, "TMLE sur SIE\net problème de positivité")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_posit_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_posit_sl_sie.png")

## one-step

In [None]:
p <- plot_biases("onestep", "sde_posit", n_sims, "one-step sur SDE\net problème de positivité")
print(p)
# ggsave("C:/Users/luis_/Downloads/onestep_posit_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/onestep_posit_sl_sde.png")

In [None]:
p <- plot_biases("onestep", "sie_posit", n_sims, "one-step sur SIE\net problème de positivité")
print(p)
# ggsave("C:/Users/luis_/Downloads/onestep_posit_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/onestep_posit_sl_sie.png")

# Variables baseline continues

## g-computation

In [None]:
p <- plot_biases("gcomp", "sde_quant", n_sims, "g-computation sur SDE\net variables baseline quantitatives")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_quant_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_quant_sl_sde.png")

In [None]:
p <- plot_biases("gcomp", "sie_quant", n_sims, "g-computation sur SIE\net variables baseline quantitatives")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_quant_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_quant_sl_sie.png")

## IPTW

In [None]:
p <- plot_biases("iptw", "sde_quant", n_sims, "IPTW sur SDE\net variables baseline quantitatives")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_quant_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_quant_sl_sde.png")

In [None]:
p <- plot_biases("iptw", "sie_quant", n_sims, "IPTW sur SIE\net variables baseline quantitatives")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_quant_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_quant_sl_sie.png")

## TMLE

In [None]:
p <- plot_biases("tmle", "sde_quant", n_sims, "TMLE sur SDE\net variables baseline quantitatives")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_quant_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_quant_sl_sde.png")

In [None]:
p <- plot_biases("tmle", "sie_quant", n_sims, "TMLE sur SIE\net variables baseline quantitatives")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_quant_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_quant_sl_sie.png")

## one-step

In [None]:
p <- plot_biases("onestep", "sde_quant", n_sims, "one-step sur SDE\net variables baseline quantitatives")
print(p)
# ggsave("C:/Users/luis_/Downloads/onestep_quant_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/onestep_quant_sl_sde.png")

In [None]:
p <- plot_biases("onestep", "sie_quant", n_sims, "one-step sur SIE\net variables baseline quantitatives")
print(p)
# ggsave("C:/Users/luis_/Downloads/onestep_quant_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/onestep_quant_sl_sie.png")

# Modèle Rudolph

## g-computation

In [None]:
p <- plot_biases("gcomp", "sde_rud", n_sims, "g-computation sur SDE\net modèle complexe (Rudolph)")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_rud_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_rud_sl_sde.png")

In [None]:
p <- plot_biases("gcomp", "sie_rud", n_sims, "g-computation sur SIE\net modèle complexe (Rudolph)")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_rud_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_rud_sl_sie.png")

## IPTW

In [None]:
p <- plot_biases("iptw", "sde_rud", n_sims, "IPTW sur SDE\net modèle complexe (Rudolph)")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_rud_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_rud_sl_sde.png")

In [None]:
p <- plot_biases("iptw", "sie_rud", n_sims, "IPTW sur SIE\net modèle complexe (Rudolph)")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_rud_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_rud_sl_sie.png")

## TMLE

In [None]:
p <- plot_biases("tmle", "sde_rud", n_sims, "TMLE sur SDE\net modèle complexe (Rudolph)")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_rud_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_rud_sl_sde.png")

In [None]:
p <- plot_biases("tmle", "sie_rud", n_sims, "TMLE sur SIE\net modèle complexe (Rudolph)")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_rud_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_rud_sl_sie.png")

## one-step

In [None]:
p <- plot_biases("onestep", "sde_rud", n_sims, "one-step sur SDE\net modèle complexe (Rudolph)")
print(p)
# ggsave("C:/Users/luis_/Downloads/onestep_rud_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/onestep_rud_sl_sde.png")

In [None]:
p <- plot_biases("onestep", "sie_rud", n_sims, "one-step sur SIE\net modèle complexe (Rudolph)")
print(p)
# ggsave("C:/Users/luis_/Downloads/onestep_rud_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/onestep_rud_sl_sie.png")

# Variable de confusion continue

## g-computation

In [None]:
p <- plot_biases("gcomp", "sde_conf", n_sims, "g-computation sur SDE\net variable de confusion quantitative")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_conf_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_conf_sl_sde.png")

In [None]:
p <- plot_biases("gcomp", "sie_conf", n_sims, "g-computation sur SIE\net variable de confusion quantitative")
print(p)
# ggsave("C:/Users/luis_/Downloads/gcomp_conf_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/gcomp_conf_sl_sie.png")

## IPTW

In [None]:
p <- plot_biases("iptw", "sde_conf", n_sims, "IPTW sur SDE\net variable de confusion quantitative")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_conf_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_conf_sl_sde.png")

In [None]:
p <- plot_biases("iptw", "sie_conf", n_sims, "IPTW sur SIE\net variable de confusion quantitative")
print(p)
# ggsave("C:/Users/luis_/Downloads/iptw_conf_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/iptw_conf_sl_sie.png")

## TMLE

In [None]:
p <- plot_biases("tmle", "sde_conf", n_sims, "TMLE sur SDE\net variable de confusion quantitative")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_conf_sl_sde.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_conf_sl_sde.png")

In [None]:
p <- plot_biases("tmle", "sie_conf", n_sims, "TMLE sur SIE\net variable de confusion quantitative")
print(p)
# ggsave("C:/Users/luis_/Downloads/tmle_conf_sl_sie.png", width = 16, height = 12, units = "in")
ggsave("C:/Users/luis_/Downloads/tmle_conf_sl_sie.png")