<strong>Comparaison des méthodes d'apprentissage statistique</strong>

In [None]:
library(tidyr)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
library(magrittr)
library(stringr)
library(tools)
library(patchwork)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 12)

In [None]:
get_dataframes <- function(estimator, effect, n_sims) {
    ## Cette fonction permet de fusionner plusieurs dataframes en un seul
    ## afin d'avoir le bon format pour ggplot2
    ## arguments:
    ##    estimator: estimateur sous forme de chaîne de caractères
    ##    effet: effet direct ou indirect au même format
    ##    n_sims: vecteur contenant le nombre de simulations
    pattern <- paste0("^", estimator, "_", effect, "_", "[0-9]")
    my_files <- list.files(pattern = pattern, ignore.case = TRUE)
    my_data <- lapply(my_files, read.csv)

    big_data <- bind_rows(my_data)
    keycol <- "library"
    valuecol <- "bias"
    gathercols <- colnames(big_data)

    data_long <- gather_(big_data, keycol, valuecol, gathercols)
    data_long$n <- c(rep(n_sims[1], n_sims[1]),
                     rep(n_sims[2], n_sims[2]),
                     rep(n_sims[3], n_sims[3]),
                     rep(n_sims[4], n_sims[4]),
                     rep(n_sims[5], n_sims[5]))

    return(data_long)
}

In [None]:
plot_biases <- function(estimator, effect, n_sims, main) {
    data_long <- get_dataframes(estimator, effect, n_sims)

    ggplot(data_long, aes(y = bias, fill = library)) +
    geom_boxplot() +
    geom_hline(yintercept = 0, linetype = "dashed",
               color = "black", size = 1) +
    facet_wrap(~n, nrow = 2) +
    # guides(fill = guide_legend(ncol = 2)) +
    theme(strip.text.x = element_text(size = 12, face = "bold"),
          legend.position = c(1., .45),
          legend.justification = c("right", "top"),
          legend.box.just = "right",
          legend.margin = margin(6, 6, 6, 6),
          legend.title = element_text(size = 13, face = "bold"),
          legend.text = element_text(size = 12)) ->
    p

    return(p)
}

In [None]:
n_sims <- c(100, 200, 300, 400, 500)

# Données originales

In [None]:
p <- plot_biases("sde", "ws", n_sims, "workshop sur SDE")
print(p)
ggsave("sde_ws.png")

In [None]:
p <- plot_biases("sie", "ws", n_sims, "workshop sur SIE")
print(p)
ggsave("sie_ws.png")

# Données avec problème de positivité

In [None]:
p <- plot_biases("sde", "ws_posit", n_sims, "workshop sur SDE et positivité")
print(p)
ggsave("sde_ws_posit.png")

In [None]:
p <- plot_biases("sie", "ws_posit", n_sims, "workshop sur SIE et positivité")
print(p)
ggsave("sie_ws_posit.png")

# Variables baseline continues

In [None]:
p <- plot_biases("sde", "ws_quant", n_sims, "workshop sur SDE et variables baseline continues\n")
print(p)
ggsave("sde_ws_quant.png")

In [None]:
p <- plot_biases("sie", "ws_quant", n_sims, "workshop sur SIE et variables baseline continues\n")
print(p)
ggsave("sie_ws_quant.png")

# Modèle Rudolph

In [None]:
p <- plot_biases("sde", "ws_rud", n_sims, "workshop sur SDE et modèle Rudolph\n")
print(p)
ggsave("sde_ws_rud.png")

In [None]:
p <- plot_biases("sie", "ws_rud", n_sims, "workshop sur SIE et modèle Rudolph\n")
print(p)
ggsave("sie_ws_rud.png")

# Variable de confusion continue

In [None]:
p <- plot_biases("sde", "ws_conf", n_sims, "workshop sur SDE et variable intermédiaire continue\n")
print(p)
ggsave("sde_ws_conf.png")

In [None]:
p <- plot_biases("sie", "ws_conf", n_sims, "workshop sur SIE et variable intermédiaire continue\n")
print(p)
ggsave("sie_ws_conf.png")