In [None]:
library(tidyverse)

library(latex2exp)

In [None]:
# configuring figure size
options(repr.plot.width = 10, repr.plot.height = 10)

# colorblind-friendly color palette
cbPalette <- c('#56B4E9', '#0072B2', '#999999', '#CC79A7', '#F0E442', '#009E73', '#E69F00', '#D55E00')

In [None]:
df1 <- read_csv('Regression--All--Full_Set.csv')

In [None]:
df1 <- df1 %>% 
    pivot_longer(c(Train_Score:Test_EVS), names_to = 'Type1', values_to = 'Value') %>%
    separate(Type1, into = c('Train_Test', 'Type'), sep = '_') %>%
    mutate_at(vars(k), list(as.integer)) %>%
    mutate_at(vars(Id), list(as.character))

In [None]:
df1 %>% 
    filter(Method != 'sgd') %>%
    ggplot(aes(x = k, y = Value, linetype = Train_Test, color = Train_Test)) +
    geom_point() +
    geom_line() + 
    scale_color_manual(values = cbPalette) +
    scale_y_continuous(limits = c(-1, 1)) +
    scale_x_continuous(breaks = c(6:10)) +
    facet_grid(rows = vars(Method), cols = vars(Type))

In [None]:
df2 <- read_csv('Regression--All--Sampling.csv')

In [None]:
df2 <- df2 %>% 
    pivot_longer(c(Train_Score:Test_EVS), names_to = 'Type1', values_to = 'Value') %>%
    separate(Type1, into = c('Train_Test', 'Type'), sep = '_') %>%
    mutate_at(vars(k), list(as.integer)) %>%
    mutate_at(vars(Id), list(as.character))

In [None]:
df2 %>% 
    filter(Method != 'sgd') %>%
    ggplot(aes(x = k, y = Value, linetype = Train_Test, color = Train_Test)) +
    geom_point() +
    geom_line() +
    scale_color_manual(values = cbPalette) +
    scale_y_continuous(limits = c(-1, 1)) +
    scale_x_continuous(breaks = c(9:16)) +
    facet_grid(rows = vars(Method), cols = vars(Type))

In [None]:
df3 <- read_csv('Regression--Subset--Full_Set.csv')

In [None]:
df3 <- df3 %>% 
    pivot_longer(c(Train_Score:Test_EVS), names_to = 'Type1', values_to = 'Value') %>%
    separate(Type1, into = c('Train_Test', 'Type'), sep = '_') %>%
    mutate_at(vars(k), list(as.integer)) %>%
    mutate_at(vars(Id), list(as.character))

In [None]:
df3 %>% 
    filter(Method != 'sgd') %>%
    ggplot(aes(x = k, y = Value, linetype = Train_Test, color = Train_Test)) +
    geom_point() +
    geom_line() +
    scale_color_manual(values = cbPalette) +
    scale_y_continuous(limits = c(-1, 1)) +
    scale_x_continuous(breaks = c(6:10)) +
    facet_grid(rows = vars(Method), cols = vars(Type))

In [None]:
df4 <- read_csv('Regression--Subset--Sampling.csv')

In [None]:
df4 <- df4 %>% 
    pivot_longer(c(Train_Score:Test_EVS), names_to = 'Type1', values_to = 'Value') %>%
    separate(Type1, into = c('Train_Test', 'Type'), sep = '_') %>%
    mutate_at(vars(k), list(as.integer)) %>%
    mutate_at(vars(Id), list(as.character))

In [None]:
df4 %>% 
    filter(Method != 'sgd') %>%
    ggplot(aes(x = k, y = Value, linetype = Train_Test, color = Train_Test)) +
    geom_point() +
    geom_line() + 
    scale_color_manual(values = cbPalette) +
    scale_y_continuous(limits = c(-1, 1)) +
    scale_x_continuous(breaks = c(9:16)) +
    facet_grid(rows = vars(Method), cols = vars(Type))

In [None]:
df5 <- read_csv('Regression--Tree--All--Full_Set.csv')

In [None]:
df5 <- df5 %>% 
    pivot_longer(c(Train_Score:Test_EVS), names_to = 'Type1', values_to = 'Value') %>%
    separate(Type1, into = c('Train_Test', 'Type'), sep = '_') %>%
    mutate_at(vars(k), list(as.integer)) %>%
    mutate_at(vars(Id), list(as.character)) %>%
    mutate(Method1 = case_when(Method == 'br' ~ 'bagging',
                               Method == 'dtr' ~ 'decisiontree',
                               Method == 'etr' ~ 'extratree',
                               Method == 'etsr' ~ 'extratrees',
                               Method == 'rfr' ~ 'randomforest')) %>%
    select(-Method) %>%
    rename(Method = Method1)

In [None]:
df5 %>% 
    ggplot(aes(x = k, y = Value, linetype = Train_Test, color = Pruning)) +
    geom_point() +
    geom_line() + 
    scale_color_manual(values = cbPalette) +
    scale_y_continuous(limits = c(-1, 1)) +
    scale_x_continuous(breaks = c(6:10)) +
    facet_grid(rows = vars(Method, Pruning), cols = vars(Type))

In [None]:
df6 <- read_csv('Regression--Tree--All--Sampling.csv')

In [None]:
df6 <- df6 %>% 
    pivot_longer(c(Train_Score:Test_EVS), names_to = 'Type1', values_to = 'Value') %>%
    separate(Type1, into = c('Train_Test', 'Type'), sep = '_') %>%
    mutate_at(vars(k), list(as.integer)) %>%
    mutate_at(vars(Id), list(as.character)) %>%
    mutate(Method1 = case_when(Method == 'br' ~ 'bagging',
                               Method == 'dtr' ~ 'decisiontree',
                               Method == 'etr' ~ 'extratree',
                               Method == 'etsr' ~ 'extratrees',
                               Method == 'rfr' ~ 'randomforest')) %>%
    select(-Method) %>%
    rename(Method = Method1)

In [None]:
df6 %>% 
    ggplot(aes(x = k, y = Value, linetype = Train_Test, color = Pruning)) +
    geom_point() +
    geom_line() + 
    scale_color_manual(values = cbPalette) +
    scale_y_continuous(limits = c(-1, 1)) +
    scale_x_continuous(breaks = c(9:16)) +
    facet_grid(rows = vars(Method, Pruning), cols = vars(Type))

In [None]:
df7 <- read_csv('Regression--Tree--Subset--Full_Set.csv')

In [None]:
df7 <- df7 %>% 
    pivot_longer(c(Train_Score:Test_EVS), names_to = 'Type1', values_to = 'Value') %>%
    separate(Type1, into = c('Train_Test', 'Type'), sep = '_') %>%
    mutate_at(vars(k), list(as.integer)) %>%
    mutate_at(vars(Id), list(as.character)) %>%
    mutate(Method1 = case_when(Method == 'br' ~ 'bagging',
                               Method == 'dtr' ~ 'decisiontree',
                               Method == 'etr' ~ 'extratree',
                               Method == 'etsr' ~ 'extratrees',
                               Method == 'rfr' ~ 'randomforest')) %>%
    select(-Method) %>%
    rename(Method = Method1)

In [None]:
df7 %>% 
    ggplot(aes(x = k, y = Value, linetype = Train_Test, color = Pruning)) +
    geom_point() +
    geom_line() + 
    scale_color_manual(values = cbPalette) +
    scale_y_continuous(limits = c(-1, 1)) +
    scale_x_continuous(breaks = c(6:10)) +
    facet_grid(rows = vars(Method, Pruning), cols = vars(Type))

In [None]:
df8 <- read_csv('Regression--Tree--Subset--Sampling.csv')

In [None]:
df8 <- df8 %>% 
    pivot_longer(c(Train_Score:Test_EVS), names_to = 'Type1', values_to = 'Value') %>%
    separate(Type1, into = c('Train_Test', 'Type'), sep = '_') %>%
    mutate_at(vars(k), list(as.integer)) %>%
    mutate_at(vars(Id), list(as.character)) %>%
    mutate(Method1 = case_when(Method == 'br' ~ 'bagging',
                               Method == 'dtr' ~ 'decisiontree',
                               Method == 'etr' ~ 'extratree',
                               Method == 'etsr' ~ 'extratrees',
                               Method == 'rfr' ~ 'randomforest')) %>%
    select(-Method) %>%
    rename(Method = Method1)

In [None]:
df8 %>% 
    ggplot(aes(x = k, y = Value, linetype = Train_Test, color = Pruning)) +
    geom_point() +
    geom_line() + 
    scale_color_manual(values = cbPalette) +
    scale_y_continuous(limits = c(-1, 1)) +
    scale_x_continuous(breaks = c(9:16)) +
    facet_grid(rows = vars(Method, Pruning), cols = vars(Type))