In [2]:
source("utils/plot.R")

In [3]:
options(repr.plot.width=4, repr.plot.height=3, jupyter.plot_scale=1)

In [4]:
params <- read_tsv(snakemake@input$parameters, col_types=cols()) %>% select(uuid, sweep_mode)

### Selection strength regressions

In [5]:
selstrength <- read_tsv(snakemake@input$selstrength, col_types=cols()) %>%
    inner_join(params, on="uuid") %>%
    mutate(sweep_mode=sweepmode_factor(sweep_mode))

In [6]:
selstrength_fig <- ggplot(selstrength) +
    geom_point(aes(x=true_log_selection_coefficient, y=predicted_log_selection_coefficient, colour=sweep_mode), size=0.5) +
    geom_abline(linetype='dashed') +
    facet_wrap(vars(sweep_mode)) +
    scale_x_continuous(labels=function(x){10**x}) +
    scale_y_continuous(labels=function(x){round(10**x, 2)}) +
    labs(
        x = "True s",
        y = "Predicted s"
    ) +
    sweeps_colour +
    sweeps_theme +
    theme(
        legend.position="none"
    )

In [7]:
selstrength_fig

### Sweep mode confusion matrices

In [8]:
sweepmode_raw <- read_tsv(snakemake@input$sweepmode, col_types=cols()) %>%
    select(true_label, predicted_label)  


sweepmode_confmat <- sweepmode_raw %>%
    table %>%
    as_tibble %>%
    mutate(
        true_label=sweepmode_factor_short(true_label),
        predicted_label=sweepmode_factor_short(predicted_label)
    ) %>%
    group_by(true_label) %>%
    mutate(
        percent=n/sum(n),
        percent_label=paste0(round(percent*100, 1), '%')
    )

In [9]:
confmat_fig <- ggplot(sweepmode_confmat) +
    geom_tile(aes(x=true_label, y=predicted_label, fill=percent)) +
    geom_text(aes(x=true_label, y=predicted_label, label=percent_label, colour=percent<0.5)) +
    scale_colour_manual(values=c('white', 'black')) +
    scale_y_discrete(limits=rev) +
    scale_fill_distiller(palette=3, direction=1) +
    sweeps_theme +
    labs(x='True', y='Predicted') +
    theme(
        legend.position='none',
        panel.grid=element_blank(),
        panel.spacing=unit(0.3, "in")
    )

In [10]:
confmat_fig

### Plot it all together

In [11]:
all_fig <- plot_grid(
    selstrength_fig,
    plot_grid(confmat_fig, NULL),
    ncol=1,
    labels=c('A', 'B'))

In [12]:
all_fig

In [13]:
sweeps_save(snakemake@output$figure, all_fig, width=5, asp=1)

### Get metrics

In [14]:
metrics_sel_by_sm <- selstrength %>%
    group_by(sweep_mode) %>%
    summarize(
        selstrength_rmse=rmse(true_log_selection_coefficient, predicted_log_selection_coefficient),
        selstrength_mre=mean_relative_error(true_log_selection_coefficient, predicted_log_selection_coefficient)
    ) %>%
    pivot_longer(!sweep_mode, names_to='metric', values_to='value')

metrics_sweepmode <- tibble(
    sweep_mode='All',
    metric='accuracy',
    value=accuracy(sweepmode_raw$true_label, sweepmode_raw$predicted_label)
)

metrics_all_sel <- tibble(
    sweep_mode=c('All', 'All'),
    metric=c('selstrength_rsme', 'selstrength_mre'),
    value=c(
        rmse(selstrength$true_log_selection_coefficient, selstrength$predicted_log_selection_coefficient),
        mean_relative_error(selstrength$true_log_selection_coefficient, selstrength$predicted_log_selection_coefficient)
    )
)

metrics <- bind_rows(metrics_sel_by_sm, metrics_all_sel, metrics_sweepmode)

In [15]:
write_tsv(metrics, snakemake@output$metrics)