In [2]:
source("utils/plot.R")

In [3]:
options(repr.plot.width=4, repr.plot.height=3, jupyter.plot_scale=1)

In [4]:
params <- read_tsv(snakemake@input$parameters, col_types=cols()) %>% select(uuid, sweep_mode)

### Selection strength regressions

In [5]:
selstrength <- read_tsv(snakemake@input$selstrength, col_types=cols()) %>%
    inner_join(params, on="uuid") %>%
    mutate(sweep_mode=sweepmode_factor(sweep_mode))

In [6]:
slice_sample(selstrength, n=10)

In [7]:
selstrength_fig <- ggplot(selstrength) +
    geom_point(aes(x=true_log_selection_coefficient, y=predicted_log_selection_coefficient, colour=sweep_mode)) +
    geom_abline(linetype='dashed') +
    facet_wrap(vars(sweep_mode)) +
    scale_x_continuous(labels=function(x){10**x}) +
    scale_y_continuous(labels=function(x){10**x}) +
    labs(
        x = "True sel. coefficient",
        y = "Predicted sel. coefficient"
    ) +
    sweeps_colour +
    sweeps_theme +
    theme(
        legend.position="none"
    )

In [8]:
selstrength_fig

### Sweep mode confusion matrices

In [20]:
sweepmode_raw <- read_tsv(snakemake@input$sweepmode, col_types=cols()) %>%
    select(true_label, predicted_label)

In [21]:
sweepmode_confmat <- sweepmode_raw %>%
    table %>%
    as_tibble %>%
    mutate(
        true_label=sweepmode_factor_short(true_label),
        predicted_label=sweepmode_factor_short(predicted_label)
    ) %>%
    group_by(true_label) %>%
    mutate(
        percent=n/sum(n),
        percent_label=paste0(round(percent*100, 2), '%')
    )

In [10]:
sweepmode_confmat

In [11]:
confmat_fig <- ggplot(sweepmode_confmat) +
    geom_tile(aes(x=true_label, y=predicted_label, fill=percent)) +
    geom_text(aes(x=true_label, y=predicted_label, label=percent_label, colour=percent<0.5)) +
    scale_colour_manual(values=c('black', 'white')) +
    scale_y_discrete(limits=rev) +
    scale_fill_distiller(palette=3, direction=-1) +
    sweeps_theme +
    labs(x='True', y='Predicted') +
    theme(
        legend.position='none',
        panel.grid=element_blank(),
        panel.spacing=unit(0.3, "in")
    )

In [12]:
confmat_fig

### Sweep mode ROC curves

In [13]:
sweepmode_roc <- read_tsv(snakemake@input$sweepmode_roc, col_types=cols()) %>%
    mutate(reference_label=sweepmode_factor_short(reference_label))

In [14]:
roc_fig <- ggplot(sweepmode_roc) +
    geom_line(aes(x=false_positive_rate, y=true_positive_rate, colour=reference_label)) +
    geom_abline(linetype='dashed') +
    guides(colour=guide_legend(title='Reference')) +
    labs(
        x='False positive rate',
        y='True positive rate'
    ) +
    sweeps_colour +
    sweeps_theme +
    theme(
        legend.position=c(1, 0),
        legend.justification=c(1, 0),
        legend.background=element_rect(colour='white')
    )

In [15]:
roc_fig

### Plot it all together

In [16]:
classification_fig <- plot_grid(
    confmat_fig + theme(plot.margin = unit(c(1, 1, 1, 1), "lines")),
    roc_fig + theme(plot.margin = unit(c(1, 1, 1, 1), "lines")),
    labels=c('B', 'C'))

In [17]:
all_fig <- plot_grid(
    selstrength_fig,
    classification_fig,
    nrow=2, labels=c('A', NA))

In [18]:
sweeps_save(snakemake@output$figure, all_fig, width=6, asp=4/3)

## Get metrics

In [23]:
metrics <- tibble(
    selstrength_rmse=rmse(selstrength$true_log_selection_coefficient, selstrength$predicted_log_selection_coefficient),
    selstrength_mre=mean_relative_error(selstrength$true_log_selection_coefficient, selstrength$predicted_log_selection_coefficient),
    sweepmode_accuracy=accuracy(sweepmode_raw$true_label, sweepmode_raw$predicted_label)
)

In [25]:
write_tsv(metrics, snakemake@output$metrics)