In [16]:
source("utils/plot.R")

In [3]:
options(repr.plot.width=4, repr.plot.height=3, jupyter.plot_scale=1)

In [4]:
col_spec = cols(
    swept_mutations=col_character(),
    adaptive_mutation_rate=col_number(),
    selection_region_size=col_number(),
    swept_frequencies=col_character(),
    actual_frequency_at_selection=col_number(),
    num_starting_lineages=col_number(),
    num_surviving_lineages=col_number(),
    frequency_at_selection=col_number()
)

In [5]:
params <- bind_rows(
    `0.05`=read_tsv(snakemake@input$parameters_weak, col_types=col_spec),
    `0.01`=read_tsv(snakemake@input$parameters_strong, col_types=col_spec),
    .id="bottleneck_strength"
) %>%
    filter(sweep_mode %in% c('hard', 'rnm (true)', 'sgv (true)')) %>%
    select(uuid, sweep_mode, slim_generations, selection_generation, demography, bottleneck_strength) %>%
    mutate(
        bottleneck_label=paste0(as.double(bottleneck_strength)*100, '% Bottleneck')
    ) %>%
    separate("demography", sep=',', into=c('crash_size', 'bottleneck_duration', 'bottleneck_start', NA, NA)) %>%
    separate("crash_size", sep="=", into=c(NA, 'crash_size'), convert=TRUE) %>%
    separate("bottleneck_duration", sep="=", into=c(NA, 'bottleneck_duration'), convert=TRUE) %>%
    separate("bottleneck_start", sep="=", into=c(NA, 'bottleneck_start'), convert=TRUE) %>%
    mutate(
        relative_starttime = selection_generation - bottleneck_start,
        relative_fixtime = slim_generations - bottleneck_start,
    ) %>%
    filter(relative_fixtime > 0)

### Selection strength regressions

In [6]:
selstrength <- bind_rows(
    read_tsv(snakemake@input$selstrength_weak, col_types=cols()),
    read_tsv(snakemake@input$selstrength_strong, col_types=cols()),
) %>%
    right_join(params, by="uuid") %>%
    mutate(
        sweep_mode=sweepmode_factor(sweep_mode),
    )

In [7]:
selstrength_fig <- ggplot(selstrength) +
    geom_point(aes(
        x=true_log_selection_coefficient,
        y=predicted_log_selection_coefficient,
        colour=relative_fixtime < bottleneck_duration
    ), size=1) +
    scale_colour_manual(values=c('grey', 'darkred')) +
    geom_abline(linetype='dashed') +
    facet_wrap(vars(bottleneck_label)) +
    scale_x_continuous(labels=function(x){10**x}) +
    scale_y_continuous(labels=function(x){10**x}) +
    labs(
        x = "True sel. coefficient",
        y = "Predicted sel. coefficient"
    ) +
    sweeps_theme +
    theme(
        legend.position='none'
    )

In [8]:
selstrength_fig

### Sweep mode confusion matrices

In [27]:
sweepmode_raw <- bind_rows(
    read_tsv(snakemake@input$sweepmode_weak, col_types=cols()),
    read_tsv(snakemake@input$sweepmode_strong, col_types=cols()),
) %>%
    right_join(params, by='uuid') %>%
    select(bottleneck_label, true_labels, predicted_labels) %>%
    filter(true_labels %in% c('hard', 'rnm (true)', 'sgv (true)'))

In [28]:
sweepmode_confmat <- sweepmode_raw %>%
    table %>%
    as_tibble %>%
    mutate(
        true_label=sweepmode_factor_short(true_labels),
        predicted_label=sweepmode_factor_short(predicted_labels)
    ) %>%
    group_by(bottleneck_label, true_label) %>%
    mutate(
        percent=n/sum(n),
        percent_label=paste0(round(percent*100, 2), '%')
    )

In [10]:
sweepmode_confmat

In [11]:
confmat_fig <- ggplot(sweepmode_confmat) +
    geom_tile(aes(x=true_label, y=predicted_label, fill=percent)) +
    geom_text(aes(x=true_label, y=predicted_label, label=percent_label, colour=percent<0.5)) +
    facet_wrap(vars(bottleneck_label)) +
    scale_colour_manual(values=c('black', 'white')) +
    scale_y_discrete(limits=rev) +
    scale_fill_distiller(palette=3, direction=-1) +
    sweeps_theme +
    labs(x='True', y='Predicted') +
    theme(
        legend.position='none',
        panel.grid=element_blank(),
        panel.spacing=unit(0.3, "in")
    )

In [12]:
confmat_fig

### Plot it all together

In [13]:
all_fig <- plot_grid(selstrength_fig, confmat_fig, nrow=2, labels=c('A', 'B'), axis='lr', align='l')

In [14]:
sweeps_save(snakemake@output$figure, all_fig, width=5, asp=1)

## Get metrics

In [35]:
metrics <- selstrength %>%
    group_by(bottleneck_label) %>%
    summarize(
        selstrength_rmse=rmse(true_log_selection_coefficient, predicted_log_selection_coefficient),
        selstrength_mre=mean_relative_error(true_log_selection_coefficient, predicted_log_selection_coefficient)
    )

metrics_sweepmode <- sweepmode_raw %>%
    group_by(bottleneck_label) %>%
    summarize(
        sweepmode_accuracy=accuracy(true_labels, predicted_labels)
    )

metrics <- inner_join(metrics, metrics_sweepmode, by="bottleneck_label")

In [38]:
write_tsv(metrics, snakemake@output$metrics)