In [2]:
source("utils/plot.R")

In [3]:
options(repr.plot.width=4, repr.plot.height=3, jupyter.plot_scale=1)

In [4]:
col_spec = cols(
    swept_mutations=col_character(),
    adaptive_mutation_rate=col_number(),
    selection_region_size=col_number(),
    swept_frequencies=col_character(),
    actual_frequency_at_selection=col_number(),
    num_starting_lineages=col_number(),
    num_surviving_lineages=col_number(),
    frequency_at_selection=col_number(),
    num_restarts=col_number(),
    actual_frequency_at_sampling=col_number(),
    dominance_coefficient=col_number(),
    frequency_at_sampling=col_number(),
    selection_coefficient=col_number(),
    selection_coordinate=col_number(),
    selection_generation=col_number(),
    log_selection_coefficient=col_number(),
    selection_coordinate=col_number(),
    selection_generation=col_number()
)

In [5]:
snakemake@input$parameters

In [6]:
offcenter_factor <- function(v) {
    result <- paste0("Off by ", v/1000, "kb")
    result <- factor(result, levels=c("Off by 0.5kb", "Off by 10kb"))
    return(result)
}

read_offcenter_parameters <- function(filename) {
    result <- read_tsv(filename, col_types=col_spec) %>%
        mutate(offcenter_distance=as.integer(str_extract(filename, "\\d+")))
    return(result)
}

In [7]:
parameters <- bind_rows(lapply(snakemake@input$parameters, read_offcenter_parameters)) %>%
    filter(sweep_mode %in% c('hard', 'rnm (true)', 'sgv (true)')) %>%
    select(uuid, offcenter_distance, log_selection_coefficient)

### Selection strength regressions

In [8]:
selstrength <- bind_rows(
    lapply(
        snakemake@input$selstrength,
        read_tsv,
        col_types=cols()
    )
) %>%
    right_join(parameters, by='uuid') %>%
    mutate(
        offcenter_label=offcenter_factor(offcenter_distance)
    )

In [9]:
selstrength_fig <- ggplot(selstrength) +
    geom_point(aes(
        x=true_log_selection_coefficient,
        y=predicted_log_selection_coefficient,
    ), colour='grey', size=0.5) +
    geom_abline(linetype='dashed') +
    facet_wrap(vars(offcenter_label), nrow=1) +
    scale_x_continuous(labels=function(x){10**x}) +
    scale_y_continuous(labels=function(x){10**x}) +
    labs(
        x = "True s",
        y = "Predicted s"
    ) +
    sweeps_theme

In [10]:
selstrength_fig

### Sweep mode accuracy, by selection bracket

In [11]:
sweepmode_raw <- bind_rows(
    lapply(
        snakemake@input$sweepmode,
        read_tsv,
        col_types=cols()
    )) %>%
    select(uuid, true_labels, predicted_labels) %>%
    right_join(parameters, by='uuid') %>%
    select(offcenter_distance, true_labels, predicted_labels, log_selection_coefficient)

In [12]:
sel_breaks <- 10^(c(-2, -1.5, -1.0, -0.5, 0, 0.5, 1.0, 1.5, 2))
sweepmode <- sweepmode_raw %>%
    mutate(selection_bracket=cut(10^log_selection_coefficient, breaks=sel_breaks, dig.lab=3)) %>%
    group_by(offcenter_distance, selection_bracket) %>%
    summarize(
        accuracy=accuracy(true_labels, predicted_labels),
    )
selection_bracket_levels <- levels(sweepmode$selection_bracket)
baseline <- sweepmode_raw %>%
    group_by(offcenter_distance) %>%
    summarize(
        selection_bracket="All",
        accuracy=accuracy(true_labels, predicted_labels)
)
sweepmode <- bind_rows(sweepmode, baseline) %>%
    mutate(
        selection_bracket=factor(selection_bracket, levels=c(selection_bracket_levels, 'All')),
        selection_bracket_label=str_replace_all(selection_bracket, c(
            "\\("="",
            "\\]"="",
            ","="-"
        )),
        selection_bracket_label=fct_reorder(selection_bracket_label, as.integer(selection_bracket)),
        offcenter_label=offcenter_factor(offcenter_distance)
    )

In [13]:
sweepmode_fig <- ggplot(sweepmode) +
    geom_col(aes(x=selection_bracket_label, y=accuracy)) +
    facet_wrap(vars(offcenter_label), nrow=1) +
    sweeps_theme +
    labs(x='Selection coefficient', y='Accuracy') +
    theme(
        legend.position='none',
        panel.spacing=unit(0.3, "in"),
        axis.text.x=element_text(angle=45, hjust=1),
        strip.text=element_blank()
    )

In [14]:
sweepmode_fig

### Plot it all together

In [15]:
all_fig <- plot_grid(
    selstrength_fig,
    sweepmode_fig,
    nrow=2,
    labels=c('A', 'B'), axis='lr', align='l')

In [16]:
sweeps_save(snakemake@output$figure, all_fig, asp=1)

## Get metrics

In [17]:
metrics <- selstrength %>%
    group_by(offcenter_label) %>%
    summarize(
        selstrength_rmse=rmse(true_log_selection_coefficient, predicted_log_selection_coefficient),
        selstrength_mre=mean_relative_error(true_log_selection_coefficient, predicted_log_selection_coefficient)
    )

metrics_sweepmode <- sweepmode_raw %>%
    mutate(offcenter_label=offcenter_factor(offcenter_distance)) %>%
    group_by(offcenter_label) %>%
    summarize(
        sweepmode_accuracy=accuracy(true_labels, predicted_labels)
    )

metrics <- inner_join(metrics, metrics_sweepmode, by="offcenter_label")

In [18]:
write_tsv(metrics, snakemake@output$metrics)