In [2]:
source("utils/plot.R")

In [3]:
options(repr.plot.width=4, repr.plot.height=3, jupyter.plot_scale=1)

In [72]:
col_spec = cols(
    swept_mutations=col_character(),
    adaptive_mutation_rate=col_number(),
    selection_region_size=col_number(),
    swept_frequencies=col_character(),
    actual_frequency_at_selection=col_number(),
    num_starting_lineages=col_number(),
    num_surviving_lineages=col_number(),
    frequency_at_selection=col_number(),
    num_restarts=col_number(),
    actual_frequency_at_sampling=col_number(),
    dominance_coefficient=col_number(),
    frequency_at_sampling=col_number(),
    selection_coefficient=col_number(),
    selection_coordinate=col_number(),
    selection_generation=col_number(),
    log_selection_coefficient=col_number(),
    selection_coordinate=col_number(),
    selection_generation=col_number()
)

In [171]:
dataset_factor <- function(v) {
    result <- str_replace_all(v, c(
                'popsize-higher'="Higher Ne",
                'popsize-lower'="Lower Ne",
                'recombination-higher'="Higher r",
                'recombination-lower'="Lower r"
    ))
    result <- factor(result, levels=c(
                "Higher Ne", "Lower Ne", "Higher r", "Lower r"
    ))
    return(result)
}

read_ne_rec_parameters <- function(filename) {
    result <- read_tsv(filename, col_types=col_spec) %>%
        mutate(filename=basename(filename)) %>%
        separate("filename", sep="_", into=c("dataset", NA))
    return(result)
}

In [172]:
parameters <- bind_rows(lapply(snakemake@input$parameters, read_ne_rec_parameters)) %>%
    filter(sweep_mode %in% c('hard', 'rnm (true)', 'sgv (true)')) %>%
    select(uuid, dataset, recombination_rate, diploid_population_size)

### Selection strength regressions

In [193]:
TRAINING_REC <- 1.619e-7

In [192]:
selstrength <- bind_rows(
    lapply(
        snakemake@input$selstrength,
        read_tsv,
        col_types=cols(true_log_selection_coefficient=col_number())
    )
) %>%
    right_join(parameters, by='uuid') %>%
    mutate(
        dataset_label=dataset_factor(dataset),
        predicted_log_sr = log10((10^predicted_log_selection_coefficient)/TRAINING_REC),
        true_log_sr = log10((10^true_log_selection_coefficient)/recombination_rate)
    )

In [246]:
selstrength_fig <- ggplot(selstrength) +
    geom_point(aes(
        x=true_log_selection_coefficient,
        y=predicted_log_selection_coefficient,
    ), colour='grey', size=0.5) +
    geom_abline(linetype='dashed') +
    facet_wrap(vars(dataset_label), nrow=1) +
    scale_x_continuous(labels=function(x){10**x}) +
    scale_y_continuous(labels=function(x){10**x}) +
    labs(
        x = "True s",
        y = "Predicted s"
    ) +
    sweeps_theme

In [247]:
selstrength_fig

In [248]:
sr_fig <- ggplot(selstrength) +
    geom_point(aes(
        x=true_log_sr,
        y=predicted_log_sr,
    ), colour='blue', size=0.5) +
    geom_abline(linetype='dashed') +
    facet_wrap(vars(dataset_label), nrow=1) +
    labs(
        x = "True s/r",
        y = "Predicted s/r"
    ) +
    sweeps_theme +
    theme(
        strip.text=element_blank()
    )

In [249]:
sr_fig

### Sweep mode confusion matrices

In [250]:
sweepmode_raw <- bind_rows(
    lapply(
        snakemake@input$sweepmode,
        read_tsv,
        col_types=cols(true_ix=col_number())
    )) %>%
    select(uuid, true_labels, predicted_labels) %>%
    right_join(parameters, by='uuid') %>%
    select(dataset, true_labels, predicted_labels)

In [251]:
sweepmode_confmat <- sweepmode_raw %>%
    table %>%
    as_tibble %>%
    mutate(
        true_labels=sweepmode_factor_short(true_labels),
        predicted_labels=sweepmode_factor_short(predicted_labels)
    ) %>%
    group_by(dataset, true_labels) %>%
    mutate(
        percent=n/sum(n),
        percent_label=paste0(round(percent*100, 1), '%'),
        dataset_label=dataset_factor(dataset)
    )

In [252]:
confmat_fig <- ggplot(sweepmode_confmat) +
    geom_tile(aes(x=true_labels, y=predicted_labels, fill=percent)) +
    geom_text(aes(x=true_labels, y=predicted_labels, label=percent_label, colour=percent<0.5), size=2.5) +
    facet_wrap(vars(dataset_label), nrow=1) +
    scale_colour_manual(values=c('white', 'black')) +
    scale_y_discrete(limits=rev) +
    scale_fill_distiller(palette=3, direction=1) +
    sweeps_theme +
    labs(x='True', y='Predicted') +
    theme(
        legend.position='none',
        panel.grid=element_blank(),
        panel.spacing=unit(0.3, "in"),
        strip.text=element_blank()
    )

In [253]:
confmat_fig

### Plot it all together

In [254]:
all_fig <- plot_grid(
    selstrength_fig,
    sr_fig,
    confmat_fig,
    nrow=3,
    labels=c('A', 'B', 'C'), axis='lr', align='l')

In [255]:
sweeps_save(snakemake@output$figure, all_fig, width=7, asp=1.15)

## Get metrics

In [259]:
metrics <- selstrength %>%
    group_by(dataset_label) %>%
    summarize(
        selstrength_rmse=rmse(true_log_selection_coefficient, predicted_log_selection_coefficient),
        selstrength_mre=mean_relative_error(true_log_selection_coefficient, predicted_log_selection_coefficient)
    )

metrics_sweepmode <- sweepmode_raw %>%
    mutate(dataset_label=dataset_factor(dataset)) %>%
    group_by(dataset_label) %>%
    summarize(
        sweepmode_accuracy=accuracy(true_labels, predicted_labels)
    )

metrics <- inner_join(metrics, metrics_sweepmode, by="dataset_label")

In [261]:
write_tsv(metrics, snakemake@output$metrics)