In [2]:
source("utils/plot.R")

In [3]:
data <- read_tsv(snakemake@input[["clues"]], col_types=cols(locus=col_integer())) %>%
    mutate(
        locus_str = paste0('Pos. ', scales::comma(locus))
    ) # %>%
    # filter(log_prob > quantile(log_prob, 0.75))

In [4]:
region_limits <- c(
    snakemake@config$sweep_region_start/1000,
    snakemake@config$sweep_region_end/1000
)

selstrens = data %>%
    group_by(locus) %>%
    slice_sample(n=1) %>%
    select(locus, s_estimate) %>%
    ungroup() %>%
    filter(s_estimate < 0.197, s_estimate > 0)

trees <- read_delim(snakemake@input[["arg_info"]], delim=';', col_types=cols()) %>%
    filter(is_not_mapping==0) %>%
    select(pos_of_snp, tree_index) %>%
    rename(locus=pos_of_snp)

### CLUES results boxplot

In [5]:
results_fig <- ggplot(selstrens) +
    geom_boxplot(aes(y=s_estimate)) +
    xlim(-0.5, 1) +
    ylim(0, 0.075) +
    geom_text(aes(x=0.6, y=median(s_estimate), label=paste0('Median s = ', signif(median(s_estimate), 3)))) +
    coord_flip() +
    labs(title=paste0('CLUES estimates for the ', nrow(selstrens), ' middle sites')) +
    turkana_theme +
    turkana_colour +
    theme(
        legend.position='none',
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_blank()
    )
turkana_save(snakemake@output[["results"]], results_fig, asp=2.5)

### Individual site plots

In [6]:
for (loc in unique(data$locus)) {
    
    this <- data %>% filter(locus==loc)
    title <- paste0('Pos. ', scales::comma(loc), ' (tree ' ,filter(trees, locus==loc)$tree_index[[1]],  ')')
    s <- this$s_estimate[[1]]
    subtitle <- paste0('s = ', s)
    
    clues <- ggplot(this) +
        geom_raster(aes(x=gen, y=freq, fill=exp(log_prob))) +
        scale_x_reverse() +
        labs(
            title=title,
            subtitle=subtitle,
            x='Generations ago',
            y='Allele freq.'
        ) +
        turkana_theme +
        theme(
            legend.position = 'none',
            axis.text.x=element_text(angle=45, hjust=1),
            plot.subtitle=element_text(size=10, face='italic')
        )

    turkana_save(
        paste0(snakemake@params[["plot_dir"]], "/clues_", loc, ".pdf"),
        clues, width=3, asp=1
    )
    
}

### Multi-panel with multiple sites for the paper

In [89]:
panels_data <- data %>%
    filter(s_estimate < 0.197, s_estimate > 0) %>%
    mutate(
        caption_str = paste0('Pos. ', scales::comma(locus), '\n', 's = ', s_estimate)
    )

s_estimates <- panels_data %>%
    distinct(locus, s_estimate)

In [94]:
fig <- ggplot(panels_data) +
        geom_raster(aes(x=gen, y=freq, fill=exp(log_prob))) +
        facet_wrap(vars(paste0('Pos. ', scales::label_comma(drop0trailing=TRUE)(locus)))) +
        geom_text(data=s_estimates, aes(x=280, y=1.0, label=paste0('s = ', signif(s_estimate, 3))),
                  colour='white', hjust=0, vjust=1
                 ) +
        scale_x_reverse() +
        labs(
            x='Generations ago',
            y='Allele freq.'
        ) +
        turkana_theme +
        theme(
            legend.position = 'none',
            axis.text.x=element_text(angle=45, hjust=1),
            plot.subtitle=element_text(size=10, face='italic'),
            panel.grid=element_blank()
        )

turkana_save("fig/paper/clues.pdf", fig, width=6, asp=1)