In [1]:
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.3     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.2     [32m✔[39m [34mdplyr  [39m 1.0.6
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
df <- read_delim('../output/signals/signals.csv', delim='\t', col_types=cols()) %>%
    mutate(str_ws=paste(window_size, 'bp subwindows', sep='')) %>%
    mutate(str_ws=factor(str_ws, levels=str_sort(unique(str_ws), numeric=TRUE)),
           case = recode_factor(
               case,
               'Ace'='Ace',
               'CHKoV'='CHKoV',
               'Cyp'='Cyp',
               'Average hard sweep\n(s=0.01)'='Average hard sweep\n(s=0.01)',
               'Hard sweep example 1\n(s=0.01)'='Hard sweep example 1\n(s=0.01)',
               'Hard sweep example 2\n(s=0.01)'='Hard sweep example 2\n(s=0.01)',
               'Average hard sweep\n(s=100)'='Average hard sweep\n(s=100)',
               'Hard sweep example 1\n(s=100)'='Hard sweep example 1\n(s=100)',
               'Hard sweep example 2\n(s=100)'='Hard sweep example 2\n(s=100)'
           ),
           feature_full = recode_factor(
               feature,
               pi='Pi',
               snps='# SNPs',
               haps='# Haplotypes',
               `H1`="H1",
               `H12`="H12",
               `H2overH1`="H2/H1",
               tajD="Tajima's D"
           )
          ) 

In [3]:
# Pick 5 window sizes to use
acceptable_ws <- quantile(df$window_size, probs=c(0, 0.25, 0.5, 0.75, 1))
df <- df %>%
    filter(window_size %in% acceptable_ws)

### Empirical sweeps

In [43]:
data <- df %>%
    filter(case %in% c('Ace', 'Cyp', 'CHKoV')) %>%
    mutate(
        position = position - 10,
        posinbp = position*(window_size/2)
    )

fig <- ggplot(data) +
        geom_line(aes(x=posinbp/1000, y=value, colour=feature_full)) +
        facet_grid(rows=vars(case), cols=vars(str_ws), switch='y', scales='free_x') +
        labs(
            x='Position (kb)',
            y='Normalized statistic'
        ) +
        scale_colour_brewer(palette='Dark2', name='Statistic') +
        theme_minimal() +
        theme(
            panel.grid.minor = element_blank(),
            panel.grid.major.y = element_blank(),
            axis.text.y = element_blank(),
            axis.title.y = element_blank(),
            strip.text.y.left = element_text(angle = 0),
            panel.border = element_rect(colour='grey', fill=NA)
        )

    ggsave('../fig/methods/empirical-signals.pdf', width=10, height=4, plot=fig)

### Averages and examples of sweeps

In [41]:

data <- df %>%
    filter(str_detect(case, 'ard sweep')) %>%
    mutate(
        isAverage = str_detect(case, 'Average'),
        position = position - 10,
        posinbp = position*(window_size/2)
    )

fig <- ggplot(data) +
        geom_line(aes(x=posinbp/1000, y=value, colour=feature_full, size=isAverage)) +
        scale_size_manual(values=c(0.5, 0.8), guide='none') +
        facet_grid(rows=vars(case), cols=vars(str_ws), switch='y', scales='free_x') +
        labs(
            x='Position (kb)',
            y='Normalized statistic'
        ) +
        scale_colour_brewer(palette='Dark2', name='Statistic') +
        theme_minimal() +
        theme(
            panel.grid.minor = element_blank(),
            panel.grid.major.y = element_blank(),
            axis.text.y = element_blank(),
            axis.title.y = element_blank(),
            strip.text.y.left = element_text(angle = 0),
            panel.border = element_rect(colour='grey', fill=NA)
        )

    ggsave('../fig/methods/averages-and-examples.pdf', width=10, height=6, plot=fig)

### Internal: 21x21x7 heatmap

In [6]:
library(cowplot)

In [7]:
data <- read_tsv('../output/signals/signals.csv', col_types=cols()) %>%
    filter(case == 'Cyp') %>%
    mutate(ws_id=as.numeric(as.factor(window_size)))

figs <- list()

In [8]:
for (feat in unique(data$feature)) {
    x <- data %>% filter(feature == feat)
    
    signfig <- ggplot(x) +
        geom_tile(aes(x=position, y=ws_id, fill=value)) +
        scale_fill_gradient(low='lightgrey', high='black') +
        scale_x_continuous(expand=c(0, 0)) +
        scale_y_continuous(expand=c(0, 0)) +
        theme_void() +
        theme(
            legend.position='none'
        )
    
    figs[[feat]] <- signfig
}

fig <- ggdraw()
for (ix in seq(1, length(figs))) {
    j <- length(figs) - ix
    scale <- 0.6
    step <- (1 - scale)/(length(figs) - 1)
    x <- step*j
    y <- step*j
    fig <- fig + draw_plot(figs[[ix]], x=x, y=y, scale=scale, halign=0, valign=0)
}
ggsave('../fig/internal/data-representation.pdf', width=4, height=4, plot=fig)