In [2]:
library(tidyverse)

In [3]:
library(cowplot)

In [4]:
source("utils/plot.R")

In [8]:
N <- 220

In [16]:
sfs <- read_tsv(snakemake@input[[1]], col_types=cols()) %>%
    filter(num_alternate != 0, num_alternate != N) %>%
    mutate(num_minor=pmin(num_alternate, N - num_alternate)) %>%
    group_by(num_minor) %>%
    summarize(num_sites=sum(num_sites)) %>%
    mutate(freq=num_minor/N)

In [23]:
reference <- sfs$num_sites[1]
phi_1 <- 1 + 1/(N - 1)
theta <- reference/phi_1

In [35]:
sfs <- sfs %>% mutate(
    # Expected folded SFS from https://doi.org/10.1006/tpbi.1995.1025
    expected=theta*((1/(1 + as.integer(num_minor == N - num_minor)))*(1/num_minor + 1/(N - num_minor))),
    scaled=num_sites/expected
)

In [57]:
weirdness <- tibble(location=c(46, 108)) %>% mutate(freq=location/N, label=paste('n =', location))

In [59]:
fig <- ggplot(sfs) +
    geom_vline(data=weirdness, aes(xintercept=freq), colour='darkred', linetype='dashed') +
    geom_text(data=weirdness, aes(x=freq, y=0.75, label=label), colour='darkred', hjust=1, nudge_x=-0.01) +
    geom_point(aes(x=freq, y=scaled), size=1) +
    scale_colour_brewer(palette="Dark2") +
    labs(
        x = "Frequency",
        y = "Observed/Expected",
        title = "Genome-wide SFS"
    ) +
    basic_theme +
    theme(
        legend.position='top',
        legend.title=element_blank(),
        panel.grid.major.y = element_blank(),
    )

save_plot(snakemake@output[[1]], plot=fig, base_height=NULL, base_width=4, base_asp=4/3)