-
Notifications
You must be signed in to change notification settings - Fork 3
/
exercise_1_grid.R
66 lines (52 loc) · 2.28 KB
/
exercise_1_grid.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
library(slendr)
setup_env()
library(tidyverse)
TRUE_NE <- 6543
simulate_afs <- function(Ne) {
model_path <- tempfile()
on.exit(unlink(model_path, recursive = TRUE, force = TRUE))
pop <- population("pop", N = Ne, time = 100000)
model <- compile_model(pop, generation_time = 1, direction = "backward",
path = model_path, overwrite = TRUE, force = TRUE)
ts <-
msprime(model, sequence_length = 10e6, recombination_rate = 1e-8) %>%
ts_mutate(mutation_rate = 1e-8)
samples <- ts_samples(ts) %>% sample_n(10) %>% pull(name)
afs <- ts_afs(ts, list(samples), polarised = TRUE)
afs
}
Ne_grid <- seq(from = 1000, to = 30000, by = 1000)
if (file.exists("solutions/data/exercise_1_grid.rds")) {
afs_grid <- readRDS("solutions/data/exercise_1_grid.rds")
} else {
afs_grid <- lapply(Ne_grid, simulate_afs)
dir.create("solutions/data", showWarnings = TRUE, recursive = TRUE)
saveRDS(afs_grid, "solutions/data/exercise_1_grid.rds")
}
afs_observed <- c(2520, 1449, 855, 622, 530, 446, 365, 334, 349, 244, 264, 218,
133, 173, 159, 142, 167, 129, 125, 143)
# Plot the observed AFS and overlay the simulated AFS vectors on top of it
plot(afs_observed, type = "b", col = "red", lwd = 3, xlab = "allele count bin", ylab = "count")
for (i in seq_along(Ne_grid)) {
lines(afs_grid[[i]], lwd = 0.5)
}
# Compute mean-squared error of the AFS produced by each Ne value across the grid
errors <- sapply(afs_grid, function(afs) {
sum((afs - afs_observed)^2) / length(afs)
})
# plot the errors, highlight the most likely value
plot(Ne_grid, errors)
abline(v = Ne_grid[which.min(errors)], col = "green")
abline(v = TRUE_NE, col = "purple")
legend("topright", legend = c(paste("inferred Ne =", Ne_grid[which.min(errors)]),
paste("true Ne =", TRUE_NE)),
col = c("green", "purple"), lty = 2)
# Plot the AFS again, highlighting the most likely spectrum
plot(afs_observed, type = "b", col = "red", lwd = 1, xlab = "allele count bin", ylab = "count")
for (i in seq_along(Ne_grid)) {
color <- if (i == which.min(errors)) "green" else "gray"
width <- if (i == which.min(errors)) 2 else 0.5
lines(afs_grid[[i]], lwd = width, col = color)
}
legend("topright", legend = paste("Ne =", Ne_grid[which.min(errors)]),
col = "green", lty = 2)