# Install

To install, after creating conda environment from `environment.yml` run in R:

```R
install.packages("jackalope", type="source")
```

Note, please see <https://lucasnell.github.io/jackalope/index.html#enabling-openmp> for enabling openmp to use multiple threads.

# Salmonella

This simulates *Salmonella* data using a reference genome and a tree.

In [1]:
library(jackalope)

# Make sure we've complied with openmp
jackalope:::using_openmp()

In [2]:
reference <- read_fasta("input/S_HeidelbergSL476.fasta.gz")
reference$set_names(c("NC_011083"))
reference

< Set of 1 chromosomes >
# Total size: 4,888,768 bp
  name                             chromosome                             length
NC_011083  AGAGATTACGTCTGGTTGCAAGAGATCA...ATAACGTGCTGTAATTTTTAAAATAATA   4888768

In [3]:
library(ape)

tree <- read.tree("input/salmonella.tre")
tree <- root(tree, "reference", resolve.root=TRUE)

#options(repr.plot.width=8, repr.plot.height=12)
#plot(tree)
tree


Phylogenetic tree with 60 tips and 59 internal nodes.

Tip labels:
  reference, SH12-013, SH12-014, SH10-015, SH10-014, SH11-002, ...
Node labels:
  Root, 1.000000, 0.000000, 0.000000, 1.000000, 0.000000, ...

Rooted; includes branch lengths.

In [4]:
sub <- sub_JC69(lambda = 1e-5, mu = 1, invariant=0.5)
ins <- NULL #indels(rate = 1e-4, max_length = 60,a = 1.60)
del <- NULL #indels(rate = 1e-4, max_length = 60, a = 1.51)

ref_haplotypes <- create_haplotypes(reference, haps_phylo(tree), sub=sub, ins=ins, del=del)
ref_haplotypes

                              << haplotypes object >>
# Haplotypes: 60
# Mutations: 991

                          << Reference genome info: >>
< Set of 1 chromosomes >
# Total size: 4,888,768 bp
  name                             chromosome                             length
NC_011083  AGAGATTACGTCTGGTTGCAAGAGATCA...ATAACGTGCTGTAATTTTTAAAATAATA   4888768

In [5]:
write_vcf(ref_haplotypes, out_prefix="output/haplotypes", compress=TRUE)

In [6]:
coverage <- 30
read_length <- 250
n_samples <- length(tree$tip)
n_reads <- round((reference$sizes() * coverage * n_samples) / read_length)
cat("Number of reads:", n_reads)

illumina(ref_haplotypes, out_prefix = "output/reads/initial", sep_files=TRUE, n_reads = n_reads,
         frag_mean = read_length * 2 + 50, frag_sd = 100,
         compress=TRUE, comp_method="bgzip", n_threads=32,
         paired=TRUE, read_length = read_length)

Number of reads: 35199130