Skip to content

Commit

Permalink
version 0.2
Browse files Browse the repository at this point in the history
  • Loading branch information
emmanuelparadis authored and gaborcsardi committed Jan 2, 2013
1 parent b46bc96 commit 16f410a
Show file tree
Hide file tree
Showing 15 changed files with 195 additions and 97 deletions.
18 changes: 9 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
Package: coalescentMCMC
Version: 0.1
Date: 2012-10-02
Version: 0.2
Date: 2013-01-02
Title: MCMC Algorithms for the Coalescent
Author: Emmanuel Paradis
Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
Depends: ape, phangorn, adegenet
Depends: ape, pegas, phangorn, adegenet, coda
ZipData: no
Description: coalescentMCMC provides a flexible framework for
coalescent analyses in R. It includes a main function running
the MCMC algorithm, auxilliary functions for tree
rearrangement, and some functions to compute population genetic
parameters.
coalescent analyses in R. It includes a main function running the
MCMC algorithm, auxilliary functions for tree rearrangement, and some
functions to compute population genetic parameters.
License: GPL (>= 2)
Packaged: 2012-10-06 05:50:53 UTC; paradis
Packaged: 2013-07-19 10:01:33 UTC; paradis
NeedsCompilation: yes
Repository: CRAN
Date/Publication: 2012-10-08 14:13:10
Date/Publication: 2013-07-19 16:23:13
22 changes: 14 additions & 8 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
0632cc7589c46bbf4981f0a778f1ef2a *COPYING
f61636fd6fd3b627a8310a2a8fe64787 *DESCRIPTION
683157750d2460689cd787a3c35a251b *NAMESPACE
a046ffd3ed9110166958666b77a64f51 *R/coalescentMCMC.R
41db9f31b72a1a1d407dcdc7ef1f3442 *DESCRIPTION
67aaf5cffbeea97b2eeaf843d51736a8 *NAMESPACE
cbba597e045c358f3de88d65790d0ddf *NEWS
76fc210f685a966a80af3d766bf2a5f9 *R/coalescentMCMC.R
a52942fca58804291fa4cca2a5e9c3dd *R/dcoal.R
bec3a8f0654c0c3a8c5dec49c6e49f42 *R/treeOperators.R
01edab916944e326887dcf69a832c144 *R/treeOperators.R
d3bc00b2363b4155824eff32034801ad *R/zzz.R
e5199c70ce61c9060f806ed82cb289d4 *Thanks
dfa3ae7f0e0bc0918691ff5894094c03 *inst/doc/CoalescentModels.R
38240cef46b41eeb490ec9514930a48c *inst/doc/CoalescentModels.Rnw
237b4a43f436fee20190a325f62e3b9a *inst/doc/CoalescentModels.pdf
0bcdbabfb7092b7640dfcfbef43d486a *inst/doc/CoalescentModels.pdf
89de4cff6c4b5e549a113a2a9fbdb011 *inst/doc/Running_coalescentMCMC.R
e23289a4d68e33f3c7c9a15ab6b41b7a *inst/doc/Running_coalescentMCMC.Rnw
30fc10e423f522decaf2285509e34aa3 *inst/doc/Running_coalescentMCMC.pdf
025957c8904dfd4a81e78a623f9344a5 *man/coalesceMCMC.Rd
9a296a8f19c56174dc7e784bfaa19c79 *inst/doc/Running_coalescentMCMC.pdf
f5d8bb0591f7b77f4f0741657243cce6 *man/coalesceMCMC.Rd
9aa980aea36886b18ac6a09b823765da *man/coalescentMCMC-internal.Rd
75cee3886d17bba64ac13c52d9e87beb *man/dcoal.Rd
822affb72dc7c49b0b62f4d05bb47acf *man/treeOperators.Rd
57079d8225ffe76e970003dbf772df44 *src/coalescentMCMC.c
2a6f9e9e044a78154d3cfda5936d6f48 *src/Makevars
412493cc6a8a250e740281af9e1c9086 *src/coalescentMCMC.c
38240cef46b41eeb490ec9514930a48c *vignettes/CoalescentModels.Rnw
9d144704316ef46e6c8ec412bc1fde3e *vignettes/Running_coalescentMCMC-001.pdf
8dc91bbd23b6a474511cec93c97a6534 *vignettes/Running_coalescentMCMC-002.pdf
Expand Down
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
useDynLib(coalescentMCMC)

exportPattern("^[^\\.]")

import(ape)
import(phangorn)

import(pegas)
import(coda)
6 changes: 6 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CHANGES IN coalescentMCMC VERSION 0.2


NEW FEATURES

o The main MCMC has been completely rewritten.
57 changes: 38 additions & 19 deletions R/coalescentMCMC.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
## coalescentMCMC.R (2012-09-26)
## coalescentMCMC.R (2013-07-19)

## Run MCMC for Coalescent Trees

## Copyright 2012 Emmanuel Paradis
## Copyright 2012-2013 Emmanuel Paradis

## This file is part of the R-package `coalescentMCMC'.
## See the file ../COPYING for licensing issues.

coalescentMCMC <-
function(x, ntrees = 1e4, burnin = ntrees,
function(x, ntrees = 3000, burnin = 1000, frequency = 1,
tree0 = NULL, quiet = FALSE)
{
if (is.null(tree0)) {
Expand All @@ -19,47 +19,66 @@ coalescentMCMC <-

n <- Ntip(tree0)
nodeMax <- 2*n - 1
nOut <- ntrees# + burnin

TREES <- vector("list", ntrees)
LL <- numeric(ntrees)
getlogLik <- function(phy, X) phangorn::pml(phy, X)$logLik #phangorn:::pml6(phy, X)

lnL0 <- phangorn::pml(tree0, X)$logLik
TREES <- vector("list", nOut)
LL <- numeric(nOut)
TREES[[1L]] <- tree0
lnL0 <- getlogLik(tree0, X)
LL[[1L]] <- lnL0

i <- j <- 0L # j: number of tree proposals
i <- 2L
j <- 0L # number of accepted trees
k <- 0L # number of sampled trees

if (!quiet) {
cat("Running the Markov chain:\n")
cat("Nb of proposed trees Nb of accepted trees\n")
cat(" Number of trees to output:", ntrees, "\n")
cat(" Burn-in period:", burnin, "\n")
cat(" Sampling frequency:", frequency, "\n")
cat(" Number of generations to run:", ntrees * frequency + burnin, "\n")
cat("Generation Nb of accepted trees\n")
}

while (i <= ntrees) {
while (k < nOut) {
if (!quiet)
cat("\r ", j, " ", i, " ")
j <- j + 1L
cat("\r ", i, " ", j, " ")

tr.b <- NeighborhoodRearrangement(tree0, n, nodeMax)
if (!(i %% frequency) && i > burnin) {
k <- k + 1L
TREES[[k]] <- tr.b
}
## do TipInterchange() every 10 steps:
## tr.b <-
## if (!i %% 10) TipInterchange(TREES[[i]], n)
## else NeighborhoodRearrangement(TREES[[i]], n, nodeMax)
### see the note about CladeInterchange() in 'dcoal.R'
lnL.b <- phangorn::pml(tr.b, X)$logLik
lnL.b <- getlogLik(tr.b, X)
LL[[i]] <- lnL.b
i <- i + 1L
ACCEPT <- if (is.na(lnL.b)) FALSE else {
if (lnL.b >= lnL0) TRUE
else rbinom(1, 1, exp(lnL.b - lnL0))
}
if (ACCEPT) {
j <- j + 1L
lnL0 <- lnL.b
tree0 <- tr.b
if (j > burnin) {
i <- i + 1L
LL[i] <- lnL0
TREES[[i]] <- tree0
}
}
}

dim(LL) <- c(i - 1, 1)
colnames(LL) <- "logLik"
LL <- mcmc(LL, start = 1, end = i - 1)

class(TREES) <- "multiPhylo"
assign(".TREES", TREES, envir = .coalescentMCMCenv)
## TREES <- .compressTipLabel(TREES)
if (!quiet) cat("\nDone.\n")
list(trees = TREES, logLik = LL)
#list(mcmc = LL, trees = TREES)
LL
}

getMCMCtrees <- function() get(".TREES", envir = .coalescentMCMCenv)
99 changes: 58 additions & 41 deletions R/treeOperators.R
Original file line number Diff line number Diff line change
@@ -1,70 +1,87 @@
## treeOperators.R (2012-09-19)
## treeOperators.R (2013-01-10)

## Trees Operators for Running MCMC

## Copyright 2012 Emmanuel Paradis
## Copyright 2012-2013 Emmanuel Paradis

## This file is part of the R-package `coalescentMCMC'.
## See the file ../COPYING for licensing issues.

getIndexEdge <- function(tip, edge)
## 'integer(1)' mustn't be substituted by '0L' except if 'DUP = TRUE':
.C("get_single_index_integer", as.integer(edge[, 2L]),
as.integer(tip), integer(1L), PACKAGE = "coalescentMCMC",
NAOK = TRUE, DUP = FALSE)[[3L]]

getIndexEdge2 <- function(node, edge)
.C("get_two_index_integer", as.integer(edge[, 1L]),
as.integer(node), integer(2L), PACKAGE = "coalescentMCMC",
NAOK = TRUE, DUP = FALSE)[[3L]]

NeighborhoodRearrangement <- function(phy, n, nodeMax)
{
##phy <- tr <- rcoal(n <- 10); nodeMax <- 2*n - 1
##plot(tr); nodelabels(); axisPhylo()

THETA <- pegas::theta.tree(phy, 1)$theta
bt <- c(rep(0, n), branching.times(phy))
target <- sample((n + 2):nodeMax, size = 1)
i1 <- which(phy$edge[, 2] == target)
parent <- phy$edge[i1, 1]
i2 <- which(phy$edge[, 1] == target) # the 2 descendants from 'target'
i3 <- which(phy$edge[, 1] == parent) # this includes i1, so:
i3 <- i3[!i3 %in% i1]
sister <- phy$edge[i3, 2]
select <- sample(c(TRUE, FALSE))
i2.move <- i2[select]
i2.stay <- i2[!select]
phy$edge[i3, 2] <- child2move <- phy$edge[i2.move, 2]
child2stay <- phy$edge[i2.stay, 2]
phy$edge[i2.move, 2] <- sister
## select one internal node excluding the root
target <- sample((n + 2):nodeMax, size = 1L)

e <- phy$edge # local copy

### i1, i2, and i3 are edge indices
### target, anc, and sister are node indices

## i1 <- which(e2 == target)
i1 <- getIndexEdge(target, e)
anc <- e[i1, 1L] # the ancestor of 'target'
## i2 <- which(e1 == target)
i2 <- getIndexEdge2(target, e) # the 2 edges where 'target' is basal
## i3 <- which(e1 == anc)
i3 <- getIndexEdge2(anc, e) # this includes i1, so:
i3 <- i3[i3 != i1]
sister <- e[i3, 2L] # the sister-node of 'target'
sel <- sample(2L, size = 1L)
i2.move <- i2[sel]
i2.stay <- i2[-sel]
phy$edge[i3, 2L] <- child2move <- e[i2.move, 2L]
child2stay <- e[i2.stay, 2L]
phy$edge[i2.move, 2L] <- sister

## now adjust branch lengths:
## adjust the branch length that was subtending 'sister':
phy$edge.length[i3] <- bt[parent] - bt[child2move]
## random age for 'target' between the ones of 'sister' and 'parent':
## newage <- runif(1, bt[sister], bt[parent])
newage <- (bt[parent] + max(bt[sister], bt[child2stay]))/2
phy$edge.length[i1] <- bt[parent] - newage
phy$edge.length[i3] <- bt[anc] - bt[child2move]
## random age for 'target' between the ones of 'sister' and 'anc':
agemin <- max(bt[sister], bt[child2stay])
pmax <- 1 - exp(-THETA * (bt[anc] - agemin))
p <- runif(1, 0, pmax)
newage <- -log(1 - p) / THETA + agemin
## alternative with average of ages:
## newage <- (bt[anc] + max(bt[sister], bt[child2stay]))/2
phy$edge.length[i1] <- bt[anc] - newage
phy$edge.length[i2.move] <- newage - bt[sister]
## adjust the branch length below the child that has not been moved:
phy$edge.length[i2.stay] <- newage - bt[child2stay]

attr(phy, "order") <- NULL
phy <- reorder(phy)
newNb <- integer(nodeMax)
newNb[n + 1L] <- n + 1L
sndcol <- phy$edge[, 2] > n
phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <- (n + 2):nodeMax
phy$edge[, 1] <- newNb[phy$edge[, 1]]
sndcol <- phy$edge[, 2L] > n
phy$edge[sndcol, 2L] <- newNb[phy$edge[sndcol, 2L]] <- (n + 2):nodeMax
phy$edge[, 1L] <- newNb[phy$edge[, 1L]]
phy
##plot(phy); nodelabels(); axisPhylo()
}

TipInterchange <- function(phy, n)
{
getIndexEdge <- function(tip, edge)
## 'integer(1)' mustn't be substituted by '0L' except if 'DUP = TRUE':
.C("get_single_index_integer", as.integer(edge[, 2L]),
as.integer(tip), integer(1L),
PACKAGE = "coalescentMCMC", NAOK = TRUE, DUP = FALSE)[[3L]]

e <- phy$edge
repeat {
chosen <- sample(n, size = 2L)
i1 <- getIndexEdge(chosen[1], phy$edge)
i2 <- getIndexEdge(chosen[2], phy$edge)
## check that the two tips in 'chosen' are not sisters
if (phy$edge[i1, 1L] != phy$edge[i2, 1L]) break
k <- sample(n, size = 2L)
i1 <- getIndexEdge(k[1], e)
i2 <- getIndexEdge(k[2], e)
## check that the two tips in 'k' are not sisters
if (e[i1, 1L] != e[i2, 1L]) break
}

##phy$edge[c(i2, i1), 2] <- chosen
phy$tip.label[chosen] <- phy$tip.label[rev(chosen)]
phy$tip.label[k] <- phy$tip.label[rev(k)]
phy
}

Expand Down
10 changes: 10 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
## zzz.R (2013-07-19)

## Library Loading

## Copyright 2013 Emmanuel Paradis

## This file is part of the R-package `coalescentMCMC'.
## See the file ../COPYING for licensing issues.

.coalescentMCMCenv <- new.env()
9 changes: 9 additions & 0 deletions inst/doc/CoalescentModels.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
### R code from vignette source 'CoalescentModels.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: CoalescentModels.Rnw:20-21
###################################################
options(width=60)


Binary file modified inst/doc/CoalescentModels.pdf
Binary file not shown.
9 changes: 9 additions & 0 deletions inst/doc/Running_coalescentMCMC.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
### R code from vignette source 'Running_coalescentMCMC.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: Running_coalescentMCMC.Rnw:20-21
###################################################
options(width=60)


Binary file modified inst/doc/Running_coalescentMCMC.pdf
Binary file not shown.
28 changes: 18 additions & 10 deletions man/coalesceMCMC.Rd
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
\name{coalescentMCMC}
\alias{coalescentMCMC}
\alias{getMCMCtrees}
\title{Run MCMC for Coalescent Trees}
\description{
This function runs a simple Markov chain Monte Carlo (MCMC) algorithm
to generate a set of trees which is returned with their likelihoods.
This function runs a Markov chain Monte Carlo (MCMC) algorithm to
generate a set of trees which is returned with their likelihoods.

\code{getMCMCtrees} extracts the trees from the last MCMC run.
}
\usage{
coalescentMCMC(x, ntrees = 1e4, burnin = ntrees,
coalescentMCMC(x, ntrees = 3000, burnin = 1000, frequency = 1,
tree0 = NULL, quiet = FALSE)

getMCMCtrees()
}
\arguments{
\item{x}{a set of DNA sequences, typically an object of class
\code{"DNAbin"} or \code{"phyDat"}.}
\item{ntrees}{the number of trees to output.}
\item{burnin}{the number of trees to discard as ``burn-in''.}
\item{frequency}{the frequency at which trees are sampled.}
\item{tree0}{the initial tree of the chain; by default, a UPGMA
tree with a JC69 distance is generated.}
\item{quiet}{a logical specifying whether to print the numbers of trees
Expand All @@ -25,13 +29,16 @@ coalescentMCMC(x, ntrees = 1e4, burnin = ntrees,
the ``neighborhood rearrangement'' operation (Kuhner et al., 1995) and
Hastings's ratio for acceptance/rejection of the proposed tree. This
can be modified easily.
The number of generations of the chain is determined by: `frequency'
multiplied by `ntrees' + `burnin'. Only the `ntrees' trees are output
whereas all the log-likelihood values are output.
}
\value{
a list with the following elements:
an object of class \code{"coda"} with the log-likelihood of each tree.

\item{trees}{an object of class \code{"multiPhylo"} containing the
`ntrees' trees.}
\item{logLik}{a numerical vector with the log-likelihood of each tree.}
The list of trees is returned in the hidden object \code{.TREES} which
can be extracted with \code{getMCMCtrees}.
}
\references{
Hastings, W. K. (1970) Monte Carlo sampling methods using Markov
Expand All @@ -48,7 +55,8 @@ coalescentMCMC(x, ntrees = 1e4, burnin = ntrees,
\examples{
\dontrun{
data(woodmouse)
system.time(out <- coalescentMCMC(woodmouse)) # circa 2.5 mins
plot(out$logLik, type = "l")
system.time(out <- coalescentMCMC(woodmouse)) # circa 12 sec.
plot(out)
getMCMCtrees() # returns 3000 trees
}}
\keyword{models}

0 comments on commit 16f410a

Please sign in to comment.