Skip to content

Commit

Permalink
version 1.0-0
Browse files Browse the repository at this point in the history
  • Loading branch information
Christoph Heibl authored and gaborcsardi committed Oct 30, 2014
0 parents commit 83855c9
Show file tree
Hide file tree
Showing 116 changed files with 19,319 additions and 0 deletions.
17 changes: 17 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,17 @@
Package: megaptera
Type: Package
Title: MEGAPhylogeny Techniques in R
Version: 1.0-0
Date: 2014-10-30
Author: Christoph Heibl
Maintainer: Christoph Heibl <christoph.heibl@gmx.net>
Depends: R (>= 2.10), snowfall, ips
Imports: ape, seqinr, methods, RPostgreSQL, XML
Suggests: knitr
Description: Given a set of taxa and marker, the goal is build a complete and correct sequence alignment for phylogenetic analysis.
License: GPL (>= 2)
VignetteBuilder: knitr
Packaged: 2014-11-13 06:55:15 UTC; stoffi
NeedsCompilation: no
Repository: CRAN
Date/Publication: 2014-11-13 08:44:56
115 changes: 115 additions & 0 deletions MD5
@@ -0,0 +1,115 @@
3e3aed980babc3def9fc8afb5d0a65a6 *DESCRIPTION
95a6567b025c7f17ff22eec12e531526 *NAMESPACE
48558e57bdba209cae6a134f80911bbd *R/EFetchLocus.R
786c38d04e8a0d384f39b1f318fd6864 *R/MAD.R
a9d4e41467bd0c9ad5483ee37659fb40 *R/NCBI.wrap.R
fc8e7e2e3b74cc74db844f3bc5cb5b6f *R/addNbAcc.R
b5b41feced1455a8833b8f3fc960c151 *R/addTip.R
0da22490035327ac468070c2f7a202c1 *R/alignGenus.R
9d4cc4105ebd561464ec941812553677 *R/calcDistToRef.R
c895d3c9137988ae48465b6447331d6c *R/check.Clades.R
c610bd26dfe216598c7c24b62172f478 *R/check.Coverage.R
06b3550f5f2298d63fdd184d57af0dfb *R/check.Markers.R
3f8136abba8feece2c10786fa73c0676 *R/check.NameChanges.R
285d072dda773a0ae5d32e36f89cd25c *R/check.Species.R
a45c51cd591c8358e0851f47beed5c38 *R/check.TaxAgainstPhy.R
c9c7fb32411e1d614676e8c952facbb4 *R/coverage.R
9dad9c8e2b7bbd3e2090de4bb7a56fe0 *R/dbAutofillTaxonomy.R
87dcc5d7f95cf818d3c2ffc0056e211e *R/dbDeleteSpec.R
dc4ea6ea4afcced33d7ddc838218ed16 *R/dbPars.R
d8205ccd165788178b33fb3fe207b194 *R/dbReadDNA.R
6dad0ba7b85b17788d36db5e078d17f9 *R/dbReadReference.R
adfc74497e5b4192b818adbc748429de *R/dbReadTaxomony.R
c4bb63d99156752cc75057dfc6bb3095 *R/dbUpdateLocus.R
1d694d672cf192859c1ceb16364d14f3 *R/dbUpdateTaxonomy.R
498622834c18a4f1ea955ae390200d03 *R/dbWriteDNA.R
7772cac5fee3967939b9f9757b5b0d11 *R/dbWriteString.r
94d087225730331de5d15c867d0fccfc *R/downloadSequences.R
720051a59f3cf97716dd4c3992306f14 *R/esearch.R
bd1559c454e178a22cc19fb126b2f89a *R/extractLocus.R
0ef7a14bb085049f8db73646f671334f *R/filter.alignment.R
7e8ea880a587fae85d2e450b2dcacebe *R/fixTaxonomy.R
f34603e1877ffc4c2193b0e809cb8b18 *R/fleshoutGuidetree.R
899486c798287f644f981239f4382d5b *R/funX.R
438e9ff9a203ca84e83efd680e2ab1e6 *R/getLineage.R
96a37217d38151205a4a98ee951a4d5d *R/is.alignable.R
05aa91a37cd609f97bec285e8c10a024 *R/locus.R
36f47fc7afe5b3360398814ccbb1cb06 *R/mafft.merge.R
43e2cbe34a3844b94192f596e64d26c7 *R/markersGenbank.R
8ee75eab357a95a0947003ef1963e992 *R/maxDist.R
0c10569fb1d6b90aec1a4aae7c223652 *R/meanPairwiseIdentity.R
3a7285842949c7621ca91451eab4ec08 *R/megapteraPars.R
a988f7ccfcad6e1661f54a5dffe3fd4a *R/megapteraProj.R
979cbb2a17ae81a25c5ee4c07e944214 *R/megaptera_heatmap.R
8c9737677a3b5c377d86740568c714bf *R/myDist.R
6daa04c9154670e3d7ee3d44b2216369 *R/ncbiLineage.R
3a5298c1c14cb68a3a0d81fc87438640 *R/ncbiTaxonomy.R
9c6833936d3ef669d0135c78b293400e *R/nucleotideOverlap.R
de64b49214edc4f858235343fc9cadf6 *R/prune.phylo.rank.R
4a2a54e49d442fb57bed27bd533bcf58 *R/revCompTest.R
e33d00ed84d8bd86f6f9b157c232d595 *R/slog.R
72e7968dd9780ff8df9e52c4b96e8d39 *R/sortRanks.R
f67ead5593b7796f679c175c7945258c *R/specCons.R
77fbc80ac283b790fee6abd80e0f587e *R/speciesConsensus.R
ccb08a0a4fd21e7eb8f01defaabf2bae *R/splitGiTaxon.R
9c0bfe88dae41035b93d82d938eeca11 *R/sql.conform.R
da08c1da1469c25479e6d18d835ab589 *R/sql.wrap.R
2930c9ee1d8239713fde29caa059d675 *R/sqlTaxonomyHeader.R
8faa34c7d41837dd7e0087eb60b204f1 *R/stepA.R
d52bb04309566eb59de98554884a8717 *R/stepB.R
c571da0e3a4925704c580e798a5bff50 *R/stepC.R
f1c559011a25d937155d1b5dea4a2adf *R/stepD.R
9f9d5608cc3461cdfd1e763c82d02a1d *R/stepE.R
4b86fceca9327964c59a06f40a99b065 *R/stepF.R
88d834c57530bbd08ef82db9d83a34da *R/stepG.R
c325e8d611695fdb81baf383666e2cd0 *R/stepH.R
21b5be601e2db3399ad172845daa8249 *R/strip.infraspec.R
6d1c968a7430cbee1ceca24311bd51a6 *R/strip.spec.R
343524b9710043e94345be91f6b4eb0f *R/supermatrix.R
aca5f9ae1fe8a2b3b50cb3a88e0c9c3d *R/tax2tree.R
c103e729936acc2deaa410ae2be3166d *R/taxon.R
8f5eca0c280d8437834b832522004042 *R/term.R
3d90307fd26718a2f7f5453e1203d0c0 *R/whereToInsert.R
4c4533140cbf7701c87075c56ea21e98 *R/write.dna.spectable.R
d4417e83c58fb0f16d7ffd2fa6227728 *build/vignette.rds
536d055a97992ba61844b8119354ab4d *data/NCBIeaa.rda
fca17fc47d7990b3649f6b36eace1536 *data/cetacea.rda
ef33d2859b6753e69f684c416f2bb415 *data/input.rda
b5d74a849cba3e7c7e47db7ff6597254 *inst/doc/Getting-started.pdf
0ba17b221c72c2683ea8389075bb80a1 *inst/doc/Getting-started.rnw
92eb780cecc6fecf296e06109175a44d *man/NCBIeaa.Rd
315abc42430e84f0b3937a45295da5a4 *man/addTip.Rd
2db24e84fb95999509024ebb31205d55 *man/cetacea.Rd
5a67e67ec729f3554e146e1a35bb9972 *man/check.Function.Rd
3aeffee7d45bcb5701c9893c15bf7d86 *man/dbPars-class.Rd
2a1363e484830a4b6d9ff8e230ac2d89 *man/dbPars.Rd
e0b4704c047c52f5115e86974d2cc564 *man/dbReadTaxonomy.Rd
9ac9b2f87e39f041c0450397b4a06fdd *man/dbUpdateTaxonomy.Rd
9433a79ccbfd43e5b938090097b91d30 *man/fleshoutGuidetree.Rd
cc891c29089f309a8be0816fa61a4413 *man/io.dna.db.Rd
eb29477f635b2ce4a5eaa7956d2e8ee5 *man/locus-class.Rd
5806678388ec51384827a7dc599ec8e0 *man/locus.Rd
0cd50c7657d9b02ed75948025bd55ada *man/megaptera-internal.Rd
3c20241fc0f31a0e87d600ea46e3c8b4 *man/megaptera-package.Rd
bcffee8df4c4096d46f9f390d1de80ba *man/megapteraPars-class.Rd
92f6c5a1cdd8ae4c8fc27c2bd7c7d642 *man/megapteraPars.Rd
760806a74a7ce058d5a95e4e5ccf1d51 *man/megapteraProj-class.Rd
6db1da549850bd9aa4fa18d860e658e2 *man/megapteraProj.Rd
106bef8a86b94f9099c3976ff99fed26 *man/ncbiTaxonomy.Rd
d4da731131c18d024ea7b4009fc83ded *man/prune.phylo.rank.Rd
9c46f3adcb0a7ae2bab89681be5f15e3 *man/setLocus.Rd
952b16f00d8c07d43a3ebda87dd2f6cd *man/stepA.Rd
31ea4fc056c939065a2d0d48727a16d9 *man/stepB.Rd
3c7a2b6dfacdd13df32faf11c5c61d14 *man/stepC.Rd
39c9e7611bc109dc58d530fd70cc14f6 *man/stepD.Rd
62b72235fe55fc51c0d65fef3d580159 *man/stepE.Rd
80dfe7f151d3a03e062acd1677518f9f *man/stepF.Rd
b8b0ff9d20f2d3395f2339588795377c *man/stepG.Rd
5c80b9116e16a058408e754e6cf1d62c *man/stepH.Rd
1871697f9f30305ab9ea98c203362dbf *man/strip.infraspec.Rd
6cf13af3dfc61d6a416231e22ca0586e *man/taxon-class.Rd
87ea7bcb79f08fc988c86b53a4426b92 *man/taxon.Rd
1e64525ddbb9ac19b002a03ae12c7f83 *vignettes/Getting-started-concordance.tex
0ba17b221c72c2683ea8389075bb80a1 *vignettes/Getting-started.rnw
f023eb3b1bd56bc5c1480431479542de *vignettes/biology.bib
1e545dcdf6ef2b91204ca1b717455fb4 *vignettes/jbiogeo2.bst
14 changes: 14 additions & 0 deletions NAMESPACE
@@ -0,0 +1,14 @@
exportPattern("^[[:alpha:]]+")

import(ape)
importFrom(ips, deleteEmptyCells, trimEnds, mafft, gblocks,
write.fas, write.phy, write.nex, read.fas,
descendants, sister, noi, fixNodes,
tipHeights, terminal.clades)
import(methods)
importFrom(RPostgreSQL, PostgreSQL, dbConnect,
dbDisconnect, dbSendQuery, dbGetQuery,
dbWriteTable, dbReadTable, dbExistsTable, dbRemoveTable)
importFrom(seqinr, comp, c2s, s2c)
import(snowfall)
import(XML)
27 changes: 27 additions & 0 deletions R/EFetchLocus.R
@@ -0,0 +1,27 @@
## package: megaptera
## called by: markersGenbank
## author: Christoph Heibl (at gmx.net)
## last update: 2014-07-15

EFetchLocus <- function (gi){

## retrieve sequence information
## -----------------------------
x <- paste("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/",
"efetch.fcgi?db=nucleotide&id=", gi,
"&rettype=gb&retmode=xml", sep = "")
# xml <- scan(x, what = "c", quiet = TRUE, sep = "\n")
# write(xml, "test2.xml"); system("open -t test2.xml")
# x <- "test.xml"
x <- xmlTreeParse(x, getDTD = FALSE,
useInternalNodes = TRUE)

## get locus
## ---------
gene <- xpathSApply(x, "//GBQualifier[GBQualifier_name='gene']/GBQualifier_value", xmlValue)
if ( is.null(gene) )
gene <- xpathSApply(x, "//GBQualifier[GBQualifier_name='product']/GBQualifier_value", xmlValue)
if ( is.null(gene) )
gene <- xpathSApply(x, "//GBQualifier[GBQualifier_name='note']/GBQualifier_value", xmlValue)
unique(gene)
}
11 changes: 11 additions & 0 deletions R/MAD.R
@@ -0,0 +1,11 @@
MAD <- function(DNAbin, model = "JC69"){

if ( nrow(DNAbin) > 1 ){
p.uncorr <- dist.dna(DNAbin, model = "raw", pairwise.deletion = TRUE)
p.corr <- dist.dna(DNAbin, model = model, pairwise.deletion = TRUE)
x <- mad(p.corr - p.uncorr, na.rm = TRUE)
} else {
x <- 0
}
x
}
10 changes: 10 additions & 0 deletions R/NCBI.wrap.R
@@ -0,0 +1,10 @@
NCBI.wrap <- function(x, BOOLEAN = "OR", field = "all"){
x <- paste(x, "[xxx]", sep = "")
x <- paste(x, collapse = paste(rep(" ", 2), collapse = BOOLEAN))
x <- paste("(", x, ")", sep = "")
xx <- vector()
for (i in field){
xx <- c(xx, gsub("xxx", i, x))
}
xx
}
17 changes: 17 additions & 0 deletions R/addNbAcc.R
@@ -0,0 +1,17 @@
addNbAcc <- function(tip, dbname = "fungi", pw = ""){
conn <- dbConnect(PostgreSQL(), dbname = dbname, user = "postgres", password = pw)
tip <- dbGetQuery(conn, paste("SELECT * FROM taxonomy WHERE spec='",
tip, "'", sep = ""))
dbDisconnect(conn)
tip <- tip[, grep("spec|_sel", names(tip))]
names(tip) <- gsub("_sel", "", names(tip))
tip[is.na(tip)] <- 0
tip <- tip[, tip[1, ] > 0]
tip <- paste(names(tip), tip, sep = "=")
tip <- gsub("^(spec=|_)", "", tip)
tip[1] <- gsub("_", " ", tip[1])
tip[1] <- paste("italic(\"", tip[1], "\")", sep = "")
tip[-1] <- paste("plain(\"", tip[-1], "\")", sep = "")
tip <- paste(tip, collapse = "*\" \"*")
parse(text = tip)
}
117 changes: 117 additions & 0 deletions R/addTip.R
@@ -0,0 +1,117 @@
addTip <- function(phy, tip, tax, insert = "crown", stem.edge = 0.5){

if ( !inherits(phy, "phylo") ) stop("'phy' is not of class 'phylo'")
# if ( !is.ultrametric(phy) ) stop("'phy' must be ultrametric")

tip <- gsub(" ", "_", tip)

if ( tip %in% phy$tip.label ) stop("'tip' is already contained in 'phy'")
insert <- match.arg(insert, c("crown", "stem", "randomly"))

# number of tips, internal nodes and edges:
nt <- Ntip(phy); ni <- Nnode(phy); ne <- Nedge(phy)

it <- ifelse(insert == "randomly", insert, paste("at", insert))
cat(" add", it, "of ")
an <- whereToInsert(phy, tax, tip)

if ( an <= nt ){

# cat(" - add to one congeneric species")
## add tip to one congeneric
## -------------------------
pretip <- an

## an: This is now the subtending (or stem) node
## of the node where the new tip is to be inserted
an <- phy$edge[phy$edge[, 2] == pretip, 1]

## new internal node number
newinternal <- descendants(phy, an, type = "i")
if ( length(newinternal) == 0 ){
newinternal <- an + 2
} else {
newinternal <- max(descendants(phy, an, type = "i")) + 2
}


# increase node number greater than 'pretip' by 1
# to create a gap to insert new tip at number pretip + 1
phy$edge[phy$edge > pretip] <- phy$edge[phy$edge > pretip] + 1
an <- an + 1 # step ancestral node accordingly
## create splits above and below gap
id <- which(phy$edge[, 1] == an & phy$edge[, 2] == pretip)
upper <- 1:id; lower <- (id + 1):ne
phy$edge[phy$edge >= newinternal] <- phy$edge[phy$edge >= newinternal] + 1
phy$edge[id, 2] <- newinternal

# add edges
phy$edge <- rbind(phy$edge[upper, ],
c(newinternal, pretip),
c(newinternal, pretip + 1),
phy$edge[lower, ])

# add edge.lengths
phy$edge.length <- c(phy$edge.length[head(upper, -1)],
phy$edge.length[id] * stem.edge,
rep(phy$edge.length[id] * (1 - stem.edge), 2),
phy$edge.length[lower])

# add tip.label
phy$tip.label <- c(phy$tip.label[1:pretip],
tip,
phy$tip.label[(pretip + 1):nt])

# ajust internal node number
phy$Nnode <- ni + 1 # and not nt, which would be valid
# only for binary trees!

} else {

## add tip to more than one congenerics or a higher rank
## -----------------------------------------------------
# cat(" - add to more congeneric or higher rank relatives")

if ( insert == "stem" ) an <- noi(phy, strip.spec(tip),
regex = TRUE, stem = TRUE)
if ( insert == "randomly" ) {
an <- descendants(phy, an, type = "i")
an <- sample(an, 1)
}

# crown group age:
if ( is.ultrametric(phy) ){
ael <- branching.times(phy)[an - Ntip(phy)]
} else {
tip.heights <- tipHeights(extract.clade(phy, an))
ael <- runif(1, min(tip.heights), max(tip.heights))
}

# number of tip to be inserted: the smallest tip number,
# because the existing tip numberd will be increased by 1
newtip <- min(descendants(phy, an, "t"))

# increase number of internal nodes by 1
phy$edge[phy$edge >= newtip] <- phy$edge[phy$edge >= newtip] + 1
an <- an + 1

# insert before id (after would be more difficult)
id <- min(which(phy$edge[, 1] == an))
upper <- 1:(id - 1); lower <- id:ne

# add edge
phy$edge <- rbind(phy$edge[upper, ],
c(an, newtip),
phy$edge[lower, ])
# add edge.length
phy$edge.length <- c(phy$edge.length[upper],
ael,
phy$edge.length[lower])
# add tip.label
phy$tip.label <- c(phy$tip.label[1:(newtip - 1)],
tip,
phy$tip.label[newtip:nt])
}
# fixNodes(phy)
phy
}

0 comments on commit 83855c9

Please sign in to comment.