Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 83855c9
Showing
116 changed files
with
19,319 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
Package: megaptera | ||
Type: Package | ||
Title: MEGAPhylogeny Techniques in R | ||
Version: 1.0-0 | ||
Date: 2014-10-30 | ||
Author: Christoph Heibl | ||
Maintainer: Christoph Heibl <christoph.heibl@gmx.net> | ||
Depends: R (>= 2.10), snowfall, ips | ||
Imports: ape, seqinr, methods, RPostgreSQL, XML | ||
Suggests: knitr | ||
Description: Given a set of taxa and marker, the goal is build a complete and correct sequence alignment for phylogenetic analysis. | ||
License: GPL (>= 2) | ||
VignetteBuilder: knitr | ||
Packaged: 2014-11-13 06:55:15 UTC; stoffi | ||
NeedsCompilation: no | ||
Repository: CRAN | ||
Date/Publication: 2014-11-13 08:44:56 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
3e3aed980babc3def9fc8afb5d0a65a6 *DESCRIPTION | ||
95a6567b025c7f17ff22eec12e531526 *NAMESPACE | ||
48558e57bdba209cae6a134f80911bbd *R/EFetchLocus.R | ||
786c38d04e8a0d384f39b1f318fd6864 *R/MAD.R | ||
a9d4e41467bd0c9ad5483ee37659fb40 *R/NCBI.wrap.R | ||
fc8e7e2e3b74cc74db844f3bc5cb5b6f *R/addNbAcc.R | ||
b5b41feced1455a8833b8f3fc960c151 *R/addTip.R | ||
0da22490035327ac468070c2f7a202c1 *R/alignGenus.R | ||
9d4cc4105ebd561464ec941812553677 *R/calcDistToRef.R | ||
c895d3c9137988ae48465b6447331d6c *R/check.Clades.R | ||
c610bd26dfe216598c7c24b62172f478 *R/check.Coverage.R | ||
06b3550f5f2298d63fdd184d57af0dfb *R/check.Markers.R | ||
3f8136abba8feece2c10786fa73c0676 *R/check.NameChanges.R | ||
285d072dda773a0ae5d32e36f89cd25c *R/check.Species.R | ||
a45c51cd591c8358e0851f47beed5c38 *R/check.TaxAgainstPhy.R | ||
c9c7fb32411e1d614676e8c952facbb4 *R/coverage.R | ||
9dad9c8e2b7bbd3e2090de4bb7a56fe0 *R/dbAutofillTaxonomy.R | ||
87dcc5d7f95cf818d3c2ffc0056e211e *R/dbDeleteSpec.R | ||
dc4ea6ea4afcced33d7ddc838218ed16 *R/dbPars.R | ||
d8205ccd165788178b33fb3fe207b194 *R/dbReadDNA.R | ||
6dad0ba7b85b17788d36db5e078d17f9 *R/dbReadReference.R | ||
adfc74497e5b4192b818adbc748429de *R/dbReadTaxomony.R | ||
c4bb63d99156752cc75057dfc6bb3095 *R/dbUpdateLocus.R | ||
1d694d672cf192859c1ceb16364d14f3 *R/dbUpdateTaxonomy.R | ||
498622834c18a4f1ea955ae390200d03 *R/dbWriteDNA.R | ||
7772cac5fee3967939b9f9757b5b0d11 *R/dbWriteString.r | ||
94d087225730331de5d15c867d0fccfc *R/downloadSequences.R | ||
720051a59f3cf97716dd4c3992306f14 *R/esearch.R | ||
bd1559c454e178a22cc19fb126b2f89a *R/extractLocus.R | ||
0ef7a14bb085049f8db73646f671334f *R/filter.alignment.R | ||
7e8ea880a587fae85d2e450b2dcacebe *R/fixTaxonomy.R | ||
f34603e1877ffc4c2193b0e809cb8b18 *R/fleshoutGuidetree.R | ||
899486c798287f644f981239f4382d5b *R/funX.R | ||
438e9ff9a203ca84e83efd680e2ab1e6 *R/getLineage.R | ||
96a37217d38151205a4a98ee951a4d5d *R/is.alignable.R | ||
05aa91a37cd609f97bec285e8c10a024 *R/locus.R | ||
36f47fc7afe5b3360398814ccbb1cb06 *R/mafft.merge.R | ||
43e2cbe34a3844b94192f596e64d26c7 *R/markersGenbank.R | ||
8ee75eab357a95a0947003ef1963e992 *R/maxDist.R | ||
0c10569fb1d6b90aec1a4aae7c223652 *R/meanPairwiseIdentity.R | ||
3a7285842949c7621ca91451eab4ec08 *R/megapteraPars.R | ||
a988f7ccfcad6e1661f54a5dffe3fd4a *R/megapteraProj.R | ||
979cbb2a17ae81a25c5ee4c07e944214 *R/megaptera_heatmap.R | ||
8c9737677a3b5c377d86740568c714bf *R/myDist.R | ||
6daa04c9154670e3d7ee3d44b2216369 *R/ncbiLineage.R | ||
3a5298c1c14cb68a3a0d81fc87438640 *R/ncbiTaxonomy.R | ||
9c6833936d3ef669d0135c78b293400e *R/nucleotideOverlap.R | ||
de64b49214edc4f858235343fc9cadf6 *R/prune.phylo.rank.R | ||
4a2a54e49d442fb57bed27bd533bcf58 *R/revCompTest.R | ||
e33d00ed84d8bd86f6f9b157c232d595 *R/slog.R | ||
72e7968dd9780ff8df9e52c4b96e8d39 *R/sortRanks.R | ||
f67ead5593b7796f679c175c7945258c *R/specCons.R | ||
77fbc80ac283b790fee6abd80e0f587e *R/speciesConsensus.R | ||
ccb08a0a4fd21e7eb8f01defaabf2bae *R/splitGiTaxon.R | ||
9c0bfe88dae41035b93d82d938eeca11 *R/sql.conform.R | ||
da08c1da1469c25479e6d18d835ab589 *R/sql.wrap.R | ||
2930c9ee1d8239713fde29caa059d675 *R/sqlTaxonomyHeader.R | ||
8faa34c7d41837dd7e0087eb60b204f1 *R/stepA.R | ||
d52bb04309566eb59de98554884a8717 *R/stepB.R | ||
c571da0e3a4925704c580e798a5bff50 *R/stepC.R | ||
f1c559011a25d937155d1b5dea4a2adf *R/stepD.R | ||
9f9d5608cc3461cdfd1e763c82d02a1d *R/stepE.R | ||
4b86fceca9327964c59a06f40a99b065 *R/stepF.R | ||
88d834c57530bbd08ef82db9d83a34da *R/stepG.R | ||
c325e8d611695fdb81baf383666e2cd0 *R/stepH.R | ||
21b5be601e2db3399ad172845daa8249 *R/strip.infraspec.R | ||
6d1c968a7430cbee1ceca24311bd51a6 *R/strip.spec.R | ||
343524b9710043e94345be91f6b4eb0f *R/supermatrix.R | ||
aca5f9ae1fe8a2b3b50cb3a88e0c9c3d *R/tax2tree.R | ||
c103e729936acc2deaa410ae2be3166d *R/taxon.R | ||
8f5eca0c280d8437834b832522004042 *R/term.R | ||
3d90307fd26718a2f7f5453e1203d0c0 *R/whereToInsert.R | ||
4c4533140cbf7701c87075c56ea21e98 *R/write.dna.spectable.R | ||
d4417e83c58fb0f16d7ffd2fa6227728 *build/vignette.rds | ||
536d055a97992ba61844b8119354ab4d *data/NCBIeaa.rda | ||
fca17fc47d7990b3649f6b36eace1536 *data/cetacea.rda | ||
ef33d2859b6753e69f684c416f2bb415 *data/input.rda | ||
b5d74a849cba3e7c7e47db7ff6597254 *inst/doc/Getting-started.pdf | ||
0ba17b221c72c2683ea8389075bb80a1 *inst/doc/Getting-started.rnw | ||
92eb780cecc6fecf296e06109175a44d *man/NCBIeaa.Rd | ||
315abc42430e84f0b3937a45295da5a4 *man/addTip.Rd | ||
2db24e84fb95999509024ebb31205d55 *man/cetacea.Rd | ||
5a67e67ec729f3554e146e1a35bb9972 *man/check.Function.Rd | ||
3aeffee7d45bcb5701c9893c15bf7d86 *man/dbPars-class.Rd | ||
2a1363e484830a4b6d9ff8e230ac2d89 *man/dbPars.Rd | ||
e0b4704c047c52f5115e86974d2cc564 *man/dbReadTaxonomy.Rd | ||
9ac9b2f87e39f041c0450397b4a06fdd *man/dbUpdateTaxonomy.Rd | ||
9433a79ccbfd43e5b938090097b91d30 *man/fleshoutGuidetree.Rd | ||
cc891c29089f309a8be0816fa61a4413 *man/io.dna.db.Rd | ||
eb29477f635b2ce4a5eaa7956d2e8ee5 *man/locus-class.Rd | ||
5806678388ec51384827a7dc599ec8e0 *man/locus.Rd | ||
0cd50c7657d9b02ed75948025bd55ada *man/megaptera-internal.Rd | ||
3c20241fc0f31a0e87d600ea46e3c8b4 *man/megaptera-package.Rd | ||
bcffee8df4c4096d46f9f390d1de80ba *man/megapteraPars-class.Rd | ||
92f6c5a1cdd8ae4c8fc27c2bd7c7d642 *man/megapteraPars.Rd | ||
760806a74a7ce058d5a95e4e5ccf1d51 *man/megapteraProj-class.Rd | ||
6db1da549850bd9aa4fa18d860e658e2 *man/megapteraProj.Rd | ||
106bef8a86b94f9099c3976ff99fed26 *man/ncbiTaxonomy.Rd | ||
d4da731131c18d024ea7b4009fc83ded *man/prune.phylo.rank.Rd | ||
9c46f3adcb0a7ae2bab89681be5f15e3 *man/setLocus.Rd | ||
952b16f00d8c07d43a3ebda87dd2f6cd *man/stepA.Rd | ||
31ea4fc056c939065a2d0d48727a16d9 *man/stepB.Rd | ||
3c7a2b6dfacdd13df32faf11c5c61d14 *man/stepC.Rd | ||
39c9e7611bc109dc58d530fd70cc14f6 *man/stepD.Rd | ||
62b72235fe55fc51c0d65fef3d580159 *man/stepE.Rd | ||
80dfe7f151d3a03e062acd1677518f9f *man/stepF.Rd | ||
b8b0ff9d20f2d3395f2339588795377c *man/stepG.Rd | ||
5c80b9116e16a058408e754e6cf1d62c *man/stepH.Rd | ||
1871697f9f30305ab9ea98c203362dbf *man/strip.infraspec.Rd | ||
6cf13af3dfc61d6a416231e22ca0586e *man/taxon-class.Rd | ||
87ea7bcb79f08fc988c86b53a4426b92 *man/taxon.Rd | ||
1e64525ddbb9ac19b002a03ae12c7f83 *vignettes/Getting-started-concordance.tex | ||
0ba17b221c72c2683ea8389075bb80a1 *vignettes/Getting-started.rnw | ||
f023eb3b1bd56bc5c1480431479542de *vignettes/biology.bib | ||
1e545dcdf6ef2b91204ca1b717455fb4 *vignettes/jbiogeo2.bst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
exportPattern("^[[:alpha:]]+") | ||
|
||
import(ape) | ||
importFrom(ips, deleteEmptyCells, trimEnds, mafft, gblocks, | ||
write.fas, write.phy, write.nex, read.fas, | ||
descendants, sister, noi, fixNodes, | ||
tipHeights, terminal.clades) | ||
import(methods) | ||
importFrom(RPostgreSQL, PostgreSQL, dbConnect, | ||
dbDisconnect, dbSendQuery, dbGetQuery, | ||
dbWriteTable, dbReadTable, dbExistsTable, dbRemoveTable) | ||
importFrom(seqinr, comp, c2s, s2c) | ||
import(snowfall) | ||
import(XML) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
## package: megaptera | ||
## called by: markersGenbank | ||
## author: Christoph Heibl (at gmx.net) | ||
## last update: 2014-07-15 | ||
|
||
EFetchLocus <- function (gi){ | ||
|
||
## retrieve sequence information | ||
## ----------------------------- | ||
x <- paste("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/", | ||
"efetch.fcgi?db=nucleotide&id=", gi, | ||
"&rettype=gb&retmode=xml", sep = "") | ||
# xml <- scan(x, what = "c", quiet = TRUE, sep = "\n") | ||
# write(xml, "test2.xml"); system("open -t test2.xml") | ||
# x <- "test.xml" | ||
x <- xmlTreeParse(x, getDTD = FALSE, | ||
useInternalNodes = TRUE) | ||
|
||
## get locus | ||
## --------- | ||
gene <- xpathSApply(x, "//GBQualifier[GBQualifier_name='gene']/GBQualifier_value", xmlValue) | ||
if ( is.null(gene) ) | ||
gene <- xpathSApply(x, "//GBQualifier[GBQualifier_name='product']/GBQualifier_value", xmlValue) | ||
if ( is.null(gene) ) | ||
gene <- xpathSApply(x, "//GBQualifier[GBQualifier_name='note']/GBQualifier_value", xmlValue) | ||
unique(gene) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
MAD <- function(DNAbin, model = "JC69"){ | ||
|
||
if ( nrow(DNAbin) > 1 ){ | ||
p.uncorr <- dist.dna(DNAbin, model = "raw", pairwise.deletion = TRUE) | ||
p.corr <- dist.dna(DNAbin, model = model, pairwise.deletion = TRUE) | ||
x <- mad(p.corr - p.uncorr, na.rm = TRUE) | ||
} else { | ||
x <- 0 | ||
} | ||
x | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
NCBI.wrap <- function(x, BOOLEAN = "OR", field = "all"){ | ||
x <- paste(x, "[xxx]", sep = "") | ||
x <- paste(x, collapse = paste(rep(" ", 2), collapse = BOOLEAN)) | ||
x <- paste("(", x, ")", sep = "") | ||
xx <- vector() | ||
for (i in field){ | ||
xx <- c(xx, gsub("xxx", i, x)) | ||
} | ||
xx | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
addNbAcc <- function(tip, dbname = "fungi", pw = ""){ | ||
conn <- dbConnect(PostgreSQL(), dbname = dbname, user = "postgres", password = pw) | ||
tip <- dbGetQuery(conn, paste("SELECT * FROM taxonomy WHERE spec='", | ||
tip, "'", sep = "")) | ||
dbDisconnect(conn) | ||
tip <- tip[, grep("spec|_sel", names(tip))] | ||
names(tip) <- gsub("_sel", "", names(tip)) | ||
tip[is.na(tip)] <- 0 | ||
tip <- tip[, tip[1, ] > 0] | ||
tip <- paste(names(tip), tip, sep = "=") | ||
tip <- gsub("^(spec=|_)", "", tip) | ||
tip[1] <- gsub("_", " ", tip[1]) | ||
tip[1] <- paste("italic(\"", tip[1], "\")", sep = "") | ||
tip[-1] <- paste("plain(\"", tip[-1], "\")", sep = "") | ||
tip <- paste(tip, collapse = "*\" \"*") | ||
parse(text = tip) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
addTip <- function(phy, tip, tax, insert = "crown", stem.edge = 0.5){ | ||
|
||
if ( !inherits(phy, "phylo") ) stop("'phy' is not of class 'phylo'") | ||
# if ( !is.ultrametric(phy) ) stop("'phy' must be ultrametric") | ||
|
||
tip <- gsub(" ", "_", tip) | ||
|
||
if ( tip %in% phy$tip.label ) stop("'tip' is already contained in 'phy'") | ||
insert <- match.arg(insert, c("crown", "stem", "randomly")) | ||
|
||
# number of tips, internal nodes and edges: | ||
nt <- Ntip(phy); ni <- Nnode(phy); ne <- Nedge(phy) | ||
|
||
it <- ifelse(insert == "randomly", insert, paste("at", insert)) | ||
cat(" add", it, "of ") | ||
an <- whereToInsert(phy, tax, tip) | ||
|
||
if ( an <= nt ){ | ||
|
||
# cat(" - add to one congeneric species") | ||
## add tip to one congeneric | ||
## ------------------------- | ||
pretip <- an | ||
|
||
## an: This is now the subtending (or stem) node | ||
## of the node where the new tip is to be inserted | ||
an <- phy$edge[phy$edge[, 2] == pretip, 1] | ||
|
||
## new internal node number | ||
newinternal <- descendants(phy, an, type = "i") | ||
if ( length(newinternal) == 0 ){ | ||
newinternal <- an + 2 | ||
} else { | ||
newinternal <- max(descendants(phy, an, type = "i")) + 2 | ||
} | ||
|
||
|
||
# increase node number greater than 'pretip' by 1 | ||
# to create a gap to insert new tip at number pretip + 1 | ||
phy$edge[phy$edge > pretip] <- phy$edge[phy$edge > pretip] + 1 | ||
an <- an + 1 # step ancestral node accordingly | ||
## create splits above and below gap | ||
id <- which(phy$edge[, 1] == an & phy$edge[, 2] == pretip) | ||
upper <- 1:id; lower <- (id + 1):ne | ||
phy$edge[phy$edge >= newinternal] <- phy$edge[phy$edge >= newinternal] + 1 | ||
phy$edge[id, 2] <- newinternal | ||
|
||
# add edges | ||
phy$edge <- rbind(phy$edge[upper, ], | ||
c(newinternal, pretip), | ||
c(newinternal, pretip + 1), | ||
phy$edge[lower, ]) | ||
|
||
# add edge.lengths | ||
phy$edge.length <- c(phy$edge.length[head(upper, -1)], | ||
phy$edge.length[id] * stem.edge, | ||
rep(phy$edge.length[id] * (1 - stem.edge), 2), | ||
phy$edge.length[lower]) | ||
|
||
# add tip.label | ||
phy$tip.label <- c(phy$tip.label[1:pretip], | ||
tip, | ||
phy$tip.label[(pretip + 1):nt]) | ||
|
||
# ajust internal node number | ||
phy$Nnode <- ni + 1 # and not nt, which would be valid | ||
# only for binary trees! | ||
|
||
} else { | ||
|
||
## add tip to more than one congenerics or a higher rank | ||
## ----------------------------------------------------- | ||
# cat(" - add to more congeneric or higher rank relatives") | ||
|
||
if ( insert == "stem" ) an <- noi(phy, strip.spec(tip), | ||
regex = TRUE, stem = TRUE) | ||
if ( insert == "randomly" ) { | ||
an <- descendants(phy, an, type = "i") | ||
an <- sample(an, 1) | ||
} | ||
|
||
# crown group age: | ||
if ( is.ultrametric(phy) ){ | ||
ael <- branching.times(phy)[an - Ntip(phy)] | ||
} else { | ||
tip.heights <- tipHeights(extract.clade(phy, an)) | ||
ael <- runif(1, min(tip.heights), max(tip.heights)) | ||
} | ||
|
||
# number of tip to be inserted: the smallest tip number, | ||
# because the existing tip numberd will be increased by 1 | ||
newtip <- min(descendants(phy, an, "t")) | ||
|
||
# increase number of internal nodes by 1 | ||
phy$edge[phy$edge >= newtip] <- phy$edge[phy$edge >= newtip] + 1 | ||
an <- an + 1 | ||
|
||
# insert before id (after would be more difficult) | ||
id <- min(which(phy$edge[, 1] == an)) | ||
upper <- 1:(id - 1); lower <- id:ne | ||
|
||
# add edge | ||
phy$edge <- rbind(phy$edge[upper, ], | ||
c(an, newtip), | ||
phy$edge[lower, ]) | ||
# add edge.length | ||
phy$edge.length <- c(phy$edge.length[upper], | ||
ael, | ||
phy$edge.length[lower]) | ||
# add tip.label | ||
phy$tip.label <- c(phy$tip.label[1:(newtip - 1)], | ||
tip, | ||
phy$tip.label[newtip:nt]) | ||
} | ||
# fixNodes(phy) | ||
phy | ||
} |
Oops, something went wrong.