Skip to content

Commit

Permalink
v0.99.24 added plot_sample_network, plot_richness_estimates
Browse files Browse the repository at this point in the history
* Advanced network/graph plotting/visualization wrappers:
make_sample_network(), plot_sample_network()
* Advanced alpha diversity wrappers: plot_richness_estimates(),
estimate_richness()
* Added taxafilter() function for even more convenient filtering of
species/taxa
* Added reshape dependency for explicit use of melt() function.
  • Loading branch information
joey711 committed Mar 1, 2012
1 parent 60b96bd commit 0ef911b
Show file tree
Hide file tree
Showing 17 changed files with 965 additions and 159 deletions.
Binary file modified .DS_Store
Binary file not shown.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: phyloseq
Version: 0.99.23
Date: 2012-02-21
Version: 0.99.24
Date: 2012-02-29
Title: Handling and analysis of high-throughput
phylogenetic sequence data.
Description: phyloseq is a set of classes, wrappers, and
Expand All @@ -15,6 +15,7 @@ Imports:
vegan (>= 2.0),
foreach (>= 1.3),
plyr (>= 1.7),
reshape (>= 0.8.4),
RJSONIO (>= 0.98)
Depends:
R (>= 2.14.0),
Expand Down Expand Up @@ -52,3 +53,4 @@ Collate:
'assignment-methods.r'
'sampleData-class.R'
'extend_vegan.R'
'network-methods.R'
17 changes: 15 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ export("sampleData<-")
export("taxTab<-")
export(access)
export(calcplot)
export(estimate_richness)
export(export_env_file)
export(filter_taxa)
export(filterfunSample)
export(getslots.phyloseq)
export(getTaxa)
Expand All @@ -16,10 +18,13 @@ export(import_pyrotagger_tab)
export(import_qiime)
export(import_RDP_cluster)
export(import)
export(makenetwork)
export(make_sample_network)
export(merge_phyloseq)
export(phyloseq)
export(plot_ordination_phyloseq)
export(plot_richness_estimates)
export(plot_sample_network)
export(plot_taxa_bar)
export(plot_tree_phyloseq)
export(rank.names)
export(rm_outlierf)
Expand Down Expand Up @@ -82,10 +87,18 @@ exportMethods(tre)
exportMethods(UniFrac)
exportMethods(vegdist)
import(foreach)
import(igraph)
import(vegan)
importFrom(igraph,degree)
importFrom(igraph,delete.vertices)
importFrom(igraph,get.edgelist)
importFrom(igraph,graph.adjacency)
importFrom(igraph,layout.fruchterman.reingold)
importFrom(igraph,V)
importFrom(multtest,mt.maxT)
importFrom(multtest,mt.minP)
importFrom(plyr,laply)
importFrom(plyr,ldply)
importFrom(reshape,melt)
importFrom(RJSONIO,fromJSON)
importFrom(vegan,diversity)
importFrom(vegan,estimateR)
73 changes: 73 additions & 0 deletions R/extend_vegan.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,77 @@ setMethod("vegdist", "phyloseq", function(x, method = "bray", binary = FALSE,
x <- otuTable(x)
vegdist(x, method, binary, diag, upper, na.rm, ...)
})
################################################################################
################################################################################
#' Summarize richness estimates
#'
#' Performs a number of standard richness estimates, and returns the results
#' as a \code{data.frame}. Can operate on the cumulative population of all
#' samples in the dataset, or by repeating the richness estimates for each
#' sample individually.
#' NOTE: You must use untrimmed datasets
#' for meaningful results, as these estimates (and even the ``observed'' richness)
#' are highly dependent on the number of singletons. You can always trim the data
#' later on if needed, just not before using this function.
#'
#' @usage estimate_richness(physeq, split=TRUE)
#'
#' @param physeq (Required). \code{\link{phyloseq-class}}, or alternatively,
#' an \code{\link{otuTable-class}}. The data about which you want to estimate
#' the richness.
#'
#' @param split (Optional). Logical. Should a separate set of richness estimates
#' be performed for each sample? Or alternatively, pool all samples and
#' estimate richness of the entire set.
#'
#' @return A \code{data.frame} of the richness estimates, and their standard error.
#'
#' @seealso
#' Check out the custom plotting function, \code{\link{plot_richness_estimates}},
#' for easily showing the results of different estimates, with method-specific
#' error-bars. Also check out the internal functions borrowed from the \code{vegan}
#' package:
#' \code{\link[vegan]{estimateR}},
#' \code{\link[vegan]{diversity}}
#'
#' @importFrom vegan estimateR
#' @importFrom vegan diversity
#' @export
#' @examples
#' data(GlobalPatterns)
#' ( S.GP <- estimate_richness(GlobalPatterns) )
#' # # Make the plots
#' # plot_richness_estimates(GlobalPatterns, "SampleType")
#' # plot_richness_estimates(GlobalPatterns, "SampleType", "SampleType")
#' # For more plotting examples, see plot_richness_estimates()
#'
estimate_richness <- function(physeq, split=TRUE){
# Check for singletons, and then warning if they are missing.
# These metrics only really meaningful if singletons are included.
if( !any(otuTable(physeq)==1) ){
warning("The experiment object you have provided does not have\n",
"any singletons. This is highly suspicious. Results of richness\n",
"estimates are probably unreliable, or wrong, if you have already\n",
"trimmed low-abundance taxa from the data.\n",
"\n",
"It is recommended that you find the un-trimmed data and retry.",
)
}

# If we are not splitting sample-wise, sum the species. Else, enforce orientation.
if( !split ){
OTU <- speciesSums(physeq)
} else if( split ){
OTU <- as(otuTable(physeq), "matrix")
if( speciesAreRows(physeq) ){ OTU <- t(OTU) }
}

# Some standard richness parameters
richness <- round(estimateR(OTU))
shannon <- round(diversity( OTU ), 2)
simpson <- round(diversity( OTU, index="simpson"), 2)
# # fisher <- round(fisher.alpha( OTU))

return( t(rbind(richness, shannon, simpson)) )
}
################################################################################
139 changes: 139 additions & 0 deletions R/network-methods.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
################################################################################
#' Make sample-wise microbiome network (igraph)
#'
#' A specialized function for creating graphical models of microbiome samples
#' based on a user-defined ecological distance and threshold.
#' The graph is ultimately built with tools from the
#' \code{\link[igraph]{igraph}}-package.
#'
#' @usage make_sample_network(physeq, keep.isolates=FALSE, threshold = 0,
#' max.dist = 0.4, dist.fun = function(x) vegdist(x, "jaccard"), presence.absence=FALSE)
#'
#' @param physeq (Required). Default \code{NULL}.
#' A \code{\link{phyloseq-class}} object on which \code{g} is based.
#'
#' @param keep.isolates (Optional). Default \code{FALSE}. Logical.
#' Whether to keep isolates (un-connected samples, not microbial isolates)
#' in the graphical model that is returned. Default results in isolates
#' being removed from the object.
#'
#' @param threshold (Optional). Default \code{1}.
#' The minimum number of individuals of each taxa across all
#' samples. Taxa below this value are ignored during network construction.
#' Note that this is extremely simple filtering. More advanced filtering
#' can be performed prior to this function using the
#' \code{\link{filterfunSample}} or \code{\link{filter_taxa}} functions.
#'
#' @param max.dist (Optional). Default \code{0.4}.
#' The maximum ecological distance (as defined by \code{dist.fun})
#' allowed between two samples to still consider them ``connected''
#' by an edge in the graphical model.
#'
#' @param dist.fun (Optional). Default \code{function(x) vegdist(x, "jaccard")}.
#' A function that takes a \code{\link{phyloseq-class}} object
#' (or \code{\link{otuTable-class}}) and returns a sample-wise distance
#' matrix. Currently, any method in \code{\link{vegdist}} is supported,
#' as well as \code{\link{UniFrac}} (although this may take some time to run
#' you probably want to run this separately, save, and then provide the
#' distance-object directly. See below for alternatives).
#' Most ecological distance functions that take a matrix in \code{vegan-package}
#' format (samples are rows) are supported.
#'
#' Alternatively, if you have already calculated the sample-wise distance
#' object, this can be provided as \code{dist.fun} instead (see examples).
#' A third alternative is to provide a character string indicating the
#' desired method of distance calculation. For the moment, this is limited
#' to the distance methods available in \code{\link{vegdist}}.
#'
#' @param presence.absence (Optional). Default \code{FALSE}.
#' Whether the abundances values should be transformed to binary
#' presence-absence values prior to distance calculation and network
#' construction.
#'
#' @return A \code{\link[igraph]{igraph}}-class object.
#'
#' @seealso
#' \code{\link{plot_sample_network}}
#'
#' @importFrom igraph graph.adjacency
#' @importFrom igraph V
#' @importFrom igraph delete.vertices
#' @importFrom igraph degree
#'
#' @export
#'
#' @examples
#' # # Example plots with Enterotype Dataset
#' data(enterotype)
#'
#' ig <- make_sample_network(enterotype, FALSE, max.dist=0.3)
#' plot_sample_network(ig, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.3, label=NULL)
#'
#' # ig <- make_sample_network(enterotype, FALSE, max.dist=0.2)
#' # plot_sample_network(ig, enterotype, color="SeqTech", shape="Enterotype", line_weight=0.3, label=NULL)
#'
#' # # Three methods of choosing/providing distance/distance-method
#' # Provide method name available to vegdist
#' ig <- make_sample_network(enterotype, FALSE, max.dist=0.3, dist.fun="jaccard")
#' # Provide distance object, already computed
#' ih <- make_sample_network(enterotype, FALSE, max.dist=0.3, dist.fun=vegdist(enterotype, "jaccard"))
#' # Provide explicit function. Can be custom function.
#' ii <- make_sample_network(enterotype, FALSE, max.dist=0.3, dist.fun=function(x){vegdist(x, "jaccard")})
#' # The have equal results:
#' all.equal(ig, ih)
#' all.equal(ig, ii)
#' #
make_sample_network <- function(physeq, keep.isolates=FALSE, threshold = 0,
max.dist = 0.4, dist.fun = function(x) vegdist(x, "jaccard"), presence.absence=FALSE){

abund <- otuTable(physeq)
if (speciesAreRows(abund)) {
abund <- t(abund)
}

# Keep only those taxa (columns) that are above the threshold
abundance <- abund[, colSums(abund) > threshold]

if( presence.absence ){
# Convert to presence/absence matrix
abundance <- (abundance > threshold) - 0
}

# Calculate or asign sample-wise distance matrix
if( class(dist.fun) == "dist" ){
# If dist.fun a distance object, use it rather than re-calculate
sample.dist <- dist.fun
if( attributes(sample.dist)$Size != nsamples(physeq) ){
stop("nsamples(physeq) does not match size of dist object in dist.fun")
}
if( !setequal(attributes(sample.dist)$Labels, sample.names(physeq)) ){
stop("sample.names does not exactly match dist-indices")
}
} else if( class(dist.fun) == "character" ){
sample.dist <- vegdist(abundance, method=dist.fun)
} else {
sample.dist <- dist.fun(abundance)
}

# coerce distance-matrix back into vanilla matrix, Sample Distance Matrix, SaDiMa
SaDiMa <- as.matrix(sample.dist)

# Add Inf to the diagonal to avoid self-connecting edges (inefficient)
SaDiMa <- SaDiMa + diag(Inf, nsamples(physeq), nsamples(physeq))

# Convert distance matrix to coincidence matrix, CoMa, using max.dist
CoMa <- SaDiMa < max.dist

# Calculate the igraph-formatted network
ig <- graph.adjacency(CoMa, mode="lower")

# If keeping isolates, done. Else, remove them, then return igraph.
if( keep.isolates ){
return(ig)
} else {
isolates <- V(ig)[degree(ig) == 0]
ig.no.isol <- delete.vertices(ig, V(ig)[degree(ig) == 0])
return(ig.no.isol)
}
}
################################################################################
Loading

0 comments on commit 0ef911b

Please sign in to comment.