From b83f7037979b190bc769bf14cf4f367f31599fb2 Mon Sep 17 00:00:00 2001 From: Benjamin Blonder Date: Tue, 24 Nov 2015 07:46:52 +0000 Subject: [PATCH] version 0.6.2 --- DESCRIPTION | 10 +++++----- MD5 | 18 ++++++++++-------- NAMESPACE | 5 +++-- R/make_netassoc_network.R | 29 ++++++++++++----------------- R/pairwise_association.R | 9 +++++++++ R/partial_correlation.R | 3 +-- R/plot.R | 4 ++-- man/make_netassoc_network.Rd | 34 +++++++++++++++++++++++----------- man/netassoc-package.Rd | 4 ++-- man/pairwise_association.Rd | 33 +++++++++++++++++++++++++++++++++ man/plot_netassoc_network.Rd | 4 ++-- 11 files changed, 102 insertions(+), 51 deletions(-) create mode 100644 R/pairwise_association.R create mode 100644 man/pairwise_association.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0c3dfea..6b244cb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,15 +1,15 @@ Package: netassoc Type: Package Title: Inference of Species Associations from Co-Occurrence Data -Version: 0.6.0 -Date: 2015-08-26 +Version: 0.6.2 +Date: 2015-11-23 Author: Benjamin Blonder, Naia Morueta-Holme Maintainer: Benjamin Blonder Description: Infers species associations from community matrices. Uses local and (optional) regional-scale co-occurrence data by comparing observed partial correlation coefficients between species to those estimated from regional species distributions. Extends Gaussian graphical models to a null modeling framework. Provides interface to a variety of inverse covariance matrix estimation methods. License: GPL-3 -Depends: igraph +Depends: igraph, infotheo Imports: rags2ridges, corpcor, huge, vegan NeedsCompilation: no -Packaged: 2015-09-09 17:24:16 UTC; benjaminblonder +Packaged: 2015-11-23 21:40:00 UTC; benjaminblonder Repository: CRAN -Date/Publication: 2015-09-09 21:19:28 +Date/Publication: 2015-11-24 07:46:52 diff --git a/MD5 b/MD5 index a18b609..af71c8a 100644 --- a/MD5 +++ b/MD5 @@ -1,11 +1,13 @@ -82260df8240b082ada9f47a00c054317 *DESCRIPTION -f2dc20ae5f05ced7d041f87c925f41b6 *NAMESPACE +cb225004cdc2bce35851bc7b34149c24 *DESCRIPTION +9f25089fefe79e2a8c12e30938a3fd6d *NAMESPACE c39460d085cf3ce914cf94344b1f47c3 *R/generate_nul_resample.R -e7657f7b50c214b6f461f12decb9d62e *R/make_netassoc_network.R -b09a0db03fbdd4cd99b9ac739ab71a0f *R/partial_correlation.R -f604d7aef6e660454cae70cb17c8a9ee *R/plot.R -960dff4b833ab53ac8f1adfd533d2257 *man/make_netassoc_network.Rd -18f84bb3a4de3bea3605babafd4ffff5 *man/netassoc-package.Rd +0da717e3c3000fa8d3c0549220db32c0 *R/make_netassoc_network.R +7b95b8c6278534ba92d0d10d054c829e *R/pairwise_association.R +19a162e0145463d76e3b70b479c5d452 *R/partial_correlation.R +0ce37ad1bcd9a0de4ee01bb565d04f47 *R/plot.R +e37052ada824878367e60059827bfc2f *man/make_netassoc_network.Rd +8f11cf7649b395ffd95a9d9eb3afd309 *man/netassoc-package.Rd +93226c98aac9dc52931e74d962f4fa5e *man/pairwise_association.Rd 21095f107a35447ed412eb29de5e3bb3 *man/partial_correlation.Rd b533e6b2c50de93c38845bb5833d404e *man/plot_netassoc_matrix.Rd -db0acf932b7487440e2ef9a22327c715 *man/plot_netassoc_network.Rd +4f5ff85ce0e4f9ab4738fd8423da502c *man/plot_netassoc_network.Rd diff --git a/NAMESPACE b/NAMESPACE index a6c463d..34b69fa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,12 @@ -export(plot_netassoc_network, make_netassoc_network, partial_correlation, plot_netassoc_matrix) +export(plot_netassoc_network, make_netassoc_network, partial_correlation, plot_netassoc_matrix, pairwise_association) import(igraph) -importFrom(rags2ridges, optPenalty.aLOOCV) +importFrom(rags2ridges, optPenalty.LOOCVauto) importFrom(corpcor, decompose.invcov) importFrom(corpcor, invcov.shrink) importFrom(vegan, permatfull) importFrom(huge, huge) importFrom(huge, huge.select) +import(infotheo) importFrom("grDevices", "colorRampPalette", "gray", "rgb") importFrom("graphics", "box", "image", "plot", "title") importFrom("stats", "cor", "cov", "na.omit", "p.adjust", "sd") \ No newline at end of file diff --git a/R/make_netassoc_network.R b/R/make_netassoc_network.R index 51cd476..7128d12 100644 --- a/R/make_netassoc_network.R +++ b/R/make_netassoc_network.R @@ -1,4 +1,4 @@ -make_netassoc_network <- function(obs, nul=vegan::permatfull(obs)$perm[[1]], method="shrinkage", p.method="fdr", alpha=0.05, numnulls=1000, plot=TRUE,plot.legend=TRUE, plot.title=TRUE, verbose=TRUE) +make_netassoc_network <- function(obs, nul=vegan::permatfull(obs)$perm[[1]], method="partial_correlation", args=list(method="shrinkage",verbose=FALSE), p.method="fdr", alpha=0.05, numnulls=1000, plot=TRUE,plot.legend=TRUE, plot.title=TRUE, verbose=TRUE) { if (is.matrix(obs) & is.null(dimnames(obs))) { @@ -38,7 +38,8 @@ make_netassoc_network <- function(obs, nul=vegan::permatfull(obs)$perm[[1]], met } if (verbose==TRUE) { cat('Calculating observed co-occurrence scores...\n') } - pcor_obs <- partial_correlation(obs, method, verbose=FALSE) + pcor_obs <- do.call(method, args=c(list(mat=obs), args)) + #pcor_obs <- partial_correlation(obs, method, verbose=FALSE) if (plot==TRUE) { @@ -48,10 +49,6 @@ make_netassoc_network <- function(obs, nul=vegan::permatfull(obs)$perm[[1]], met # create null distributions pcor_nul_all <- array(dim=c(nsp, nsp, numnulls)) - if (method=="glasso") - { - warning("Graphical lasso may not produce non-singular null distribution - SES and p-values may be unreasonable.\nProceed with caution.") - } for (k in 1:numnulls) { # Generating null @@ -63,7 +60,8 @@ make_netassoc_network <- function(obs, nul=vegan::permatfull(obs)$perm[[1]], met nul_resampled <- generate_nul_resample(nul, obs) - pcor_nul <- partial_correlation(nul_resampled, method, verbose=FALSE) + #pcor_nul <- partial_correlation(nul_resampled, method, verbose=FALSE) + pcor_nul <- do.call(method, args=c(list(mat=nul_resampled), args)) pcor_nul_all[,,k] <- pcor_nul } @@ -90,18 +88,15 @@ make_netassoc_network <- function(obs, nul=vegan::permatfull(obs)$perm[[1]], met } - - - - pcor_ses <- (pcor_obs - pcor_nul_mean) / pcor_nul_sd pcor_pvalues <- apply(absobs_minus_absnul, c(1,2), function(x) { mean(x<0) }) - + diag(pcor_pvalues) <- NA # control false discovery rate - if (verbose==TRUE) { cat('Adjusting p-values for multiple comparisons...\n') } - pcor_pvalues_adjusted <- matrix(p.adjust(pcor_pvalues, method=p.method, n=((nsp)*(nsp-1)/2)),nrow=nsp,ncol=nsp) + if (verbose==TRUE) { cat('Adjusting p-values for multiple comparisons... ') } + + pcor_pvalues_adjusted <- matrix(p.adjust(pcor_pvalues, method=p.method),nrow=nsp,ncol=nsp) dimnames(pcor_pvalues_adjusted) <- dimnames(pcor_obs) pcor_ses_trimmed <- pcor_ses @@ -132,9 +127,9 @@ make_netassoc_network <- function(obs, nul=vegan::permatfull(obs)$perm[[1]], met pcor_ses_trimmed_neg_net <- pcor_ses_trimmed_neg pcor_ses_trimmed_neg_net[is.na(pcor_ses_trimmed_neg_net)] <- 0 - network_all <- graph.adjacency(pcor_ses_trimmed_net,mode='upper',weighted=T) - network_pos <- graph.adjacency(pcor_ses_trimmed_pos_net,mode='upper',weighted=T) - network_neg <- graph.adjacency(pcor_ses_trimmed_neg_net,mode='upper',weighted=T) + network_all <- graph.adjacency(pcor_ses_trimmed_net,mode='directed',weighted=T) + network_pos <- graph.adjacency(pcor_ses_trimmed_pos_net,mode='directed',weighted=T) + network_neg <- graph.adjacency(pcor_ses_trimmed_neg_net,mode='directed',weighted=T) if (plot==TRUE) { diff --git a/R/pairwise_association.R b/R/pairwise_association.R new file mode 100644 index 0000000..4ff1bee --- /dev/null +++ b/R/pairwise_association.R @@ -0,0 +1,9 @@ +pairwise_association <- function(mat, method="condentropy") +{ + nsp <- nrow(mat) + m_spxsp <- outer(1:nsp, 1:nsp, FUN=Vectorize(function(i,j) { do.call(method, args=list(X=mat[i,], Y=mat[j,])) })) + + diag(m_spxsp) <- NA + + return(m_spxsp) +} \ No newline at end of file diff --git a/R/partial_correlation.R b/R/partial_correlation.R index 2776852..26c903b 100644 --- a/R/partial_correlation.R +++ b/R/partial_correlation.R @@ -13,7 +13,7 @@ partial_correlation <- function(mat, method, verbose=FALSE) else if (method=="ridge") { # use auto-selected lambda penalty parameter based on approximate leave one out cross validation - invcov <- rags2ridges::optPenalty.aLOOCV(t(mat), lambdaMin=1e-3,lambdaMax=1e4,step=100,graph=FALSE,verbose=verbose)$optPrec + invcov <- rags2ridges::optPenalty.LOOCVauto(t(mat), lambdaMin=1e-3,lambdaMax=1e4)$optPrec } else if (method=="exact") { @@ -32,7 +32,6 @@ partial_correlation <- function(mat, method, verbose=FALSE) if (verbose==TRUE) {cat('Calculating correlations...\n')} pcor <- cor(t(mat)) } - pcor[lower.tri(pcor,diag=TRUE)] <- NA return(pcor) } \ No newline at end of file diff --git a/R/plot.R b/R/plot.R index 12fed3e..8c14bea 100644 --- a/R/plot.R +++ b/R/plot.R @@ -8,7 +8,7 @@ plot_netassoc_network <- function(network, layout = layout.fruchterman.reingold( vertex.label.family = "sans", edge.width = NULL, edge.color = NULL, - edge.arrow.size = 0.05, + edge.arrow.size = 0.2, vertex.label.cex = 0.5, legend=TRUE, ...) @@ -35,7 +35,7 @@ plot_netassoc_network <- function(network, layout = layout.fruchterman.reingold( } else { - edge.color <- ifelse(E(network)$weight > 0, rgb(0,0,1,0.8),rgb(1,0,0,0.8)) + edge.color <- ifelse(E(network)$weight > 0, rgb(0,0,1),rgb(1,0,0)) if (length(E(network)$weight)>0) { diff --git a/man/make_netassoc_network.Rd b/man/make_netassoc_network.Rd index 21f739d..996a95d 100644 --- a/man/make_netassoc_network.Rd +++ b/man/make_netassoc_network.Rd @@ -5,23 +5,27 @@ Infer species-association network } \description{ -Infers a species association network by determining which co-occurrence patterns between species are more or less likely than expected under a null model of community assembly. +Infers a species association network by determining which co-occurrence patterns between species are more or less likely than expected under a null model of community assembly. Defaults to estimation of association using a robust shrinkage estimator for inverse covariance matrices. } \usage{ -make_netassoc_network(obs, nul = vegan::permatfull(obs)$perm[[1]], - method="shrinkage", p.method="fdr", alpha=0.05, numnulls=1000, +make_netassoc_network(obs, nul=vegan::permatfull(obs)$perm[[1]], + method="partial_correlation", args=list(method="shrinkage",verbose=FALSE), + p.method="fdr", alpha=0.05, numnulls=1000, plot=TRUE,plot.legend=TRUE, plot.title=TRUE, verbose=TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obs}{ -A m x n community matrix describing the abundance of m species at n sites. Represents the observed data. +A m x n community matrix describing the abundance or presence/absence of m species at n sites. Represents the observed data. } \item{nul}{ -A m x n community matrix describing the abundance of m species at n sites. Represents the regional null expectation data. The default value is a resampling of the observed data that preserves row and column sums. +A m x n community matrix describing the abundance or presence/absence of m species at n sites. Represents the regional null expectation data. The default value is a resampling of the observed data that preserves row and column sums, but this default method is not recommended. } \item{method}{ -The method used to calculate relationships between species. See \code{\link{partial_correlation}} for options. Defaults to a robust shrinkage estimator for inverse covariance matrices. +The name of a function used to calculate relationships between species. The function must accept at least the arguments \code{mat}, a m x n (species x site) matrix. Defaults to \code{\link{partial_correlation}}. +} + \item{args}{ +A list of additional arguments to be passed to the \code{method} function. } \item{p.method}{ The method used to correct p-values for multiple comparisons. See \code{\link{p.adjust}} for options. @@ -57,9 +61,13 @@ Steps taken are: The resulting network can be analyzed using functions from the \code{igraph} network package. -This process builds a Gaussian graphical model via estimating an inverse covariance matrix (precision matrix, which can be used to calculate partial correlation coefficients) for all species pairs. This graph is then compared to a distribution of null graphs, such that the final output is a graph with edge weights corresponding to standardized effect sizes after correction for multiple comparisons. +The user should specify a \code{nul} matrix of the same dimensionality as \code{obs} based on some regional distribution modeling approach (e.g. MaxEnt). The default reshuffling method is not recommended but provided to allow immediate output from the function. + +This process by default builds a Gaussian graphical model via estimating an inverse covariance matrix (precision matrix, which can be used to calculate partial correlation coefficients) for all species pairs. This graph is then compared to a distribution of null graphs, such that the final output is a graph with edge weights corresponding to standardized effect sizes after correction for multiple comparisons. A range of different methods are provided in \code{\link{partial_correlation}} for estimating relationships between species. Note that while a method is provided for the graphical lasso (L1-regularization) its use is not recommended, as it will produce very sparse null networks and then a narrow (or singular) distribution of null edge weights. + +The inverse covariance methods implemented in \code{\link{partial_correlation}} result in symmetric association metrics. Non-symmetric metrics (e.g. describing predation or commensalism) are possible mathematically but their usage is not well-established. For an example of how to implement these, see \code{\link{pairwise_association}}. } \value{ A list with the following components: @@ -72,9 +80,6 @@ A list with the following components: \item{\code{network_pos}}{An \code{igraph} object representing an association network including only positive associations} \item{\code{network_pos}}{An \code{igraph} object representing an association network including only negative associations} } -\references{ -Morueta-Holme, N., Blonder, B., et al. in preparation. -} \seealso{vegan::permat} \examples{ @@ -88,8 +93,15 @@ m_obs[1,1:(nsi/2)] <- rpois(n=nsi/2,lambda=20) m_obs[2,1:(nsi/2)] <- rpois(n=nsi/2,lambda=20) n <- make_netassoc_network(m_obs, m_nul, - method="shrinkage",p.method='fdr', + method="partial_correlation",args=list(method="shrinkage"), + p.method='fdr', numnulls=100, plot=TRUE,alpha=0.05) + +# experimental demonstration of non-symmetric metrics +#n <- make_netassoc_network(m_obs, m_nul, +# method="pairwise_association",args=list(method="condentropy"), +# p.method='fdr', +# numnulls=100, plot=TRUE,alpha=0.05) n$network_all } \ No newline at end of file diff --git a/man/netassoc-package.Rd b/man/netassoc-package.Rd index 349426b..de15387 100644 --- a/man/netassoc-package.Rd +++ b/man/netassoc-package.Rd @@ -3,7 +3,7 @@ \alias{netassoc} \docType{package} \title{ -Inference of Species Assocations from Co-Occurrence Data +Inference of Species Associations from Co-Occurrence Data } \description{ Infers species associations from community matrices. Uses local and (optional) regional-scale co-occurrence data by comparing observed partial correlation coefficients between species to those estimated from regional species distributions. Extends Gaussian graphical models to a null modeling framework. Provides interface to a variety of inverse covariance matrix estimation methods. @@ -14,6 +14,6 @@ Benjamin Blonder, Naia Morueta-Holme Maintainer: Benjamin Blonder } \references{ -Morueta-Holme, N., Blonder, B., et al. IN PREPARATION. +Morueta-Holme, N., Blonder, B., et al. A network approach for inferring species associations from co-occurrence data. (in review) } \keyword{ package } \ No newline at end of file diff --git a/man/pairwise_association.Rd b/man/pairwise_association.Rd new file mode 100644 index 0000000..b5de55c --- /dev/null +++ b/man/pairwise_association.Rd @@ -0,0 +1,33 @@ +\name{pairwise_association} +\alias{pairwise_association} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Pairwise associations +} +\description{ +Computes pairwise associations between every row (species) in a species x site matrix. Note that usage of this function is advantageous when non-symmetric association metrics are desired, but the pairwise computation will prevent accounting for indirect effects between species. As such this function should be considered preliminary, and its use experimental. +} +\usage{ +pairwise_association(mat, method = "condentropy") +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{mat}{ +A m x n (species x site) matrix +} + \item{method}{ +The name of a function to call to calculate an association score. Must take two vector arguments (X,Y) and return a single numeric value. Default argument uses conditional information entropy statistic, although other functions (e.g. Jaccard similarity) are possible. +} +} +\value{ +A n x n (species x species) matrix with NA diagonal values. May be non-symmetric depending on the method used. +} +\examples{ +nsp <- 10 +nsi <- 50 +m_obs <- floor(matrix(rpois(nsp*nsi,lambda=5),ncol=nsi,nrow=nsp)) +m_obs[1,1:(nsi/2)] <- rpois(n=nsi/2,lambda=20) + +spxsp <- pairwise_association(m_obs, method="condentropy") +image(spxsp) +} \ No newline at end of file diff --git a/man/plot_netassoc_network.Rd b/man/plot_netassoc_network.Rd index b20b6ed..0236bcb 100644 --- a/man/plot_netassoc_network.Rd +++ b/man/plot_netassoc_network.Rd @@ -5,7 +5,7 @@ Plots species association network } \description{ -Draws a network of species associations. By default edge widths are proportional to association strength and edge color reflects assocation type (blue, positive; red, negative). +Draws a network of species associations. By default edge widths are proportional to association strength and edge color reflects association type (blue, positive; red, negative). } \usage{ plot_netassoc_network(network, layout = layout.fruchterman.reingold(network), @@ -16,7 +16,7 @@ plot_netassoc_network(network, layout = layout.fruchterman.reingold(network), vertex.label.family = "sans", edge.width = NULL, edge.color = NULL, - edge.arrow.size = 0.05, + edge.arrow.size = 0.2, vertex.label.cex = 0.5, legend = TRUE, ...)