version 1.1.2

cran · Feb 3, 2024 · 83fcb66 · 83fcb66
commit 83fcb66
Show file tree

Hide file tree

Showing 30 changed files with 1,714 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,25 @@
+Type: Package
+Package: PCGII
+Title: Partial Correlation Graph with Information Incorporation
+Version: 1.1.2
+Authors@R: c(person("Hao", "Wang", email = "haydo.wang@outlook.com", role = c("aut", "cre")), person("Yumou", "Qiu", role = "aut"), person("Peng", "Liu", role = "aut"))
+Description: Large-scale gene expression studies allow gene network construction to uncover associations among genes. This package is developed for estimating and testing partial correlation graphs with prior information incorporated. 
+License: MIT + file LICENSE
+URL: https://haowang47.github.io/PCGII/
+Depends: R(>= 4.3.0)
+Imports: stats, corpcor (>= 1.6.10), glmnet, igraph (>= 1.5.0.1),
+        Matrix, dplyr (>= 1.1.4)
+Suggests: mvtnorm, tidyverse, knitr, rmarkdown, testthat (>= 3.0.0)
+VignetteBuilder: knitr
+Config/testthat/edition: 3
+Encoding: UTF-8
+RoxygenNote: 7.2.3
+Language: en-US
+NeedsCompilation: no
+Packaged: 2024-01-31 16:30:07 UTC; haowang
+Author: Hao Wang [aut, cre],
+  Yumou Qiu [aut],
+  Peng Liu [aut]
+Maintainer: Hao Wang <haydo.wang@outlook.com>
+Repository: CRAN
+Date/Publication: 2024-02-02 18:30:05 UTC
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2024
+COPYRIGHT HOLDER: PCGII authors
diff --git a/MD5 b/MD5
@@ -0,0 +1,29 @@
+33be0546f690e4d101dddf9e1722d073 *DESCRIPTION
+870d398d0d24f2c7f4cd15e5094e4c3a *LICENSE
+089a5a9573bb45cb19fc78e4e8c25c08 *NAMESPACE
+a63181bf25d199c3738ce5b95b0a3c95 *R/PCGII.R
+36ba1981d552cedbd26118db2e93febb *R/clevel.R
+355f9c91568f78022942e4438db34461 *R/inference.R
+f0816c8648ab47d2c42c0c084c8aedc2 *R/makeBlockDiag.R
+dad26b4dbc66f1a80a627de6bf1ff913 *R/make_random_precision_mat.R
+2362fab64e4e2aa2397798a8b3820555 *R/make_sf_precision_mat.R
+33f927e4a24b3a2fc4ca1a6c28e9a6c5 *R/sigs2mat.R
+63e265cf267d11041ec1e6eb071ad999 *R/undirected_prior.R
+21b9f8b5865e3ef9b4671ce99a4f972f *README.md
+afea5cf0e676e88b6d05beea54435360 *build/vignette.rds
+59327d43ff9e61c63d6fd79927d04179 *inst/doc/Introduction.R
+93e4bfe007a7d8146dbe27e60961a4ab *inst/doc/Introduction.Rmd
+dc03d934cc98344d149412db75e5a58e *inst/doc/Introduction.html
+0cb98d12476e9d809f0bcf9643362f52 *man/PCGII.Rd
+fb189ac160ba906d8f081b39bc634e61 *man/clevel.Rd
+2cf6d4c0882315f32c4e33dc908a2b03 *man/inference.Rd
+4af97c8d5dac0aad4b4caebd16fa1b90 *man/makeBlockDiag.Rd
+5d17cc1112c5e85366c5ae0faa691437 *man/make_random_precision_mat.Rd
+bb894cb4600728ae61ceda7f3241865a *man/make_sf_precision_mat.Rd
+cf583738168edaf8149465976dd379d7 *man/sigs2mat.Rd
+30a1013707942b90c77dc7ae2dd87263 *man/undirected_prior.Rd
+407443e5a6604bda9bf109ca2abfd56f *tests/testthat.R
+df39653db99f2158257c3e00402438cc *tests/testthat/test-makeBlockDiag.R
+0e042ba3bde1089e7830a771104b3f41 *tests/testthat/test-make_random_precision_mat.R
+cd93c502837a289df919ab1392721266 *tests/testthat/test-make_sf_precision_mat.R
+93e4bfe007a7d8146dbe27e60961a4ab *vignettes/Introduction.Rmd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,21 @@
+# Generated by roxygen2: do not edit by hand
+
+export(PCGII)
+export(clevel)
+export(inference)
+export(makeBlockDiag)
+export(make_random_precision_mat)
+export(make_sf_precision_mat)
+export(sigs2mat)
+export(undirected_prior)
+importFrom(Matrix,bdiag)
+importFrom(corpcor,is.positive.definite)
+importFrom(dplyr,arrange)
+importFrom(glmnet,glmnet)
+importFrom(igraph,as_adjacency_matrix)
+importFrom(igraph,sample_gnp)
+importFrom(igraph,sample_pa)
+importFrom(stats,pnorm)
+importFrom(stats,predict)
+importFrom(stats,runif)
+importFrom(stats,sd)
diff --git a/R/PCGII.R b/R/PCGII.R
@@ -0,0 +1,113 @@
+#' Get the estimated partial correlation graph with information incorporation
+#'
+#' @description
+#' PCGII() is the function to apply the proposed method to get the estimated partial correlation graph with information incorporation. Remark: mathematical standardization will be automatically done within the function.
+#'
+#' @export PCGII
+#' @importFrom stats sd
+#' @importFrom glmnet glmnet
+#' @importFrom stats predict
+#' @param df The main expression dataset, an n by p matrix, in which each row corresponds to a sample and each column represents expression/abundance of an omics feature.
+#' @param prior The prior set, a k by 2 dataframe, in which each row corresponds to a pair of nodes (any omics features) that are connected under prior belief. Note, prior input has to be dataframe.
+#' @param lambda The regularization parameter, used in the node-wise regression. If missing, default lambda will be used which is at the order of sqrt(2*log(p)/n).
+#' @returns A list. The list contains estimated partial correlation matrix (Est), sparse partial correlation estimation matrix with threshold (EstThresh), estimated kappa (kappa), estimated test statistics matrix of partial correlations (tscore), sample size (n) and number of nodes (p).
+#' @examples
+#' library(PCGII)
+#' library(corpcor)
+#' library(glmnet)
+#' library(igraph)
+#' library(Matrix)
+#' library(mvtnorm)
+#' # Simulating data
+#' set.seed(1234567)
+#' n=50 # sample size
+#' p=30 # number of nodes
+#'
+#' Omega=make_random_precision_mat(eta=.01, p=p)
+#'
+#' # population covariance matrix, which is used to generate data
+#' Sigma=solve(Omega)
+#' # simulate expression data
+#' X = rmvnorm(n = n, sigma = Sigma)
+#'
+#' lam=2*sqrt(log(p)/n) ## fixed lambda
+#'
+#' # directed prior network
+#' prior_set=as.data.frame(matrix(data=c(5,6, 28,24), nrow=2, ncol=2, byrow = TRUE))
+#' colnames(prior_set)=c("row", "col")
+#' prior_set=undirected_prior(prior_set)
+#' PCGII_out=PCGII(df=X, prior=prior_set, lambda = lam)
+#' inference_out=inference(list=PCGII_out)
+#' diag(inference_out)=0
+#' net=graph_from_adjacency_matrix(inference_out, mode = "undirected")
+#'    plot(net, vertex.size=4,
+#'         vertex.label.dist=0.5,
+#'         vertex.color="red",
+#'         edge.arrow.size=0.5,
+#'         layout=layout_in_circle(net))
+PCGII=function(df, prior, lambda){
+  n = dim(df)[1]; p = dim(df)[2]
+  t0=2
+  IndMatrix = matrix(1, p, p) - diag(rep(1, p))
+  Eresidual = matrix(0, n, p) # regression residuals matrix n*p
+  CoefMatrix = matrix(0, p, p - 1) # regression coefficient matrix p*p-1
+  meanX = colMeans(df)
+  X = t(t(df) - meanX)
+  XS = matrix(0, n, p)
+  # XS: Standardized X
+  for (i in 1 : p){
+    XS[, i] = X[, i] / sd(X[, i])
+  }
+
+  colnames(X)=colnames(df)
+  colnames(XS)=colnames(df)
+
+  if(missing(lambda)){
+    shat=sqrt(n/(log(p)^3))
+    lambda=sqrt(2*(2+0.01)*log(p/shat)/n)
+  }
+
+  default_penalty=rep(1,p-1)
+  for (i in 1 : p){
+    penalty_fac=default_penalty
+    temp.node=prior[with(prior,row==i),'col']
+
+    for(nds in temp.node){
+      if (nds < i) {penalty_fac[nds]=0} else {penalty_fac[nds-1]=0}
+    }
+
+    out = glmnet(XS[, -i], X[, i], family = "gaussian", lambda = lambda, penalty.factor=penalty_fac)
+    Coef = out$beta
+    CoefMatrix[i, ] = as.numeric(Coef) / apply(X[, -i], 2, sd)
+
+    out = glmnet(XS[, -i], X[, i], family = "gaussian", lambda = lambda)
+    Predict = predict(out, XS[, -i], type = "link")
+    Eresidual[, i] = X[, i] - Predict
+  }
+
+  CovRes = t(Eresidual) %*% Eresidual / n # residuals covariance
+  Est = matrix(1, p, p) # estimated partial correlation (rho hat in the paper )
+
+  for (i in 1 : (p - 1)){
+    for (j in (i + 1) : p){
+      temp = Eresidual[, i] * Eresidual[, j] + Eresidual[, i]^2 * CoefMatrix[j, i] + Eresidual[, j]^2 * CoefMatrix[i, j - 1]
+      Est[i, j] = mean(temp) / sqrt(diag(CovRes)[i] * diag(CovRes)[j])
+      Est[j, i] = Est[i, j]
+    }
+  }
+
+  EstThresh = Est * ( abs(Est) >= (t0 * sqrt(log(p) / n) * IndMatrix) )
+
+  kappa = (n / 3) * mean( colSums(Eresidual^4) / (colSums(Eresidual^2))^2 )  # forth moment, a number
+
+  SE=sqrt((kappa*(1-EstThresh^2))^2/n)
+
+  tscore=Est/SE
+
+  return(list(Est=Est,
+              tscore=tscore,
+              kappa=kappa,
+              EstThresh=EstThresh,
+              n=n, p=p))
+
+}
diff --git a/R/clevel.R b/R/clevel.R
@@ -0,0 +1,100 @@
+#' Get the estimated partial correlation graph without information incorporation
+#'
+#' @description
+#' clevel() is the function to apply the method originally proposed in paper "Qiu, Y., & Zhou, X. H. (2020). Estimating c-level partial correlation graphs with application to brain imaging". It is used to get the estimated partial correlation graph without information incorporation. Remark: mathematical standardization will be automatically done within the function.
+#'
+#' @importFrom stats sd
+#' @importFrom glmnet glmnet
+#' @importFrom stats predict
+#' @export clevel
+#' @param df The main expression dataset, an n by p matrix, in which each row corresponds to a sample and each column represents expression/abundance of an omics feature.
+#' @param lambda The regularization parameter, used in the node-wise regression. If missing, default lambda will be used which is at the order of sqrt(2*log(p)/n).
+#' @returns A list. The list contains estimated partial correlation matrix (Est), sparse partial correlation estimation matrix with threshold (EstThresh), estimated kappa (kappa), estimated test statistics matrix of partial correlations (tscore), sample size (n) and number of nodes (p).
+#' @examples
+#' library(PCGII)
+#' library(corpcor)
+#' library(glmnet)
+#' library(igraph)
+#' library(Matrix)
+#' library(mvtnorm)
+#' # Simulating data
+#' set.seed(1234567)
+#' n=50 # sample size
+#' p=30 # number of nodes
+#'
+#' Omega=make_random_precision_mat(eta=.01, p=p)
+#'
+#' # population covariance matrix, which is used to generate data
+#' Sigma=solve(Omega)
+#' # simulate expression data
+#' X = rmvnorm(n = n, sigma = Sigma)
+#'
+#' lam=2*sqrt(log(p)/n) ## fixed lambda
+#'
+#' CLEVEL_out=clevel(df=X, lambda = lam)
+#' inference_out=inference(list=CLEVEL_out)
+#' diag(inference_out)=0
+#' net=graph_from_adjacency_matrix(inference_out, mode = "undirected")
+#'    plot(net,
+#'         vertex.size=4,
+#'         vertex.label.dist=0.5,
+#'         vertex.color="red",
+#'         edge.arrow.size=0.5,
+#'         layout=layout_in_circle(net))
+clevel=function(df, lambda){
+  n = dim(df)[1]; p = dim(df)[2]
+  t0=2
+  IndMatrix = matrix(1, p, p) - diag(rep(1, p))
+  Eresidual = matrix(0, n, p) # regression residuals matrix n*p
+  CoefMatrix = matrix(0, p, p - 1) # regression coefficient matrix p*p-1
+  meanX = colMeans(df)
+  X = t(t(df) - meanX)
+  XS = matrix(0, n, p)
+  # XS: Standardized X
+  for (i in 1 : p){
+    XS[, i] = X[, i] / sd(X[, i])
+  }
+
+  colnames(X)=colnames(df)
+  colnames(XS)=colnames(df)
+
+  if(missing(lambda)){
+    shat=sqrt(n/(log(p)^3))
+    lambda=sqrt(2*(2+0.01)*log(p/shat)/n)
+  }
+
+  for (i in 1 : p){
+    out = glmnet(XS[, -i], X[, i], family = "gaussian", lambda = lambda)
+
+    Coef = out$beta
+    Predict = predict(out, XS[, -i], type = "link")
+    CoefMatrix[i, ] = as.numeric(Coef) / apply(X[, -i], 2, sd)
+    Eresidual[, i] = X[, i] - Predict
+  }
+
+  CovRes = t(Eresidual) %*% Eresidual / n # residuals covariance
+  Est = matrix(1, p, p) # estimated partial correlation (rho hat in the paper )
+
+  for (i in 1 : (p - 1)){
+    for (j in (i + 1) : p){
+      temp = Eresidual[, i] * Eresidual[, j] + Eresidual[, i]^2 * CoefMatrix[j, i] + Eresidual[, j]^2 * CoefMatrix[i, j - 1]
+      Est[i, j] = mean(temp) / sqrt(diag(CovRes)[i] * diag(CovRes)[j])
+      Est[j, i] = Est[i, j]
+    }
+  }
+
+  # sparse partial correlation estimation with threshold (rho ~ in the paper)
+  EstThresh = Est * ( abs(Est) >= (t0 * sqrt(log(p) / n) * IndMatrix) )
+
+  kappa = (n / 3) * mean( colSums(Eresidual^4) / (colSums(Eresidual^2))^2 )  # forth moment, a number
+
+  SE=sqrt((kappa*(1-EstThresh^2))^2/n)
+
+  tscore=Est/SE
+
+  return(list(Est=Est,
+              tscore=tscore,
+              kappa=kappa,
+              EstThresh=EstThresh,
+              n=n, p=p))
+}
diff --git a/R/inference.R b/R/inference.R
@@ -0,0 +1,74 @@
+#' Conduct simultaneous inference of estimated partial correlations
+#'
+#' @description
+#' Inference() is the function to conduct simultaneous inference of estimated partial correlations.
+#'
+#' @export inference
+#' @importFrom stats pnorm
+#' @param list A list returned by either `PCGII()` or `clevel()`.
+#' @param alpha A pre-determined False Discovery Rate. Nominal FDR is set at 0.05 by default.
+#' @returns An adjacency matrix of significant partial correlations.
+#' @examples
+#' library(igraph)
+#' library(PCGII)
+#' library(mvtnorm)
+#' # Simulating data
+#' set.seed(1234567)
+#' n=50 # sample size
+#' p=30 # number of nodes
+#'
+#' Omega=make_random_precision_mat(eta=.01, p=p)
+#'
+#' # population covariance matrix, which is used to generate data
+#' Sigma=solve(Omega)
+#' # simulate expression data
+#' X = rmvnorm(n = n, sigma = Sigma)
+#'
+#' lam=2*sqrt(log(p)/n) ## fixed lambda
+#'
+#' # directed prior network
+#' prior_set=as.data.frame(matrix(data=c(5,6, 28,24), nrow=2, ncol=2, byrow = TRUE))
+#' colnames(prior_set)=c("row", "col")
+#' prior_set=undirected_prior(prior_set)
+#' PCGII_out=PCGII(df=X, prior=prior_set, lambda = lam)
+#' inference_out=inference(list=PCGII_out)
+#' diag(inference_out)=0
+#' net=graph_from_adjacency_matrix(inference_out, mode = "undirected")
+#'    plot(net, vertex.size=4,
+#'         vertex.label.dist=0.5,
+#'         vertex.color="red",
+#'         edge.arrow.size=0.5,
+#'         layout=layout_in_circle(net))
+inference=function(list, alpha=0.05){
+  Est=list$Est
+  tscore=list$tscore
+  kappa=list$kappa
+  EstThresh=list$EstThresh
+  n=list$n; p=list$p;
+  t0=2; tau = seq(0, 3.5, 0.01); smax = n / 2; lentau = length(tau)
+
+
+  resprop = list() # selected edges with different tau's, a list of 351 elements
+  rejectprop = c()
+  for (i in 1 : lentau){ # tau vary from 0 to 3.50 by 0.01, length=351
+    Threshold = tau[i] * sqrt(kappa * log(p) / n) * (1 - EstThresh^2)
+
+    # c=0
+    SRec = 1 * (abs(Est) > Threshold) # selected edge (matrix with 0 & 1) at tau[i]
+    NoNSRec = 1 * (SRec == 0)
+    resprop[[i]] = which(SRec == 1, arr.ind = TRUE) # select those significant edges at tau[i], off-diagonal elements, first columns, then second columns
+    rejectprop = c(rejectprop, max(1, (sum(SRec) - p)))
+  }
+
+  # c=0
+  FDPprop = 2 * (p * (p - 1)) * ( 1 - pnorm( tau * sqrt(log(p)) ) ) / rejectprop # FDP corresponding to each tau (page 10)
+
+  FDPresprop = c()
+
+  # determine thresholding parameter tau by controling FDP
+  if (sum(FDPprop <= alpha) > 0) tauprop = min(c(2, tau[FDPprop <= alpha]))
+  if (sum(FDPprop <= alpha) == 0) tauprop = 2
+  Threshold = tauprop * sqrt(kappa * log(p) / n) * (1 - EstThresh^2)
+  SRec = 1 * (abs(Est) > Threshold);
+  return(SRec)
+}
diff --git a/R/makeBlockDiag.R b/R/makeBlockDiag.R
@@ -0,0 +1,28 @@
+#' Generate block-diagonal matrix of size p by p
+#'
+#' @description
+#' A utility function generates block-diagonal matrix of size p by p with blocks B1, B2, ..., Bk. Each block matrix is of size blocksize by blocksize. The off-diagonal elements in block matrix are generated from uniform (min.beta, max.beta). The diagonal elements in block matrix are generated from uniform (1, 1.25).
+#'
+#' @importFrom Matrix bdiag
+#' @importFrom corpcor is.positive.definite
+#' @importFrom stats runif
+#' @export makeBlockDiag
+#' @param blocksize A positive integer, the dimension of the block matrix. Note, 'blocksize' has to be a factor of 'p'.
+#' @param p A positive integer, the size of the block-diagonal matrix.
+#' @param min.beta A positive number, lower limits of the uniform distribution.
+#' @param max.beta A positive number, upper limits of the uniform distribution.
+#' @returns A block-diagonal matrix of size 'p' by 'p'.
+#' @examples
+#' mat = makeBlockDiag(blocksize=4, p=20)
+makeBlockDiag=function(blocksize=4, p=20, min.beta=0.3, max.beta=0.9){
+  # blocksize has to be a factor of p
+  reps=p/blocksize
+  S=list()
+  for (i in 1:reps) {
+    bd=matrix(runif(1, min.beta, max.beta), blocksize, blocksize)
+    diag(bd)=runif(1,1,1.25)
+    while(!is.positive.definite(bd)){diag(bd)=diag(bd)+0.01}
+    S[[i]]=bd
+  }
+  as.matrix(Matrix::bdiag(S))
+}