Skip to content

Commit

Permalink
version 1.1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
Hao Wang authored and cran-robot committed Feb 3, 2024
0 parents commit 83fcb66
Show file tree
Hide file tree
Showing 30 changed files with 1,714 additions and 0 deletions.
25 changes: 25 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Type: Package
Package: PCGII
Title: Partial Correlation Graph with Information Incorporation
Version: 1.1.2
Authors@R: c(person("Hao", "Wang", email = "haydo.wang@outlook.com", role = c("aut", "cre")), person("Yumou", "Qiu", role = "aut"), person("Peng", "Liu", role = "aut"))
Description: Large-scale gene expression studies allow gene network construction to uncover associations among genes. This package is developed for estimating and testing partial correlation graphs with prior information incorporated.
License: MIT + file LICENSE
URL: https://haowang47.github.io/PCGII/
Depends: R(>= 4.3.0)
Imports: stats, corpcor (>= 1.6.10), glmnet, igraph (>= 1.5.0.1),
Matrix, dplyr (>= 1.1.4)
Suggests: mvtnorm, tidyverse, knitr, rmarkdown, testthat (>= 3.0.0)
VignetteBuilder: knitr
Config/testthat/edition: 3
Encoding: UTF-8
RoxygenNote: 7.2.3
Language: en-US
NeedsCompilation: no
Packaged: 2024-01-31 16:30:07 UTC; haowang
Author: Hao Wang [aut, cre],
Yumou Qiu [aut],
Peng Liu [aut]
Maintainer: Hao Wang <haydo.wang@outlook.com>
Repository: CRAN
Date/Publication: 2024-02-02 18:30:05 UTC
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2024
COPYRIGHT HOLDER: PCGII authors
29 changes: 29 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
33be0546f690e4d101dddf9e1722d073 *DESCRIPTION
870d398d0d24f2c7f4cd15e5094e4c3a *LICENSE
089a5a9573bb45cb19fc78e4e8c25c08 *NAMESPACE
a63181bf25d199c3738ce5b95b0a3c95 *R/PCGII.R
36ba1981d552cedbd26118db2e93febb *R/clevel.R
355f9c91568f78022942e4438db34461 *R/inference.R
f0816c8648ab47d2c42c0c084c8aedc2 *R/makeBlockDiag.R
dad26b4dbc66f1a80a627de6bf1ff913 *R/make_random_precision_mat.R
2362fab64e4e2aa2397798a8b3820555 *R/make_sf_precision_mat.R
33f927e4a24b3a2fc4ca1a6c28e9a6c5 *R/sigs2mat.R
63e265cf267d11041ec1e6eb071ad999 *R/undirected_prior.R
21b9f8b5865e3ef9b4671ce99a4f972f *README.md
afea5cf0e676e88b6d05beea54435360 *build/vignette.rds
59327d43ff9e61c63d6fd79927d04179 *inst/doc/Introduction.R
93e4bfe007a7d8146dbe27e60961a4ab *inst/doc/Introduction.Rmd
dc03d934cc98344d149412db75e5a58e *inst/doc/Introduction.html
0cb98d12476e9d809f0bcf9643362f52 *man/PCGII.Rd
fb189ac160ba906d8f081b39bc634e61 *man/clevel.Rd
2cf6d4c0882315f32c4e33dc908a2b03 *man/inference.Rd
4af97c8d5dac0aad4b4caebd16fa1b90 *man/makeBlockDiag.Rd
5d17cc1112c5e85366c5ae0faa691437 *man/make_random_precision_mat.Rd
bb894cb4600728ae61ceda7f3241865a *man/make_sf_precision_mat.Rd
cf583738168edaf8149465976dd379d7 *man/sigs2mat.Rd
30a1013707942b90c77dc7ae2dd87263 *man/undirected_prior.Rd
407443e5a6604bda9bf109ca2abfd56f *tests/testthat.R
df39653db99f2158257c3e00402438cc *tests/testthat/test-makeBlockDiag.R
0e042ba3bde1089e7830a771104b3f41 *tests/testthat/test-make_random_precision_mat.R
cd93c502837a289df919ab1392721266 *tests/testthat/test-make_sf_precision_mat.R
93e4bfe007a7d8146dbe27e60961a4ab *vignettes/Introduction.Rmd
21 changes: 21 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by roxygen2: do not edit by hand

export(PCGII)
export(clevel)
export(inference)
export(makeBlockDiag)
export(make_random_precision_mat)
export(make_sf_precision_mat)
export(sigs2mat)
export(undirected_prior)
importFrom(Matrix,bdiag)
importFrom(corpcor,is.positive.definite)
importFrom(dplyr,arrange)
importFrom(glmnet,glmnet)
importFrom(igraph,as_adjacency_matrix)
importFrom(igraph,sample_gnp)
importFrom(igraph,sample_pa)
importFrom(stats,pnorm)
importFrom(stats,predict)
importFrom(stats,runif)
importFrom(stats,sd)
113 changes: 113 additions & 0 deletions R/PCGII.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#' Get the estimated partial correlation graph with information incorporation
#'
#' @description
#' PCGII() is the function to apply the proposed method to get the estimated partial correlation graph with information incorporation. Remark: mathematical standardization will be automatically done within the function.
#'
#' @export PCGII
#' @importFrom stats sd
#' @importFrom glmnet glmnet
#' @importFrom stats predict
#' @param df The main expression dataset, an n by p matrix, in which each row corresponds to a sample and each column represents expression/abundance of an omics feature.
#' @param prior The prior set, a k by 2 dataframe, in which each row corresponds to a pair of nodes (any omics features) that are connected under prior belief. Note, prior input has to be dataframe.
#' @param lambda The regularization parameter, used in the node-wise regression. If missing, default lambda will be used which is at the order of sqrt(2*log(p)/n).
#' @returns A list. The list contains estimated partial correlation matrix (Est), sparse partial correlation estimation matrix with threshold (EstThresh), estimated kappa (kappa), estimated test statistics matrix of partial correlations (tscore), sample size (n) and number of nodes (p).
#' @examples
#' library(PCGII)
#' library(corpcor)
#' library(glmnet)
#' library(igraph)
#' library(Matrix)
#' library(mvtnorm)
#' # Simulating data
#' set.seed(1234567)
#' n=50 # sample size
#' p=30 # number of nodes
#'
#' Omega=make_random_precision_mat(eta=.01, p=p)
#'
#' # population covariance matrix, which is used to generate data
#' Sigma=solve(Omega)
#' # simulate expression data
#' X = rmvnorm(n = n, sigma = Sigma)
#'
#' lam=2*sqrt(log(p)/n) ## fixed lambda
#'
#' # directed prior network
#' prior_set=as.data.frame(matrix(data=c(5,6, 28,24), nrow=2, ncol=2, byrow = TRUE))
#' colnames(prior_set)=c("row", "col")
#' prior_set=undirected_prior(prior_set)
#' PCGII_out=PCGII(df=X, prior=prior_set, lambda = lam)
#' inference_out=inference(list=PCGII_out)
#' diag(inference_out)=0
#' net=graph_from_adjacency_matrix(inference_out, mode = "undirected")
#' plot(net, vertex.size=4,
#' vertex.label.dist=0.5,
#' vertex.color="red",
#' edge.arrow.size=0.5,
#' layout=layout_in_circle(net))
PCGII=function(df, prior, lambda){
n = dim(df)[1]; p = dim(df)[2]
t0=2
IndMatrix = matrix(1, p, p) - diag(rep(1, p))
Eresidual = matrix(0, n, p) # regression residuals matrix n*p
CoefMatrix = matrix(0, p, p - 1) # regression coefficient matrix p*p-1
meanX = colMeans(df)
X = t(t(df) - meanX)
XS = matrix(0, n, p)
# XS: Standardized X
for (i in 1 : p){
XS[, i] = X[, i] / sd(X[, i])
}

colnames(X)=colnames(df)
colnames(XS)=colnames(df)

if(missing(lambda)){
shat=sqrt(n/(log(p)^3))
lambda=sqrt(2*(2+0.01)*log(p/shat)/n)
}

default_penalty=rep(1,p-1)
for (i in 1 : p){
penalty_fac=default_penalty
temp.node=prior[with(prior,row==i),'col']

for(nds in temp.node){
if (nds < i) {penalty_fac[nds]=0} else {penalty_fac[nds-1]=0}
}

out = glmnet(XS[, -i], X[, i], family = "gaussian", lambda = lambda, penalty.factor=penalty_fac)
Coef = out$beta
CoefMatrix[i, ] = as.numeric(Coef) / apply(X[, -i], 2, sd)

out = glmnet(XS[, -i], X[, i], family = "gaussian", lambda = lambda)
Predict = predict(out, XS[, -i], type = "link")
Eresidual[, i] = X[, i] - Predict
}

CovRes = t(Eresidual) %*% Eresidual / n # residuals covariance
Est = matrix(1, p, p) # estimated partial correlation (rho hat in the paper )

for (i in 1 : (p - 1)){
for (j in (i + 1) : p){
temp = Eresidual[, i] * Eresidual[, j] + Eresidual[, i]^2 * CoefMatrix[j, i] + Eresidual[, j]^2 * CoefMatrix[i, j - 1]
Est[i, j] = mean(temp) / sqrt(diag(CovRes)[i] * diag(CovRes)[j])
Est[j, i] = Est[i, j]
}
}

EstThresh = Est * ( abs(Est) >= (t0 * sqrt(log(p) / n) * IndMatrix) )

kappa = (n / 3) * mean( colSums(Eresidual^4) / (colSums(Eresidual^2))^2 ) # forth moment, a number

SE=sqrt((kappa*(1-EstThresh^2))^2/n)

tscore=Est/SE

return(list(Est=Est,
tscore=tscore,
kappa=kappa,
EstThresh=EstThresh,
n=n, p=p))

}
100 changes: 100 additions & 0 deletions R/clevel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#' Get the estimated partial correlation graph without information incorporation
#'
#' @description
#' clevel() is the function to apply the method originally proposed in paper "Qiu, Y., & Zhou, X. H. (2020). Estimating c-level partial correlation graphs with application to brain imaging". It is used to get the estimated partial correlation graph without information incorporation. Remark: mathematical standardization will be automatically done within the function.
#'
#' @importFrom stats sd
#' @importFrom glmnet glmnet
#' @importFrom stats predict
#' @export clevel
#' @param df The main expression dataset, an n by p matrix, in which each row corresponds to a sample and each column represents expression/abundance of an omics feature.
#' @param lambda The regularization parameter, used in the node-wise regression. If missing, default lambda will be used which is at the order of sqrt(2*log(p)/n).
#' @returns A list. The list contains estimated partial correlation matrix (Est), sparse partial correlation estimation matrix with threshold (EstThresh), estimated kappa (kappa), estimated test statistics matrix of partial correlations (tscore), sample size (n) and number of nodes (p).
#' @examples
#' library(PCGII)
#' library(corpcor)
#' library(glmnet)
#' library(igraph)
#' library(Matrix)
#' library(mvtnorm)
#' # Simulating data
#' set.seed(1234567)
#' n=50 # sample size
#' p=30 # number of nodes
#'
#' Omega=make_random_precision_mat(eta=.01, p=p)
#'
#' # population covariance matrix, which is used to generate data
#' Sigma=solve(Omega)
#' # simulate expression data
#' X = rmvnorm(n = n, sigma = Sigma)
#'
#' lam=2*sqrt(log(p)/n) ## fixed lambda
#'
#' CLEVEL_out=clevel(df=X, lambda = lam)
#' inference_out=inference(list=CLEVEL_out)
#' diag(inference_out)=0
#' net=graph_from_adjacency_matrix(inference_out, mode = "undirected")
#' plot(net,
#' vertex.size=4,
#' vertex.label.dist=0.5,
#' vertex.color="red",
#' edge.arrow.size=0.5,
#' layout=layout_in_circle(net))
clevel=function(df, lambda){
n = dim(df)[1]; p = dim(df)[2]
t0=2
IndMatrix = matrix(1, p, p) - diag(rep(1, p))
Eresidual = matrix(0, n, p) # regression residuals matrix n*p
CoefMatrix = matrix(0, p, p - 1) # regression coefficient matrix p*p-1
meanX = colMeans(df)
X = t(t(df) - meanX)
XS = matrix(0, n, p)
# XS: Standardized X
for (i in 1 : p){
XS[, i] = X[, i] / sd(X[, i])
}

colnames(X)=colnames(df)
colnames(XS)=colnames(df)

if(missing(lambda)){
shat=sqrt(n/(log(p)^3))
lambda=sqrt(2*(2+0.01)*log(p/shat)/n)
}

for (i in 1 : p){
out = glmnet(XS[, -i], X[, i], family = "gaussian", lambda = lambda)

Coef = out$beta
Predict = predict(out, XS[, -i], type = "link")
CoefMatrix[i, ] = as.numeric(Coef) / apply(X[, -i], 2, sd)
Eresidual[, i] = X[, i] - Predict
}

CovRes = t(Eresidual) %*% Eresidual / n # residuals covariance
Est = matrix(1, p, p) # estimated partial correlation (rho hat in the paper )

for (i in 1 : (p - 1)){
for (j in (i + 1) : p){
temp = Eresidual[, i] * Eresidual[, j] + Eresidual[, i]^2 * CoefMatrix[j, i] + Eresidual[, j]^2 * CoefMatrix[i, j - 1]
Est[i, j] = mean(temp) / sqrt(diag(CovRes)[i] * diag(CovRes)[j])
Est[j, i] = Est[i, j]
}
}

# sparse partial correlation estimation with threshold (rho ~ in the paper)
EstThresh = Est * ( abs(Est) >= (t0 * sqrt(log(p) / n) * IndMatrix) )

kappa = (n / 3) * mean( colSums(Eresidual^4) / (colSums(Eresidual^2))^2 ) # forth moment, a number

SE=sqrt((kappa*(1-EstThresh^2))^2/n)

tscore=Est/SE

return(list(Est=Est,
tscore=tscore,
kappa=kappa,
EstThresh=EstThresh,
n=n, p=p))
}
74 changes: 74 additions & 0 deletions R/inference.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#' Conduct simultaneous inference of estimated partial correlations
#'
#' @description
#' Inference() is the function to conduct simultaneous inference of estimated partial correlations.
#'
#' @export inference
#' @importFrom stats pnorm
#' @param list A list returned by either `PCGII()` or `clevel()`.
#' @param alpha A pre-determined False Discovery Rate. Nominal FDR is set at 0.05 by default.
#' @returns An adjacency matrix of significant partial correlations.
#' @examples
#' library(igraph)
#' library(PCGII)
#' library(mvtnorm)
#' # Simulating data
#' set.seed(1234567)
#' n=50 # sample size
#' p=30 # number of nodes
#'
#' Omega=make_random_precision_mat(eta=.01, p=p)
#'
#' # population covariance matrix, which is used to generate data
#' Sigma=solve(Omega)
#' # simulate expression data
#' X = rmvnorm(n = n, sigma = Sigma)
#'
#' lam=2*sqrt(log(p)/n) ## fixed lambda
#'
#' # directed prior network
#' prior_set=as.data.frame(matrix(data=c(5,6, 28,24), nrow=2, ncol=2, byrow = TRUE))
#' colnames(prior_set)=c("row", "col")
#' prior_set=undirected_prior(prior_set)
#' PCGII_out=PCGII(df=X, prior=prior_set, lambda = lam)
#' inference_out=inference(list=PCGII_out)
#' diag(inference_out)=0
#' net=graph_from_adjacency_matrix(inference_out, mode = "undirected")
#' plot(net, vertex.size=4,
#' vertex.label.dist=0.5,
#' vertex.color="red",
#' edge.arrow.size=0.5,
#' layout=layout_in_circle(net))
inference=function(list, alpha=0.05){
Est=list$Est
tscore=list$tscore
kappa=list$kappa
EstThresh=list$EstThresh
n=list$n; p=list$p;
t0=2; tau = seq(0, 3.5, 0.01); smax = n / 2; lentau = length(tau)


resprop = list() # selected edges with different tau's, a list of 351 elements
rejectprop = c()
for (i in 1 : lentau){ # tau vary from 0 to 3.50 by 0.01, length=351
Threshold = tau[i] * sqrt(kappa * log(p) / n) * (1 - EstThresh^2)

# c=0
SRec = 1 * (abs(Est) > Threshold) # selected edge (matrix with 0 & 1) at tau[i]
NoNSRec = 1 * (SRec == 0)
resprop[[i]] = which(SRec == 1, arr.ind = TRUE) # select those significant edges at tau[i], off-diagonal elements, first columns, then second columns
rejectprop = c(rejectprop, max(1, (sum(SRec) - p)))
}

# c=0
FDPprop = 2 * (p * (p - 1)) * ( 1 - pnorm( tau * sqrt(log(p)) ) ) / rejectprop # FDP corresponding to each tau (page 10)

FDPresprop = c()

# determine thresholding parameter tau by controling FDP
if (sum(FDPprop <= alpha) > 0) tauprop = min(c(2, tau[FDPprop <= alpha]))
if (sum(FDPprop <= alpha) == 0) tauprop = 2
Threshold = tauprop * sqrt(kappa * log(p) / n) * (1 - EstThresh^2)
SRec = 1 * (abs(Est) > Threshold);
return(SRec)
}
28 changes: 28 additions & 0 deletions R/makeBlockDiag.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#' Generate block-diagonal matrix of size p by p
#'
#' @description
#' A utility function generates block-diagonal matrix of size p by p with blocks B1, B2, ..., Bk. Each block matrix is of size blocksize by blocksize. The off-diagonal elements in block matrix are generated from uniform (min.beta, max.beta). The diagonal elements in block matrix are generated from uniform (1, 1.25).
#'
#' @importFrom Matrix bdiag
#' @importFrom corpcor is.positive.definite
#' @importFrom stats runif
#' @export makeBlockDiag
#' @param blocksize A positive integer, the dimension of the block matrix. Note, 'blocksize' has to be a factor of 'p'.
#' @param p A positive integer, the size of the block-diagonal matrix.
#' @param min.beta A positive number, lower limits of the uniform distribution.
#' @param max.beta A positive number, upper limits of the uniform distribution.
#' @returns A block-diagonal matrix of size 'p' by 'p'.
#' @examples
#' mat = makeBlockDiag(blocksize=4, p=20)
makeBlockDiag=function(blocksize=4, p=20, min.beta=0.3, max.beta=0.9){
# blocksize has to be a factor of p
reps=p/blocksize
S=list()
for (i in 1:reps) {
bd=matrix(runif(1, min.beta, max.beta), blocksize, blocksize)
diag(bd)=runif(1,1,1.25)
while(!is.positive.definite(bd)){diag(bd)=diag(bd)+0.01}
S[[i]]=bd
}
as.matrix(Matrix::bdiag(S))
}

0 comments on commit 83fcb66

Please sign in to comment.