-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 83fcb66
Showing
30 changed files
with
1,714 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
Type: Package | ||
Package: PCGII | ||
Title: Partial Correlation Graph with Information Incorporation | ||
Version: 1.1.2 | ||
Authors@R: c(person("Hao", "Wang", email = "haydo.wang@outlook.com", role = c("aut", "cre")), person("Yumou", "Qiu", role = "aut"), person("Peng", "Liu", role = "aut")) | ||
Description: Large-scale gene expression studies allow gene network construction to uncover associations among genes. This package is developed for estimating and testing partial correlation graphs with prior information incorporated. | ||
License: MIT + file LICENSE | ||
URL: https://haowang47.github.io/PCGII/ | ||
Depends: R(>= 4.3.0) | ||
Imports: stats, corpcor (>= 1.6.10), glmnet, igraph (>= 1.5.0.1), | ||
Matrix, dplyr (>= 1.1.4) | ||
Suggests: mvtnorm, tidyverse, knitr, rmarkdown, testthat (>= 3.0.0) | ||
VignetteBuilder: knitr | ||
Config/testthat/edition: 3 | ||
Encoding: UTF-8 | ||
RoxygenNote: 7.2.3 | ||
Language: en-US | ||
NeedsCompilation: no | ||
Packaged: 2024-01-31 16:30:07 UTC; haowang | ||
Author: Hao Wang [aut, cre], | ||
Yumou Qiu [aut], | ||
Peng Liu [aut] | ||
Maintainer: Hao Wang <haydo.wang@outlook.com> | ||
Repository: CRAN | ||
Date/Publication: 2024-02-02 18:30:05 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2024 | ||
COPYRIGHT HOLDER: PCGII authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
33be0546f690e4d101dddf9e1722d073 *DESCRIPTION | ||
870d398d0d24f2c7f4cd15e5094e4c3a *LICENSE | ||
089a5a9573bb45cb19fc78e4e8c25c08 *NAMESPACE | ||
a63181bf25d199c3738ce5b95b0a3c95 *R/PCGII.R | ||
36ba1981d552cedbd26118db2e93febb *R/clevel.R | ||
355f9c91568f78022942e4438db34461 *R/inference.R | ||
f0816c8648ab47d2c42c0c084c8aedc2 *R/makeBlockDiag.R | ||
dad26b4dbc66f1a80a627de6bf1ff913 *R/make_random_precision_mat.R | ||
2362fab64e4e2aa2397798a8b3820555 *R/make_sf_precision_mat.R | ||
33f927e4a24b3a2fc4ca1a6c28e9a6c5 *R/sigs2mat.R | ||
63e265cf267d11041ec1e6eb071ad999 *R/undirected_prior.R | ||
21b9f8b5865e3ef9b4671ce99a4f972f *README.md | ||
afea5cf0e676e88b6d05beea54435360 *build/vignette.rds | ||
59327d43ff9e61c63d6fd79927d04179 *inst/doc/Introduction.R | ||
93e4bfe007a7d8146dbe27e60961a4ab *inst/doc/Introduction.Rmd | ||
dc03d934cc98344d149412db75e5a58e *inst/doc/Introduction.html | ||
0cb98d12476e9d809f0bcf9643362f52 *man/PCGII.Rd | ||
fb189ac160ba906d8f081b39bc634e61 *man/clevel.Rd | ||
2cf6d4c0882315f32c4e33dc908a2b03 *man/inference.Rd | ||
4af97c8d5dac0aad4b4caebd16fa1b90 *man/makeBlockDiag.Rd | ||
5d17cc1112c5e85366c5ae0faa691437 *man/make_random_precision_mat.Rd | ||
bb894cb4600728ae61ceda7f3241865a *man/make_sf_precision_mat.Rd | ||
cf583738168edaf8149465976dd379d7 *man/sigs2mat.Rd | ||
30a1013707942b90c77dc7ae2dd87263 *man/undirected_prior.Rd | ||
407443e5a6604bda9bf109ca2abfd56f *tests/testthat.R | ||
df39653db99f2158257c3e00402438cc *tests/testthat/test-makeBlockDiag.R | ||
0e042ba3bde1089e7830a771104b3f41 *tests/testthat/test-make_random_precision_mat.R | ||
cd93c502837a289df919ab1392721266 *tests/testthat/test-make_sf_precision_mat.R | ||
93e4bfe007a7d8146dbe27e60961a4ab *vignettes/Introduction.Rmd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(PCGII) | ||
export(clevel) | ||
export(inference) | ||
export(makeBlockDiag) | ||
export(make_random_precision_mat) | ||
export(make_sf_precision_mat) | ||
export(sigs2mat) | ||
export(undirected_prior) | ||
importFrom(Matrix,bdiag) | ||
importFrom(corpcor,is.positive.definite) | ||
importFrom(dplyr,arrange) | ||
importFrom(glmnet,glmnet) | ||
importFrom(igraph,as_adjacency_matrix) | ||
importFrom(igraph,sample_gnp) | ||
importFrom(igraph,sample_pa) | ||
importFrom(stats,pnorm) | ||
importFrom(stats,predict) | ||
importFrom(stats,runif) | ||
importFrom(stats,sd) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
#' Get the estimated partial correlation graph with information incorporation | ||
#' | ||
#' @description | ||
#' PCGII() is the function to apply the proposed method to get the estimated partial correlation graph with information incorporation. Remark: mathematical standardization will be automatically done within the function. | ||
#' | ||
#' @export PCGII | ||
#' @importFrom stats sd | ||
#' @importFrom glmnet glmnet | ||
#' @importFrom stats predict | ||
#' @param df The main expression dataset, an n by p matrix, in which each row corresponds to a sample and each column represents expression/abundance of an omics feature. | ||
#' @param prior The prior set, a k by 2 dataframe, in which each row corresponds to a pair of nodes (any omics features) that are connected under prior belief. Note, prior input has to be dataframe. | ||
#' @param lambda The regularization parameter, used in the node-wise regression. If missing, default lambda will be used which is at the order of sqrt(2*log(p)/n). | ||
#' @returns A list. The list contains estimated partial correlation matrix (Est), sparse partial correlation estimation matrix with threshold (EstThresh), estimated kappa (kappa), estimated test statistics matrix of partial correlations (tscore), sample size (n) and number of nodes (p). | ||
#' @examples | ||
#' library(PCGII) | ||
#' library(corpcor) | ||
#' library(glmnet) | ||
#' library(igraph) | ||
#' library(Matrix) | ||
#' library(mvtnorm) | ||
#' # Simulating data | ||
#' set.seed(1234567) | ||
#' n=50 # sample size | ||
#' p=30 # number of nodes | ||
#' | ||
#' Omega=make_random_precision_mat(eta=.01, p=p) | ||
#' | ||
#' # population covariance matrix, which is used to generate data | ||
#' Sigma=solve(Omega) | ||
#' # simulate expression data | ||
#' X = rmvnorm(n = n, sigma = Sigma) | ||
#' | ||
#' lam=2*sqrt(log(p)/n) ## fixed lambda | ||
#' | ||
#' # directed prior network | ||
#' prior_set=as.data.frame(matrix(data=c(5,6, 28,24), nrow=2, ncol=2, byrow = TRUE)) | ||
#' colnames(prior_set)=c("row", "col") | ||
#' prior_set=undirected_prior(prior_set) | ||
#' PCGII_out=PCGII(df=X, prior=prior_set, lambda = lam) | ||
#' inference_out=inference(list=PCGII_out) | ||
#' diag(inference_out)=0 | ||
#' net=graph_from_adjacency_matrix(inference_out, mode = "undirected") | ||
#' plot(net, vertex.size=4, | ||
#' vertex.label.dist=0.5, | ||
#' vertex.color="red", | ||
#' edge.arrow.size=0.5, | ||
#' layout=layout_in_circle(net)) | ||
PCGII=function(df, prior, lambda){ | ||
n = dim(df)[1]; p = dim(df)[2] | ||
t0=2 | ||
IndMatrix = matrix(1, p, p) - diag(rep(1, p)) | ||
Eresidual = matrix(0, n, p) # regression residuals matrix n*p | ||
CoefMatrix = matrix(0, p, p - 1) # regression coefficient matrix p*p-1 | ||
meanX = colMeans(df) | ||
X = t(t(df) - meanX) | ||
XS = matrix(0, n, p) | ||
# XS: Standardized X | ||
for (i in 1 : p){ | ||
XS[, i] = X[, i] / sd(X[, i]) | ||
} | ||
|
||
colnames(X)=colnames(df) | ||
colnames(XS)=colnames(df) | ||
|
||
if(missing(lambda)){ | ||
shat=sqrt(n/(log(p)^3)) | ||
lambda=sqrt(2*(2+0.01)*log(p/shat)/n) | ||
} | ||
|
||
default_penalty=rep(1,p-1) | ||
for (i in 1 : p){ | ||
penalty_fac=default_penalty | ||
temp.node=prior[with(prior,row==i),'col'] | ||
|
||
for(nds in temp.node){ | ||
if (nds < i) {penalty_fac[nds]=0} else {penalty_fac[nds-1]=0} | ||
} | ||
|
||
out = glmnet(XS[, -i], X[, i], family = "gaussian", lambda = lambda, penalty.factor=penalty_fac) | ||
Coef = out$beta | ||
CoefMatrix[i, ] = as.numeric(Coef) / apply(X[, -i], 2, sd) | ||
|
||
out = glmnet(XS[, -i], X[, i], family = "gaussian", lambda = lambda) | ||
Predict = predict(out, XS[, -i], type = "link") | ||
Eresidual[, i] = X[, i] - Predict | ||
} | ||
|
||
CovRes = t(Eresidual) %*% Eresidual / n # residuals covariance | ||
Est = matrix(1, p, p) # estimated partial correlation (rho hat in the paper ) | ||
|
||
for (i in 1 : (p - 1)){ | ||
for (j in (i + 1) : p){ | ||
temp = Eresidual[, i] * Eresidual[, j] + Eresidual[, i]^2 * CoefMatrix[j, i] + Eresidual[, j]^2 * CoefMatrix[i, j - 1] | ||
Est[i, j] = mean(temp) / sqrt(diag(CovRes)[i] * diag(CovRes)[j]) | ||
Est[j, i] = Est[i, j] | ||
} | ||
} | ||
|
||
EstThresh = Est * ( abs(Est) >= (t0 * sqrt(log(p) / n) * IndMatrix) ) | ||
|
||
kappa = (n / 3) * mean( colSums(Eresidual^4) / (colSums(Eresidual^2))^2 ) # forth moment, a number | ||
|
||
SE=sqrt((kappa*(1-EstThresh^2))^2/n) | ||
|
||
tscore=Est/SE | ||
|
||
return(list(Est=Est, | ||
tscore=tscore, | ||
kappa=kappa, | ||
EstThresh=EstThresh, | ||
n=n, p=p)) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#' Get the estimated partial correlation graph without information incorporation | ||
#' | ||
#' @description | ||
#' clevel() is the function to apply the method originally proposed in paper "Qiu, Y., & Zhou, X. H. (2020). Estimating c-level partial correlation graphs with application to brain imaging". It is used to get the estimated partial correlation graph without information incorporation. Remark: mathematical standardization will be automatically done within the function. | ||
#' | ||
#' @importFrom stats sd | ||
#' @importFrom glmnet glmnet | ||
#' @importFrom stats predict | ||
#' @export clevel | ||
#' @param df The main expression dataset, an n by p matrix, in which each row corresponds to a sample and each column represents expression/abundance of an omics feature. | ||
#' @param lambda The regularization parameter, used in the node-wise regression. If missing, default lambda will be used which is at the order of sqrt(2*log(p)/n). | ||
#' @returns A list. The list contains estimated partial correlation matrix (Est), sparse partial correlation estimation matrix with threshold (EstThresh), estimated kappa (kappa), estimated test statistics matrix of partial correlations (tscore), sample size (n) and number of nodes (p). | ||
#' @examples | ||
#' library(PCGII) | ||
#' library(corpcor) | ||
#' library(glmnet) | ||
#' library(igraph) | ||
#' library(Matrix) | ||
#' library(mvtnorm) | ||
#' # Simulating data | ||
#' set.seed(1234567) | ||
#' n=50 # sample size | ||
#' p=30 # number of nodes | ||
#' | ||
#' Omega=make_random_precision_mat(eta=.01, p=p) | ||
#' | ||
#' # population covariance matrix, which is used to generate data | ||
#' Sigma=solve(Omega) | ||
#' # simulate expression data | ||
#' X = rmvnorm(n = n, sigma = Sigma) | ||
#' | ||
#' lam=2*sqrt(log(p)/n) ## fixed lambda | ||
#' | ||
#' CLEVEL_out=clevel(df=X, lambda = lam) | ||
#' inference_out=inference(list=CLEVEL_out) | ||
#' diag(inference_out)=0 | ||
#' net=graph_from_adjacency_matrix(inference_out, mode = "undirected") | ||
#' plot(net, | ||
#' vertex.size=4, | ||
#' vertex.label.dist=0.5, | ||
#' vertex.color="red", | ||
#' edge.arrow.size=0.5, | ||
#' layout=layout_in_circle(net)) | ||
clevel=function(df, lambda){ | ||
n = dim(df)[1]; p = dim(df)[2] | ||
t0=2 | ||
IndMatrix = matrix(1, p, p) - diag(rep(1, p)) | ||
Eresidual = matrix(0, n, p) # regression residuals matrix n*p | ||
CoefMatrix = matrix(0, p, p - 1) # regression coefficient matrix p*p-1 | ||
meanX = colMeans(df) | ||
X = t(t(df) - meanX) | ||
XS = matrix(0, n, p) | ||
# XS: Standardized X | ||
for (i in 1 : p){ | ||
XS[, i] = X[, i] / sd(X[, i]) | ||
} | ||
|
||
colnames(X)=colnames(df) | ||
colnames(XS)=colnames(df) | ||
|
||
if(missing(lambda)){ | ||
shat=sqrt(n/(log(p)^3)) | ||
lambda=sqrt(2*(2+0.01)*log(p/shat)/n) | ||
} | ||
|
||
for (i in 1 : p){ | ||
out = glmnet(XS[, -i], X[, i], family = "gaussian", lambda = lambda) | ||
|
||
Coef = out$beta | ||
Predict = predict(out, XS[, -i], type = "link") | ||
CoefMatrix[i, ] = as.numeric(Coef) / apply(X[, -i], 2, sd) | ||
Eresidual[, i] = X[, i] - Predict | ||
} | ||
|
||
CovRes = t(Eresidual) %*% Eresidual / n # residuals covariance | ||
Est = matrix(1, p, p) # estimated partial correlation (rho hat in the paper ) | ||
|
||
for (i in 1 : (p - 1)){ | ||
for (j in (i + 1) : p){ | ||
temp = Eresidual[, i] * Eresidual[, j] + Eresidual[, i]^2 * CoefMatrix[j, i] + Eresidual[, j]^2 * CoefMatrix[i, j - 1] | ||
Est[i, j] = mean(temp) / sqrt(diag(CovRes)[i] * diag(CovRes)[j]) | ||
Est[j, i] = Est[i, j] | ||
} | ||
} | ||
|
||
# sparse partial correlation estimation with threshold (rho ~ in the paper) | ||
EstThresh = Est * ( abs(Est) >= (t0 * sqrt(log(p) / n) * IndMatrix) ) | ||
|
||
kappa = (n / 3) * mean( colSums(Eresidual^4) / (colSums(Eresidual^2))^2 ) # forth moment, a number | ||
|
||
SE=sqrt((kappa*(1-EstThresh^2))^2/n) | ||
|
||
tscore=Est/SE | ||
|
||
return(list(Est=Est, | ||
tscore=tscore, | ||
kappa=kappa, | ||
EstThresh=EstThresh, | ||
n=n, p=p)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#' Conduct simultaneous inference of estimated partial correlations | ||
#' | ||
#' @description | ||
#' Inference() is the function to conduct simultaneous inference of estimated partial correlations. | ||
#' | ||
#' @export inference | ||
#' @importFrom stats pnorm | ||
#' @param list A list returned by either `PCGII()` or `clevel()`. | ||
#' @param alpha A pre-determined False Discovery Rate. Nominal FDR is set at 0.05 by default. | ||
#' @returns An adjacency matrix of significant partial correlations. | ||
#' @examples | ||
#' library(igraph) | ||
#' library(PCGII) | ||
#' library(mvtnorm) | ||
#' # Simulating data | ||
#' set.seed(1234567) | ||
#' n=50 # sample size | ||
#' p=30 # number of nodes | ||
#' | ||
#' Omega=make_random_precision_mat(eta=.01, p=p) | ||
#' | ||
#' # population covariance matrix, which is used to generate data | ||
#' Sigma=solve(Omega) | ||
#' # simulate expression data | ||
#' X = rmvnorm(n = n, sigma = Sigma) | ||
#' | ||
#' lam=2*sqrt(log(p)/n) ## fixed lambda | ||
#' | ||
#' # directed prior network | ||
#' prior_set=as.data.frame(matrix(data=c(5,6, 28,24), nrow=2, ncol=2, byrow = TRUE)) | ||
#' colnames(prior_set)=c("row", "col") | ||
#' prior_set=undirected_prior(prior_set) | ||
#' PCGII_out=PCGII(df=X, prior=prior_set, lambda = lam) | ||
#' inference_out=inference(list=PCGII_out) | ||
#' diag(inference_out)=0 | ||
#' net=graph_from_adjacency_matrix(inference_out, mode = "undirected") | ||
#' plot(net, vertex.size=4, | ||
#' vertex.label.dist=0.5, | ||
#' vertex.color="red", | ||
#' edge.arrow.size=0.5, | ||
#' layout=layout_in_circle(net)) | ||
inference=function(list, alpha=0.05){ | ||
Est=list$Est | ||
tscore=list$tscore | ||
kappa=list$kappa | ||
EstThresh=list$EstThresh | ||
n=list$n; p=list$p; | ||
t0=2; tau = seq(0, 3.5, 0.01); smax = n / 2; lentau = length(tau) | ||
|
||
|
||
resprop = list() # selected edges with different tau's, a list of 351 elements | ||
rejectprop = c() | ||
for (i in 1 : lentau){ # tau vary from 0 to 3.50 by 0.01, length=351 | ||
Threshold = tau[i] * sqrt(kappa * log(p) / n) * (1 - EstThresh^2) | ||
|
||
# c=0 | ||
SRec = 1 * (abs(Est) > Threshold) # selected edge (matrix with 0 & 1) at tau[i] | ||
NoNSRec = 1 * (SRec == 0) | ||
resprop[[i]] = which(SRec == 1, arr.ind = TRUE) # select those significant edges at tau[i], off-diagonal elements, first columns, then second columns | ||
rejectprop = c(rejectprop, max(1, (sum(SRec) - p))) | ||
} | ||
|
||
# c=0 | ||
FDPprop = 2 * (p * (p - 1)) * ( 1 - pnorm( tau * sqrt(log(p)) ) ) / rejectprop # FDP corresponding to each tau (page 10) | ||
|
||
FDPresprop = c() | ||
|
||
# determine thresholding parameter tau by controling FDP | ||
if (sum(FDPprop <= alpha) > 0) tauprop = min(c(2, tau[FDPprop <= alpha])) | ||
if (sum(FDPprop <= alpha) == 0) tauprop = 2 | ||
Threshold = tauprop * sqrt(kappa * log(p) / n) * (1 - EstThresh^2) | ||
SRec = 1 * (abs(Est) > Threshold); | ||
return(SRec) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#' Generate block-diagonal matrix of size p by p | ||
#' | ||
#' @description | ||
#' A utility function generates block-diagonal matrix of size p by p with blocks B1, B2, ..., Bk. Each block matrix is of size blocksize by blocksize. The off-diagonal elements in block matrix are generated from uniform (min.beta, max.beta). The diagonal elements in block matrix are generated from uniform (1, 1.25). | ||
#' | ||
#' @importFrom Matrix bdiag | ||
#' @importFrom corpcor is.positive.definite | ||
#' @importFrom stats runif | ||
#' @export makeBlockDiag | ||
#' @param blocksize A positive integer, the dimension of the block matrix. Note, 'blocksize' has to be a factor of 'p'. | ||
#' @param p A positive integer, the size of the block-diagonal matrix. | ||
#' @param min.beta A positive number, lower limits of the uniform distribution. | ||
#' @param max.beta A positive number, upper limits of the uniform distribution. | ||
#' @returns A block-diagonal matrix of size 'p' by 'p'. | ||
#' @examples | ||
#' mat = makeBlockDiag(blocksize=4, p=20) | ||
makeBlockDiag=function(blocksize=4, p=20, min.beta=0.3, max.beta=0.9){ | ||
# blocksize has to be a factor of p | ||
reps=p/blocksize | ||
S=list() | ||
for (i in 1:reps) { | ||
bd=matrix(runif(1, min.beta, max.beta), blocksize, blocksize) | ||
diag(bd)=runif(1,1,1.25) | ||
while(!is.positive.definite(bd)){diag(bd)=diag(bd)+0.01} | ||
S[[i]]=bd | ||
} | ||
as.matrix(Matrix::bdiag(S)) | ||
} |
Oops, something went wrong.