Skip to content

Commit

Permalink
version 3.8-1
Browse files Browse the repository at this point in the history
  • Loading branch information
Anhui Huang authored and cran-robot committed May 4, 2023
1 parent 96047f0 commit 0e17322
Show file tree
Hide file tree
Showing 10 changed files with 242 additions and 73 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
@@ -1,16 +1,16 @@
Package: sparseSEM
Type: Package
Title: Sparse-Aware Maximum Likelihood for Structural Equation Models
Version: 3.8
Version: 3.8-1
Date: 2023-04-17
Authors@R: c(person("Anhui", "Huang", role=c("aut","ctb", "cre" ), email="anhuihuang@gmail.com"))
Maintainer: Anhui Huang <anhuihuang@gmail.com>
Description: Provides Sparse-aware maximum likelihood for structural equation models in inferring network structure (topology).
License: GPL
Packaged: 2023-04-20 21:41:45 UTC; anhui
Packaged: 2023-04-30 03:29:22 UTC; anhui
NeedsCompilation: yes
Repository: CRAN
Suggests: knitr
VignetteBuilder: knitr
Date/Publication: 2023-04-21 12:40:22 UTC
Date/Publication: 2023-05-04 11:10:02 UTC
Author: Anhui Huang [aut, ctb, cre]
18 changes: 9 additions & 9 deletions MD5
@@ -1,8 +1,8 @@
1b05d8af566d0bffa415ba8a72a06ef7 *DESCRIPTION
b7e54aa02846438c9b4f0739037123ec *DESCRIPTION
bb9f69b9616594cc1ef2c9e4eb5e5ac6 *NAMESPACE
8cd359134055c6ecbaadf520941084f5 *R/elasticNetSML.R
a584cfc82d00e74072985292295b1182 *R/elasticNetSMLcv.R
bb70a33048c3ca09bf30ee48faeeaac6 *R/elasticNetSMLpoint.R
6e6de2a876f1c72a84eb604631aa375f *R/elasticNetSML.R
e947b0a889d59c929ec417dcbdf1ed0a *R/elasticNetSMLcv.R
55812cecde6c9c0ccd876bb614910679 *R/elasticNetSMLpoint.R
cbb8c7e8520229faa4c6a0dd8777fa0a *R/lassoSML.R
bad019d38fef5c94082020c9de652c61 *build/vignette.rds
8b29113c7289ac8136cc2af61567dd89 *data/B.rda
Expand All @@ -15,15 +15,15 @@ a0d563ead9fa839619f86608431501e4 *inst/doc/sparseSEM.pdf
e434b8cffaeee8b604eaccc4d6ed6777 *man/Missing.Rd
7a7bcf5e61f9256b3f7720efceed6a1c *man/X.Rd
6ca57e348ffdb690bfab41db260ba484 *man/Y.Rd
9e1594ff77dab78098bdef3ae3b48641 *man/elasticNetSML.Rd
842e3224e99cc0a3e43a9b44ecbfda59 *man/elasticNetSMLcv.Rd
06480c625ce4c106a10a28f029f5bd0c *man/elasticNetSMLpoint.Rd
28259514d7120e1e69aa63918a0d6432 *man/elasticNetSML.Rd
45c31997b1c0689a683b83929b79a32e *man/elasticNetSMLcv.Rd
4123592eff8b03878aad52e61326ef95 *man/elasticNetSMLpoint.Rd
64bb11823214b0162f2a55803bb0b834 *man/lassoSML.Rd
02d5e802a778d403cfe84adc2a93721a *man/sparseSEM-package.Rd
6302b4ccb9f7d35e86457a6729ac32a6 *man/sparseSEM-package.Rd
03cb2d11b8ddc5bd6b6f1f28edf771ee *src/Makevars
c265ecb513d3138a1b228acaf30e9f44 *src/lassoSEM.c
e508b1c76529602e9e7b66b0b970bb89 *src/lassoSEM.h
bedaf16b9268fb2d7d4627c0f0e85072 *src/lassoSMLv10beta.c
41df57cb9c1bc415555d2afbe90975c4 *src/lassoSMLv10beta.c
7c22b1b500cab25872924e218f1645f5 *src/registerDynamicSymbol.c
4e619e3576dcad79dd24601d0d7fe1bc *vignettes/Fig1_nCPU.png
eabdc3a9c03e6b931f78ddf5c1e23a11 *vignettes/Fig2_sigma01.png
Expand Down
4 changes: 2 additions & 2 deletions R/elasticNetSML.R
Expand Up @@ -2,7 +2,7 @@ elasticNetSML <-
function(Y,X,Missing,B,Verbose = 0){
M = nrow(Y);
N = ncol(Y);
if(Verbose>=0) cat("\telastic net SML version_1;",M, "Genes, ", N , "samples; Verbose: ", Verbose, "\n\n")
if(Verbose>=0) cat("\telastic net SML;",M, "Nodes, ", N , "samples; Verbose: ", Verbose, "\n\n")
f = matrix(1,M,1);
stat = rep(0,6);

Expand All @@ -28,7 +28,7 @@ function(Y,X,Missing,B,Verbose = 0){
Bout = matrix(output$B,nrow= M, ncol = M, byrow = F);
fout = matrix(output$f,nrow= M, ncol = 1, byrow = F);
stat = matrix(output$stat,nrow = 6,ncol = 1, byrow = F);
stat


SMLresult <- list(Bout,fout,stat,simTime[1]);
names(SMLresult) <-c("weight","F","statistics","simTime")
Expand Down
34 changes: 26 additions & 8 deletions R/elasticNetSMLcv.R
@@ -1,14 +1,25 @@

elasticNetSMLcv <- function(Y,X,Missing,B,alpha_factors = seq(1,0.05, -0.05), lambda_factors =10^seq(-0.2,-4,-0.2) ,Verbose = 0){
elasticNetSMLcv <- function(Y,X,Missing,B,alpha_factors = seq(1,0.05, -0.05), lambda_factors =10^seq(-0.2,-4,-0.2), kFold = 5, Verbose = 0){
M = nrow(Y);
N = ncol(Y);
if(Verbose>=0) cat("\telastic net SML version_1;",M, "Genes, ", N , "samples; Verbose: ", Verbose, "\n\n")
if(Verbose>=0) cat("\telastic net SML;",M, "Nodes, ", N , "samples; Verbose: ", Verbose, "\n\n")
f = matrix(1,M,1);
stat = rep(0,6);
#------------------------------------------------------R_package parameter
nAlpha = length(alpha_factors);
nLambda = length(lambda_factors);
mseStd = rep(0,nLambda*2);
mse = rep(0,nLambda*nAlpha);
mseSte = rep(0,nLambda*nAlpha);
mseStd = rep(0, nLambda*2);

parameters = matrix(0,0,2);
for (i in alpha_factors){
col1 = rep(i,nLambda);
col2 = lambda_factors;
para = cbind(col1,col2);
parameters = rbind(parameters,para)
}

#------------------------------------------------------R_package parameter

#dyn.load("elasticSMLv1.dll")
Expand All @@ -26,7 +37,10 @@ elasticNetSMLcv <- function(Y,X,Missing,B,alpha_factors = seq(1,0.05, -0.05), la
nAlpha = as.integer(nAlpha),
lambda = as.double(lambda_factors),
nLambda = as.integer(nLambda),
mse = as.double(mse),
mseSte = as.double(mseSte),
mseStd = as.double(mseStd),
kFold = as.integer(kFold),
verbose = as.integer(Verbose),
package = "sparseSEM");

Expand All @@ -39,14 +53,18 @@ elasticNetSMLcv <- function(Y,X,Missing,B,alpha_factors = seq(1,0.05, -0.05), la
fout = matrix(output$f,nrow= M, ncol = 1, byrow = F);
stat = matrix(output$stat,nrow = 6,ncol = 1, byrow = F);
#------------------------------------------------------R_package parameter
mseStd = matrix(output$mseStd,nrow= nLambda, ncol = 2, byrow = F);
mseStd = cbind(lambda_factors,mseStd);
colnames(mseStd)<-c("lambda","mean Error", "Std");
# mseStd = matrix(output$mseStd,nrow= nLambda, ncol = 2, byrow = F);
mse = matrix(output$mse,nrow = nAlpha*nLambda, ncol =1, byrow= F)
mseSte = matrix(output$mseSte,nrow = nAlpha*nLambda, ncol =1, byrow= F)


cvResults = cbind(parameters,mse,mseSte);
colnames(cvResults)<-c("alpha","lambda","mean Error", "Ste");
#------------------------------------------------------R_package parameter


SMLresult <- list(Bout,fout,stat,simTime[1],mseStd);
names(SMLresult) <-c("weight","F","statistics","simTime","residuals")
SMLresult <- list(Bout,fout,stat,simTime[1],cvResults);
names(SMLresult) <-c("weight","F","statistics","simTime","cv")
return(SMLresult)
}

2 changes: 1 addition & 1 deletion R/elasticNetSMLpoint.R
Expand Up @@ -2,7 +2,7 @@
elasticNetSMLpoint <- function(Y,X,Missing,B,alpha_factor = 1, lambda_factor =0.01 ,Verbose = 0){
M = nrow(Y);
N = ncol(Y);
if(Verbose>=0) cat("\telastic net SML version_1;",M, "Genes, ", N , "samples; Verbose: ", Verbose, "\n\n")
if(Verbose>=0) cat("\telastic net SML;",M, "Nodes, ", N , "samples; Verbose: ", Verbose, "\n\n")
f = matrix(1,M,1);
stat = rep(0,6);
#------------------------------------------------------R_package parameter
Expand Down
76 changes: 67 additions & 9 deletions man/elasticNetSML.Rd
Expand Up @@ -5,24 +5,63 @@
The Elastic Net penalty for SEM
}
\description{
For each alpha from 0.95 to 0.05 at a step of 0.05, the function perform 5 fold CV for
lambda_max to lambda_min in 20 step to determine the optimal alpha and lambda for the data. }
Fit the elastic Net penalized SEM model with input data X, Y: Y = BY + fX + e.

For users new to this package, elasticNetSML provides the simplified entry point:
Missing matrix can be all 0 (none or uknown), B matrix can be all 0 (unknow connections in the network),
thus only Y, X needed.

Underlying the function, the function obtained the optimal hyperparameter (alpha, lambda) from
k-fold cross validation (CV) with fixed k= 5. and the grid search of candiate alpha and number of
steps for candate lambda are also fixed. Specifically, for each alpha from 0.95 to 0.05 at
a step of 0.05, the function perform 5 fold CV for lambda_max to lambda_min in 20 step
to determine the optimal alpha and lambda for the data.

Once users get the idea of what the package and the function does, the package also
provide functions with more flexibility to allow users to tune the parameters. Please
see function elasticNetSMLcv() and elasticNetSMLpoint().

Note that the function is computationally expensive. Generally, the following steps are followed: \cr
Step 1. SEM-ridge regression (L2 penalty) with k-fold CV: this step find the optimal ridge hyperparameter rho \cr
Step 2. fit SEM reidge regression model (L2 penalty) with rho from Step 1, obtain the initial status (non-sparse)
of network structure (B_ridge); \cr
Step 3.SEM-elastic net regression with k-fold CV: this step finds the optimal hyperparameter (alpha, lambda) \cr
Step 4. fit SEM-elastic net model with (alpha, lambda) from Step 3. \cr
Step 5. result calculation for PD, FDR, provide the output.


For large scale network inference, a standalone C/C++ software with openMPI for
parallel computation is also available upon request.

If you are interested in implementating the algorithm to integrit with Spark, please reach me out.
}
\usage{
elasticNetSML(Y, X, Missing, B, Verbose = 0)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{Y}{
gene expression M by N matrix
The observed node response data with dimension of M by N. Y is normalized inside the function.
}
\item{X}{
cis_eQTL M by N matrix
The network node attribute matrix with dimension of M by N, with M being the number of nodes,
and N being the number of samples. Theoretically, X can be L by N matrix, with L being the total
node attributes. However, in current implementation, each node only allows one and only one attribute.
If you have more than one attributes for some nodes, please consider selecting the top one by either
correlation or principal component methods.
X is normalized inside the function.
}
\item{Missing}{
missing data in Y
M by N matrix corresponding to elements of Y. 0 denotes no missing, while 1 denotes missing.
If a node j in sample i has a missing label (Missing[j,i] = 1), the node response Y[j,i] is set to 0.
}
\item{B}{
true network topology if available
For a network with M nodes, B is the M by M adjacency matrix.
If data is simulated/with known true network topology (i.e., known adjacency matrix), the Power
of detection (PD) and False Discovery Rate (FDR) is computed in the output parameter 'statistics'.

If the true network topology is unknown, B is optional, and the PD/FDR in output parameter
'statistics' should be ignored.
}
\item{Verbose}{
describe the information output from 0 - 10, larger number means more output
Expand All @@ -33,9 +72,28 @@ elasticNetSML(Y, X, Missing, B, Verbose = 0)
}
\value{

\item{Bout}{the matrix B from sparseSEM}
\item{fout}{f: the weight for matrix X}
\item{stat}{compute the power and FDR statistics if the ture topology is provided}
\item{Bout}{
the computed weights for the network topology. B[i,j] = 0 means there is no edge between node i and j;
B[i,j]!=0 denotes an (undirected) edge between note i and j.
}
\item{fout}{
f is 1 by M array keeping the weight for X (in SEM: Y = BY + FX + e). Theoretically, F can be M by L matrix,
with M being the number of nodes, and L being the total node attributes. However, in current implementation,
each node only allows one and only one attribute.
If you have more than one attributes for some nodes, please consider selecting the top one by either
correlation or principal component methods.

}
\item{stat}{
statistics is 1x6 array keeping record of:
1. correct positive
2. total positive
3. false positive
4. positive detected
5. Power of detection (PD) = correct positive/total positive
6. False Discovery Rate (FDR) = false positive/positive detected

}
\item{simTime}{computational time}

}
Expand Down
64 changes: 49 additions & 15 deletions man/elasticNetSMLcv.Rd
Expand Up @@ -6,47 +6,80 @@
}
\description{
While elasticNetSML function has a set of default (alpha, lambda) and the optimal
one is chosen by 5 fold cv, elasticNetSMLcv tests the combination of a set of
one is chosen by k=5 fold cv, elasticNetSMLcv tests the combination of a set of
alpha an lambda, and choose one as the optimal parameters.
elasticNetSMLcv should be combined with elasticNetSMLpoint to obtain the network
inference.
For each alpha from the set of alphas provided, the function perform 5 fold CV for

For each alpha from the set of alphas provided, the function perform k-fold CV for
each user supplied lambda to determine the optimal alpha and lambda for the data. }
\usage{
elasticNetSMLcv(Y, X, Missing, B, alpha_factors,lambda_factors, Verbose)
elasticNetSMLcv(Y, X, Missing, B, alpha_factors,lambda_factors,kFold, Verbose)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{Y}{
gene expression M by N matrix
The observed node response data with dimension of M by N. Y is normalized inside the function.
}
\item{X}{
cis_eQTL M by N matrix
The network node attribute matrix with dimension of M by N, with M being the number of nodes,
and N being the number of samples. Theoretically, X can be L by N matrix, with L being the total
node attributes. However, in current implementation, each node only allows one and only one attribute.
If you have more than one attributes for some nodes, please consider selecting the top one by either
correlation or principal component methods.
X is normalized inside the function.
}
\item{Missing}{
missing data in Y
M by N matrix corresponding to elements of Y. 0 denotes no missing, while 1 denotes missing.
If a node j in sample i has a missing label (Missing[j,i] = 1), the node response Y[j,i] is set to 0.
}
\item{B}{
true network topology if available
For a network with M nodes, B is the M by M adjacency matrix.
If data is simulated/with known true network topology (i.e., known adjacency matrix), the Power
of detection (PD) and False Discovery Rate (FDR) is computed in the output parameter 'statistics'.

If the true network topology is unknown, B is optional, and the PD/FDR in output parameter
'statistics' should be ignored.
}
\item{alpha_factors}{
alpha_factors: the set of alphas to be tested, and is in range of (0, 1);
The set of candidate alpha values. Default is seq(start = 0.95, to = 0.05, step = -0.05)
}
\item{lambda_factors}{
penalty lambda_factor: the set of lambda to be tested, and is in range of (0, 1);
The set of candidate lambda values. Default is 10^seq(start =1, to = 0.001, step = -0.2)
}
\item{kFold}{
k-fold cross validation, default k=5
}
\item{Verbose}{
describe the information output from 0 - 10, larger number means more output
}

}
\details{
the function perform CV and parameter inference, calculate power and FDR
}
\value{

\item{Bout}{the matrix B from sparseSEM}
\item{fout}{f: the weight for matrix X}
\item{stat}{compute the power and FDR statistics if the ture topology is provided}
\item{Bout}{
the computed weights for the network topology. B[i,j] = 0 means there is no edge between node i and j;
B[i,j]!=0 denotes an (undirected) edge between note i and j.
}
\item{fout}{
f is 1 by M array keeping the weight for X (in SEM: Y = BY + FX + e). Theoretically, F can be M by L matrix,
with M being the number of nodes, and L being the total node attributes. However, in current implementation,
each node only allows one and only one attribute.
If you have more than one attributes for some nodes, please consider selecting the top one by either
correlation or principal component methods.

}
\item{stat}{
statistics is 1x6 array keeping record of:
1. correct positive
2. total positive
3. false positive
4. positive detected
5. Power of detection (PD) = correct positive/total positive
6. False Discovery Rate (FDR) = false positive/positive detected

}
\item{simTime}{computational time}
\item{residual}{only meaningful for 1 alpha: \cr
col1: lambdas; \cr
Expand All @@ -64,6 +97,7 @@ elasticNetSMLcv(Y, X, Missing, B, alpha_factors,lambda_factors, Verbose)
2) elasticNetSMLcv: user supplied alphas (one or more), lambdas; compute the optimal parameters and network parameters \cr
3) elasticNetSMLpoint: user supplied one alpha and one lambda, compute the network parameters

User is responsible to set the random seed to guarantee repeatable results.
}

%% ~Make other sections like Warning with \section{Warning }{....} ~
Expand All @@ -75,7 +109,7 @@ elasticNetSMLcv(Y, X, Missing, B, alpha_factors,lambda_factors, Verbose)
data(X);
data(Missing);
\dontrun{OUT <- elasticNetSMLcv(Y, X, Missing, B, alpha_factors = c(0.75, 0.5, 0.25),
lambda_factors=c(0.1, 0.01, 0.001), Verbose = 1);
lambda_factors=c(0.1, 0.01, 0.001), kFold = 5, Verbose = 1);
}
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
Expand Down

0 comments on commit 0e17322

Please sign in to comment.