diff --git a/DESCRIPTION b/DESCRIPTION index de2e3b9..4e76bfd 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,20 +1,20 @@ Package: fastclime Type: Package -Title: A Fast Solver for Parameterized Linear Programming Problems and - Constrained L1 Minimization Approach to Sparse Precision Matrix - Estimation -Version: 1.2.5 -Date: 2015-04-26 +Title: A Fast Solver for Parameterized LP Problems, Constrained L1 + Minimization Approach to Sparse Precision Matrix Estimation and + Dantzig Selector +Version: 1.4 +Date: 2016-04-18 Author: Haotian Pang, Han Liu and Robert Vanderbei Maintainer: Haotian Pang Depends: R (>= 2.15.0), lattice, igraph, MASS, Matrix -Description: An efficient method of recovering precision matrices - by applying the parametric simplex method is provided in this package. - The computation is based on a linear optimization solver. - It also contains a generic Linear Programming solver and a - parameterized Linear Programming solver based on the parametric simplex method. +Description: Provides a method of recovering the precision matrix efficiently + and solving for the dantzig selector by applying the parametric + simplex method. The computation is based on a linear optimization + solver. It also contains a generic LP solver and a parameterized LP + solver using parametric simplex method. License: GPL-2 Repository: CRAN -Packaged: 2015-04-28 15:09:28 UTC; Haotian +Packaged: 2016-04-18 20:42:46 UTC; diqi NeedsCompilation: yes -Date/Publication: 2015-04-29 01:00:45 +Date/Publication: 2016-04-19 01:03:54 diff --git a/MD5 b/MD5 index 9423061..fa7ee3b 100644 --- a/MD5 +++ b/MD5 @@ -1,20 +1,26 @@ -14f64da498203e5dfa0d0c38e060a879 *DESCRIPTION -91550c718c980cdc013d446f08e2ab43 *NAMESPACE -9e4bf14ea92b98a590d7fa1eac42849f *R/fastclime.R +bbd18b1d4fd1567705b80f2ed105166d *DESCRIPTION +06ad4ba0325b6b4cd1db0527d7d29c84 *NAMESPACE +d73d03f0823c3525f51e536fd2239dbb *R/dantzig.R +160d9f4b55ec7785b4b072462b85e098 *R/dantzig.generator.R +abe951f5b691616b98195f3d79ac2f6f *R/dantzig.selector.R +1f0a64344d4a8e359a343593d6bfccb5 *R/fastclime.R 72e007d2cb34233ade78ee6e2f1f8b76 *R/fastclime.generator.R -8ef3ee31b65f547317d1462094958e44 *R/fastclime.lambda.R b3963588ea4d8a6a2cd76260cc1d1e4d *R/fastclime.plot.R +47a616773f0890d252a050e8c6fcc8e1 *R/fastclime.selector.R 5c3828535b08a0c1788ed23c83861df0 *R/fastlp.R 536660f46b0c6dc2521f61f909c17c25 *R/paralp.R -76c9cdb8b2cceb78acb824b3638b1b33 *build/vignette.rds +63e519b5961c72e0c7658351fb3768d7 *build/vignette.rds ec90576375b24e35502847bdd1d85b6e *data/stockdata.rda 9cf23fc8036ec97c902be03c40a08490 *inst/doc/vignette.Rnw f98003a926cc707a4151fca574f18949 *inst/doc/vignette.pdf -fecc41afd3b960765e9bb14249608699 *man/fastclime-package.Rd -0de78a40947a13893cc690823524fd3b *man/fastclime.Rd -05bb9eae8a1e4a458d4c9630fd907b8d *man/fastclime.generator.Rd -324d7b637a23e7d94cd740ae71269204 *man/fastclime.lambda.Rd +39d6af5a5f954649c45630c36af872bb *man/dantzig.Rd +a548bf97e2058993638ff0b83fee209e *man/dantzig.generator.Rd +c682ad82b538860d45104db2d6548115 *man/dantzig.selector.Rd +2b4e4a37efb71e88c8c028047a614fb3 *man/fastclime-package.Rd +be21f97b1fe5ae2bfd866060d75e305d *man/fastclime.Rd +abbc954825a01b4f8fa520024d267864 *man/fastclime.generator.Rd 64307ab02e80b23986e1f4446aa316c6 *man/fastclime.plot.Rd +a0184c8f7fd1815026131718920df8f6 *man/fastclime.selector.Rd 35a34d4b383d8d9798b754ca6ebd808b *man/fastlp.Rd e45d01c047c1c70cd41af8adb55ec220 *man/paralp.Rd 4fede98f1e2ad5f3ee859ea58504be85 *man/plot.fastclime.Rd @@ -22,17 +28,18 @@ e45d01c047c1c70cd41af8adb55ec220 *man/paralp.Rd b196c00bb421f420ddd428008f97458b *man/print.fastclime.Rd fbb7265b9ba8567834e3a326c7b67247 *man/print.sim.Rd f584effdd8752b5956fea4ac449e7e33 *man/stockdata.Rd -35a63cab361acbbe913b1ed670d0c13f *src/fastlp.c +5a43dc28eb8a09075c5635f6d1f059b4 *src/dantzig.c +727636e2dbade4df49b5b850fdd8ec4f *src/fastlp.c 3d21bc5d49dee6d81d869301b5adbcb0 *src/heap.c ad5e9b747ce1895e811d2d2e8052eddc *src/heap.h -c5217e444b843a5a358b905e6cb1c975 *src/linalg.c +73f4a0339e3200c93191fb0ca0fd7ec8 *src/linalg.c 73fc5b2f3cf1fd12dab5c3066e43f6d7 *src/linalg.h 7f31e3adcb7ccc095b1276de9a0b528f *src/lu.c af36d5dada6396e15a3010c505432213 *src/lu.h abc0a80ca8d0260b12e96ba9f9df608f *src/macros.h e1be8277ecf906d37bc71e80fb591490 *src/myalloc.h -f71b04cab340901ada6b838edac84ff9 *src/paralp.c -38e3e602e5d4ef2881e1d209050bf5d2 *src/parametric.c +700fdd78e2957c90d1e691643322f8e9 *src/paralp.c +e43295a6e453c3bebe1f5f6e81e9d1c0 *src/parametric.c 38e9ff9c3d2f5b3b832010d120069caf *src/tree.c ebd8db35929bc9a80f444621e0e0f909 *src/tree.h 0dcd4cce00eb3ef52ec58251b0c62ea4 *vignettes/fastclime.pdf diff --git a/NAMESPACE b/NAMESPACE index ff5e559..f563559 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,13 +1,19 @@ useDynLib("fastclime") import("Matrix","igraph","lattice","MASS") +importFrom("grDevices", "dev.off", "gray.colors", "postscript") +importFrom("graphics", "par", "plot") +importFrom("stats", "cor", "cov", "rbinom", "rnorm", "runif") export(fastclime, fastclime.generator, fastclime.plot, - fastclime.lambda, + fastclime.selector, print.fastclime,plot.fastclime, print.sim,plot.sim, fastlp, - paralp + paralp, + dantzig, + dantzig.generator, + dantzig.selector ) S3method("print","sim") diff --git a/R/dantzig.R b/R/dantzig.R new file mode 100755 index 0000000..514abe8 --- /dev/null +++ b/R/dantzig.R @@ -0,0 +1,54 @@ +#-------------------------------------------------------------------------------# +# Package: fastclime # +# dantzig(): Dantzig Selector Function # +# Authors: Haotian Pang, Han Liu and Robert Vanderbei # +# Emails: , and # +# Date: April 25th 2014 # +# Version: 1.2.4 # +#-------------------------------------------------------------------------------# +dantzig <- function(X, y, lambda = 0.01, nlambda = 50) +{ + n0<-nrow(X) + d0<-ncol(X) + BETA0<-matrix(0,d0,nlambda) + lambdalist<-matrix(0,nlambda,1) + + cat("compute X^TX and X^y \n") + + X2=t(X)%*%X + Xy=t(X)%*%y + + cat("start recovering \n") + + # start.time <- Sys.time() + str=.C("dantzig", as.double(X2), as.double(Xy), as.double(BETA0), + as.integer(d0), as.double(lambda), as.integer(nlambda), as.double(lambdalist), PACKAGE="fastclime") + # end.time <- Sys.time() + # t0 <- end.time - start.time + + # ptm <- proc.time() + # cat("prepare the solution path \n") + # proc.time() - ptm + # print(ptm) + + rm(X2,Xy) + BETA0<-matrix(unlist(str[3]),d0,nlambda) + lambdalist<-unlist(str[7]) + + validn<-sum(lambdalist>0) + + BETA0<-BETA0[,1:validn] + + final_lambda<-lambdalist[validn] + cat("lambdamin is ", final_lambda,"\n") + + lambdalist<-lambdalist[1:validn] + result<-list("X" = X, "y"=y, "BETA0"=BETA0, "n0"=n0, "d0"=d0, "validn"=validn, "lambdalist"=lambdalist) + + gc() + class(result) = "dantzig" + cat("Done! \n") + + return(result) + +} diff --git a/R/dantzig.generator.R b/R/dantzig.generator.R new file mode 100755 index 0000000..9fbbdd7 --- /dev/null +++ b/R/dantzig.generator.R @@ -0,0 +1,47 @@ + +#---------------------------------------------------------------------------------# +# Package: fastclime # +# Dantzig.generator: Generates sparse linear regression model for testing dantzig # +# Authors: Haotian Pang, Han Liu, Robert Vanderbei and Di Qi # +# Emails: , and # +# Date: April 17th 2016 # +# Version: 1.4.2 # +#---------------------------------------------------------------------------------# + +dantzig.generator <- function(n = 50, d = 100, sparsity = 0.1, sigma0=1) +{ + + if(sparsity<1){ + s<-floor(d*sparsity) + } + else{ + s<-sparsity + } + + + BETA<-rep(0,d) + pos<-rep(0,s) + + for (i in 1:s) + { + + a<-rnorm(1, mean=0, sd=1) + si<-2*(rbinom(1,1,0.5)-0.5) + n1<-floor(runif(1,min=1,max=d+1)) + BETA[n1]=si*(1+a) + pos[i]=n1 + } + + sigma<-rnorm(n, mean=0, sd=sigma0) + + + X0<-matrix(rnorm(n*d, mean = 0, sd = 1), n,d) + y<-X0%*%BETA+sigma + + gc() + + sim = list(X0 = X0 , y = y, BETA=BETA, s=s, pos=pos) + class(sim) = "sim" + return(sim) +} + diff --git a/R/dantzig.selector.R b/R/dantzig.selector.R new file mode 100644 index 0000000..b80aee9 --- /dev/null +++ b/R/dantzig.selector.R @@ -0,0 +1,9 @@ +dantzig.selector <- function(lambdalist, BETA0, lambda){ + for (i in 1:length(lambdalist)) { + if (lambdalist[i] < lambda) { + break; + } + } + beta0<-BETA0[,i] + return(beta0) +} \ No newline at end of file diff --git a/R/fastclime.R b/R/fastclime.R index 0fc996f..bf65c34 100755 --- a/R/fastclime.R +++ b/R/fastclime.R @@ -12,14 +12,15 @@ fastclime <- function(x, lambda.min=0.1, nlambda = 50) gcinfo(FALSE) cov.input<-1 SigmaInput<-x + d<-dim(SigmaInput)[2] if(!isSymmetric(x)) { - SigmaInput<-cor(x) + SigmaInput<-cov(x)*(1-1/d) cov.input<-0 } - d<-dim(SigmaInput)[2] + cat("Allocating memory \n") maxnlambda=0 diff --git a/R/fastclime.lambda.R b/R/fastclime.selector.R similarity index 73% rename from R/fastclime.lambda.R rename to R/fastclime.selector.R index 98ed6eb..85d6bb1 100755 --- a/R/fastclime.lambda.R +++ b/R/fastclime.selector.R @@ -5,14 +5,14 @@ # Version: 1.2.4 # #------------------------------------------------------------------------------------------# -fastclime.lambda <- function(lambdamtx, icovlist, lambda) +fastclime.selector <- function(lambdamtx, icovlist, lambda) { gcinfo(FALSE) d<-dim(icovlist[[1]])[2] maxnlambda<-dim(lambdamtx)[1] icov<-matrix(0,d,d) - path<-matrix(0,d,d) + adaj<-matrix(0,d,d) seq<-rep(0,d) threshold<-1e-5 status<-0 @@ -34,13 +34,16 @@ fastclime.lambda <- function(lambdamtx, icovlist, lambda) } - icov<-(icov+t(icov))/2 + icov <- icov*(abs(icov)<= abs(t(icov)))+ t(icov)*(abs(icov)> abs(t(icov))) + cat("changed to min") + #icov<-(icov+t(icov))/2 tmpicov<-icov diag(tmpicov)<-0 - path<-Matrix(tmpicov>threshold, sparse=TRUE)*1 + adaj<-Matrix(tmpicov>threshold, sparse=TRUE)*1 + - sparsity<-(sum(path))/(d^2-d) + sparsity<-(sum(adaj))/(d^2-d) if(status==1) { @@ -50,8 +53,8 @@ fastclime.lambda <- function(lambdamtx, icovlist, lambda) rm(temp_lambda,seq,d,threshold) gc() - result<-list("icov"=icov, "path"=path,"sparsity"=sparsity) - class(result)="fastclime.lambda" + result<-list("icov"=icov, "adaj"=adaj,"sparsity"=sparsity) + class(result)="fastclime.selector" return(result) diff --git a/build/vignette.rds b/build/vignette.rds index d37171c..cae52ce 100644 Binary files a/build/vignette.rds and b/build/vignette.rds differ diff --git a/man/dantzig.Rd b/man/dantzig.Rd new file mode 100644 index 0000000..5188e39 --- /dev/null +++ b/man/dantzig.Rd @@ -0,0 +1,85 @@ +\name{dantzig} +\alias{dantzig} + +\title{ +A solver for the Dantzig selector estimator +} + +\description{ +Implementation of the Primal Dual (i.e. Self Dual) Simplex Method on Dantzig selector +} + +\usage{ +dantzig(X, y, lambda = 0.01, nlambda = 50) +} + +\arguments{ + \item{X}{ +\code{x} is an \code{n} by \code{d} data matrix +} + \item{y}{ +\code{y} is a length \code{n} response vector +} + \item{lambda}{ +The parametric simplex method will stop when the calculated parameter is smaller than lambda. The default value is \code{0.01}. +} + \item{nlambda}{ + This is the number of the maximum path length one would like to achieve. The default length is 50. +} +} + +\details{ +This program applies the parametric simplex linear programming method to the Dantzig selector to solve for the regression coefficient vector. The solution path of the problem corresponds to the parameter in the parametric simplex method. +} + +\note{ +The program will stop when either the maximum number of iterations for each column \code{nlambda} is achieved or when the required \code{lambda} is achieved for each column. Note if d is large and nlambda is also large, it is possible that the program will fail to allocate memory for the path. +} + + + +\value{ +An object with S3 class \code{"dantzig"} is returned: + \item{X}{ +\code{X} is the \code{n} by \code{d} data matrix. +} + \item{y}{ +\code{y} is a length \code{n} response vector. +} + \item{BETA0}{ +\code{BETA0} is a \code{d} by \code{validn} matrix where each column has an estimated regression coefficient vector given a lambda interval. +} + + \item{n0}{ +\code{n0} is the number of rows in the \code{n} by \code{d} data matrix. +} + \item{d0}{ +\code{d0} is the number of columns in the \code{n} by \code{d} data matrix. +} + \item{validn}{ +\code{validn} is the number of solutions along the solution path. The maximum is \code{nlambda}. +} + \item{lambdalist}{ +\code{lambdalist} is the decrementing path of the lambda solution values. +} + +} + +\author{ +Haotian Pang, Han Liu, Robert Vanderbei and Di Qi \cr +Maintainer: Haotian Pang +} + +\seealso{ +\code{\link{dantzig.selector}} +} + +\examples{ + +#generate data +a = dantzig.generator(n = 200, d = 100, sparsity = 0.1) + +#regression coefficient estimation +b = dantzig(a$X0, a$y, lambda = 0.1, nlambda = 100) + +} diff --git a/man/dantzig.generator.Rd b/man/dantzig.generator.Rd new file mode 100644 index 0000000..b7801fe --- /dev/null +++ b/man/dantzig.generator.Rd @@ -0,0 +1,68 @@ +\name{dantzig.generator} +\alias{dantzig.generator} + +\title{ +Dantzig data generator +} + +\description{ +Generates sparse linear regression model for testing \code{dantzig} function +} + +\usage{ +dantzig.generator(n = 50, d = 100, sparsity = 0.1, sigma0=1) +} + +\arguments{ + \item{n}{ +The number of observations (sample size). The default value is \code{50}. +} + \item{d}{ +The number of variables (dimension). The default value is \code{100}. +} + \item{sparsity}{ +\code{d} is either the number of nonzero entries out of \code{d} or the proportion of nonzero entries in \code{BETA} +} + \item{sigma0}{ +\code{sigma0} is the standard deviation of the noise vector +} +} + +\details{ +Generates sparse linear regression model for testing \code{dantzig} function +} + +\value{ +An object with S3 class "dantzig.generator" is returned: +\item{X0}{ +\code{X0} is the \code{n} by \code{d} matrix for the generated data +} +\item{y}{ +A \code{y} is a \code{n} response vector for the generated data +} + \item{BETA}{ +\code{BETA} is a length \code{d} regression coefficient vector +} + \item{s}{ +\code{s} is the number of nonzero entries out of \code{d} +} + \item{pos}{ +A vector containing the indices of the nonzero entries (may contain repeats) +} +} + +\author{ +Haotian Pang, Han Liu, Robert Vanderbei and Di Qi \cr +Maintainer: Di Qi +} + +\seealso{ +\code{\link{dantzig}} +} + +\examples{ + +## +L = dantzig.generator(n = 50, d = 100, sparsity = 0.1) + +} diff --git a/man/dantzig.selector.Rd b/man/dantzig.selector.Rd new file mode 100644 index 0000000..73d58c4 --- /dev/null +++ b/man/dantzig.selector.Rd @@ -0,0 +1,58 @@ +\name{dantzig.selector} +\alias{dantzig.selector} + +\title{ +Dantzig selector +} + +\description{ +Function used to select the solution path for a given lambda +} + +\usage{ +dantzig.selector(lambdalist, BETA0, lambda) +} + +\arguments{ + \item{lambdalist}{ +\code{lambdalist} is the decrementing path of the lambda solution values. It is obtained from the \code{dantzig} function. +} + \item{BETA0}{ +\code{BETA0} is a \code{d} by \code{validn} matrix where each column has an estimated regression coefficient vector given a given lambda interval. It is obtained from the \code{dantzig} function. +} + \item{lambda}{ +\code{lambda} is the lambda solution value the user wishes to estimate a regression coefficient vector with. +} +} + +\details{ +Finds the estimated regression coefficient vector associated with a given \code{lambda} +} + +\value{ +\item{beta0}{ +\code{beta0} is the estimated regression coefficient vector for the given \code{lambda}. +} +} + +\author{ +Di Qi, Haotian Pang \cr +Maintainer: Di Qi +} + +\seealso{ +\code{\link{dantzig}} +} + +\examples{ + +# generate data +a = dantzig.generator(n = 200, d = 100, sparsity = 0.1) + +# regression coefficient estimation +b = dantzig(a$X0, a$y, lambda = 0.1, nlambda = 100) + +# estimated regression coefficient vector +c = dantzig.selector(b$lambdalist, b$BETA0, 15) + +} diff --git a/man/fastclime-package.Rd b/man/fastclime-package.Rd index 3f92d10..fa6c577 100755 --- a/man/fastclime-package.Rd +++ b/man/fastclime-package.Rd @@ -16,11 +16,12 @@ Date: \tab 2014-04-25\cr License: \tab GPL-2\cr LazyLoad: \tab yes\cr } -The package "fastclime" provides 4 main functions:\cr +The package "fastclime" provides 5 main functions:\cr (1) the data generator creates random samples from multivariate normal distributions with different graph structures. Please refer to \code{\link{fastclime.generator}}.\cr (2) The parametric simplex solver for constrainted l1 minimization approach to sparse precision matrix estimation. Please refer to \code{\link{fastclime}}.\cr -(3) The path selector function gives the path and precision matrix for a given parameter in CLIME. Please refer to \code{\link{fastclime.lambda}}.\cr +(3) The path selector function gives the path and precision matrix for a given parameter in CLIME. Please refer to \code{\link{fastclime.selector}}.\cr (4) A generic linear programming solver and a parameterized linear programming solver. Please refer to \code{\link{fastlp}} and \code{\link{paralp}}.\cr +(5) An implementation of the Primal Dual (i.e. Self Dual) Simplex Method on the Dantzig selector. Please refer to \code{\link{dantzig}}, \code{\link{dantzig.selector}} and \code{\link{dantzig.generator}}.\cr } \author{ Haotian Pang, Han Liu and Robert Vanderbei \cr @@ -28,5 +29,5 @@ Maintainer: Haotan Pang; } \seealso{ -\code{\link{fastclime.generator}}, \code{\link{fastclime}}, \code{\link{fastclime.plot}}, \code{\link{fastclime.lambda}}, \code{\link{fastlp}} and\code{\link{paralp}} +\code{\link{fastclime.generator}}, \code{\link{fastclime}}, \code{\link{fastclime.plot}}, \code{\link{fastclime.selector}}, \code{\link{fastlp}}, \code{\link{paralp}}, \code{\link{dantzig}}, \code{\link{dantzig.selector}}, and \code{\link{dantzig.generator}} } diff --git a/man/fastclime.Rd b/man/fastclime.Rd index 8e73db4..3ea2613 100755 --- a/man/fastclime.Rd +++ b/man/fastclime.Rd @@ -19,7 +19,7 @@ There are 2 options: (1) \code{x} is an \code{n} by \code{d} data matrix (2) a \ } \item{lambda.min}{ -This is the smallest value of lambda you would like the solver to explorer. The default value is \code{0.1}. If \code{nlambda} is large enough, the precision matrix selector function \code{\link{fastclime.lambda}} will be able to find all precision matrix corresponding to all lambda values ranging from \code{1} to \code{lambda.min}. +This is the smallest value of lambda you would like the solver to explorer. The default value is \code{0.1}. If \code{nlambda} is large enough, the precision matrix selector function \code{\link{fastclime.selector}} will be able to find all precision matrix corresponding to all lambda values ranging from \code{1} to \code{lambda.min}. } \item{nlambda}{ @@ -32,7 +32,7 @@ This program uses parametric simplex linear programming method to solve CLIME (C } \note{ -The program will stop when either the maximum number of iteration for each column \code{nlambda} is achieved or when the required \code{lambda.min} is achieved for each column. When the dimension is huge, make sure \code{nlambda} is small so that there are enough memory to allocate the solution path. \code{lambdamtx} and \code{icovlist} will be used in \code{\link{fastclime.lambda}}. +The program will stop when either the maximum number of iteration for each column \code{nlambda} is achieved or when the required \code{lambda.min} is achieved for each column. When the dimension is huge, make sure \code{nlambda} is small so that there are enough memory to allocate the solution path. \code{lambdamtx} and \code{icovlist} will be used in \code{\link{fastclime.selector}}. } \value{ @@ -50,10 +50,10 @@ The empirical covariance of the data. If cov.inpu is TRUE, sigmahat = data The length of the path. If the program finds \code{lambda.min} in less than \code{nlambda} iterations for all columns, then the acutal maximum lenth for all columns will be returned. Otherwise it equals \code{nlambda}. } \item{lambdamtx}{ -The sequence of regularization parameters for each column, it is a \code{nlambda} by \code{d} matrix. It will be filled with 0 when the program finds the required \code{lambda.min} value for that column. This parameter is required for \code{\link{fastclime.lambda}}. +The sequence of regularization parameters for each column, it is a \code{nlambda} by \code{d} matrix. It will be filled with 0 when the program finds the required \code{lambda.min} value for that column. This parameter is required for \code{\link{fastclime.selector}}. } - \item{icovlist}{ -A \code{nlambda} list of \code{d} by \code{d} precision matrices as an alternative graph path (numerical path) corresponding to \code{lambdamtx}. This parameter is also required for \code{\link{fastclime.lambda}}. + \item{icovlist}{ +A \code{nlambda} list of \code{d} by \code{d} precision matrices as an alternative graph path (numerical path) corresponding to \code{lambdamtx}. This parameter is also required for \code{\link{fastclime.selector}}. } } @@ -64,7 +64,7 @@ Maintainer: Haotan Pang } \seealso{ -\code{\link{fastclime.generator}}, \code{\link{fastclime.plot}}, \code{\link{fastclime.lambda}} and \code{\link{fastclime-package}}. +\code{\link{fastclime.generator}}, \code{\link{fastclime.plot}}, \code{\link{fastclime.selector}} and \code{\link{fastclime-package}}. } \examples{ @@ -73,11 +73,11 @@ L = fastclime.generator(n = 100, d = 20) #graph path estimation out1 = fastclime(L$data,0.1) -O = fastclime.lambda(out1$lambdamtx, out1$icovlist,0.2) -fastclime.plot(O$path) +out2 = fastclime.selector(out1$lambdamtx, out1$icovlist,0.2) +fastclime.plot(out2$adaj) #graph path estimation using the sample covariance matrix as the input. out1 = fastclime(cor(L$data),0.1) -O = fastclime.lambda(out1$lambdamtx, out1$icovlist,0.2) -fastclime.plot(O$path) +out2 = fastclime.selector(out1$lambdamtx, out1$icovlist,0.2) +fastclime.plot(out2$adaj) } diff --git a/man/fastclime.generator.Rd b/man/fastclime.generator.Rd index 5f77996..5ac56ed 100755 --- a/man/fastclime.generator.Rd +++ b/man/fastclime.generator.Rd @@ -49,7 +49,7 @@ Given the adjacency matrix \code{theta}, the graph patterns are generated as bel (I) \code{"random"}: Each pair of off-diagonal elements are randomly set \code{theta[i,j]=theta[j,i]=1} for \code{i!=j} with probability \code{prob}, and \code{0} other wise. It results in about \code{d*(d-1)*prob/2} edges in the graph.\cr\cr (II)\code{"hub"}:The row/columns are evenly partitioned into \code{g} disjoint groups. Each group is associated with a "center" row \code{i} in that group. Each pair of off-diagonal elements are set \code{theta[i,j]=theta[j,i]=1} for \code{i!=j} if \code{j} also belongs to the same group as \code{i} and \code{0} otherwise. It results in \code{d - g} edges in the graph.\cr\cr (III)\code{"cluster"}:The row/columns are evenly partitioned into \code{g} disjoint groups. Each pair of off-diagonal elements are set \code{theta[i,j]=theta[j,i]=1} for \code{i!=j} with the probability \code{prob}if both \code{i} and \code{j} belong to the same group, and \code{0} other wise. It results in about \code{g*(d/g)*(d/g-1)*prob/2} edges in the graph.\cr\cr -(IV)\code{"band"}: The off-diagonal elements are set to be \code{theta[i,j]=1} if \code{1<=|i-j|<=g} and \code{0} other wise. It results in \code{(2d-1-g)*g/2} edges in the graph.\cr\cr +(IV)\code{"band"}: The off-diagonal elements are set to be \code{theta[i,j]=1} if \code{1<=|i-j|<=g} and \code{0} other wise. It results in \code{(2d-1-g)*g/2} edges in the graph. The adjacency matrix \code{theta} has all diagonal elements equal to \code{0}. To obtain a positive definite precision matrix, the smallest eigenvalue of \code{theta*v} (denoted by \code{e}) is computed. Then we set the precision matrix equal to \code{theta*v+(|e|+0.1+u)I}. The covariance matrix is then computed to generate multivariate normal data. } diff --git a/man/fastclime.lambda.Rd b/man/fastclime.selector.Rd similarity index 84% rename from man/fastclime.lambda.Rd rename to man/fastclime.selector.Rd index f09c310..e04f525 100755 --- a/man/fastclime.lambda.Rd +++ b/man/fastclime.selector.Rd @@ -1,5 +1,5 @@ -\name{fastclime.lambda} -\alias{fastclime.lambda} +\name{fastclime.selector} +\alias{fastclime.selector} \title{ A precision matrix and path selector function for fastclime @@ -10,7 +10,7 @@ Select the precision matrix and solution path for a given parameter lambda } \usage{ -fastclime.lambda(lambdamtx, icovlist, lambda) +fastclime.selector(lambdamtx, icovlist, lambda) } \arguments{ @@ -34,15 +34,15 @@ The function is able to estimate the precision matrices corresponding to all \co } \value{ -An object with S3 class "fastclime.lambda" is returned: +An object with S3 class "fastclime.selector" is returned: \item{icov}{ The estimated precision matrix corresponding to \code{lambda}. } - \item{path}{ + \item{adaj}{ The estimated graph path corresponding to \code{lambda}. } \item{sparsity}{ -The sparsity level of this estimated graph path. +The sparsity level of this estimated graph for this value of lambda. } } @@ -62,6 +62,6 @@ L = fastclime.generator(n = 100, d = 20) #graph path estimation out1 = fastclime(L$data,0.1) -O = fastclime.lambda(out1$lambdamtx, out1$icovlist,0.2) -fastclime.plot(O$path) +out2 = fastclime.selector(out1$lambdamtx, out1$icovlist,0.2) +fastclime.plot(out2$adaj) } diff --git a/src/dantzig.c b/src/dantzig.c new file mode 100755 index 0000000..48b3301 --- /dev/null +++ b/src/dantzig.c @@ -0,0 +1,424 @@ +/***************************************************************************** + + Implementation of the + Primal Dual (i.e. Self Dual) Simplex Method on Dantzig Selector + R. Vanderbei & H. Pang, Novermeber 2012 + +******************************************************************************/ +#include +#include +#include +#include + +#include "myalloc.h" +#include "lu.h" +#include "linalg.h" +#include "macros.h" + +#define EPS1 1.0e-8 +#define EPS2 1.0e-12 +#define EPS3 1.0e-5 + + +int ratio_test2( + double *dy, + int *idy, + int ndy, + double *y, + double mu +); + + +void dantzig(double *X2, double *Xy, double *BETA0, int *d0, + double *lambda, int *nlambda, double *lambdalist) +{ + + + int m; /* number of constraints */ + int n; /* number of variables */ + int nz; /* number of nonzeros in sparse constraint matrix */ + int *ia; /* array row indices */ + int *ka; /* array of indices into ia and a */ + double *a; /* array of nonzeros in the constraint matrix */ +// double **LMATRIX; + + + int *basics; + int *nonbasics; + int *basicflag; + double *x_B; /* primal basics */ + double *y_N; /* dual nonbasics */ + double *xbar_B; /* primal basic perturbation */ + double *dy_N; /* dual basics step direction - values (sparse) */ + int *idy_N; /* dual basics step direction - row indices */ + int ndy_N=0; /* number of nonz in dy_N */ + double *dx_B; /* primal basics step direction - values (sparse) */ + int *idx_B; /* primal basics step direction - row indices */ + int ndx_B; /* number of nonz in dx_B */ + double *at; /* sparse data structure for a^t */ + int *iat; + int *kat; + int col_in; /* entering column; index in 'nonbasics' */ + int col_out; /* leaving column; index in 'basics' */ + int iter = 0; /* number of iterations */ + int i,j,k,v=0; + double s, t, tbar, mu=HUGE_VAL; + double *vec; + int *ivec; + int nvec; + int N; + double *output_vec; + int d; + //double temp_sum; + //double *temp_vec; + + + d=*d0; + m=2*d; + n=2*d; + nz=m*n; + + MALLOC( a, nz+m, double ); + MALLOC( ia, nz+m, int ); + MALLOC( ka, n+m+1, int ); + + + // MALLOC(LMATRIX, d, double*); + // for (i=0; i=d) { + a[k] = -X2[i*d+j-d]; + } + else if(i>=d && j EPS2) { + if ( mu < -x_B[i]/xbar_B[i] ) { + mu = -x_B[i]/xbar_B[i]; + col_out = i; + col_in = -1; + } + } + } + + + lambdalist[iter]=mu; + //printf("mu=%e \n",mu); + //printf("lambdalist[%d]=%e \n", iter, lambdalist[iter]); + + for (i=0; iEPS3){ + //printf("output_vec[%d]= %e \n",i, output_vec[i]); + //printf("output_vec[%d]= %e \n",i+m/2, output_vec[i+m/2]); + BETA0[m/2*iter+i]=output_vec[i]-output_vec[i+m/2]; + } + + } + + // for (i = 0; i < d; i++) { + // temp_sum = 0.0; + // // requires a single access to the i-th row + // for (j = 0; j < d; j++) { + // temp_sum += LMATRIX[i][j] *(output_vec[j]-output_vec[j+m/2]); + // } + // temp_vec[i]=temp_sum;// requires a single access of element c[i] + // printf("result[%d]= %e \n",i, temp_sum-Xy[i]); + + // } + // printf("lambda= %e \n",mu); + + + if(mu <= *lambda){ /*Find the solution corresponding to the required lambda*/ + break; + } + + /************************************************************* + * -1 t * + * step 2: compute dy = -(b n) e * + * n i * + * where i = col_out * + *************************************************************/ + + vec[0] = -1.0; + ivec[0] = col_out; + nvec = 1; + btsolve( m, vec, ivec, &nvec ); + Nt_times_y( N, at, iat, kat, basicflag, vec, ivec, nvec, + dy_N, idy_N, &ndy_N ); + + /************************************************************* + * step 3: ratio test to find entering column * + *************************************************************/ + col_in = ratio_test2( dy_N, idy_N, ndy_N, y_N, mu ); + if (col_in == -1) { /* infeasible */ + break; + } + + /************************************************************* + * -1 * + * step 4: compute dx = b n e * + * b j * + * * + *************************************************************/ + + j = nonbasics[col_in]; + for (i=0, k=ka[j]; k EPS1 ) { + j = idy[k]; + if ( y[j]/dy[k] < min ) { + min = y[j]/dy[k]; + jj = j; + } + } + } + + return jj; +} + + + + + + + + + + + diff --git a/src/fastlp.c b/src/fastlp.c index 027cea4..8e6922a 100755 --- a/src/fastlp.c +++ b/src/fastlp.c @@ -110,7 +110,7 @@ void fastlp(double *obj, double *mat, double *rhs, int *m0 , int *n0, double *op } } - ka[n]=nz; + ka[n]=k; solver20(m,n,nz,ia,ka,a,b,c); *status=status0; diff --git a/src/linalg.c b/src/linalg.c index 2128cca..fffb414 100755 --- a/src/linalg.c +++ b/src/linalg.c @@ -74,15 +74,20 @@ void atnum( int m, int n, int *ka, int *ia, double *a, CALLOC( iwork, m, int ); + for (k=0; kmax_row_iter[j/m0]){ - icov_mtx[i][j]=icov_mtx[i-1][j]; - } - - iicov[j*lambda+i]=icov_mtx[i][j]; - } - } - - *maxnlambda=maxiter; + + for(j = 0; j < m0*m0; j++){ + for(i = 1; i < lambda; i++){ + + if(i > max_row_iter[j/m0]){ + iicov[j*lambda+i] = iicov[j*lambda+i-1]; + } + + } + } - FREE(a); - FREE(ia); - FREE(ka); - FREE(c); - for(i=0;imaxiter){ - maxiter=iter; - } + for (iter = 0; iter < lambda; iter++) { + + CALLOC( output_vec, N, double ); + if(iter > *maxnlambda){ + *maxnlambda = iter; + } /************************************************************* * step 1: find mu * *************************************************************/ - mu = -HUGE_VAL; - col_in = -1; - col_out = -1; - for (i=0; i EPS2) { - if ( mu < -x_B[i]/xbar_B[i] ) { - mu = -x_B[i]/xbar_B[i]; - col_out = i; - col_in = -1; - } - } - } + mu = -HUGE_VAL; + col_in = -1; + col_out = -1; + for (i=0; i EPS2) { + if ( mu < -x_B[i]/xbar_B[i] ) { + mu = -x_B[i]/xbar_B[i]; + col_out = i; + col_in = -1; + } + } + } - mu_mtx[iter][ColNum]=mu; - - for (i=0; iEPS3){ - // count++; - icov_mtx[iter][m/2*ColNum+i]=output_vec[i]-output_vec[i+m/2]; - } - } - + mu_input[lambda*ColNum+iter] = mu; + + /*Find the current basic solution and store it to output_vec*/ + for (i=0; iEPS3){ + iicov[ColNum * lambda * (m/2) + i * lambda + iter] = output_vec[i]-output_vec[i+m/2]; } + } + - if ( mu <= EPS3 ) { /* optimal, we only need primal feasible */ - status=3; - break; + if(mu <= *lambdamin){ /*mu becomes smaller than the required lambdamin, stop*/ + status = 0; + break; + + } - } + if ( mu <= EPS3 ) { /* optimal, we only need primal feasible */ + status = 1; + break; + } /************************************************************* * -1 t * @@ -337,21 +321,22 @@ void solver2( * where i = col_out * *************************************************************/ - vec[0] = -1.0; - ivec[0] = col_out; - nvec = 1; - btsolve( m, vec, ivec, &nvec ); - Nt_times_y( N, at, iat, kat, basicflag, vec, ivec, nvec, + vec[0] = -1.0; + ivec[0] = col_out; + nvec = 1; + btsolve( m, vec, ivec, &nvec ); + Nt_times_y( N, at, iat, kat, basicflag, vec, ivec, nvec, dy_N, idy_N, &ndy_N ); /************************************************************* * step 3: ratio test to find entering column * *************************************************************/ - col_in = ratio_test( dy_N, idy_N, ndy_N, y_N, mu ); - if (col_in == -1) { /* infeasible */ + col_in = ratio_test( dy_N, idy_N, ndy_N, y_N, mu ); + if (col_in == -1) { /* infeasible for the current value of mu, stop*/ + status = 2; break; - } + } /************************************************************* * -1 * @@ -360,13 +345,13 @@ void solver2( * * *************************************************************/ - j = nonbasics[col_in]; - for (i=0, k=ka[j]; k