Skip to content

Commit

Permalink
version 1.3
Browse files Browse the repository at this point in the history
  • Loading branch information
ChongWu-Biostat authored and cran-robot committed Dec 13, 2016
1 parent aad627e commit c79deb1
Show file tree
Hide file tree
Showing 14 changed files with 546 additions and 830 deletions.
10 changes: 5 additions & 5 deletions DESCRIPTION
@@ -1,16 +1,16 @@
Package: prclust
Type: Package
Title: Penalized Regression-Based Clustering Method
Version: 1.2
Date: 2016-7-18
Version: 1.3
Date: 2016-12-12
Depends: R (>= 3.1.1)
Author: Chong Wu, Wei Pan
Maintainer: Chong Wu <wuxx0845@umn.edu>
Description: Clustering is unsupervised and exploratory in nature. Yet, it can be performed through penalized regression with grouping pursuit. In this package, we provide two algorithms for fitting the penalized regression-based clustering (PRclust). One algorithm is based on quadratic penalty and difference convex method. Another algorithm is based on difference convex and ADMM, called DC-ADD, which is more efficient. Generalized cross validation was provided to select the tuning parameters. Rand index, adjusted Rand index and Jaccard index were provided to estimate the agreement between estimated cluster memberships and the truth.
Description: Clustering is unsupervised and exploratory in nature. Yet, it can be performed through penalized regression with grouping pursuit. In this package, we provide two algorithms for fitting the penalized regression-based clustering (PRclust) with non-convex grouping penalties, such as group truncated lasso, MCP and SCAD. One algorithm is based on quadratic penalty and difference convex method. Another algorithm is based on difference convex and ADMM, called DC-ADD, which is more efficient. Generalized cross validation and stability based method were provided to select the tuning parameters. Rand index, adjusted Rand index and Jaccard index were provided to estimate the agreement between estimated cluster memberships and the truth.
License: GPL-2 | GPL-3
Imports: Rcpp (>= 0.12.1), parallel
LinkingTo: Rcpp
NeedsCompilation: yes
Packaged: 2016-07-19 21:37:32 UTC; chong
Packaged: 2016-12-12 23:43:15 UTC; chong
Repository: CRAN
Date/Publication: 2016-07-20 00:38:43
Date/Publication: 2016-12-13 07:57:15
24 changes: 12 additions & 12 deletions MD5
@@ -1,12 +1,12 @@
6db3b95a865daaa3952688feae2b2366 *DESCRIPTION
23717a118e400defc3359d248d59b0ae *NAMESPACE
d2b7ad3a71003e78798bacc0e3b580d2 *R/GCVOrignial.R
87b9fa36139a24e660e9c7ea00e4d035 *R/RcppExports.R
5207126dc79490c10bf38108f2e4a02f *R/stability.R
c3f45da3654bf1594bd7984fd646cbcb *man/GCV.Rd
76db639c0fe42f3cc819fd8ab084d5fe *man/PRclust.Rd
cb659b9a59cccdbe8ea9c7cef74d1baa *man/clustStat.Rd
dc5fff66ed74926441e4b49a865ce8a1 *man/prclust-package.Rd
2c5100a6875dc49e3c343eafc27e853e *man/stability.Rd
f305d37e6fbe296e31377ad3c907a8b7 *src/ADMM_OrignialPRclust2.cpp
1d23aab33faec57bc91de37055a33688 *src/RcppExports.cpp
416664f1e8f4606492848eaf0fd890af *DESCRIPTION
4bae90771c584324443f7fa9e3dc80bf *NAMESPACE
1d555ce770083c50087c9735ae0857ea *R/GCVOrignial.R
4cde00ae53b0a0cdd60eb02500ddc7e1 *R/RcppExports.R
5e43ffdde617ed029a6934d3efd2ffa3 *R/stability.R
4ab6e62430b4c78bc8ad158e98aa5a7e *man/GCV.Rd
a1147a7b8edbf214ce1ffeb1b239149a *man/PRclust.Rd
faf0adb9def45c38e91c15119e7ade40 *man/clusterStat.Rd
49408a5b726d44a39e357fbeced79a2c *man/prclust-package.Rd
3c20d69acc00b96244c9bd31c3313262 *man/stability.Rd
a4ce9555f924e405457fd1c3b89ebbb4 *src/ADMM_OrignialPRclust2.cpp
8879c86ea6d72a7dce25ca35a174a261 *src/RcppExports.cpp
7 changes: 3 additions & 4 deletions NAMESPACE
@@ -1,8 +1,7 @@
useDynLib(prclust)
export(PRclust,GCV, clustStat,stability)
importFrom("stats", "coefficients", "lm", "rnorm")
importFrom("stats", "dist")
export(PRclust,GCV,clusterStat, stability)
importFrom("stats", "coefficients", "lm", "rnorm", "dist")
import(parallel)
importFrom(Rcpp, evalCpp)
S3method(print, prclust)
S3method(print, clustStat)
S3method(print, clusterStat)
45 changes: 23 additions & 22 deletions R/GCVOrignial.R
Expand Up @@ -2,14 +2,15 @@
## general degree of freedom and GCV
###################################

GDF.Step23 <- function(seed,data,lambda1,lambda2,tau,mumethods, methods,sigma,algorithm,abs_res,rel_res)
GDF.Step23 <- function(seed,data,lambda1,lambda2,tau,mumethods, methods,sigma,algorithm,epsilon)
{
set.seed(seed) ## Set the random seed for this simulation
# peturbation of data
deltaB = matrix(rnorm(dim(data)[1]*dim(data)[2],0,sigma),dim(data)[1],dim(data)[2])
data1 = data + deltaB
if (algorithm == 1){
a = .Call('prclust_DCADMM', PACKAGE = 'prclust', data1, lambda1, lambda2, tau, abs_res, rel_res, mumethods, methods)
rho = lambda1
a = .Call('prclust_PRclustADMM', PACKAGE = 'prclust', data1, rho, lambda2, tau, mumethods, methods,epsilon)
} else {
a = .Call('prclust_PRclustOriginal', PACKAGE = 'prclust', data1, lambda1, lambda2, tau,mumethods, methods)

Expand All @@ -25,29 +26,27 @@ GDF.Step23 <- function(seed,data,lambda1,lambda2,tau,mumethods, methods,sigma,al

##########################################

GCV <- function(data,rho,lambda,tau,sigma,B=100,loss.function = c("quadratic","L1"),grouping.penalty = c("gtlp","tlp"), algorithm = c("DCADMM","Quadratic"),abs_res = 0.5,rel_res = 0.5)
{
## judge for different situation
mumethods = switch(match.arg(loss.function), `quadratic` = 0,L1 = 1)
methods = switch(match.arg(grouping.penalty), `gtlp` = 0,tlp = 1)
nalgorithm = switch(match.arg(algorithm), `DCADMM` = 1,Quadratic = 2)
lambda1 = rho
lambda2 = lambda
GCV <- function(data,lambda1,lambda2,tau,sigma,B=100,loss.method = c("quadratic","lasso"),grouping.penalty = c("gtlp","L1","SCAD","MCP"), algorithm = c("ADMM","Quadratic"), epsilon = 0.001)
{ ## judge for different situation
mumethods = switch(match.arg(loss.method), `quadratic` = 0,lasso = 1)
methods = switch(match.arg(grouping.penalty), `gtlp` = 0,L1 = 1, MCP = 2, SCAD = 3)
nalgorithm = switch(match.arg(algorithm), `ADMM` = 1,Quadratic = 2)

if(is.character(lambda1))
stop("lambda1 must be a number")
if(is.character(sigma))
stop("sigma must be a postive number")
stop("sigma must be a number")
if(is.character(B))
stop("B must be a postive number")
stop("B must be a number")
if(is.character(lambda2))
stop("lambda must be a number")
stop("lambda2 must be a number")
if(is.character(tau))
stop("tau must be a number")

if(lambda1<0 | is.na(lambda1))
stop("rho must be a postive number.")
if(lambda<0 | is.na(lambda2))
stop("lambda must be a postive number.")
stop("lambda1 must be a postive number.")
if(lambda2<0 | is.na(lambda2))
stop("lambda2 must be a postive number.")
if(tau<0 | is.na(tau))
stop("tau must be a postive number.")
if(sigma<0 | is.na(sigma))
Expand All @@ -58,17 +57,18 @@ GCV <- function(data,rho,lambda,tau,sigma,B=100,loss.function = c("quadratic","L
B = as.integer(B)
data = as.matrix(data)
if(sum(is.na(data)))
stop("Clustering data contains NA or character value. The current version does not support missing data.")
stop("Clustering data contains NA or character value. The current version does not support missing data situation.")

##require("multicore")
if( nalgorithm ==2) {
if (mumethods!= 0) {
if (mumethods!= 0 || methods >=2) {
stop("Quadtraic penalty based algorithm cannot deal with the selected objective function. You can try ADMM instead.")
}
}


res = mclapply(1:B,GDF.Step23,data = data,lambda1 = lambda1,lambda2 = lambda2,
tau = tau, mumethods = mumethods, methods = methods,sigma = sigma,algorithm = nalgorithm,abs_res = abs_res,rel_res = rel_res)
tau = tau, mumethods = mumethods, methods = methods,sigma = sigma,algorithm = nalgorithm,epsilon = epsilon)
nrows = dim(data)[1]
ncols = dim(data)[2]
num = nrows * ncols
Expand All @@ -92,11 +92,12 @@ GCV <- function(data,rho,lambda,tau,sigma,B=100,loss.function = c("quadratic","L
}

GDF = sum(slope)

## calculate the original mu
if (nalgorithm == 1){
rho = lambda1
a = .Call('prclust_DCADMM', PACKAGE = 'prclust', data, rho, lambda2, tau, abs_res, rel_res, mumethods, methods)
} else {
a = .Call('prclust_PRclustADMM', PACKAGE = 'prclust', data, rho, lambda2, tau,mumethods, methods,epsilon)
} else {
a = .Call('prclust_PRclustOriginal', PACKAGE = 'prclust', data, lambda1, lambda2, tau, mumethods, methods)

}
Expand All @@ -110,4 +111,4 @@ GCV <- function(data,rho,lambda,tau,sigma,B=100,loss.function = c("quadratic","L
out = t(as.matrix(c(GDF,GCV,groupNum,estSigma)))
colnames(out) = c("GDF","GCV","groupNum","estSigmaSquare")
out
}
}
103 changes: 75 additions & 28 deletions R/RcppExports.R
@@ -1,4 +1,53 @@
clustStat <- function(trueGroup, group) {
# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

distance_2 <- function(data, ndim, j) {
.Call('prclust_distance_2', PACKAGE = 'prclust', data, ndim, j)
}

distance_umu <- function(u, data, ndim, i, j, uj) {
.Call('prclust_distance_umu', PACKAGE = 'prclust', u, data, ndim, i, j, uj)
}

residual_mu <- function(mu1, mu, ndim, numbers) {
.Call('prclust_residual_mu', PACKAGE = 'prclust', mu1, mu, ndim, numbers)
}

is_zero_theta <- function(theta, j, ndim) {
.Call('prclust_is_zero_theta', PACKAGE = 'prclust', theta, j, ndim)
}

stopping_criteria <- function(mu, mu1, ndim, numbers, count) {
.Call('prclust_stopping_criteria', PACKAGE = 'prclust', mu, mu1, ndim, numbers, count)
}

PRclustADMM <- function(data, rho, lambda2, tau, mumethod = 0L, methods = 0L) {
.Call('prclust_PRclustADMM', PACKAGE = 'prclust', data, rho, lambda2, tau, mumethod, methods)
}

clusterStat <- function(trueGroup, group) {
.Call('prclust_clusterStat', PACKAGE = 'prclust', trueGroup, group)
}

distance_mu <- function(data, ndim, i, j) {
.Call('prclust_distance_mu', PACKAGE = 'prclust', data, ndim, i, j)
}

cal_S <- function(data, mu, theta, lambda1, lambda2, tau, ndim, numbers, methods) {
.Call('prclust_cal_S', PACKAGE = 'prclust', data, mu, theta, lambda1, lambda2, tau, ndim, numbers, methods)
}

judge_iteration <- function(data, mu, theta, mu1, theta1, lambda1, lambda2, tau, ndim, numbers, count, methods) {
.Call('prclust_judge_iteration', PACKAGE = 'prclust', data, mu, theta, mu1, theta1, lambda1, lambda2, tau, ndim, numbers, count, methods)
}

PRclustOriginal <- function(data, lambda1, lambda2, tau, mumethod = 0L, methods = 0L) {
.Call('prclust_PRclustOriginal', PACKAGE = 'prclust', data, lambda1, lambda2, tau, mumethod, methods)
}



clusterStat <- function(trueGroup, group) {
x = as.vector(trueGroup)
y = as.vector(group)
if (length(x) != length(y))
Expand All @@ -24,54 +73,52 @@ clustStat <- function(trueGroup, group) {
out["Rand"] <- RAND
out["AdjustedRand"] <- ARI
out["Jaccard"] <- Jaccard
class(out) <- "clustStat"
class(out) <- "clusterStat"
out
}

PRclust <- function(data, rho, lambda, tau, loss.function = c("quadratic","L1"),grouping.penalty = c("gtlp","tlp"), algorithm = c("DCADMM","Quadratic"),abs_res = 0.5,rel_res = 0.5) {



PRclust <- function(data, lambda1, lambda2, tau, loss.method = c("quadratic","lasso"),grouping.penalty = c("gtlp","L1","SCAD","MCP"), algorithm = c("ADMM","Quadratic"),epsilon = 0.001) {
## judge for different situation
mumethod = switch(match.arg(loss.function), `quadratic` = 0,L1 = 1)
methods = switch(match.arg(grouping.penalty), `gtlp` = 0,tlp = 1)
nalgorithm = switch(match.arg(algorithm), `DCADMM` = 1,Quadratic = 2)
lambda1 = rho
mumethods = switch(match.arg(loss.method), `quadratic` = 0,lasso = 1)
methods = switch(match.arg(grouping.penalty), `gtlp` = 0,L1 = 1, MCP = 2, SCAD = 3)
nalgorithm = switch(match.arg(algorithm), `ADMM` = 1,Quadratic = 2)

if(is.character(lambda1))
stop("rho must be a postive number, you can use either GCV or stability based method to choose good tunning parameters.")
if(is.character(lambda))
stop("lambda must be a postive number,you can use either GCV or stability based method to choose good tunning parameters.")
stop("lambda1 must be a number")
if(is.character(lambda2))
stop("lambda2 must be a number")
if(is.character(tau))
stop("tau must be a postive number,you can use either GCV or stability based method to choose good tunning parameters.")

stop("tau must be a number")
if(lambda1<0 | is.na(lambda1))
stop("rho must be a postive number, you can use GCV to choose the 'best' tunning parameter.")
if(lambda<0 | is.na(lambda))
stop("lambda must be a postive number, you can use GCV to choose the 'best' tunning parameter.")
stop("lambda1 must be a postive number, you can use GCV to choose the 'best' tunning parameter.")
if(lambda2<0 | is.na(lambda2))
stop("lambda2 must be a postive number, you can use GCV to choose the 'best' tunning parameter.")
if(tau<0 | is.na(tau))
stop("tau must be a postive number,you can use either GCV or stability based method to choose good tunning parameters.")

stop("tau must be a postive number, you can use GCV to choose the 'best' tunning parameter.")
data = as.matrix(data)
if(sum(is.na(data)))
stop("Clustering data contains NA or character value. The current version does not support missing data.")
stop("Clustering data contains NA or character value. The current version does not support missing data situation.")

if( nalgorithm ==1){
res = .Call('prclust_DCADMM', PACKAGE = 'prclust', data, rho, lambda, tau, abs_res , rel_res,mumethod, methods )
final.count = sum(res$count2)
rho = lambda1
res = .Call('prclust_PRclustADMM', PACKAGE = 'prclust', data, rho, lambda2, tau,mumethods, methods,epsilon)
} else {
if (mumethod!= 0) {
if (mumethods!= 0 || methods >=2)
{
stop("Quadtraic penalty based algorithm cannot deal with the selected objective function. You can try ADMM instead.")
}

res = .Call('prclust_PRclustOriginal', PACKAGE = 'prclust', data, lambda1, lambda, tau, mumethod,methods)
final.count = res$count
res = .Call('prclust_PRclustOriginal', PACKAGE = 'prclust', data, lambda1, lambda2, tau, mumethods,methods)
}

res = list(mu = res$mu,count = final.count,group = res$group,
theta = res$theta,rho = lambda1, lambda = lambda,tau = tau, method = methods, algorithm = nalgorithm)
out = list(mu = res$mu,count = res$count,group = res$group,
theta = res$theta,lambda1 = lambda1, lambda2 = lambda2,tau = tau, method = methods, algorithm = nalgorithm)
class(res) = "prclust"
res
}

print.clustStat <-function(x, ...) {
print.clusterStat <-function(x, ...) {
cat("External evaluation of cluster results:\n")
cat(paste("The Rand index: ",x$Rand,"\n",sep = ""))
cat(paste("The adjusted rand index: ",x$AdjustedRand,"\n",sep = ""))
Expand Down
14 changes: 7 additions & 7 deletions R/stability.R
@@ -1,12 +1,10 @@
stability<- function(data, rho,lambda, tau,loss.function = c("quadratic","L1"),grouping.penalty = c("gtlp","tlp"), algorithm = c("DCADMM","Quadratic"), abs_res = 0.5,rel_res = 0.5,n.times = 10) {
stability<- function(data, rho,lambda, tau,loss.function = c("quadratic","L1","MCP","SCAD"),grouping.penalty = c("gtlp","tlp"), algorithm = c("DCADMM","Quadratic"), epsilon = 0.001,n.times = 10) {


lambda1 = rho
lambda2 = lambda

## judge for different situation
mumethod = switch(match.arg(loss.function), `quadratic` = 0,L1 = 1)
methods = switch(match.arg(grouping.penalty), `gtlp` = 0,tlp = 1)
methods = switch(match.arg(grouping.penalty), `gtlp` = 0,L1 = 1, MCP = 2, SCAD = 3)
nalgorithm = switch(match.arg(algorithm), `DCADMM` = 1,Quadratic = 2)
lambda1 = rho
if(is.character(lambda1))
Expand Down Expand Up @@ -50,11 +48,13 @@ stability<- function(data, rho,lambda, tau,loss.function = c("quadratic","L1"),g
})

if( nalgorithm ==1){
a = .Call('prclust_DCADMM', PACKAGE = 'prclust', data.tr, rho, lambda, tau, abs_res , rel_res,mumethod, methods )
a = .Call('prclust_PRclustADMM', PACKAGE = 'prclust', data.tr, rho, lambda, tau,mumethod, methods,epsilon)
#a = .Call('prclust_DCADMM', PACKAGE = 'prclust', data.tr, rho, lambda, tau, abs_res , rel_res,mumethod, methods )
tr.res = as.matrix(a$group)
tmp = tr.res[testtr.index,]

a = .Call('prclust_DCADMM', PACKAGE = 'prclust', data.test, rho, lambda, tau, abs_res , rel_res,mumethod, methods )
tmp = tr.res[testtr.index,]
a = .Call('prclust_PRclustADMM', PACKAGE = 'prclust', data.test, rho, lambda, tau,mumethod, methods,epsilon)
#a = .Call('prclust_DCADMM', PACKAGE = 'prclust', data.test, rho, lambda, tau, abs_res , rel_res,mumethod, methods )
} else {
if (mumethod!= 0) {
stop("Quadtraic penalty based algorithm cannot deal with the selected objective function. You can try ADMM instead.")
Expand Down

0 comments on commit c79deb1

Please sign in to comment.