-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 4b89345
Showing
13 changed files
with
578 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Package: vscc | ||
Type: Package | ||
Title: Variable selection for clustering and classification | ||
Version: 0.2 | ||
Date: 2013-11-16 | ||
Author: Jeffrey L. Andrews, Paul D. McNicholas | ||
Maintainer: Jeffrey L. Andrews <jeffrey.andrews@macewan.ca> | ||
Description: Performs variable selection/feature reduction under a clustering or classification framework. In particular, it can be used in an automated fashion using mixture model-based methods (tEIGEN and MCLUST are currently supported). | ||
License: GPL (>= 2) | ||
Imports: teigen, mclust | ||
Packaged: 2013-11-16 23:30:12 UTC; jeff | ||
NeedsCompilation: no | ||
Repository: CRAN | ||
Date/Publication: 2013-11-17 08:24:55 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
b9560548ea2a7012b37e6e05bf7469e5 *DESCRIPTION | ||
af3e0ec837f755bdaf405c90111c7d2a *NAMESPACE | ||
a89264140a524e5a23b352409b4d7bf0 *R/plot.vscc.R | ||
972aa6ba1490610d319b494faf438267 *R/print.vscc.R | ||
047e0a9d46d96515e37bd0cee7a531a2 *R/summary.vscc.R | ||
9d87a2e8d515659e5792f882e82a67db *R/vscc.R | ||
ff5d205b6ca0116c0726df42ccd25dd2 *inst/CITATION | ||
b0665b5c92ace46daf9f87b783c26f44 *man/plot.vscc.Rd | ||
8d7217e814ed31023162a7024dd46cdb *man/print.vscc.Rd | ||
2fcf41c0a6df54128bb0471ca6376b72 *man/summary.vscc.Rd | ||
8afbad6abe93013da02de31968c9fed6 *man/vscc-package.Rd | ||
b8d0fcd61884096ca8bfb602f973f6e0 *man/vscc.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
export(vscc) | ||
import(stats, utils, teigen, mclust) | ||
S3method(plot, vscc) | ||
S3method(summary, vscc) | ||
S3method(print, vscc) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
plot.vscc <- | ||
function(x, ...){ | ||
classcolours <- rainbow(length(unique(x$bestmod$class))) | ||
pairs(x$top, col=classcolours[x$best$class]) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
print.vscc <- | ||
function(x, ...){ | ||
summary(x) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
summary.vscc <- function(object, ...){ | ||
x <- object | ||
cat("---------- Summary for VSCC ----------", "\n\n") | ||
cat(" ---- RESULTS ---- ", "\n") | ||
cat("# Vars: ", ncol(x$top), "\n") | ||
cat("Relation: ", x$chosen, "\n") | ||
cat("BIC: ", x$bestmod$bic, "\n") | ||
cat("Model: ", x$bestmod$model, "\n") | ||
cat("Family: ", x$family, "\n") | ||
cat("# Groups: ", x$bestmod$G, "\n") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,275 @@ | ||
####Fix the arguments passed along in the post-analysis! | ||
|
||
vscc <-function(x, G=1:9, automate="mclust", initial=NULL, train=NULL, forcereduction=FALSE){ | ||
origx <- x | ||
origG <- G | ||
x <- scale(x) | ||
p <- ncol(x) | ||
if(is.null(train)){ | ||
if(automate=="teigen"){ | ||
# require("teigen") | ||
if(packageVersion("teigen")<1.9){ | ||
warning(paste("The 'vscc' package requires 'teigen' version 1.9 or higher, version", packageVersion("teigen"), "is currently installed: issues may arise.")) | ||
} | ||
if(is.null(initial)){ | ||
mclinit1 <- hc("VVV",x) | ||
mclinit2 <- hclass(mclinit1, G) | ||
mclist <- list() | ||
for(g in length(G)){ | ||
mclist[[G[g]]] <- mclinit2[,g] | ||
} | ||
initrun <- teigen(x, G, init=mclist, training=train, verbose=FALSE) | ||
if(initrun$G==1){stop("teigen initialization gives G=1 solution...please use an initialization where G>1")} | ||
initial <- initrun$classification | ||
initunc <- sum(1-apply(initrun$fuzzy,1,max)) | ||
} | ||
G <- length(unique(initial)) | ||
n <- nrow(x) | ||
zmat <- matrix(0,n,G) | ||
for(i in 1:G){ | ||
zmat[initial==i, i]<-1 | ||
} | ||
} | ||
else{ | ||
if(automate=="mclust"){ | ||
# require("mclust") | ||
if(packageVersion("mclust")<4.0){ | ||
warning(paste("VSCC requires 'mclust' version 4.0 or higher, version", packageVersion("mclust"), "is currently installed: issues may arise.")) | ||
} | ||
if(is.null(initial)){ | ||
initrun <- Mclust(x, G) | ||
if(initrun$G==1){stop("mclust initialization gives G=1 solution...please use an initialization where G>1")} | ||
initial <- initrun$classification | ||
initunc <- sum(initrun$unc) | ||
} | ||
G <- length(unique(initial)) | ||
n <- nrow(x) | ||
zmat <- matrix(0,n,G) | ||
for(i in 1:G){ | ||
zmat[initial==i, i]<-1 | ||
} | ||
} | ||
else{ | ||
if(is.null(initial)){stop("If an initial clustering vector is not supplied, automate='teigen' or 'mclust' must be specified")} | ||
} | ||
G <- length(unique(initial)) | ||
n <- nrow(x) | ||
zmat <- matrix(0,n,G) | ||
for(i in 1:G){ | ||
zmat[initial==i, i]<-1 | ||
} | ||
} | ||
} | ||
else{ | ||
if(is.null(initial)){ | ||
stop("If using 'train', 'initial' vector must also be given") | ||
} | ||
origx <- x | ||
x <- x[train,] | ||
G <- length(unique(initial[train])) | ||
n <- nrow(x) | ||
zmat <- matrix(0,n,G) | ||
for(i in 1:G){ | ||
zmat[initial[train]==i, i]<-1 | ||
} | ||
} | ||
|
||
if(is.null(colnames(x))){ | ||
colnames(x) <- 1:p | ||
colnames(origx) <- 1:p | ||
} | ||
ng <- colSums(zmat) | ||
mug <- matrix(0,G,p) | ||
for(g in 1:G){ | ||
mug[g,] <- colSums(zmat[,g]*x)/ng[g] | ||
} | ||
# mug <- muginit(G,p,x,zmat,ng) | ||
|
||
mugarr <- array(0,dim=c(n,p,G)) | ||
for(g in 1:G){ | ||
mugarr[,,g] <- t(mug[g,] * t(matrix(1,n,p))) | ||
} | ||
mugmat <- matrix(0,n,p) | ||
for(g in 1:G){ | ||
mugmat <- mugmat + zmat[,g] * mugarr[,,g] | ||
} | ||
xminusmug <- x - mugmat | ||
ss <- xminusmug * xminusmug | ||
ssbyvar <- colSums(ss)/n | ||
|
||
# bssmugmat <- array(0,dim=c(n,p,G)) | ||
# for(g in 1:G){ | ||
# bssmugmat[,,g] <- bssmugmat[,,g] + (1-zmat[,g]) * mugarr[,,g] | ||
# } | ||
# bssxminusmug <- array(0,dim=c(n,p,G)) | ||
# for(g in 1:G){ | ||
# bssxminusmug[,,g] <- (x-bssmugmat[,,g])^2/(n-ng[g]) | ||
# } | ||
# bssbyvar <- rep(0,p) | ||
# for(g in 1:G){ | ||
# bssbyvar <- bssbyvar + colSums(bssxminusmug[,,g]) | ||
# } | ||
# sortbss <- sort(bssbyvar) | ||
sorted <- t(as.matrix(sort(ssbyvar))) | ||
select <- list() | ||
useselect <- list() | ||
varnames <- list() | ||
trun <- list() | ||
numvars <- NA | ||
for(i in 1:5){ | ||
select[[i]] <- matrix(data=origx[,colnames(sorted)[1]]) | ||
useselect[[i]] <- matrix(data=x[,colnames(sorted)[1]]) | ||
varnames[[i]] <- colnames(sorted)[1] | ||
} | ||
counts <- rep(2,5) | ||
for(k in 2:p){ | ||
curname <- colnames(sorted)[k] | ||
for(i in 1:5){ | ||
curcor <- cor(cbind(x[,curname],useselect[[i]])) | ||
if(all(abs(curcor[upper.tri(curcor)])<=(1-sorted[1,k]^i))){ | ||
select[[i]] <- cbind(select[[i]],origx[,curname]) | ||
useselect[[i]] <- cbind(useselect[[i]],x[,curname]) | ||
varnames[[i]][counts[i]] <- curname | ||
counts[i] <- counts[i]+1 | ||
} | ||
} | ||
} | ||
for(i in 1:5){ | ||
colnames(select[[i]]) <- varnames[[i]] | ||
} | ||
if(!is.null(automate)){ | ||
tuncs <- Inf | ||
numvars <- counts-1 | ||
counttab <- table(counts-1) | ||
runteig <- rep(TRUE,5) | ||
if(any(counttab>1)){ | ||
# dubvars <- as.numeric(names(which(counttab>1))) | ||
# for(j in 1:length(dubvars)){ | ||
# relneedcheck <- which(numvars==dubvars[j]) | ||
# k <- 1 | ||
# while(k < length(relneedcheck)){ | ||
# for(i in (k+1):length(relneedcheck)){ | ||
# if(all(varnames[[relneedcheck[k]]] %in% varnames[[relneedcheck[i]]])){ | ||
# runteig[relneedcheck[i]] <- FALSE | ||
# } | ||
# } | ||
# k <- k+1 | ||
# } | ||
# } | ||
#This could be improved | ||
for(i in 1:4){ | ||
for(j in (i+1):5){ | ||
if(length(varnames[[i]])==length(varnames[[j]])){ | ||
if(all(varnames[[i]] %in% varnames[[j]])){ | ||
runteig[j] <- FALSE | ||
} | ||
} | ||
} | ||
} | ||
} | ||
if(automate=="teigen"){ | ||
for(i in 1:5){ | ||
if(runteig[i]){ | ||
G <- origG | ||
mclinit1 <- hc("VVV",x) | ||
mclinit2 <- hclass(mclinit1, G) | ||
mclist <- list() | ||
for(g in length(G)){ | ||
mclist[[G[g]]] <- mclinit2[,g] | ||
} | ||
trun[[i]] <- teigen(select[[i]], G, training=train, init=mclist, verbose=FALSE) | ||
if(trun[[i]]$G>1){ | ||
tuncs[i] <- sum(1-apply(trun[[i]]$fuzzy,1,max)) | ||
} | ||
else{ | ||
tuncs[i] <- Inf | ||
} | ||
} | ||
else{ | ||
trun[[i]] <- "Same as simpler relation" | ||
tuncs[i] <- Inf | ||
} | ||
} | ||
} | ||
else{ | ||
if(is.null(train)){ | ||
for(i in 1:5){ | ||
if(runteig[i]){ | ||
G <- origG | ||
trun[[i]] <- Mclust(scale(select[[i]]), G) | ||
if(trun[[i]]$G>1){ | ||
tuncs[i] <- sum(trun[[i]]$unc) | ||
} | ||
else{ | ||
tuncs[i] <- Inf | ||
} | ||
} | ||
else{ | ||
trun[[i]] <- "Same as simpler relation" | ||
tuncs[i] <- Inf | ||
} | ||
} | ||
} | ||
else{ | ||
for(i in 1:5){ | ||
if(runteig[i]){ | ||
G <- origG | ||
trun[[i]] <- teigen(select[[i]], G, models="mclust", training=train, init="uniform", verbose=FALSE, known=initial) | ||
if(trun[[i]]$G>1){ | ||
tuncs[i] <- sum(1-apply(trun[[i]]$fuzzy,1,max)) | ||
} | ||
else{ | ||
tuncs[i] <- Inf | ||
} | ||
} | ||
else{ | ||
trun[[i]] <- "Same as simpler relation" | ||
tuncs[i] <- Inf | ||
} | ||
} | ||
} | ||
} | ||
|
||
} | ||
store <- list() | ||
store[["selected"]] <- select | ||
if(!is.null(automate)){ | ||
# if(is.null(initial)){ | ||
if(is.null(train)){ | ||
store[["initialrun"]] <- initrun | ||
} | ||
else{ | ||
initunc <- Inf | ||
} | ||
if(forcereduction){ | ||
store[["bestmodel"]] <- trun[[which.min(tuncs)]] | ||
store[["chosenrelation"]] <- which.min(tuncs) | ||
} | ||
else{ | ||
if(min(tuncs)<initunc){ | ||
store[["bestmodel"]] <- trun[[which.min(tuncs)]] | ||
store[["chosenrelation"]] <- which.min(tuncs) | ||
store[["topselected"]] <- select[[which.min(tuncs)]] | ||
store[["uncertainty"]] <- min(tuncs) | ||
} | ||
else{ | ||
store[["bestmodel"]] <- initrun | ||
store[["chosenrelation"]] <- "Full dataset" | ||
store[["topselected"]] <- origx | ||
store[["uncertainty"]] <- initunc | ||
} | ||
} | ||
# } | ||
# else{ | ||
# store[["bestmodel"]] <- trun[[which.min(tuncs)]] | ||
# store[["chosenrelation"]] <- which.min(tuncs) | ||
# store[["topselected"]] <- select[[which.min(tuncs)]] | ||
# store[["uncertainty"]] <- min(tuncs) | ||
# } | ||
store[["allmodelfit"]] <- trun | ||
} | ||
store[["family"]] <- automate | ||
store[["wss"]] <- sorted | ||
class(store) <- "vscc" | ||
store | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
citHeader("To cite package 'vscc' in publications use:") | ||
|
||
citEntry(entry="Manual", | ||
title = "vscc: Variable selection for clustering and classification", | ||
author = personList(person(given=c("Jeffrey", "L."), family="Andrews"),person(given=c("Paul", "D."), family="McNicholas")), | ||
year = "2013", | ||
note = "R package version 1", | ||
textVersion = "Andrews, J. L. & McNicholas, P. D. (2013). vscc: Variable selection for clustering and classification. R package version 1." | ||
) | ||
|
||
|
||
citEntry(entry="Article", | ||
title=" Variable Selection for Clustering and Classification", | ||
author=personList(person(given=c("Jeffrey", "L."), family="Andrews"),person(given=c("Paul", "D."), family="McNicholas")), | ||
journal="arXiv preprint", | ||
year="2013", | ||
note = "arXiv:1303.5294", | ||
textVersion = "Andrews, J. L. & McNicholas, P. D. (2013). Variable selection for clustering and classification. arXiv preprint arXiv:1303.5294." | ||
) | ||
|
||
citFooter("For BibTeX entries, call: toBibtex(citation('vscc'))") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
\name{plot.vscc} | ||
\alias{plot.vscc} | ||
%- Also NEED an '\alias' for EACH other topic documented here. | ||
\title{ | ||
Plotting for vscc objects | ||
} | ||
\description{ | ||
Dedicated plot function for objects of class vscc. | ||
} | ||
\usage{ | ||
\method{plot}{vscc}(x, ...) | ||
} | ||
%- maybe also 'usage' for other objects documented here. | ||
\arguments{ | ||
\item{x}{ | ||
An object of class vscc. | ||
} | ||
\item{\dots}{ | ||
Further arguments to be passed on | ||
} | ||
} | ||
\details{ | ||
Provides a scatterplot matrix of the selected variables with colours corresponding to each group. | ||
} | ||
\author{ | ||
Jeffrey L. Andrews | ||
} | ||
|
||
\seealso{ | ||
\code{\link{vscc}} | ||
} | ||
\examples{ | ||
require("mclust") | ||
data(banknote) | ||
bankrun <- vscc(banknote[,-1]) | ||
plot(bankrun) | ||
} | ||
\keyword{ ~kwd1 } |
Oops, something went wrong.