-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 3804341
Showing
15 changed files
with
375 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Package: ClusBoot | ||
Type: Package | ||
Title: Bootstrap Clustering | ||
Version: 1.0 | ||
Date: 2019-11-11 | ||
Author: Sugnet Lubbe | ||
Maintainer: Sugnet Lubbe <slubbe@sun.ac.za> | ||
Description: Clustering algorithms are designed to cluster objects into a number of clusters. Any clustering algorithm provides the 'best' | ||
grouping of objects according to some criterion. | ||
This does not guarantee a 'good' clustering solution in the sense that some allocations were not simply the result of chance. | ||
This package allows the user to apply any clustering algorithm to a data set. The cluster allocations are subjected to a | ||
bootstrap analysis | ||
to determine the extent to which the clustering structure is stable and fundamental to the data set. For more information | ||
see <https://slubbe.wixsite.com/academic-cv/conference-presentations>. | ||
License: AGPL-3 | ||
NeedsCompilation: no | ||
Packaged: 2019-12-05 14:52:51 UTC; filz | ||
Depends: R (>= 3.5.0) | ||
Repository: CRAN | ||
Date/Publication: 2019-12-13 15:20:02 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
e8f79ccd1acced4e8d05ec4398093a42 *DESCRIPTION | ||
f3e08a3f89c576b59ade251eafa5d7a3 *NAMESPACE | ||
8e95bd3634dbb044a89e174303fa09cc *R/boot.silhouette.R | ||
e2ae8b677d47dadb9b3c338dcd6ac3d4 *R/clusboot.R | ||
838928313918fa1218f7f15e9d31f137 *R/complete.linkage.R | ||
9750c10672a4551764bf4e7380900e47 *R/plot.clusboot.R | ||
7daae5384cc743d046c837092961f449 *build/partial.rdb | ||
0f01608300356264e1620e627ecbf81a *data/case_study_psychiatrist.rda | ||
602fed71fcdbd3dead709ead7bcfa933 *man/ClusBoot-package.Rd | ||
e7c8d822754be59587f0f44f98193e7d *man/boot.silhouette.Rd | ||
ee9aed8c28f04c09b2590a84c127d12e *man/case_study_psychiatrist.Rd | ||
21be27b22db9ba5482101b7cfa63d1c3 *man/clusboot.Rd | ||
27f2896c166a5065d7d91164ba0929d4 *man/complete.linkage.Rd | ||
730f2de0200dccb5c8aa44f7a3292cca *man/plot.clusboot.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
exportPattern("^[[:alpha:]]+") | ||
importFrom("grDevices", "rainbow") | ||
importFrom("graphics", "barplot", "plot", "points") | ||
importFrom("stats", "as.dist", "cmdscale", "cutree", "dist", "hclust","optim") | ||
S3method(plot,clusboot) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
boot.silhouette <- | ||
function(clusboot.out, ...) | ||
{ | ||
cluster.vec <- clusboot.out[[2]] | ||
k <- nlevels(factor(cluster.vec)) | ||
Pmat <- clusboot.out[[1]] | ||
|
||
sil <- rep(NA,k) | ||
for (i in 1:k) | ||
{ | ||
current.clus <- (1:length(cluster.vec))[cluster.vec==i] | ||
current.p <- Pmat[current.clus, current.clus] | ||
own.p <- mean(current.p[lower.tri(current.p)]) | ||
other.p <- 0 | ||
for (j in (1:k)[-i]) | ||
{ other.clus <- (1:length(cluster.vec))[cluster.vec==j] | ||
other.mat <- Pmat[current.clus,other.clus] | ||
other.mean <- mean(other.mat) | ||
if (other.mean>other.p) other.p <- other.mean | ||
} | ||
sil[i] <- own.p-other.p | ||
} | ||
barplot (sil, names.arg=1:k, horiz=T, xlim=c(0,1), ...) | ||
sil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
clusboot <- | ||
function (datmat, B=1000, clustering.func=complete.linkage, ...) | ||
{ | ||
n <- nrow(datmat) | ||
if (is.null(rownames(datmat))) rownames(datmat) <- 1:n | ||
boot.samples <- matrix(sample (1:n, size=n*B, replace=T), ncol=B) | ||
|
||
boot.out <- apply(boot.samples,2,function(x) | ||
{ out <- clustering.func(datmat[x,], ...) | ||
|
||
clusD <- totD <- matrix (0, nrow=n, ncol=n, dimnames=list(rownames(datmat),rownames(datmat))) | ||
|
||
boot.sample <- table(x) | ||
boot.names <- as.numeric(names(boot.sample)) | ||
Dmat <- matrix (boot.sample,ncol=1) %*% matrix (boot.sample, nrow=1) | ||
totD[boot.names,boot.names] <- Dmat | ||
|
||
kk <- nlevels(factor(out)) | ||
for (i in 1:kk) | ||
{ | ||
clus.count <- table(x[out==i]) | ||
clus.names <- as.numeric(names(clus.count)) | ||
Dmat <- matrix (clus.count,ncol=1) %*% matrix (clus.count,nrow=1) | ||
clusD[clus.names,clus.names] <- Dmat | ||
} | ||
|
||
cbind(clusD, totD) | ||
}) | ||
# boot.out is a matrix with B columns | ||
# the first nrow/2 is the n*n elements of clusD | ||
# the remaining nrow/2 is the n*n elements of totD | ||
|
||
boot.out <- apply (boot.out, 1, sum) | ||
clusD <- matrix (boot.out[1:(n*n)], nrow=n, ncol=n) | ||
totD <- matrix (boot.out[-(1:(n*n))], nrow=n, ncol=n) | ||
|
||
clusD <- clusD/totD | ||
dimnames(clusD) <- list(rownames(datmat),rownames(datmat)) | ||
out <- clustering.func(datmat, ...) | ||
output <- list(proportions=clusD[order(out),order(out)], clustering=out[order(out)]) | ||
class(output) <- "clusboot" | ||
output | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
complete.linkage <- | ||
function (X, k) cutree(hclust(dist(X)), k) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
plot.clusboot <- | ||
function (x, col=NULL, ...) | ||
{ | ||
Dmat <- 1-x$proportions | ||
|
||
stress.func <- function (y, delta) | ||
{ | ||
Y <- matrix (y, ncol=2) | ||
dd <- dist(Y) | ||
sum((dd-delta)^2)/sum(dd^2) | ||
} | ||
Y <- cmdscale(Dmat) | ||
y <- optim(as.vector(Y), stress.func, delta=as.dist(Dmat))$par | ||
Y <- matrix(y, ncol=2) | ||
|
||
plot (Y[,1], Y[,2], asp=1, type="n", xaxt="n", yaxt="n", xlab="", ylab="") | ||
|
||
cluster.vec <- x$clustering | ||
k <- nlevels(factor(cluster.vec)) | ||
if (missing(col)) col <- rainbow(k) | ||
if (length(col)<k) col <- rep(col,k) | ||
|
||
for (i in 1:k) | ||
points(Y[cluster.vec==i,1], Y[cluster.vec==i,2], col=col[i], ...) | ||
|
||
Y | ||
} |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
\name{ClusBoot-package} | ||
\alias{ClusBoot-package} | ||
\alias{ClusBoot} | ||
\docType{package} | ||
\title{ | ||
Performs bootstrap on a cluster analysis output | ||
} | ||
\description{ | ||
Any clustering output is subjected to a bootstrap procedure to determine the stability of the clustering solution. The results are displayed | ||
in the form of a Multi-dimensional scaling plot and a silhouette plot. | ||
} | ||
\details{ | ||
|
||
The DESCRIPTION file: | ||
\packageDESCRIPTION{ClusBoot} | ||
\packageIndices{ClusBoot} | ||
A package to perform bootstrap on any cluster analysis output and visualise the results. | ||
For more infomation see \url{https://slubbe.wixsite.com/academic-cv/conference-presentations} | ||
} | ||
\author{ | ||
Sugnet Lubbe slubbe@sun.ac.za | ||
|
||
Maintainer: Sugnet Lubbe slubbe@sun.ac.za | ||
} | ||
\references{ | ||
Lubbe, S. Visualisations associated with bootstrapping cluster analysis. Data Science, Statistics and Visualisation conference, July 2017, Lisbon, Portugal. | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
\name{boot.silhouette} | ||
\alias{boot.silhouette} | ||
\title{ | ||
Construct silhouette plot from bootstrap replicates | ||
} | ||
\description{ | ||
A silhouette plot is constructed, indicating the proportion of times the cluster members cluster together in the same cluster. | ||
} | ||
\usage{ | ||
boot.silhouette(clusboot.out, ...) | ||
} | ||
\arguments{ | ||
\item{clusboot.out}{ | ||
an object of class clusboot, usually, a result of a call to clusboot | ||
} | ||
\item{\dots}{ | ||
more plotting parameters, e.g. col | ||
} | ||
} | ||
\details{ | ||
The clustering tightness is computed for each cluster, compared to the 'nearest' alternative cluster. The cluster tightness is computed | ||
as the mean of the proportion of times each pair of objects are clustered together in the same cluster. The tightness to other clusters | ||
is computed as the mean of the proportion an item of this cluster and one from the other cluster as clustered together. The 'nearest' | ||
alternative cluster is defined as the alternative cluster with the numerically largest tightness to the current cluster. | ||
} | ||
\value{ | ||
a vector with number of components equal to the number of classes, providing the silhouette value for each class. | ||
} | ||
\references{ | ||
Lubbe, S. Visualisations associated with bootstrapping cluster analysis. Data Science, Statistics and Visualisation conference, July 2017, Lisbon, Portugal. | ||
} | ||
\author{ | ||
Sugnet Lubbe slubbe@sun.ac.za | ||
} | ||
\seealso{ | ||
\code{\link{clusboot}} for performing bootstrap on a cluster analysis output and \code{\link{plot.clusboot}} to visually represent the bootstrap replications | ||
} | ||
\examples{ | ||
data(case_study_psychiatrist) | ||
boot.out <- clusboot (scale(case_study_psychiatrist), B=100, k=6, clustering.func=complete.linkage) | ||
plot(boot.out) | ||
boot.silhouette (boot.out) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
\name{case_study_psychiatrist} | ||
\alias{case_study_psychiatrist} | ||
\docType{data} | ||
\title{ | ||
Patient by psychiatric symptom data | ||
} | ||
\description{ | ||
Abstract: Presence/absence ratings of 24 psychiatric symptoms in 30 psychiatric inpatients made by an individual psychiatrist. | ||
Subject matter background: The data have been collected in a case study of an individual psychiatrist to identify his implicit taxonomy. | ||
Data structure: object x variables data matrix | ||
} | ||
\usage{data(case_study_psychiatrist)} | ||
\format{ | ||
A data frame with 30 observations on the following 28 variables. | ||
\describe{ | ||
\item{\code{V1}}{inappropriate affect, appearance or behavior; binary vector} | ||
\item{\code{V2}}{interview belligerence - negativism; binary vector} | ||
\item{\code{V3}}{agitation - excitement; binary vector} | ||
\item{\code{V4}}{retardation; binary vector} | ||
\item{\code{V5}}{lack of emotions; binary vector} | ||
\item{\code{V6}}{speech disorganization; binary vector} | ||
\item{\code{V7}}{grandiosity; binary vector} | ||
\item{\code{V8}}{suspicion - ideas of persecution; binary vector} | ||
\item{\code{V9}}{hallucinations - delusions; binary vector} | ||
\item{\code{V10}}{overt anger; binary vector} | ||
\item{\code{V11}}{depression; binary vector} | ||
\item{\code{V12}}{anxiety; binary vector} | ||
\item{\code{V13}}{obsession - compulsion; binary vector} | ||
\item{\code{V14}}{suicide; binary vector} | ||
\item{\code{V15}}{self injury; binary vector} | ||
\item{\code{V16}}{somatic concerns; binary vector} | ||
\item{\code{V17}}{social isolation; binary vector} | ||
\item{\code{V18}}{daily routine impairment; binary vector} | ||
\item{\code{V19}}{leisure time impairment; binary vector} | ||
\item{\code{V20}}{antisocial impulses or acts; binary vector} | ||
\item{\code{V21}}{alcohol abuse; binary vector} | ||
\item{\code{V22}}{drug abuse; binary vector} | ||
\item{\code{V23}}{disorientation; binary vector} | ||
\item{\code{V24}}{memory impairment; binary vector} | ||
\item{\code{V25}}{rating on Global Assessment Scale, a 101-point scale for overall severity of psychiatric disturbance; a numeric vector} | ||
\item{\code{V26}}{Affective (Affective Disorder or Anxiety Disorder); binary vector} | ||
\item{\code{V27}}{Psychotic (Schizophrenic Disorder or Paranoid Disorder); binary vector} | ||
\item{\code{V28}}{Substance abuse (Substance Use Disorder or Substance-Induced Disorder); binary vector} | ||
} | ||
} | ||
\details{ | ||
The data set forms part of the International Federation of Classification Societies Cluster Benchmark Data Repository | ||
} | ||
\source{ | ||
Van Mechelen, I., & De Boeck, P. (1989). Implicit taxonomy in psychiatric diagnosis: A case study. Journal of Social and Clinical Psychology, 8, 276-287. } | ||
\references{ | ||
\url{http://ifcs.boku.ac.at/repository/data/case_study_psychiatrist/index.html}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
\name{clusboot} | ||
\alias{clusboot} | ||
\title{ | ||
Performs bootstrap on a cluster analysis output} | ||
\description{ | ||
B bootstrap samples are drawn with replacement from the data and cluster analysis is performed on the bootstrap samples. | ||
} | ||
\usage{ | ||
clusboot(datmat, B = 1000, clustering.func = complete.linkage, ...) | ||
} | ||
\arguments{ | ||
\item{datmat}{ | ||
a samples by variables data matrix or a distance object, whatever input is required for clustering.func | ||
} | ||
\item{B}{ | ||
number of bootstrap replicates | ||
} | ||
\item{clustering.func}{ | ||
any function that performs cluster analysis which returns a single vector of cluster allocations as output | ||
} | ||
\item{\dots}{ | ||
any other arguments to be sent to clustering.func | ||
} | ||
} | ||
\value{ | ||
\item{proportions}{an nxn matrix with (i,j)-th element the proportion of times objects i and j clustered in the same cluster)} | ||
\item{clustering}{vector of cluster allocations of the input data} | ||
} | ||
\references{ | ||
Lubbe, S. Visualisations associated with bootstrapping cluster analysis. Data Science, Statistics and Visualisation conference, July 2017, Lisbon, Portugal. | ||
} | ||
\author{ | ||
Sugnet Lubbe slubbe@sun.ac.za | ||
} | ||
\seealso{ | ||
\code{\link{plot.clusboot}} to visually represent the bootstrap replications and \code{\link{boot.silhouette}} for a silhouette summary of the bootstrap replicates as well as \code{\link{kmeans}} | ||
} | ||
\examples{ | ||
data(case_study_psychiatrist) | ||
clusboot (scale(case_study_psychiatrist), B=100, k=6, clustering.func=complete.linkage) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
\name{complete.linkage} | ||
\alias{complete.linkage} | ||
\title{ | ||
Wrapper function to return only the clustering allocation. | ||
} | ||
\description{ | ||
Wrapper function to return only the clustering allocation from the hclust function with default method = "complete". | ||
} | ||
\usage{ | ||
complete.linkage(X, k) | ||
} | ||
\arguments{ | ||
\item{X}{ | ||
numeric matrix of data, or an object that can be coerced to such a matrix (such as a numeric vector or a data frame with all numeric columns). | ||
} | ||
\item{k}{ | ||
either the number of clusters, say k, or a set of initial (distinct) cluster centres. If a number, a random set of (distinct) rows in x is chosen as the initial centres. | ||
} | ||
} | ||
\value{ | ||
a single vector of cluster allocations, the output of cutree(hclust(dist(X)), k) | ||
} | ||
\references{ | ||
hclust() | ||
} | ||
\author{ | ||
Sugnet Lubbe slubbe@sun.ac.za | ||
} | ||
\examples{ | ||
data(case_study_psychiatrist) | ||
complete.linkage(scale(case_study_psychiatrist), k=6) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
\name{plot.clusboot} | ||
\alias{plot.clusboot} | ||
\title{ | ||
Constructs a Multi-dimensional scaling plot to represent the bootstrap replications. | ||
} | ||
\description{ | ||
Constructs a Multi-dimensional scaling plot to represent the bootstrap replications. | ||
} | ||
\usage{ | ||
\method{plot}{clusboot}(x, col = NULL, ...) | ||
} | ||
\arguments{ | ||
\item{x}{ | ||
an object of class clusboot, usually, a result of a call to clusboot | ||
} | ||
\item{col}{ | ||
an optional vector equal in length to the number of clusters with colours to represent different clusters | ||
} | ||
\item{\dots}{ | ||
more plotting parameters | ||
} | ||
} | ||
\details{ | ||
Constructs a Multi-dimensional scaling plot to represent the bootstrap replications. | ||
} | ||
\value{ | ||
a two-column matrix with rows containing the MDS coordinates of the samples | ||
} | ||
\references{ | ||
Lubbe, S. Visualisations associated with bootstrapping cluster analysis. Data Science, Statistics and Visualisation conference, July 2017, Lisbon, Portugal. | ||
} | ||
\author{ | ||
Sugnet Lubbe slubbe@sun.ac.za | ||
} | ||
\seealso{ | ||
\code{\link{clusboot}} for performing bootstrap on a cluster analysis output and \code{\link{plot.clusboot}} to visually represent the bootstrap replications | ||
} | ||
\examples{ | ||
data(case_study_psychiatrist) | ||
boot.out <- clusboot (scale(case_study_psychiatrist), B=100, k=6, clustering.func=complete.linkage) | ||
plot(boot.out) | ||
} |