-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3804341
commit 482bc28
Showing
21 changed files
with
411 additions
and
372 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,22 @@ | ||
Package: ClusBoot | ||
Type: Package | ||
Title: Bootstrap Clustering | ||
Version: 1.0 | ||
Date: 2019-11-11 | ||
Author: Sugnet Lubbe | ||
Maintainer: Sugnet Lubbe <slubbe@sun.ac.za> | ||
Description: Clustering algorithms are designed to cluster objects into a number of clusters. Any clustering algorithm provides the 'best' | ||
grouping of objects according to some criterion. | ||
This does not guarantee a 'good' clustering solution in the sense that some allocations were not simply the result of chance. | ||
This package allows the user to apply any clustering algorithm to a data set. The cluster allocations are subjected to a | ||
bootstrap analysis | ||
to determine the extent to which the clustering structure is stable and fundamental to the data set. For more information | ||
see <https://slubbe.wixsite.com/academic-cv/conference-presentations>. | ||
License: AGPL-3 | ||
Title: Bootstrap a Clustering Solution to Establish the Stability of | ||
the Clusters | ||
Version: 1.0.1 | ||
Authors@R: | ||
person("Sugnet", "Lubbe", , "slubbe@sun.ac.za", role = c("aut", "cre", "cph"), | ||
comment = c(ORCID = "0000-0003-2762-9944")) | ||
Description: Providing a cluster allocation for n samples, either with an $n \times p$ data matrix or an $n \times n$ distance | ||
matrix, a bootstrap procedure is performed. The proportion of bootstrap replicates where a pair of samples | ||
cluster in the same cluster indicates who tightly the samples in a particular cluster clusters together. | ||
License: MIT + file LICENSE | ||
Encoding: UTF-8 | ||
RoxygenNote: 7.2.3 | ||
Depends: R (>= 2.10) | ||
LazyData: true | ||
Imports: graphics, grDevices, stats | ||
NeedsCompilation: no | ||
Packaged: 2019-12-05 14:52:51 UTC; filz | ||
Depends: R (>= 3.5.0) | ||
Packaged: 2023-05-30 14:49:07 UTC; slubbe | ||
Author: Sugnet Lubbe [aut, cre, cph] (<https://orcid.org/0000-0003-2762-9944>) | ||
Maintainer: Sugnet Lubbe <slubbe@sun.ac.za> | ||
Repository: CRAN | ||
Date/Publication: 2019-12-13 15:20:02 UTC | ||
Date/Publication: 2023-05-30 15:20:02 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2023 | ||
COPYRIGHT HOLDER: ClusBoot authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,12 @@ | ||
e8f79ccd1acced4e8d05ec4398093a42 *DESCRIPTION | ||
f3e08a3f89c576b59ade251eafa5d7a3 *NAMESPACE | ||
8e95bd3634dbb044a89e174303fa09cc *R/boot.silhouette.R | ||
e2ae8b677d47dadb9b3c338dcd6ac3d4 *R/clusboot.R | ||
838928313918fa1218f7f15e9d31f137 *R/complete.linkage.R | ||
9750c10672a4551764bf4e7380900e47 *R/plot.clusboot.R | ||
7daae5384cc743d046c837092961f449 *build/partial.rdb | ||
0f01608300356264e1620e627ecbf81a *data/case_study_psychiatrist.rda | ||
602fed71fcdbd3dead709ead7bcfa933 *man/ClusBoot-package.Rd | ||
e7c8d822754be59587f0f44f98193e7d *man/boot.silhouette.Rd | ||
ee9aed8c28f04c09b2590a84c127d12e *man/case_study_psychiatrist.Rd | ||
21be27b22db9ba5482101b7cfa63d1c3 *man/clusboot.Rd | ||
27f2896c166a5065d7d91164ba0929d4 *man/complete.linkage.Rd | ||
730f2de0200dccb5c8aa44f7a3292cca *man/plot.clusboot.Rd | ||
3ae7a1f3fbecf4d7e9bf11241165725d *DESCRIPTION | ||
1e56d0aff1225cea0aa97c60ce2c3e2f *LICENSE | ||
b093fd22e95a70beb8fe5c7cd6e49ea2 *NAMESPACE | ||
b3e161ad905271694302b83b3e1a844c *R/data.R | ||
77fd48d6b7f90661916d887e2f26e2dc *R/funcs.R | ||
b4698063ca49ab35f1e22782d377d78c *data/case.study.psychiatrist.rda | ||
f208ca5353d10afb03b770f1bb241a9e *man/boot.silhouette.Rd | ||
7689190f22b577a7ee0e930d9ff080dd *man/case.study.psychiatrist.Rd | ||
1877fdeded8608f80f0dfb7373ba14a5 *man/clusboot.Rd | ||
214b6f46eac17a1c085b98d5d0c4ee08 *man/complete.linkage.Rd | ||
502a5f8dd6ba90060101596366cb7d58 *man/figures/README-pressure-1.png | ||
4d82fb6eed47b6179f1211a7ea97e97f *man/plot.clusboot.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
exportPattern("^[[:alpha:]]+") | ||
importFrom("grDevices", "rainbow") | ||
importFrom("graphics", "barplot", "plot", "points") | ||
importFrom("stats", "as.dist", "cmdscale", "cutree", "dist", "hclust","optim") | ||
S3method(plot,clusboot) | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method(plot,clusboot) | ||
export(boot.silhouette) | ||
export(clusboot) | ||
export(complete.linkage) |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#' Patient by psychiatric symptom data | ||
#' | ||
#' Presence/absence ratings of 24 psychiatric symptoms in 30 psychiatric inpatients made by an individual psychiatrist. | ||
#' The data have been collected in a case study of an individual psychiatrist to identify his implicit taxonomy. | ||
#' | ||
#' @format ## `case.study.psychiatrist` | ||
#' A data frame with 30 observations on the following 28 variables: | ||
#' \describe{ | ||
#' \item{\code{V1}}{inappropriate affect, appearance or behavior; binary vector} | ||
#' \item{\code{V2}}{interview belligerence - negativism; binary vector} | ||
#' \item{\code{V3}}{agitation - excitement; binary vector} | ||
#' \item{\code{V4}}{retardation; binary vector} | ||
#' \item{\code{V5}}{lack of emotions; binary vector} | ||
#' \item{\code{V6}}{speech disorganization; binary vector} | ||
#' \item{\code{V7}}{grandiosity; binary vector} | ||
#' \item{\code{V8}}{suspicion - ideas of persecution; binary vector} | ||
#' \item{\code{V9}}{hallucinations - delusions; binary vector} | ||
#' \item{\code{V10}}{overt anger; binary vector} | ||
#' \item{\code{V11}}{depression; binary vector} | ||
#' \item{\code{V12}}{anxiety; binary vector} | ||
#' \item{\code{V13}}{obsession - compulsion; binary vector} | ||
#' \item{\code{V14}}{suicide; binary vector} | ||
#' \item{\code{V15}}{self injury; binary vector} | ||
#' \item{\code{V16}}{somatic concerns; binary vector} | ||
#' \item{\code{V17}}{social isolation; binary vector} | ||
#' \item{\code{V18}}{daily routine impairment; binary vector} | ||
#' \item{\code{V19}}{leisure time impairment; binary vector} | ||
#' \item{\code{V20}}{antisocial impulses or acts; binary vector} | ||
#' \item{\code{V21}}{alcohol abuse; binary vector} | ||
#' \item{\code{V22}}{drug abuse; binary vector} | ||
#' \item{\code{V23}}{disorientation; binary vector} | ||
#' \item{\code{V24}}{memory impairment; binary vector} | ||
#' \item{\code{V25}}{rating on Global Assessment Scale, a 101-point scale for overall severity of psychiatric disturbance; a numeric vector} | ||
#' \item{\code{V26}}{Affective (Affective Disorder or Anxiety Disorder); binary vector} | ||
#' \item{\code{V27}}{Psychotic (Schizophrenic Disorder or Paranoid Disorder); binary vector} | ||
#' \item{\code{V28}}{Substance abuse (Substance Use Disorder or Substance-Induced Disorder); binary vector} | ||
#' } | ||
#' @details { | ||
#' The data set forms part of the International Federation of Classification Societies Cluster Benchmark Data Repository | ||
#' } | ||
#' @source Van Mechelen, I., & De Boeck, P. (1989). Implicit taxonomy in psychiatric diagnosis: A case study. Journal of Social and Clinical Psychology, 8, 276-287. | ||
#' <https://ifcs.boku.ac.at/repository/data/case_study_psychiatrist/index.html> | ||
|
||
|
||
"case.study.psychiatrist" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
# Sugnet Lubbe | ||
# MuViSU (Centre for Multi-Dimensional Data Visualisation) | ||
# Department of Statistics and Actuarial Science | ||
# Stellenbosch University, June 2022 | ||
# ============================================================ | ||
|
||
# --------------------------------------------------------------------------------- | ||
# clusboot | ||
# --------------------------------------------------------------------------------- | ||
|
||
#' Performs bootstrap on a cluster analysis output | ||
#' | ||
#' @param datmat a data matrix or distance object which will be the input to the clustering function | ||
#' @param B number of bootstrap replicates | ||
#' @param clustering.func the function which will perform the clustering and output a vector of cluster memberships | ||
#' @param ... more arguments to be passed to the clustering function | ||
#' | ||
#' @return an object of type clusboot | ||
#' @export | ||
#' | ||
#' @examples | ||
#' clusboot (scale(case.study.psychiatrist), B=100, k=6, clustering.func=complete.linkage) | ||
|
||
clusboot <- function(datmat, B=1000, clustering.func=complete.linkage, ...) | ||
{ | ||
obj <- datmat | ||
if (data.class(datmat) != "matrix") datmat <- as.matrix(datmat) | ||
n <- nrow(datmat) | ||
if (is.null(rownames(datmat))) sample.names <- 1:n | ||
else sample.names <- rownames(datmat) | ||
boot.samples <- matrix(sample (1:n, size=n*B, replace=T), ncol=B) | ||
|
||
boot.out <- apply(boot.samples,2,function(x) | ||
{ if (data.class(obj) == "dist") out <- clustering.func(stats::as.dist(datmat[x,x]), ...) | ||
else out <- clustering.func(datmat[x,], ...) | ||
clusD <- totD <- matrix (0, nrow=n, ncol=n, dimnames=list(sample.names,sample.names)) | ||
|
||
boot.sample <- table(x) | ||
boot.names <- as.numeric(names(boot.sample)) | ||
Dmat <- matrix (boot.sample,ncol=1) %*% matrix (boot.sample, nrow=1) | ||
totD[boot.names,boot.names] <- Dmat | ||
|
||
kk <- nlevels(factor(out)) | ||
for (i in 1:kk) | ||
{ | ||
clus.count <- table(x[out==i]) | ||
clus.names <- as.numeric(names(clus.count)) | ||
Dmat <- matrix (clus.count,ncol=1) %*% matrix (clus.count,nrow=1) | ||
clusD[clus.names,clus.names] <- Dmat | ||
} | ||
|
||
cbind(clusD, totD) | ||
}) | ||
# boot.out is a matrix with B columns | ||
# the first nrow/2 is the n*n elements of clusD | ||
# the remaining nrow/2 is the n*n elements of totD | ||
|
||
boot.out <- apply (boot.out, 1, sum) | ||
clusD <- matrix (boot.out[1:(n*n)], nrow=n, ncol=n) | ||
totD <- matrix (boot.out[-(1:(n*n))], nrow=n, ncol=n) | ||
|
||
clusD <- clusD/totD | ||
dimnames(clusD) <- list(sample.names,sample.names) | ||
out <- clustering.func(obj, ...) | ||
output <- list(proportions=clusD[order(out),order(out)], clustering=out[order(out)]) | ||
class(output) <- "clusboot" | ||
output | ||
} | ||
|
||
# --------------------------------------------------------------------------------- | ||
# plot.clusboot | ||
# --------------------------------------------------------------------------------- | ||
|
||
#' MDS plot of similarities given by the proportion of bootstrap replicates where objects cluster together | ||
#' | ||
#' @param x an object of class clusboot | ||
#' @param col single colour or a vector specifying a colour for each object | ||
#' @param ... more arguments to be passed to `plot()` | ||
#' | ||
#' @return matrix of similarities (proportions) | ||
#' @export | ||
#' | ||
#' @examples | ||
#' out <- clusboot (scale(case.study.psychiatrist), B=100, k=6, clustering.func=complete.linkage) | ||
#' plot(out) | ||
|
||
plot.clusboot <- function(x, col, ...) | ||
{ | ||
Dmat <- 1-x$proportions | ||
|
||
stress.func <- function (y, delta) | ||
{ | ||
Y <- matrix (y, ncol=2) | ||
dd <- stats::dist(Y) | ||
sum((dd-delta)^2)/sum(dd^2) | ||
} | ||
Y <- stats::cmdscale(Dmat) | ||
y <- stats::optim(as.vector(Y), stress.func, delta=stats::as.dist(Dmat))$par | ||
Y <- matrix(y, ncol=2) | ||
|
||
plot (Y[,1], Y[,2], asp=1, type="n", xaxt="n", yaxt="n", xlab="", ylab="") | ||
|
||
cluster.vec <- x$clustering | ||
k <- nlevels(factor(cluster.vec)) | ||
if (missing(col)) col <- grDevices::rainbow(k) | ||
if (length(col)<k) col <- rep(col,k) | ||
|
||
for (i in 1:k) | ||
graphics::points(Y[cluster.vec==i,1], Y[cluster.vec==i,2], col=col[i], ...) | ||
|
||
Y | ||
} | ||
|
||
# --------------------------------------------------------------------------------- | ||
# boot.silhouette | ||
# --------------------------------------------------------------------------------- | ||
|
||
#' Constructs a silhouette plot based on proportion of times items cluster together | ||
#' | ||
#' @param clusboot.out an object of class clusboot | ||
#' @param ... more arguments to be passed to `plot()` | ||
#' | ||
#' @return vector of silhouette widths for each of the clusters | ||
#' @export | ||
#' | ||
#' @examples | ||
#' out <- clusboot (scale(case.study.psychiatrist), B=100, k=6, clustering.func=complete.linkage) | ||
#' boot.silhouette(out) | ||
|
||
boot.silhouette <- function(clusboot.out, ...) | ||
{ | ||
cluster.vec <- clusboot.out$clustering | ||
k <- nlevels(factor(cluster.vec)) | ||
Pmat <- clusboot.out$proportions | ||
|
||
sil <- rep(NA,k) | ||
for (i in 1:k) | ||
{ | ||
current.clus <- (1:length(cluster.vec))[cluster.vec==i] | ||
current.p <- Pmat[current.clus, current.clus] | ||
own.p <- mean(current.p[lower.tri(current.p)]) | ||
other.p <- 0 | ||
for (j in (1:k)[-i]) | ||
{ other.clus <- (1:length(cluster.vec))[cluster.vec==j] | ||
other.mat <- Pmat[current.clus,other.clus] | ||
other.mean <- mean(other.mat) | ||
if (other.mean>other.p) other.p <- other.mean | ||
} | ||
sil[i] <- own.p-other.p | ||
} | ||
names(sil) <- levels(factor(cluster.vec)) | ||
graphics::barplot (sil, names.arg=1:k, horiz=T, xlim=c(0,1), ...) | ||
sil | ||
} | ||
|
||
# --------------------------------------------------------------------------------- | ||
# complete.linkage | ||
# --------------------------------------------------------------------------------- | ||
|
||
#' Wrapper function for performing complete linkage clustering | ||
#' | ||
#' @param X samples x variables data matrix | ||
#' @param k number of clusters | ||
#' | ||
#' @return vector of cluster memberships | ||
#' @export | ||
#' | ||
#' @examples | ||
#' complete.linkage(scale(case.study.psychiatrist), k=6) | ||
|
||
complete.linkage <- function (X, k) | ||
{ stats::cutree(stats::hclust(stats::dist(X)), k) } |
Oops, something went wrong.