Skip to content

Commit

Permalink
Working on vignettes, bug in hdbscan
Browse files Browse the repository at this point in the history
  • Loading branch information
elbamos committed Apr 15, 2017
1 parent 908e9d5 commit f7a7c2c
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 42 deletions.
11 changes: 7 additions & 4 deletions R/dbscan.R
Expand Up @@ -21,16 +21,19 @@ lv_dbscan <- function(edges,
eps = Inf,
minPts = nrow(neighbors - 1),
verbose = getOption("verbose", TRUE)) {
if (inherits(edges, "edgematrix")) edges <- toMatrix(edges)
if (inherits(edges, "largeVis")) {
if (inherits(edges, "edgematrix")) {
edges <- t(toMatrix(edges))
} else if (inherits(edges, "largeVis")) {
if (missing(neighbors)) neighbors <- edges$knns
edges <- toMatrix(edges$edges)
edges <- t(toMatrix(edges$edges))
} else {
stop("edges must be either an edgematrix or a largeVis object")
}
if (!is.null(neighbors)) {
neighbors[is.na(neighbors)] <- -1
if (ncol(neighbors) != ncol(edges)) neighbors <- t(neighbors)
}
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be provided.")
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be specified (or use a largeVis object)")

clusters <- dbscan_cpp(edges, neighbors, as.double(eps), as.integer(minPts), as.logical(verbose))

Expand Down
10 changes: 6 additions & 4 deletions R/hdbscan.R
Expand Up @@ -95,13 +95,15 @@ hdbscan <- function(edges, neighbors = NULL, minPts = 20, K = 5,
threads = NULL,
verbose = getOption("verbose", TRUE)) {

if (inherits(edges, "edgematrix")) edges <- t(toMatrix(edges))
if (inherits(edges, "largeVis")) {
if (inherits(edges, "edgematrix")) {
edges <- t(toMatrix(edges))
} else if (inherits(edges, "largeVis")) {
if (missing(neighbors)) neighbors <- edges$knns
edges <- toMatrix(edges$edges)
edges <- t(toMatrix(edges$edges))
} else {
if (is.null(neighbors)) stop("Neighbors must be specified unless a largeVis object is given.")
stop("edges must be either an edgematrix or a largeVis object")
}
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be specified (or use a largeVis object)")

if (!is.null(neighbors)) {
neighbors[is.na(neighbors)] <- -1
Expand Down
11 changes: 7 additions & 4 deletions R/optics.R
Expand Up @@ -36,16 +36,19 @@ lv_optics <- function(edges,
xi,
useQueue = TRUE,
verbose = getOption("verbose", TRUE)) {
if (inherits(edges, "edgematrix")) edges <- toMatrix(edges)
if (inherits(edges, "largeVis")) {
if (inherits(edges, "edgematrix")) {
edges <- t(toMatrix(edges))
} else if (inherits(edges, "largeVis")) {
if (missing(neighbors)) neighbors <- edges$knns
edges <- toMatrix(edges$edges)
edges <- t(toMatrix(edges$edges))
} else {
stop("edges must be either an edgematrix or a largeVis object")
}
if (!is.null(neighbors)) {
neighbors[is.na(neighbors)] <- -1
if (ncol(neighbors) != ncol(edges)) neighbors <- t(neighbors)
}
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be specified.")
if (is.null(edges) || is.null(neighbors)) stop("Both edges and neighbors must be specified (or use a largeVis object)")
ret <- optics_cpp(edges = edges,
neighbors = neighbors,
eps = as.double(eps),
Expand Down
8 changes: 4 additions & 4 deletions inst/doc/largeVis.html
Expand Up @@ -4,15 +4,15 @@

<head>

<meta charset="utf-8">
<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />

<meta name="viewport" content="width=device-width, initial-scale=1">

<meta name="author" content="Amos Elberg" />

<meta name="date" content="2017-02-27" />
<meta name="date" content="2017-04-15" />

<title>largeVis: An Implementation of the LargeVis Algorithm</title>

Expand Down Expand Up @@ -70,7 +70,7 @@

<h1 class="title toc-ignore">largeVis: An Implementation of the LargeVis Algorithm</h1>
<h4 class="author"><em>Amos Elberg</em></h4>
<h4 class="date"><em>2017-02-27</em></h4>
<h4 class="date"><em>2017-04-15</em></h4>



Expand Down Expand Up @@ -221,7 +221,7 @@ <h2>References</h2>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
Expand Down
14 changes: 6 additions & 8 deletions inst/doc/momentumandusedata.R
Expand Up @@ -122,11 +122,10 @@ grid.raster(img)
## ----dbscan,fig.width=6,fig.height=6-------------------------------------
load(system.file(package = "largeVis", "vignettedata/spiral.Rda"))
dat <- spiral
neighbors <- randomProjectionTreeSearch(t(dat), K = 20)
edges <- buildEdgeMatrix(t(dat), neighbors = neighbors)
vis <- largeVis(t(dat), K = 20, save_edges = TRUE, save_neighbors = TRUE, sgd_batches = 1)
set <- rbind(Map(f = function(y) {
rbind(Map(f = function(x) {
clust = lv_dbscan(edges = edges, neighbors = neighbors, eps = x, minPts = y)$cluster
clust = lv_dbscan(vis, eps = x, minPts = y)$cluster
data.frame(cluster = clust, eps = x, minPts = y)
}, c(1, 3, 5)))
}, c(5, 10, 20)))
Expand All @@ -147,9 +146,8 @@ ggplot(data = set, aes(x = x, y = y, color = cluster)) +
ggtitle("Effect of eps and minPts on DBSCAN results")

## ----optics,fig.width=5,message=FALSE,warning=FALSE----------------------
library(dbscan, quietly = TRUE)
optClust <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = TRUE, minPts = 5)
optClust <- lv_optics(vis, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(vis, eps = 5, useQueue = TRUE, minPts = 5)
ggplot(data.frame(
o = c(optClust$order, optClust2$order),
d = c(optClust$reachdist, optClust2$reachdist),
Expand All @@ -163,7 +161,7 @@ ggplot(data.frame(

## ----opticsvsdbscan,fig.width=2,fig.width=6------------------------------
suppressWarnings(opticsPoints <- do.call(rbind, Map(f = function(x) {
clust = thiscut <- extractDBSCAN(optClust, x)$cluster
clust = thiscut <- dbscan::extractDBSCAN(optClust, x)$cluster
data.frame(cluster = clust, eps = x)
}, c(1, 3, 5))))
opticsPoints$cluster <- factor(opticsPoints$cluster)
Expand All @@ -182,7 +180,7 @@ ggplot(data = opticsPoints, aes(x = x, y = y, color = cluster)) +
## ----hdbscan,fig.width=6,fig.height=6------------------------------------
suppressWarnings(set <- do.call(rbind, Map(f = function(y) {
rbind(Map(f = function(x) {
hdclust <- hdbscan(edges = edges, neighbors = neighbors, K = y, minPts = x)$cluster
hdclust <- largeVis::hdbscan(vis, K = y, minPts = x)$cluster
data.frame(cluster = as.numeric(hdclust), K = x, minPts = y)
}, c(6, 10, 20)))
}, c(2, 6, 12))))
Expand Down
14 changes: 6 additions & 8 deletions inst/doc/momentumandusedata.Rmd
Expand Up @@ -189,11 +189,10 @@ The following chart illustrates the effect of the $\epsilon$ and `minPts` parame
```{r dbscan,fig.width=6,fig.height=6}
load(system.file(package = "largeVis", "vignettedata/spiral.Rda"))
dat <- spiral
neighbors <- randomProjectionTreeSearch(t(dat), K = 20)
edges <- buildEdgeMatrix(t(dat), neighbors = neighbors)
vis <- largeVis(t(dat), K = 20, save_edges = TRUE, save_neighbors = TRUE, sgd_batches = 1)
set <- rbind(Map(f = function(y) {
rbind(Map(f = function(x) {
clust = lv_dbscan(edges = edges, neighbors = neighbors, eps = x, minPts = y)$cluster
clust = lv_dbscan(vis, eps = x, minPts = y)$cluster
data.frame(cluster = clust, eps = x, minPts = y)
}, c(1, 3, 5)))
}, c(5, 10, 20)))
Expand Down Expand Up @@ -224,9 +223,8 @@ points in denser regions of the space as the seeds for new clusters.
This is illustrated in the following `reachability plots` for the spiral dataset:

```{r optics,fig.width=5,message=FALSE,warning=FALSE}
library(dbscan, quietly = TRUE)
optClust <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = TRUE, minPts = 5)
optClust <- lv_optics(vis, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(vis, eps = 5, useQueue = TRUE, minPts = 5)
ggplot(data.frame(
o = c(optClust$order, optClust2$order),
d = c(optClust$reachdist, optClust2$reachdist),
Expand All @@ -243,7 +241,7 @@ ggplot(data.frame(

```{r opticsvsdbscan,fig.width=2,fig.width=6}
suppressWarnings(opticsPoints <- do.call(rbind, Map(f = function(x) {
clust = thiscut <- extractDBSCAN(optClust, x)$cluster
clust = thiscut <- dbscan::extractDBSCAN(optClust, x)$cluster
data.frame(cluster = clust, eps = x)
}, c(1, 3, 5))))
opticsPoints$cluster <- factor(opticsPoints$cluster)
Expand All @@ -269,7 +267,7 @@ The `dbscan` package has other functions for cutting and visualizing `OPTICS` cl
```{r hdbscan,fig.width=6,fig.height=6}
suppressWarnings(set <- do.call(rbind, Map(f = function(y) {
rbind(Map(f = function(x) {
hdclust <- hdbscan(edges = edges, neighbors = neighbors, K = y, minPts = x)$cluster
hdclust <- largeVis::hdbscan(vis, K = y, minPts = x)$cluster
data.frame(cluster = as.numeric(hdclust), K = x, minPts = y)
}, c(6, 10, 20)))
}, c(2, 6, 12))))
Expand Down
4 changes: 2 additions & 2 deletions inst/doc/momentumandusedata.html
Expand Up @@ -4,7 +4,7 @@

<head>

<meta charset="utf-8">
<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />

Expand Down Expand Up @@ -113,7 +113,7 @@ <h2>References</h2>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
Expand Down
14 changes: 6 additions & 8 deletions vignettes/momentumandusedata.Rmd
Expand Up @@ -189,11 +189,10 @@ The following chart illustrates the effect of the $\epsilon$ and `minPts` parame
```{r dbscan,fig.width=6,fig.height=6}
load(system.file(package = "largeVis", "vignettedata/spiral.Rda"))
dat <- spiral
neighbors <- randomProjectionTreeSearch(t(dat), K = 20)
edges <- buildEdgeMatrix(t(dat), neighbors = neighbors)
vis <- largeVis(t(dat), K = 20, save_edges = TRUE, save_neighbors = TRUE, sgd_batches = 1)
set <- rbind(Map(f = function(y) {
rbind(Map(f = function(x) {
clust = lv_dbscan(edges = edges, neighbors = neighbors, eps = x, minPts = y)$cluster
clust = lv_dbscan(vis, eps = x, minPts = y)$cluster
data.frame(cluster = clust, eps = x, minPts = y)
}, c(1, 3, 5)))
}, c(5, 10, 20)))
Expand Down Expand Up @@ -224,9 +223,8 @@ points in denser regions of the space as the seeds for new clusters.
This is illustrated in the following `reachability plots` for the spiral dataset:

```{r optics,fig.width=5,message=FALSE,warning=FALSE}
library(dbscan, quietly = TRUE)
optClust <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(edges = edges, neighbors = neighbors, eps = 5, useQueue = TRUE, minPts = 5)
optClust <- lv_optics(vis, eps = 5, useQueue = FALSE, minPts = 5)
optClust2 <- lv_optics(vis, eps = 5, useQueue = TRUE, minPts = 5)
ggplot(data.frame(
o = c(optClust$order, optClust2$order),
d = c(optClust$reachdist, optClust2$reachdist),
Expand All @@ -243,7 +241,7 @@ ggplot(data.frame(

```{r opticsvsdbscan,fig.width=2,fig.width=6}
suppressWarnings(opticsPoints <- do.call(rbind, Map(f = function(x) {
clust = thiscut <- extractDBSCAN(optClust, x)$cluster
clust = thiscut <- dbscan::extractDBSCAN(optClust, x)$cluster
data.frame(cluster = clust, eps = x)
}, c(1, 3, 5))))
opticsPoints$cluster <- factor(opticsPoints$cluster)
Expand All @@ -269,7 +267,7 @@ The `dbscan` package has other functions for cutting and visualizing `OPTICS` cl
```{r hdbscan,fig.width=6,fig.height=6}
suppressWarnings(set <- do.call(rbind, Map(f = function(y) {
rbind(Map(f = function(x) {
hdclust <- hdbscan(edges = edges, neighbors = neighbors, K = y, minPts = x)$cluster
hdclust <- largeVis::hdbscan(vis, K = y, minPts = x)$cluster
data.frame(cluster = as.numeric(hdclust), K = x, minPts = y)
}, c(6, 10, 20)))
}, c(2, 6, 12))))
Expand Down

0 comments on commit f7a7c2c

Please sign in to comment.