Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Move into sub dir

  • Loading branch information...
commit 6be05a74a9c3ef8af744370c0a9e43206cfa4b09 0 parents
@hadley authored
12 DESCRIPTION
@@ -0,0 +1,12 @@
+Package: localmds
+Type: Package
+Title: Local MDS
+Version: 0.1.0
+Date: 2006-09-23
+Author: Lisha Chen <lishachen2006@gmail.com>, Hadley Wickham <h.wickham@gmail.com>
+Maintainer: Hadley Wickham <h.wickham@gmail.com>
+Description:
+Depends: R (>= 2.3), reshape, rggobi (>= 2.1.4), ggplot
+Suggests: graph, gWidgets
+LazyData: yes
+License: GPL
2  NAMESPACE
@@ -0,0 +1,2 @@
+exportPattern("^[^\\.]")
+useDynLib(localmds)
95 R/animation.r
@@ -0,0 +1,95 @@
+# Animate convergence of local MDS
+# This function uses GGobi to produce an animation of local mds progress
+#
+# Points are coloured according to their \code{\link{lcmeta}} criterion
+# value. White points are poorly represented, dark blue points are well
+# represented. Lines joins local points.
+#
+# Each update is rescaled to range [0, 1] to ensure that the scale
+# remains constant over time.
+#
+# @arguments All arguments passed on to \code{\link{localmds}}
+# @arguments Number of iterations between update
+# @argument Number of neighbours to use for \code{\link{lcmeta}}
+# @keyword hplot
+#X \dontrun{
+#X localmds.animate(spiral, threshold=180)
+#X localmds.animate(spiral)
+#X localmds.animate(spiral, threshold=180, initial=pc(spiral))
+#X s2 <- localmds(spiral, threshold=180, tau=1)
+#X localmds.animate(spiral, initial=s2, threshold=180)
+#X localmds.animate(frey)
+#X }
+localmds.animate <- function(..., initial=NULL, stepsize=10, k=3) {
+ if (!require("rggobi")) stop("You must have the rggobi package installed to use the GGobi tools.")
+
+ last <- localmds(..., initial=initial, maxiter=stepsize, savedist=TRUE, k=k)
+ dlast <- apply(last, 2, function(x) (x - min(x)) / diff(range(last)))
+
+ do <- attr(last, "dist")
+
+ g <- ggobi(last)
+ d <- g[1]
+
+ while(TRUE) {
+ last <- localmds(..., initial=last, maxiter=stepsize)
+ dlast <- scale(last)
+ d[[1]] <- dlast[,1]
+ d[[2]] <- dlast[,2]
+
+ lc <- as.vector(lcmeta(new=last, k=3, do=do, pointwise=TRUE, adjust=FALSE))
+ glyph_colour(d) <- floor(lc * 9) + 1
+ while(gtkEventsPending()) gtkMainIteration()
+ }
+}
+
+# Open a localmds object in GGobi.
+#
+# If \code{\link{localmds}} has been run with \code{savedist=TRUE}
+# this function will automatically add an edge set for all local
+# distances.
+#
+# The plot is also scaled between 0 and 1 to avoid having to
+# change the scale over time.
+#
+# @arguments localmds object
+# @arguments other unused arguments
+# @keyword hplot
+#X sp <- localmds(spiral, threshold=180, savedist=TRUE)
+#X ggobi(sp)
+#X ggobi(localmds(spiral, savedist=TRUE))
+ggobi.localmds <- function(data, ...) {
+ g <- ggobi(scale(data))
+ colorscheme(g) <- "Blues 9"
+ d <- g[1]
+
+ e <- attr(data,"dist")
+ if (!is.null(e)) {
+ lc <- as.vector(lcmeta(new=data, k=attr(data, "k"), do=e, pointwise=TRUE, adjust=FALSE))
+ #d$lc <- lc
+ glyph_colour(d) <- floor(lc * 9) + 1
+
+ a <- which(lower.tri(e), arr.ind=TRUE)
+ src <- row.names(d)[a[,2]]
+ dest <- row.names(d)[a[,1]]
+ lengths <- as.vector(as.dist(e))
+ local <- lengths != .myInf
+
+ g$edges <- data.frame(lengths[local])
+ edges(g$edges) <- cbind(src[local], dest[local])
+
+ d <- displays(g)[[1]]
+ edges(d) <- g$edges
+
+ }
+
+ invisible(g)
+}
+
+# Scale localmds object
+# Scales a localmds object to common scale [0, 1] across all dimensions.
+#
+# @keyword internal
+scale.localmds <- function(x, ...) {
+ apply(x, 2, function(col) (col - min(x)) / diff(range(x)))
+}
47 R/boxcox-stress.r
@@ -0,0 +1,47 @@
+# Use 1e36 consistently to subsitute non-local distances
+.myInf <- 1e36
+
+# rcboxcox
+# Basic wrapper to C function
+#
+# @returns the configuration generated by B-C stress function.
+# @arguments intial configuration, n x d matrix
+# @arguments dissimilarity matrix. Keeps local distances and use .myInf to subsitute non-local distances. Saves both local distances and neighborhood information.
+# @arguments
+# @arguments
+# @arguments
+# @arguments
+# @arguments maximum number of interations
+# @keyword internal
+rcboxcox <- function(X1, D0, lam, mu, nu, tau, niter) {
+ sel <- D0 != 0 & D0 != .myInf
+
+ k <- sum(sel)/nrow(D0) # Average size of neighhood
+
+ energy <- 0
+ c <- k/nrow(D0) * (median(D0[sel])) ^ (1/lam+nu) * tau # Relative weight of repulsion
+
+ tmp<-.C("boxcox",
+ X1 = as.double(t(X1)),
+ D0 = as.double(D0),
+ n = as.integer(nrow(X1)),
+ d = as.integer(ncol(X1)),
+ lam = as.double(lam),
+ mu = as.double(mu),
+ nu = as.double(nu),
+ c = as.double(c),
+ energy = as.double(energy),
+ niter = as.integer(niter)
+ )
+
+ proj <- matrix(tmp$X1, ncol = ncol(X1), byrow = TRUE)
+ attr(proj, "energy") <- tmp$energy
+ proj
+}
+
+
+# Energy
+# Extract energy of configuration
+#
+# @keyword manip
+energy <- function(x) attr(x, "energy")
22 R/gui.r
@@ -0,0 +1,22 @@
+#
+# if (!require("gWidgets")) stop("gWidgets required for GUI")
+# if (!require("cairoDevice")) stop("cairoDevice required for GUI")
+#
+# win <- gwindow("localmds")
+# group <- ggroup(container=win)
+#
+# table <- glayout(container=group)
+# table[1,1] <- glabel("k")
+# sl1 <- gslider(1, 10)
+# table[1,2] <- sl1
+# table[2,1] <- glabel("mu")
+# table[2,2] <- gslider(0, 10)
+# table[3,1] <- glabel("tau")
+# table[3,2] <- gslider(0, 10)
+# table[4,1] <- glabel("lambda")
+# table[4,2] <- gslider(0, 10)
+# table[5,1] <- glabel("nu")
+# table[5,2] <- gslider(0, 10)
+# visible(table) <- TRUE
+#
+# ggraphics(container=group)
81 R/localmds.r
@@ -0,0 +1,81 @@
+# Local MDS
+#
+# @keyword internal
+# @seealso \code{\link{localmds.animate}}, \code{\link{localmds.multistart}}
+localmds <- function(data, ...) UseMethod("localmds", data)
+
+# Local MDS on a distance matrix.
+#
+# Information about how localmds works here.
+#
+# @arguments maximum number of interations
+# @arguments starting configuration, defaults to random locations. See also \code{\link{pc}} and \code{\link{localmds.multistart}}
+# @arguments dimensionality of reduced space
+# @arguments
+# @arguments
+# @arguments
+# @arguments amount of static repulsive force, see also \code{\link{tausearch}}
+# @arguments maximum number of iterations to perform
+# @arguments metric threshold to use, if not specified neighbour will be based on \code{k} nearest neighbours, see \code{\link{threshold}} for more details
+# @arguments should the computed (local) distance matrix be saved as an attribute on the result?
+# @arguments number of neighbours to use to determine locality
+# @keyword multivariate
+#X localmds(eurodist)
+#X ggobi(localmds(eurodist, savedist=TRUE))
+localmds.dist <- function(data, initial=NULL, d=2, lambda=1, mu=1, nu=1, tau=0.1, maxiter=1000, threshold=NULL, savedist=FALSE,k=4, ...) {
+ data <- as.matrix(data)
+ data <- threshold(data, k=k, thresholds=threshold)
+
+ if (is.null(initial)) initial <- matrix(rnorm(d * nrow(data)),ncol=d)
+
+ res <- rcboxcox(initial, data, lam=lambda, mu=mu, nu=nu, tau=tau, niter=maxiter)
+ rownames(res) <- rownames(data)
+ if (savedist) attr(res, "dist") <- data
+ if (!missing(threshold)) attr(res,"threshold") <- threshold
+ attr(res, "k") <- k
+ class(res) <- c("localmds", class(res))
+
+ res
+}
+
+# Local MDS for a graph
+# Use localmds as a graph layout technique
+#
+# @keyword internal
+#X# localmds.graph(business)
+localmds.graph <- function(data, ...) {
+ if (!require("graph")) stop("graph package required.")
+ data <- ftM2adjM(t(edgeMatrix(data)))
+ localmds(data, ...)
+}
+
+# Local MDS on a data frame
+# Provides a convenient method for converting a data.frame to a distance matrix and then applying localmds.
+#
+# @arguments data frame
+# @arguments dimensionality of projection
+# @arguments distance metric to use, see \code{\link{dist}} for options
+# @arguments other arguments passed to \code{\link{localmds.dist}}
+# @keyword multivariate
+#X plot(cmdscale(dist(swiss)))
+#X plot(localmds(swiss, k=5))
+localmds.data.frame <- function(data, d=2, metric="euclidean", ...) {
+ localmds.matrix(data.matrix(data), d=d, metric=metric, ...)
+}
+
+# Local MDS on a matrix
+# Provides a convenient method for converting a matrix to a distance matrix and then applying localmds.
+#
+# @arguments matrix of data points
+# @arguments dimensionality of projection
+# @arguments distance metric to use, see \code{\link{dist}} for options
+# @arguments other arguments passed to \code{\link{localmds.dist}}
+# @keyword multivariate
+#X s2 <- localmds(spiral)
+#X plot(s2)
+#X ggobi(s2)
+#X s2a <- localmds(spiral, initial=s2, tau=1e-2)
+localmds.matrix <- function(data, d=2, metric="euclidean", ...) {
+ D0 <- as.matrix(stats::dist(data, method=metric))
+ localmds.dist(D0, d=d, ...)
+}
62 R/metacriterion.r
@@ -0,0 +1,62 @@
+# Compute lc meta criterion
+# Compute the lc meta criterion which is a
+#
+# The lc metacriterion computes for each point the
+# proportion of the closest k neighbours in the original
+# space that are still among the closest k neighbours in the
+# new reduced space.
+#
+# This can be optional adjusted by subtracting k/(n-1)
+# to account for the fact that by chance alone as the neighbourhood
+# size increases, the number of points that remain neighbours
+# increases.
+#
+# @arguments Original (full-dimensional) data set
+# @arguments New (lower-dimensioanl) data set
+# @arguments Numbers of neighbours to use (numeric vector)
+# @arguments Distance metric to use
+# @arguments Should adjustment for large k be used?
+# @arguments Return pointwise or average lcmeta?
+# @arguments Original distance matrix, computed from original data set if not specified
+# @keyword multivariate
+#X s2 <- localmds(spiral)
+#X lcmeta(spiral, s2)
+#X lcmeta(spiral, s2, 1:40)
+#X lcmeta(spiral, s2, adjust=FALSE)
+#X lcmeta(spiral, s2, pointwise=TRUE, adjust=FALSE)
+#X lcmeta(spiral, s2, pointwise=TRUE)
+lcmeta <- function(original, new, k=2:4, metric="euclidean", adjust=TRUE, pointwise=FALSE, do = as.matrix(dist(original, method=metric))) {
+ n <- nrow(new)
+ dn <- as.matrix(dist(new, method=metric))
+
+ do.nn <- t(apply(do, 1, order)[-1, , drop=FALSE])
+ dn.nn <- t(apply(dn, 1, order)[-1, , drop=FALSE])
+
+ m <- do.call(cbind, lapply(k, function(i) {
+ neighbours <- cbind(do.nn[, 1:i], dn.nn[, 1:i])
+ apply(neighbours, 1, function(x) sum(duplicated(x)))
+ }))
+
+ if (pointwise) {
+ colnames(m) <- k
+ m <- m / k
+ } else {
+ names(m) <- k
+ m <- colMeans(m) / k
+ }
+ if (adjust) m <- m - k / (n-1)
+
+ m
+}
+
+# Find closest neighbour
+# Used to check that lcmeta is working correctly
+#
+# @keyword internal
+#X s2 <- localmds(spiral)
+#X man <- mean(closest(spiral) == closest(s2))
+#X lc1 <- lcmeta(spiral, s2, 1, adjust=FALSE)
+#X stopifnot(all.equal(man, lc1))
+closest <- function(x) {
+ apply(as.matrix(dist(x)), 1, function(x) order(x)[2])
+}
37 R/multistart.r
@@ -0,0 +1,37 @@
+# Multistart local MDS
+# Try many random starts in order to find the best configuration.
+#
+# @arguments All arguments passed on to \code{\link{localmds}}
+# @arguments Number of times to try
+# @arguments Display running progress?
+# @arguments Default criterion function
+# @keyword multivariate
+#X localmds.multistart(spiral)
+#X lc1 <- function (x) -lcmeta(spiral, x, k=2)
+#X localmds.multistart(spiral, criterion=lc1)
+localmds.multistart <- function(..., times=10, output=TRUE, criterion=energy) {
+ best <- localmds(...)
+ bestc <- criterion(best)
+
+ if (output) .running(1, bestc)
+ for (i in 1:(times - 1)) {
+ new <- localmds(...)
+ newc <- criterion(new)
+ better <- newc < bestc
+ if (better) {
+ best <- new
+ bestc <- newc
+ }
+ if (output) .running(i+1, newc, better)
+ }
+
+ best
+}
+
+# Status messages
+# Output status messages for localmds.multistart
+# @keyword internal
+.running <- function(i, value, better=FALSE) {
+ nb <- if(better) " New best!" else ""
+ cat("Iteration: ", i, " Criterion: ", value, nb, "\n",sep="")
+}
14 R/pca.r
@@ -0,0 +1,14 @@
+# Calculate first n principal components
+# Convenience function for computing first n principal components
+#
+# @arguments data frame or matrix
+# @arguments number of pcs to return
+# @seealso \code{\link{prcomp}} for real machinery
+# @keyword multivariate
+#X pc(spiral)
+#X
+#X f <- frey[,apply(frey, 2,var) > 0.001]
+#X localmds(f, initial=pc(f))
+pc <- function(data, n=2) {
+ predict(prcomp(data, scale.=TRUE))[, 1:n]
+}
31 R/tausearch.r
@@ -0,0 +1,31 @@
+# Try different values of tau for best representation
+# Better configurations may be obtained starting with large values of tau and getting smaller. This function explores which values of tau are optimal.
+#
+# This involves running localmds many times, so may be slow.
+# @arguments arguments passed on to localmds
+# @arguments values to use for tau. Automatically sorted in decreasing order
+# @arguments neighbour to use for localmds and calculating meta criterion
+# @arguments plot results?
+# @value matrix of tau and lcmeta values
+# @keyword multivariate
+#X tausearch(spiral)
+#X ks <- 4:10
+#X diffk <- do.call(rbind, lapply(ks, function(k) tausearch(spiral, k=k, maxiter=100)))
+#X qplot(-log(tau), lcmeta, data=diffk, id=k, colour=factor(k), type="line")
+tausearch <- function(..., tau=10^(-(0:7)), k=4, plot=TRUE) {
+ tau <- sort(tau, decreasing=TRUE)
+
+ n <- length(tau)
+ results <- numeric(n)
+ config <- NULL
+ for(i in 1:n) {
+ config <- localmds(..., initial=config, k=k, tau=tau[i], savedist=TRUE)
+
+ results[i] <- lcmeta(new=config, k=k, do=attr(config,"dist"))
+ #cat(i, ": ", results[i], "\n", sep="")
+ }
+ res <- data.frame(tau=tau, lcmeta=results, k=k)
+
+ if (plot) qplot(-log(tau), lcmeta, res, type=c("line", "point"))
+ res
+}
13 R/threshold.r
@@ -0,0 +1,13 @@
+# Compute thresholds for local distances
+# This determines the cut off between local and non-local distances.
+#
+# @arguments distance matrix
+# @arguments number of neighbours to keep
+# @arguments metric threshold to apply. If \code{NULL} will compute threshold based on \code{k}
+# @keyword multivariate
+threshold <- function(d, k=4, thresholds=NULL) {
+ if (is.null(thresholds)) thresholds <- apply(d,2,sort)[k+1,]
+ d <- ifelse(d > thresholds, .myInf, d)
+ pmin(d, t(d)) # make symmetric
+}
+
5 TODO
@@ -0,0 +1,5 @@
+* sample data sets
+
+* grid search for tau
+
+* get data from http://www.cs.toronto.edu/~roweis/data.html
BIN  data/frey.rda
Binary file not shown
BIN  data/spiral.rda
Binary file not shown
4 load.r
@@ -0,0 +1,4 @@
+library(localmds)
+lapply(dir("~/documents/localmds/R", full.name=T), source)
+
+
11 man/frey.rd
@@ -0,0 +1,11 @@
+\name{Frey face data}
+\docType{data}
+\alias{frey}
+\title{Sample of data from Frey faces dataset}
+\description{
+
+}
+\usage{data(frey)}
+\format{}
+\references{}
+\keyword{datasets}
11 man/spiral.rd
@@ -0,0 +1,11 @@
+\name{Spiral data}
+\docType{data}
+\alias{spiral}
+\title{3D spiral}
+\description{
+
+}
+\usage{data(spiral)}
+\format{}
+\references{}
+\keyword{datasets}
196 src/minimise.c
@@ -0,0 +1,196 @@
+#include <R.h>
+#include <Rinternals.h>
+#include <R_ext/Rdynload.h>
+#include <stdio.h>
+#include <math.h>
+
+void dist(double *, int, int, double *);
+double fnorm(double *, int);
+void grad(double *, double *, int, int, double *);
+void get_M(double *, double *, int, double, double, double, double, double *);
+double Energy(double *, double *, int, double, double, double, double);
+void XsubsY(double *, double *, int, double *);
+void aX(double, double *, int, double *);
+void equal(double *, int, double *);
+
+const double myInf = 1e36;
+
+void
+doubledist(double *x, int *n, int *d, double *ddist)
+{
+ int i;
+ double *dis = (double *) malloc(*n * *n * sizeof(double));
+ dist(x, *n, *d, dis);
+ for (i = 0; i < *n * *n; i++)
+ ddist[i] = 2 * dis[i];
+}
+
+void
+boxcox(double *X1, double *D0, int *n, int *d,
+ double *lam, double *mu, double *nu, double *c, double *energy,
+ int *niter)
+{
+ double minsize = 1e-4;
+ double tmp;
+ double stepsize = 0.1, s0 = 10, s1 = s1 - 1;
+ int i = 0, j, count = 0, count1 = 0, count2 = 0;
+ double *X0 = (double *) malloc(*n * *d * sizeof(double));
+ double *tmpX = (double *) malloc(*n * *d * sizeof(double));
+ double *D1 = (double *) malloc(*n * *n * sizeof(double));
+ double *M = (double *) malloc(*n * *n * sizeof(double));
+ double *Grad = (double *) malloc(*n * *d * sizeof(double));
+ double *normGrad = (double *) malloc(*n * *d * sizeof(double));
+
+ dist(X1, *n, *d, D1);
+
+ while (stepsize > minsize && count < *niter) {
+ if (s1 > s0 && count > 1) {
+ stepsize = 0.5 * stepsize;
+ aX(stepsize, normGrad, *n * *d, tmpX);
+ XsubsY(X0, tmpX, *n * *d, X1);
+ } else {
+ stepsize = 1.05 * stepsize;
+ equal(X1, *n * *d, X0);
+ get_M(D0, D1, *n, *lam, *mu, *nu, *c, M);
+ grad(X0, M, *n, *d, Grad);
+ aX(fnorm(X0, *n * *d) / fnorm(Grad, *n * *d), Grad, *n * *d, normGrad);
+ aX(stepsize, normGrad, *n * *d, tmpX);
+ XsubsY(X0, tmpX, *n * *d, X1);
+ }
+ count++;
+ s0 = s1;
+ dist(X1, *n, *d, D1);
+ s1 = Energy(D0, D1, *n, *lam, *mu, *nu, *c);
+// if (count % 50 == 0) {
+// printf("Inter: %d\n", count);
+// printf("Energy: %f\n", s1);
+// }
+ }
+ *energy = s1;
+}
+
+/* ------------------ Functions ------------------- */
+
+
+void
+dist(double *x, int n, int d, double *dis)
+{
+ int i, j, k;
+ double s, r;
+
+ for (i = 0; i < n; i++)
+ for (j = 0; j < i; j++) {
+ s = 0.0;
+ for (k = 0; k < d; k++) {
+ r = x[k + i * d] - x[k + j * d];
+ s += r * r;
+ }
+ dis[i + j * n] = dis[j + i * n] = sqrt(s);
+ }
+}
+
+double
+fnorm(double *x, int len)
+{
+ int i;
+ double s = 0;
+
+ for (i = 0; i < len; i++)
+ s += pow(x[i], 2);
+ return sqrt(s);
+}
+
+void
+grad(double *X0, double *M, int n, int d, double *Grad)
+{
+ int i, j, k;
+ double tmp;
+
+ for (k = 0; k < d; k++)
+ for (i = 0; i < n; i++) {
+ tmp = 0.0;
+ for (j = 0; j < n; j++)
+ tmp += (X0[k + i * d] - X0[k + j * d]) * M[i + j * n];
+ Grad[k + i * d] = tmp;
+ }
+}
+
+void
+get_M(double *D0, double *D1,
+ int n, double lam, double mu, double nu, double c,
+ double *M)
+{
+ int i, j;
+ double s, d0, d1;
+ /* Diagnal element is set to be 0 */
+ for (i = 0; i < n; i++)
+ M[i + i * n] = 0;
+
+ for (i = 0; i < n; i++)
+ for (j = 0; j < i; j++) {
+ d0 = D0[i + j * n];
+ d1 = D1[i + j * n];
+
+ if (d0 < myInf)
+ s = pow(d0, nu) * pow(d1, mu + 1 / lam - 2) - pow(d0, nu + 1 / lam) * pow(d1, mu - 2);
+ else
+ s = -c * pow(d1, mu - 2);
+
+ M[i + j * n] = M[j + i * n] = s;
+ }
+}
+
+double
+Energy(double *D0, double *D1, int n, double lam, double mu, double nu, double c)
+{
+ int i, j, count;
+ double d0, d1, bc1, bc2, s, en = 0;
+
+ for (i = 0; i < n; i++)
+ for (j = 0; j < i; j++) {
+ d0 = D0[i + j * n];
+ d1 = D1[i + j * n];
+
+ if (mu + 1 / lam == 0)
+ bc1 = log(d1);
+ else
+ bc1 = (pow(d1, mu + 1 / lam) - 1) / (mu + 1 / lam);
+
+ if (mu == 0)
+ bc2 = log(d1);
+ else
+ bc2 = (pow(d1, mu) - 1) / mu;
+
+ if (d0 < myInf)
+ s = pow(d0, nu) * bc1 - pow(d0, nu + 1 / lam) * bc2;
+ else
+ s = -c * bc2;
+
+ en = en + 2 * s;
+ }
+ return en;
+}
+
+void
+XsubsY(double *X, double *Y, int len, double *Z)
+{
+ int i;
+ for (i = 0; i < len; i++)
+ Z[i] = X[i] - Y[i];
+}
+
+void
+aX(double a, double *X, int len, double *Z)
+{
+ int i;
+ for (i = 0; i < len; i++)
+ Z[i] = a * X[i];
+}
+
+void
+equal(double *X, int len, double *Y)
+{
+ int i = 0;
+ for (i = 0; i < len; i++)
+ Y[i] = X[i];
+}
Please sign in to comment.
Something went wrong with that request. Please try again.