Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 19c743e
Showing
7 changed files
with
311 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Package: tsne | ||
Type: Package | ||
Title: T-distributed Stochastic Neighbor Embedding for R (t-SNE) | ||
Version: 0.1-1 | ||
Date: 2010-02-19 | ||
Author: Justin Donaldson | ||
Maintainer: <jdonaldson@gmail.com> | ||
Description: A "pure R" implementation of the t-SNE algorithm. | ||
License: GPL | ||
LazyLoad: yes | ||
Packaged: 2010-02-20 17:13:07 UTC; jjdonald | ||
Repository: CRAN | ||
Date/Publication: 2010-02-21 18:43:46 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
exportPattern("^[[:alpha:]]+") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
* version 0.1 - Initial release. main tsne function included. | ||
* version 0.1-1 - fixed misc. documentation omissions. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
.Hbeta <- | ||
function(D, beta){ | ||
P = exp(-D * beta) | ||
sumP = sum(P) | ||
H = log(sumP) + beta * sum(D * P) /sumP | ||
P = P/sumP | ||
r = {} | ||
r$H = H | ||
r$P = P | ||
r | ||
} | ||
|
||
.x2p <- | ||
function(X,perplexity = 15,tol = 1e-5){ | ||
if (class(X) == 'dist') { | ||
D = X | ||
} else{ | ||
D = dist(X)^2 # remove the square once this is resolved | ||
} | ||
|
||
n = attr(D,'Size') | ||
D = as.matrix(D) | ||
P = matrix(0, n, n ) | ||
beta = rep(1, n) | ||
logU = log(perplexity) | ||
|
||
for (i in 1:n){ | ||
betamin = -Inf | ||
betamax = Inf | ||
Di = D[i, -i] | ||
hbeta= .Hbeta(Di, beta[i]) | ||
H = hbeta$H; | ||
thisP = hbeta$P | ||
Hdiff = H - logU; | ||
tries = 0; | ||
|
||
while(abs(Hdiff) > tol && tries < 50){ | ||
if (Hdiff > 0){ | ||
betamin = beta[i] | ||
if (is.infinite(betamax)) beta[i] = beta[i] * 2 | ||
else beta[i] = (beta[i] + betamax)/2 | ||
} else{ | ||
betamax = beta[i] | ||
if (is.infinite(betamin)) beta[i] = beta[i]/ 2 | ||
else beta[i] = ( beta[i] + betamin) / 2 | ||
} | ||
|
||
hbeta = .Hbeta(Di, beta[i]) | ||
H = hbeta$H | ||
thisP = hbeta$P | ||
Hdiff = H - logU | ||
tries = tries + 1 | ||
} | ||
P[i,-i] = thisP | ||
} | ||
|
||
r = {} | ||
r$P = P | ||
r$beta = beta | ||
sigma = sqrt(1/beta) | ||
|
||
message('sigma summary: ', paste(names(summary(sigma)),':',summary(sigma),'|',collapse='')) | ||
|
||
r | ||
} | ||
|
||
.whiten <- | ||
function(X, row.norm=FALSE, verbose=FALSE, n.comp=ncol(X)) | ||
{ | ||
n.comp; # forces an eval/save of n.comp | ||
if (verbose) message("Centering") | ||
n = nrow(X) | ||
p = ncol(X) | ||
X <- scale(X, scale = FALSE) | ||
X <- if (row.norm) | ||
t(scale(X, scale = row.norm)) | ||
else t(X) | ||
|
||
if (verbose) message("Whitening") | ||
V <- X %*% t(X)/n | ||
s <- La.svd(V) | ||
D <- diag(c(1/sqrt(s$d))) | ||
K <- D %*% t(s$u) | ||
K <- matrix(K[1:n.comp, ], n.comp, p) | ||
X = t(K %*% X) | ||
X | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
tsne <- | ||
function(X,initial_config = NULL, k=2, initial_dims=30, perplexity=30, max_iter = 1000, min_cost=0, epoch_callback=NULL,whiten=TRUE, epoch=100 ){ | ||
|
||
|
||
if (class(X) == 'dist') { | ||
n = attr(X,'Size') | ||
X = X/sum(X) | ||
} | ||
else { | ||
X = as.matrix(X) | ||
initial_dims = min(initial_dims,ncol(X)) | ||
if (whiten) X<-.whiten(as.matrix(X),n.comp=initial_dims) | ||
n = dim(X)[1] | ||
} | ||
|
||
momentum = .5 | ||
final_momentum = .8 | ||
mom_switch_iter = 250 | ||
|
||
epsilon = 500 | ||
min_gain = .01 | ||
|
||
P = .x2p(X,perplexity, 1e-5)$P | ||
|
||
eps = 2^(-52) # typical machine precision | ||
P[is.nan(P)]<-eps | ||
P = .5 * (P + t(P)) | ||
P = P / sum(P) | ||
P[P < eps]<-eps | ||
P = P * 4 | ||
if (!is.null(initial_config)) { | ||
ydata = initial_config | ||
} else { | ||
ydata = matrix(rnorm(k * nrow(X)),nrow(X)) | ||
} | ||
y_grads = matrix(0,dim(ydata)[1],dim(ydata)[2]) | ||
y_incs = matrix(0,dim(ydata)[1],dim(ydata)[2]) | ||
gains = matrix(1,dim(ydata)[1],dim(ydata)[2]) | ||
|
||
for (iter in 1:max_iter){ | ||
sum_ydata = apply(ydata^2, 1, sum) | ||
num = 1/(1 + sum_ydata + sweep(-2 * ydata %*% t(ydata),2, -t(sum_ydata))) | ||
diag(num)=0 | ||
Q = num / sum(num) | ||
if (any(is.nan(num))) message ('NaN in grad. descent') | ||
Q[Q < eps] = eps | ||
stiffnesses = 4 * (P-Q) * num | ||
for (i in 1:n){ | ||
y_grads[i,] = apply(sweep(-ydata, 2, -ydata[i,]) * stiffnesses[,i],2,sum) | ||
} | ||
|
||
gains = (gains + .2) * abs(sign(y_grads) != sign(y_incs)) | ||
+ gains * .8 * abs(sign(y_grads) == sign(y_incs)) | ||
gains[gains < min_gain] = min_gain | ||
y_incs = momentum * y_incs - epsilon * (gains * y_grads) | ||
ydata = ydata + y_incs | ||
y_data = sweep(ydata,2,apply(ydata,2,mean)) | ||
if (iter == mom_switch_iter) momentum = final_momentum | ||
|
||
if (iter == 100) P = P/4 | ||
|
||
if (iter %% epoch == 0) { # epoch | ||
cost = sum(apply(P * log((P+eps)/(Q+eps)),1,sum)) | ||
message("Epoch: Iteration #",iter," error is: ",cost) | ||
if (cost < min_cost) break | ||
if (!is.null(epoch_callback)) epoch_callback(ydata, P) | ||
} | ||
|
||
|
||
} | ||
r = {} | ||
r$ydata = ydata | ||
r$P = P | ||
r | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
\name{tsne-package} | ||
\Rdversion{1.1} | ||
\alias{tsne-package} | ||
|
||
\docType{package} | ||
\title{The tsne-package for multidimensional scaling} | ||
\description{ | ||
This package contains one function called \link[tsne]{tsne} which contains all the functionality. | ||
} | ||
|
||
\details{ | ||
\tabular{ll}{ | ||
Package: \tab tsne\cr | ||
Type: \tab Package\cr | ||
Version: \tab 0.1\cr | ||
Date: \tab 2010-02-19\cr | ||
License: \tab GPL\cr | ||
LazyLoad: \tab yes\cr | ||
} | ||
|
||
} | ||
\author{ | ||
Justin Donaldson | ||
http://www.scwn.net | ||
|
||
Maintainer: Justin Donaldson (jdonaldson@gmail.com) | ||
} | ||
\references{ | ||
L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. \emph{Journal of Machine Learning Research} 9 (Nov) : 2579-2605, 2008. | ||
|
||
L.J.P. van der Maaten. Learning a Parametric Embedding by Preserving Local Structure. In \emph{Proceedings of the Twelfth International Conference on Artificial Intelligence and Statistics} (AISTATS), JMLR W&CP 5:384-391, 2009. | ||
} | ||
|
||
\keyword{ package } | ||
% \seealso{ | ||
% ~~ Optional links to other man pages, e.g. ~~ | ||
% ~~ \code{\link[<pkg>:<pkg>-package]{<pkg>}} ~~ | ||
% } | ||
% \examples{ | ||
% ~~ simple examples of the most important functions ~~ | ||
% } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
\name{tsne} | ||
\Rdversion{1.1} | ||
\alias{tsne} | ||
%- Also NEED an '\alias' for EACH other topic documented here. | ||
\title{ | ||
The t-SNE method for dimensionality reduction | ||
} | ||
\description{ | ||
Provides a simple function interface for specifying t-SNE dimensionality reduction on R matrices or "dist" objects. | ||
} | ||
\usage{ | ||
tsne(X, initial_config = NULL, k = 2, initial_dims = 30, perplexity = 30, max_iter = 1000, min_cost = 0, epoch_callback = NULL, whiten = TRUE, epoch=100) | ||
} | ||
%- maybe also 'usage' for other objects documented here. | ||
\arguments{ | ||
\item{X}{ | ||
The R matrix or "dist" object | ||
} | ||
\item{initial_config}{ | ||
an argument providing a matrix specifying the initial embedding for X. | ||
} | ||
\item{k}{ | ||
the dimension of the resulting embedding. | ||
} | ||
\item{initial_dims}{ | ||
The number of dimensions to use in reduction method. | ||
} | ||
\item{perplexity}{ | ||
Perplexity parameter. (optimal number of neighbors) | ||
} | ||
\item{max_iter}{ | ||
Maximum number of iterations to perform. | ||
} | ||
\item{min_cost}{ | ||
The minimum cost value (error) to halt iteration. | ||
} | ||
\item{epoch_callback}{ | ||
A callback function used after each epoch (10 iterations) | ||
} | ||
\item{whiten}{ | ||
A boolean value indicating whether the matrix data should be whitened by the function. | ||
} | ||
\item{epoch}{ | ||
The number of iterations in between update messages. | ||
} | ||
} | ||
%%\details{ | ||
%% ~~ If necessary, more details than the description above ~~ | ||
%%} | ||
\value{ | ||
An R object containing a \emph{ydata} embedding matrix, as well as a the matrix of probabilities \emph{P} | ||
} | ||
\references{ | ||
L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. \emph{Journal of Machine Learning Research} 9 (Nov) : 2579-2605, 2008. | ||
|
||
L.J.P. van der Maaten. Learning a Parametric Embedding by Preserving Local Structure. In \emph{Proceedings of the Twelfth International Conference on Artificial Intelligence and Statistics} (AISTATS), JMLR W&CP 5:384-391, 2009. | ||
} | ||
\author{ | ||
Justin Donaldson | ||
} | ||
%%\note{ | ||
%% ~~further notes~~ | ||
%%} | ||
|
||
%% ~Make other sections like Warning with \section{Warning }{....} ~ | ||
|
||
\seealso{ | ||
\link{dist} | ||
} | ||
\examples{\dontrun{ | ||
colors = rainbow(length(unique(iris$Species))) | ||
names(colors) = unique(iris$Species) | ||
ecb = function(x,y){ plot(x,t='n'); text(x,labels=iris$Species, col=colors[iris$Species]) } | ||
tsne_iris = tsne(iris[,1:4], epoch_callback = ecb, perplexity=50) | ||
|
||
# sometimes scaling instead of whitening produces superior results | ||
# tsne_iris = tsne(scale(iris[,1:4]), epoch_callback = ecb, perplexity=50,whiten=F) | ||
|
||
# compare to PCA | ||
dev.new() | ||
pca_iris = princomp(iris[,1:4])$scores[,1:2] | ||
plot(pca_iris, t='n') | ||
text(pca_iris, labels=iris$Species,col=colors[iris$Species]) | ||
} | ||
} | ||
% Add one or more standard keywords, see file 'KEYWORDS' in the | ||
% R documentation directory. | ||
% \keyword{ ~kwd1 } | ||
% \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line |