Skip to content

Commit

Permalink
Merge pull request #1 from KlugerLab/master
Browse files Browse the repository at this point in the history
update
  • Loading branch information
dkobak committed Dec 9, 2019
2 parents 9b213e2 + 2b9d908 commit 7c7c434
Show file tree
Hide file tree
Showing 5 changed files with 4,142 additions and 1,123 deletions.
3,102 changes: 2,836 additions & 266 deletions examples/test.ipynb

Large diffs are not rendered by default.

214 changes: 105 additions & 109 deletions fast_tsne.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Note: this script should be sourced as: source('<path to file>', chdir=T)

FAST_TSNE_SCRIPT_DIR <<-getwd()

cat(sprintf("FIt-SNE R wrapper loading.\nFIt-SNE root directory was set to %s\n", FAST_TSNE_SCRIPT_DIR))
FAST_TSNE_SCRIPT_DIR <<- getwd()

message("FIt-SNE R wrapper loading.")
message("FIt-SNE root directory was set to ", FAST_TSNE_SCRIPT_DIR)

# Compute FIt-SNE of a dataset
# dims - dimensionality of the embedding. Default 2.
# perplexity - perplexity is used to determine the
Expand Down Expand Up @@ -68,40 +70,40 @@ cat(sprintf("FIt-SNE R wrapper loading.\nFIt-SNE root directory was set to %s\n"
# details. Default is 1.0
#
fftRtsne <- function(X,
dims=2, perplexity=30, theta=0.5,
check_duplicates=TRUE,
max_iter=1000,
dims = 2, perplexity = 30, theta = 0.5,
max_iter = 1000,
fft_not_bh = TRUE,
ann_not_vptree = TRUE,
stop_early_exag_iter=250,
exaggeration_factor=12.0, no_momentum_during_exag=FALSE,
start_late_exag_iter=-1.0,late_exag_coeff=1.0,
mom_switch_iter=250, momentum=.5, final_momentum=.8, learning_rate=200,
n_trees=50, search_k = -1,rand_seed=-1,
nterms=3, intervals_per_integer=1, min_num_intervals=50,
K=-1, sigma=-30, initialization=NULL,
data_path=NULL, result_path=NULL,
load_affinities=NULL,
fast_tsne_path=NULL, nthreads=0, perplexity_list = NULL,
get_costs = FALSE, df = 1.0,... ) {
version_number = '1.1.0'
stop_early_exag_iter = 250,
exaggeration_factor = 12.0, no_momentum_during_exag = FALSE,
start_late_exag_iter = -1.0, late_exag_coeff = 1.0,
mom_switch_iter = 250, momentum = 0.5, final_momentum = 0.8, learning_rate = 200,
n_trees = 50, search_k = -1, rand_seed = -1,
nterms = 3, intervals_per_integer = 1, min_num_intervals = 50,
K = -1, sigma = -30, initialization = NULL,
data_path = NULL, result_path = NULL,
load_affinities = NULL,
fast_tsne_path = NULL, nthreads = 0, perplexity_list = NULL,
get_costs = FALSE, df = 1.0) {

version_number <- '1.1.0'

if (is.null(fast_tsne_path)) {
if(.Platform$OS.type == "unix") {
fast_tsne_path = sprintf('%s/bin/fast_tsne', FAST_TSNE_SCRIPT_DIR )
if (.Platform$OS.type == "unix") {
fast_tsne_path <- file.path(FAST_TSNE_SCRIPT_DIR, "bin", "fast_tsne")
} else {
fast_tsne_path = sprintf('%s/bin/FItSNE.exe', FAST_TSNE_SCRIPT_DIR)
fast_tsne_path <- file.path(FAST_TSNE_SCRIPT_DIR, "bin", "FItSNE.exe")
}
}

if (is.null(data_path)) {
data_path <- tempfile(pattern='fftRtsne_data_', fileext='.dat')
data_path <- tempfile(pattern = 'fftRtsne_data_', fileext = '.dat')
}
if (is.null(result_path)) {
result_path <- tempfile(pattern='fftRtsne_result_', fileext='.dat')
result_path <- tempfile(pattern = 'fftRtsne_result_', fileext = '.dat')
}
if (is.null(fast_tsne_path)) {
fast_tsne_path <- system2('which', 'fast_tsne', stdout=TRUE)
fast_tsne_path <- system2('which', 'fast_tsne', stdout = TRUE)
}
fast_tsne_path <- normalizePath(fast_tsne_path)
if (!file_test('-x', fast_tsne_path)) {
Expand All @@ -110,115 +112,109 @@ fftRtsne <- function(X,

is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol

if (!is.numeric(theta) || (theta<0.0) || (theta>1.0) ) { stop("Incorrect theta.")}
if (!is.numeric(theta) || (theta < 0.0) || (theta > 1.0) ) { stop("Incorrect theta.")}
if (nrow(X) - 1 < 3 * perplexity) { stop("Perplexity is too large.")}
if (!is.matrix(X)) { stop("Input X is not a matrix")}
if (!(max_iter>0)) { stop("Incorrect number of iterations.")}
if (!is.wholenumber(stop_early_exag_iter) || stop_early_exag_iter<0) { stop("stop_early_exag_iter should be a positive integer")}
if (!(max_iter > 0)) { stop("Incorrect number of iterations.")}
if (!is.wholenumber(stop_early_exag_iter) || stop_early_exag_iter < 0) { stop("stop_early_exag_iter should be a positive integer")}
if (!is.numeric(exaggeration_factor)) { stop("exaggeration_factor should be numeric")}
if (!is.numeric(df)) { stop("df should be numeric")}
if (!is.wholenumber(dims) || dims<=0) { stop("Incorrect dimensionality.")}
if (!is.wholenumber(dims) || dims <= 0) { stop("Incorrect dimensionality.")}
if (search_k == -1) {
if (perplexity>0) {
search_k = n_trees*perplexity*3
} else if (perplexity==0) {
search_k = n_trees*max(perplexity_list)*3
} else {
search_k = n_trees*K
}
if (perplexity > 0) {
search_k <- n_trees * perplexity * 3
} else if (perplexity == 0) {
search_k <- n_trees * max(perplexity_list) * 3
} else {
search_k <- n_trees * K
}
}

if (fft_not_bh){
nbody_algo = 2;
}else{
nbody_algo = 1;
if (fft_not_bh) {
nbody_algo <- 2
} else {
nbody_algo <- 1
}

if (is.null(load_affinities)) {
load_affinities = 0;
load_affinities <- 0
} else {
if (load_affinities == 'load') {
load_affinities = 1;
load_affinities <- 1
} else if (load_affinities == 'save') {
load_affinities = 2;
load_affinities <- 2
} else {
load_affinities = 0;
load_affinities <- 0
}
}

if (ann_not_vptree){
knn_algo = 1;
}else{
knn_algo = 2;
if (ann_not_vptree) {
knn_algo <- 1
} else {
knn_algo <- 2
}
tX = c(t(X))
tX <- as.numeric(t(X))

f <- file(data_path, "wb")
n = nrow(X);
D = ncol(X);
writeBin(as.integer(n), f,size= 4)
writeBin( as.integer(D),f,size= 4)
writeBin( as.numeric(theta), f,size= 8) #theta
writeBin( as.numeric(perplexity), f,size= 8) #theta

if (perplexity == 0) {
writeBin( as.integer(length(perplexity_list)), f, size=4)
writeBin( perplexity_list, f)
}

writeBin( as.integer(dims), f,size=4) #theta
writeBin( as.integer(max_iter),f,size=4)
writeBin( as.integer(stop_early_exag_iter),f,size=4)
writeBin( as.integer(mom_switch_iter),f,size=4)
writeBin( as.numeric(momentum),f,size=8)
writeBin( as.numeric(final_momentum),f,size=8)
writeBin( as.numeric(learning_rate),f,size=8)
writeBin( as.integer(K),f,size=4) #K
writeBin( as.numeric(sigma), f,size=8) #sigma
writeBin( as.integer(nbody_algo), f,size=4) #not barnes hut
writeBin( as.integer(knn_algo), f,size=4)
writeBin( as.numeric(exaggeration_factor), f,size=8) #compexag
writeBin( as.integer(no_momentum_during_exag), f,size=4)
writeBin( as.integer(n_trees), f,size=4)
writeBin( as.integer(search_k), f,size=4)
writeBin( as.integer(start_late_exag_iter), f,size=4)
writeBin( as.numeric(late_exag_coeff), f,size=8)
n <- nrow(X)
D <- ncol(X)
writeBin(as.integer(n), f, size = 4)
writeBin(as.integer(D), f, size = 4)
writeBin(as.numeric(theta), f, size = 8) #theta
writeBin(as.numeric(perplexity), f, size = 8)

if (perplexity == 0) {
writeBin(as.integer(length(perplexity_list)), f, size = 4)
writeBin(perplexity_list, f)
}

writeBin(as.integer(dims), f, size = 4)
writeBin(as.integer(max_iter), f, size = 4)
writeBin(as.integer(stop_early_exag_iter), f, size = 4)
writeBin(as.integer(mom_switch_iter), f, size = 4)
writeBin(as.numeric(momentum), f, size = 8)
writeBin(as.numeric(final_momentum), f, size = 8)
writeBin(as.numeric(learning_rate), f, size = 8)
writeBin(as.integer(K), f, size = 4) #K
writeBin(as.numeric(sigma), f, size = 8) #sigma
writeBin(as.integer(nbody_algo), f, size = 4) #not barnes hut
writeBin(as.integer(knn_algo), f, size = 4)
writeBin(as.numeric(exaggeration_factor), f, size = 8) #compexag
writeBin(as.integer(no_momentum_during_exag), f, size = 4)
writeBin(as.integer(n_trees), f, size = 4)
writeBin(as.integer(search_k), f, size = 4)
writeBin(as.integer(start_late_exag_iter), f, size = 4)
writeBin(as.numeric(late_exag_coeff), f, size = 8)

writeBin( as.integer(nterms), f,size=4)
writeBin( as.numeric(intervals_per_integer), f,size=8)
writeBin( as.integer(min_num_intervals), f,size=4)
tX = c(t(X))
writeBin( tX, f)
writeBin( as.integer(rand_seed), f,size=4)
writeBin(as.numeric(df), f, size=8)
writeBin( as.integer(load_affinities), f,size=4)
if (! is.null(initialization)){ writeBin( c(t(initialization)), f) }
print(df)
writeBin(as.integer(nterms), f, size = 4)
writeBin(as.numeric(intervals_per_integer), f, size = 8)
writeBin(as.integer(min_num_intervals), f, size = 4)
writeBin(tX, f)
writeBin(as.integer(rand_seed), f, size = 4)
writeBin(as.numeric(df), f, size = 8)
writeBin(as.integer(load_affinities), f, size = 4)
if (!is.null(initialization)) { writeBin( c(t(initialization)), f) }
close(f)

flag= system2(command=fast_tsne_path, args=c(version_number,data_path, result_path, nthreads));
flag <- system2(command = fast_tsne_path,
args = c(version_number, data_path, result_path, nthreads))
if (flag != 0) {
stop('tsne call failed');
stop('tsne call failed')
}
f <- file(result_path, "rb")
n <- readBin(f, integer(), n=1, size=4);
d <- readBin(f, integer(), n=1, size=4);
Y <- readBin(f, numeric(), n=n*d);
Y <- t(matrix(Y, nrow=d));
if (get_costs ) {
tmp <- readBin(f, integer(), n=1, size=4);
costs <- readBin(f, numeric(), n=max_iter,size=8);
Yout <- list( Y=Y, costs=costs);
}else {
Yout <- Y;
}
close(f)
file.remove(data_path)
file.remove(result_path)
return(Yout)
n <- readBin(f, integer(), n = 1, size = 4)
d <- readBin(f, integer(), n = 1, size = 4)
Y <- readBin(f, numeric(), n = n * d)
Y <- t(matrix(Y, nrow = d))
if (get_costs) {
readBin(f, integer(), n = 1, size = 4)
costs <- readBin(f, numeric(), n = max_iter, size = 8)
Yout <- list(Y = Y, costs = costs)
} else {
Yout <- Y
}
close(f)
file.remove(data_path)
file.remove(result_path)
Yout
}





Loading

0 comments on commit 7c7c434

Please sign in to comment.