Skip to content

Commit

Permalink
Merge pull request #15 from elbamos/0.1.6
Browse files Browse the repository at this point in the history
0.1.6
  • Loading branch information
elbamos committed Aug 4, 2016
2 parents d87764b + 29f2850 commit 654da27
Show file tree
Hide file tree
Showing 96 changed files with 5,301 additions and 2,619 deletions.
31 changes: 29 additions & 2 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,29 @@
^README\.Rmd$
^README\.md$
^README_files$
^NEWS\.md$
^revdep$
^.*\.RData
^ljc\.Rda
^stmmodel\.Rda
^wikiwWordVis\.Rda
^wij
^visscratc\.R
^wikiwordcoords\.Rda
^ng20wij\.Rda
^time\.Rda
^wordcoords\.Rda
^vignettedata/$
^vignettedata/.$
^stm\.Rda
^wikiwij\.Rda
^wikiwords\.Rda
^wij$
^visscratch\.R$
^ng20wij\.Rda$
^woordcoords\.Rda$
^vignettedata$
^mnistCoords\.wij$
^unprojectable20ngwij\.Rda$
^appveyor\.yml$
^vignettes/largeVis\.pdf$
^vignettes/largeVis\.md$
Expand All @@ -23,7 +43,7 @@
^data/train\.RData$
^libs$
^doc$
^Rplots\.pdf$
^Rplots\.pdf$^.*\.RData
^m./.Rda$
^./.bin$
^inst/samples.Rda$
Expand All @@ -32,3 +52,10 @@
^f.*\.Rda$
^vignettes/.*\.Rda$
^vignettes/.*\.Rda$
^Examples/.Rmd
^Examples/.html
^faceshighres.png$
^poliblog/.Rda
^log4j/.spark/.log
^mnist$
^derby/.log
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
.Rhistory
.RData
.Ruserdata
inst/doc
Examples.html
53 changes: 51 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,56 @@
# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r

language: R
language: r
matrix:
include:
- os: linux
dist: trusty
r: release
- os: osx
osx_image: xcode7.4
r: release
allow_failures:
- os: osx
osx_image: xcode6.4
r: release
- os: linux
dist: trusty
r: devel
- os: osx
osx_image: xcode7.4
r: devel
- os: linux
dist: trusty
r: oldrel
fast_finish: true

sudo: false
cache: packages

r_github_packages:
- jimhester/covr

r_packages:
- Rcpp
- RcppArmadillo
- devtools

addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- gcc-4.9
- g++-4.9
- gfortran-4.9

before_install: |
mkdir ~/.R
cat <<EOF > ~/.R/Makevars
CXX1X=g++-4.9
FC=gfortran-4.9
CXX1XSTD=-std=c++11
after_success:
- Rscript -e 'covr::codecov()'
- Rscript -e 'covr::codecov(branch="reference")'
20 changes: 11 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,33 +1,35 @@
Package: largeVis
Type: Package
Title: High-Quality Visualizations of Large, High-Dimensional Datasets
Version: 0.1.5
Version: 0.1.6
Author: Amos B. Elberg
Maintainer: Amos Elberg <amos.elberg@gmail.com>
Description: Implements the largeVis algorithm for visualizing very large high-dimensional datasets. Also very fast search for approximate nearest neighbors.
License: GPL-3
LazyData: TRUE
RoxygenNote: 5.0.1
Depends:
R (>= 2.10),
Matrix,
RcppProgress (>= 0.2.1),
RcppArmadillo (>= 0.7.100.3.0)
R (>= 3.0.2),
Matrix
Imports:
parallel,
Rcpp (>= 0.12.4),
abind
LinkingTo: Rcpp,RcppProgress,RcppArmadillo
abind,
ggplot2 (>= 0.9.2.1),
dbscan
LinkingTo: Rcpp,RcppProgress (>= 0.2.1),RcppArmadillo (>= 0.7.100.3.0),testthat(>= 1.0.2)
Suggests: testthat,
covr,
knitr,
rmarkdown,
ggplot2,
wesanderson,
RColorBrewer
RColorBrewer,
dplyr,
magrittr
URL: https://github.com/elbamos/largeVis
BugReports: https://github.com/elbamos/largeVis/issues
NeedsCompilation: yes
OS_type: unix, windows
BuildVignettes: FALSE
VignetteBuilder: knitr
SystemRequirements: C++11
25 changes: 15 additions & 10 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,28 +1,33 @@
# Generated by roxygen2: do not edit by hand

S3method(buildEdgeMatrix,CsparseMatrix)
S3method(buildEdgeMatrix,TsparseMatrix)
S3method(buildEdgeMatrix,default)
S3method(buildWijMatrix,CsparseMatrix)
S3method(buildWijMatrix,TsparseMatrix)
S3method(distance,CsparseMatrix)
S3method(distance,TsparseMatrix)
S3method(distance,matrix)
S3method(randomProjectionTreeSearch,CsparseMatrix)
S3method(randomProjectionTreeSearch,TsparseMatrix)
S3method(randomProjectionTreeSearch,matrix)
export(buildEdgeMatrix)
export(buildWijMatrix)
export(distance)
export(ggManifoldMap)
export(largeVis)
export(manifoldMap)
export(manifoldMapStretch)
export(neighborsToVectors)
export(projectKNNs)
export(randomProjectionTreeSearch)
export(vis)
importClassesFrom(Matrix,CsparseMatrix)
importClassesFrom(Matrix,TsparseMatrix)
importFrom(Matrix,sparseMatrix)
importFrom(Rcpp,sourceCpp)
importFrom(dbscan,opticsXi)
importFrom(dbscan,optics_cut)
importFrom(ggplot2,aes)
importFrom(ggplot2,annotation_raster)
importFrom(ggplot2,geom_blank)
importFrom(ggplot2,ggplot)
importFrom(grDevices,as.raster)
importFrom(graphics,rasterImage)
importFrom(stats,optimize)
importFrom(stats,rnorm)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
importFrom(stats,aggregate)
importFrom(stats,runif)
useDynLib(largeVis)
44 changes: 44 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,47 @@
### largeVis 0.1.6

* Revisions for CRAN release, including verifying correctness by reproducing paper examples, and timing tests/benchmarks
+ Tested against the paper authors' wiki-doc and wiki-word datasets
+ Tested with up to 2.5m rows, 100m edges (processed in 12 hours).
* Neighbor search:
+ Dense search is much, much faster and more efficient
+ Tree search for cosine distances uses normalized vectors
* projectKNNs
+ Should be 10x faster for small datasets
+ Replaced binary search ( O(n log n) ) with the alias algorithm for weighted sampling ( O(1) )
+ Clips and smooths gradients, per discussion with paper authors
+ Optimized implementation for alpha == 1
+ Removed option for mixing weights into loss function - doesn't make sense if gradients are being clipped.
+ Fixed OpenMP-related bug which caused visualizations to be "fuzzy"
* Vignettes:
+ Reuse initialization matrices and neighbors, to make it easier to see the effect of hyperparameters
+ Benchmarks now a separate vignette, more detailed
+ Examples removed from vignettes and moved to readme
+ Added examples of manifold map with color faces using OpenFace vectors
* Sigms, P_ij matrix, w_ij matrix
+ Replaced C++ code entirely with new code based on reference implementation
+ Refactored R code into `buildEdgeMatrix()` and `buildWijMatrix()`, which are simpler.
* Visualization
+ Color manifold maps work
+ Ported Karpathy's function for non-overlapping embeddings (experimental)
+ Removed transparency parameter
+ Added ggManifoldMap function for adding a manifold map to a ggplot2 plot
* vis
+ Whether to return neighbors and sigmas now adjustable parameters, for memory reasons
+ Runs gc() periodically
* Data
+ Removed most data and extdata that had been included before; this is to reduce size for CRAN submission
* Dependencies & Build
+ Many misc changes to simplify dependencies for CRAN
+ Re-added ARMA_64BIT_WORD; otherwise, could exceed the limitation on size of an arma sparse matrix with moderately sized datasets (~ 1 M rows, K = 100)
+ Now depends on R >= 3.0.2, so RcppProgress and RcppArmadillo could be moved from the Depends section of the DESCRIPTION file
+ Will now compile on systems that lack OpenMP (e.g., OS X systems with old versions of xcode).
* Correctness and Testing
+ Tests are separated by subject
+ Additional, more extensive tests with greater code coverage
+ Added travis testing against OSX
* Clustering
+ Very preliminary support for dbscan and optics added

### largeVis 0.1.5

Expand Down
48 changes: 36 additions & 12 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,36 @@
# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

sgd <- function(coords, is, js, ps, ws, gamma, rho, minRho, useWeights, nBatches, M, alpha, verbose) {
.Call('largeVis_sgd', PACKAGE = 'largeVis', coords, is, js, ps, ws, gamma, rho, minRho, useWeights, nBatches, M, alpha, verbose)
dbscan_e <- function(edges, eps, minPts, verbose) {
.Call('largeVis_dbscan_e', PACKAGE = 'largeVis', edges, eps, minPts, verbose)
}

searchTrees <- function(threshold, n_trees, K, max_recursion_degree, maxIter, data, distMethod, verbose) {
.Call('largeVis_searchTrees', PACKAGE = 'largeVis', threshold, n_trees, K, max_recursion_degree, maxIter, data, distMethod, verbose)
dbscan_ed <- function(edges, data, eps, minPts, verbose) {
.Call('largeVis_dbscan_ed', PACKAGE = 'largeVis', edges, data, eps, minPts, verbose)
}

dbscan_nd <- function(neighbors, data, eps, minPts, verbose) {
.Call('largeVis_dbscan_nd', PACKAGE = 'largeVis', neighbors, data, eps, minPts, verbose)
}

optics_e <- function(edges, eps, minPts, verbose) {
.Call('largeVis_optics_e', PACKAGE = 'largeVis', edges, eps, minPts, verbose)
}

optics_ed <- function(edges, data, eps, minPts, verbose) {
.Call('largeVis_optics_ed', PACKAGE = 'largeVis', edges, data, eps, minPts, verbose)
}

optics_nd <- function(neighbors, data, eps, minPts, verbose) {
.Call('largeVis_optics_nd', PACKAGE = 'largeVis', neighbors, data, eps, minPts, verbose)
}

silhouetteDbscan <- function(edges, sil) {
invisible(.Call('largeVis_silhouetteDbscan', PACKAGE = 'largeVis', edges, sil))
}

searchTrees <- function(threshold, n_trees, K, maxIter, data, distMethod, verbose) {
.Call('largeVis_searchTrees', PACKAGE = 'largeVis', threshold, n_trees, K, maxIter, data, distMethod, verbose)
}

fastDistance <- function(is, js, data, distMethod, verbose) {
Expand All @@ -21,19 +45,19 @@ fastSDistance <- function(is, js, i_locations, j_locations, x, distMethod, verbo
.Call('largeVis_fastSDistance', PACKAGE = 'largeVis', is, js, i_locations, j_locations, x, distMethod, verbose)
}

distMatrixTowij <- function(is, js, xs, sigmas, N, verbose) {
.Call('largeVis_distMatrixTowij', PACKAGE = 'largeVis', is, js, xs, sigmas, N, verbose)
referenceWij <- function(i, j, d, perplexity) {
.Call('largeVis_referenceWij', PACKAGE = 'largeVis', i, j, d, perplexity)
}

sigFunc <- function(sigma, x_i, perplexity) {
.Call('largeVis_sigFunc', PACKAGE = 'largeVis', sigma, x_i, perplexity)
sgd <- function(coords, targets_i, sources_j, ps, weights, gamma, rho, n_samples, M, alpha, verbose) {
.Call('largeVis_sgd', PACKAGE = 'largeVis', coords, targets_i, sources_j, ps, weights, gamma, rho, n_samples, M, alpha, verbose)
}

searchTreesCSparse <- function(threshold, n_trees, K, max_recursion_degree, maxIter, i, p, x, distMethod, verbose) {
.Call('largeVis_searchTreesCSparse', PACKAGE = 'largeVis', threshold, n_trees, K, max_recursion_degree, maxIter, i, p, x, distMethod, verbose)
searchTreesCSparse <- function(threshold, n_trees, K, maxIter, i, p, x, distMethod, verbose) {
.Call('largeVis_searchTreesCSparse', PACKAGE = 'largeVis', threshold, n_trees, K, maxIter, i, p, x, distMethod, verbose)
}

searchTreesTSparse <- function(threshold, n_trees, K, max_recursion_degree, maxIter, i, j, x, distMethod, verbose) {
.Call('largeVis_searchTreesTSparse', PACKAGE = 'largeVis', threshold, n_trees, K, max_recursion_degree, maxIter, i, j, x, distMethod, verbose)
searchTreesTSparse <- function(threshold, n_trees, K, maxIter, i, j, x, distMethod, verbose) {
.Call('largeVis_searchTreesTSparse', PACKAGE = 'largeVis', threshold, n_trees, K, maxIter, i, j, x, distMethod, verbose)
}

Loading

0 comments on commit 654da27

Please sign in to comment.