Skip to content

Commit

Permalink
Refactor SVD method name and add svd_method option to R package
Browse files Browse the repository at this point in the history
  • Loading branch information
anqi committed Sep 3, 2015
1 parent 524d94d commit af75976
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 15 deletions.
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/schemas/SVDV99.java
Expand Up @@ -29,7 +29,7 @@ public static final class SVDParametersV99 extends ModelParametersSchema<SVDMode
@API(help = "Transformation of training data", values = { "NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE" }) // TODO: pull out of enum class
public DataInfo.TransformType transform;

@API(help = "Method for computing SVD (Caution: Power and Probablistic are currently experimental and unstable)", values = { "GramSVD", "Power", "Probabilistic" }) // TODO: pull out of enum class
@API(help = "Method for computing SVD (Caution: Power and Probablistic are currently experimental and unstable)", values = { "GramSVD", "Power", "Randomized" }) // TODO: pull out of enum class
public SVDParameters.Method svd_method;

@API(help = "Number of right singular vectors")
Expand Down
7 changes: 3 additions & 4 deletions h2o-algos/src/main/java/hex/svd/SVD.java
@@ -1,6 +1,5 @@
package hex.svd;

import Jama.CholeskyDecomposition;
import Jama.Matrix;
import Jama.QRDecomposition;
import Jama.SingularValueDecomposition;
Expand All @@ -27,7 +26,7 @@
* Singular Value Decomposition
* <a href = "http://www.cs.yale.edu/homes/el327/datamining2013aFiles/07_singular_value_decomposition.pdf">SVD via Power Method Algorithm</a>
* <a href = "https://www.cs.cmu.edu/~venkatg/teaching/CStheory-infoage/book-chapter-4.pdf">Proof of Convergence for Power Method</a>
* <a href = "http://arxiv.org/pdf/0909.4061.pdf">Probabilistic Algorithms for Matrix Approximation</a>
* <a href = "http://arxiv.org/pdf/0909.4061.pdf">Randomized Algorithms for Matrix Approximation</a>
* @author anqi_fu
*/
public class SVD extends ModelBuilder<SVDModel,SVDModel.SVDParameters,SVDModel.SVDOutput> {
Expand Down Expand Up @@ -55,7 +54,7 @@ public long progressUnits() {
return 2;
case Power:
return 1 + _parms._nv;
case Probabilistic:
case Randomized:
return 5 + _parms._max_iterations;
default: return _parms._nv;
}
Expand Down Expand Up @@ -486,7 +485,7 @@ private Frame directSVD(DataInfo dinfo, Frame qfrm, SVDModel model) {
DivideU utsk = new DivideU(model._output._d);
utsk.doAll(u);
}
} else if(_parms._svd_method == SVDParameters.Method.Probabilistic) {
} else if(_parms._svd_method == SVDParameters.Method.Randomized) {
qfrm = randSubIter(dinfo, _parms._max_iterations, _parms._seed);
u = directSVD(dinfo, qfrm, model);
} else
Expand Down
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/svd/SVDModel.java
Expand Up @@ -24,7 +24,7 @@ public static class SVDParameters extends Model.Parameters {
public boolean _impute_missing = false; // Should missing numeric values be imputed with the column mean?

public enum Method {
GramSVD, Power, Probabilistic
GramSVD, Power, Randomized
}
}

Expand Down
13 changes: 4 additions & 9 deletions h2o-algos/src/test/java/hex/svd/SVDTest.java
Expand Up @@ -2,10 +2,7 @@

import hex.DataInfo;
import hex.SplitFrame;
import hex.gram.Gram;
import hex.svd.SVDModel.SVDParameters;
import org.apache.commons.math3.analysis.function.Pow;
import org.apache.commons.math3.analysis.function.Power;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
Expand All @@ -15,8 +12,6 @@
import water.Scope;
import water.TestUtil;
import water.fvec.Frame;
import water.rapids.Exec;
import water.util.ArrayUtils;
import water.util.FrameUtils;
import water.util.Log;

Expand Down Expand Up @@ -227,7 +222,7 @@ public class SVDTest extends TestUtil {
parms._nv = 4;
parms._keep_u = true;
parms._transform = DataInfo.TransformType.STANDARDIZE;
parms._svd_method = SVDParameters.Method.Probabilistic;
parms._svd_method = SVDParameters.Method.Randomized;
parms._max_iterations = 4;

SVD job = new SVD(parms);
Expand Down Expand Up @@ -422,7 +417,7 @@ public class SVDTest extends TestUtil {
parms._use_all_factor_levels = true;
parms._keep_u = false;
parms._transform = DataInfo.TransformType.NONE;
parms._svd_method = SVDParameters.Method.Probabilistic;
parms._svd_method = SVDParameters.Method.Randomized;
parms._max_iterations = 7;

SVD job = new SVD(parms);
Expand Down Expand Up @@ -464,7 +459,7 @@ public class SVDTest extends TestUtil {
parms._nv = 5;
parms._keep_u = true;
parms._transform = DataInfo.TransformType.DEMEAN;
parms._svd_method = SVDParameters.Method.Probabilistic;
parms._svd_method = SVDParameters.Method.Randomized;
parms._impute_missing = true;
parms._max_iterations = 20;

Expand Down Expand Up @@ -513,7 +508,7 @@ public class SVDTest extends TestUtil {
parms._nv = 8;
parms._only_v = false;
parms._keep_u = true;
parms._svd_method = SVDParameters.Method.Probabilistic;
parms._svd_method = SVDParameters.Method.Randomized;
parms._impute_missing = true;
parms._max_iterations = 20;

Expand Down
9 changes: 9 additions & 0 deletions h2o-r/h2o-package/R/svd.R
Expand Up @@ -19,13 +19,19 @@
#' column; "DESCALE" for dividing by the standard deviation of each
#' column; "STANDARDIZE" for demeaning and descaling; and "NORMALIZE"
#' for demeaning and dividing each column by its range (max - min).
#' @param svd_method A character string that indicates how SVD should be calculated.
#' Possible values are "GramSVD": distributed computation of the Gram matrix
#' followed by a local SVD using the JAMA package, "Power": computation of
#' the SVD using the power iteration method, "Randomized": approximate SVD
#' by projecting onto a random subspace (see references).
#' @param seed (Optional) Random seed used to initialize the right singular vectors
#' at the beginning of each power method iteration.
#' @param use_all_factor_levels (Optional) A logical value indicating whether all
#' factor levels should be included in each categorical column expansion.
#' If FALSE, the indicator column corresponding to the first factor level
#' of every categorical variable will be dropped. Defaults to TRUE.
#' @return Returns an object of class \linkS4class{H2ODimReductionModel}.
#' @references N. Halko, P.G. Martinsson, J.A. Tropp. {Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions}[http://arxiv.org/abs/0909.4061]. SIAM Rev., Survey and Review section, Vol. 53, num. 2, pp. 217-288, June 2011.
#' @examples
#' library(h2o)
#' localH2O <- h2o.init()
Expand All @@ -38,6 +44,7 @@ h2o.svd <- function(training_frame, x, nv,
destination_key, # h2o generates its own default parameters
max_iterations = 1000,
transform = "NONE",
svd_method = c("GramSVD", "Power", "Randomized"),
seed,
use_all_factor_levels)
{
Expand Down Expand Up @@ -69,6 +76,8 @@ h2o.svd <- function(training_frame, x, nv,
parms$max_iterations <- max_iterations
if(!missing(transform))
parms$transform <- transform
if(!missing(svd_method))
parms$svd_method <- svd_method
if(!missing(seed))
parms$seed <- seed
if(!missing(use_all_factor_levels))
Expand Down

0 comments on commit af75976

Please sign in to comment.