diff --git a/h2o-algos/src/main/java/hex/schemas/SVDV99.java b/h2o-algos/src/main/java/hex/schemas/SVDV99.java index e9fd3326420a..e8c75973eea2 100644 --- a/h2o-algos/src/main/java/hex/schemas/SVDV99.java +++ b/h2o-algos/src/main/java/hex/schemas/SVDV99.java @@ -29,7 +29,7 @@ public static final class SVDParametersV99 extends ModelParametersSchemaSVD via Power Method Algorithm * Proof of Convergence for Power Method - * Probabilistic Algorithms for Matrix Approximation + * Randomized Algorithms for Matrix Approximation * @author anqi_fu */ public class SVD extends ModelBuilder { @@ -55,7 +54,7 @@ public long progressUnits() { return 2; case Power: return 1 + _parms._nv; - case Probabilistic: + case Randomized: return 5 + _parms._max_iterations; default: return _parms._nv; } @@ -486,7 +485,7 @@ private Frame directSVD(DataInfo dinfo, Frame qfrm, SVDModel model) { DivideU utsk = new DivideU(model._output._d); utsk.doAll(u); } - } else if(_parms._svd_method == SVDParameters.Method.Probabilistic) { + } else if(_parms._svd_method == SVDParameters.Method.Randomized) { qfrm = randSubIter(dinfo, _parms._max_iterations, _parms._seed); u = directSVD(dinfo, qfrm, model); } else diff --git a/h2o-algos/src/main/java/hex/svd/SVDModel.java b/h2o-algos/src/main/java/hex/svd/SVDModel.java index 63dcb9857626..0feb0d609a32 100644 --- a/h2o-algos/src/main/java/hex/svd/SVDModel.java +++ b/h2o-algos/src/main/java/hex/svd/SVDModel.java @@ -24,7 +24,7 @@ public static class SVDParameters extends Model.Parameters { public boolean _impute_missing = false; // Should missing numeric values be imputed with the column mean? public enum Method { - GramSVD, Power, Probabilistic + GramSVD, Power, Randomized } } diff --git a/h2o-algos/src/test/java/hex/svd/SVDTest.java b/h2o-algos/src/test/java/hex/svd/SVDTest.java index eef98f56e125..c15769fb60e3 100644 --- a/h2o-algos/src/test/java/hex/svd/SVDTest.java +++ b/h2o-algos/src/test/java/hex/svd/SVDTest.java @@ -2,10 +2,7 @@ import hex.DataInfo; import hex.SplitFrame; -import hex.gram.Gram; import hex.svd.SVDModel.SVDParameters; -import org.apache.commons.math3.analysis.function.Pow; -import org.apache.commons.math3.analysis.function.Power; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -15,8 +12,6 @@ import water.Scope; import water.TestUtil; import water.fvec.Frame; -import water.rapids.Exec; -import water.util.ArrayUtils; import water.util.FrameUtils; import water.util.Log; @@ -227,7 +222,7 @@ public class SVDTest extends TestUtil { parms._nv = 4; parms._keep_u = true; parms._transform = DataInfo.TransformType.STANDARDIZE; - parms._svd_method = SVDParameters.Method.Probabilistic; + parms._svd_method = SVDParameters.Method.Randomized; parms._max_iterations = 4; SVD job = new SVD(parms); @@ -422,7 +417,7 @@ public class SVDTest extends TestUtil { parms._use_all_factor_levels = true; parms._keep_u = false; parms._transform = DataInfo.TransformType.NONE; - parms._svd_method = SVDParameters.Method.Probabilistic; + parms._svd_method = SVDParameters.Method.Randomized; parms._max_iterations = 7; SVD job = new SVD(parms); @@ -464,7 +459,7 @@ public class SVDTest extends TestUtil { parms._nv = 5; parms._keep_u = true; parms._transform = DataInfo.TransformType.DEMEAN; - parms._svd_method = SVDParameters.Method.Probabilistic; + parms._svd_method = SVDParameters.Method.Randomized; parms._impute_missing = true; parms._max_iterations = 20; @@ -513,7 +508,7 @@ public class SVDTest extends TestUtil { parms._nv = 8; parms._only_v = false; parms._keep_u = true; - parms._svd_method = SVDParameters.Method.Probabilistic; + parms._svd_method = SVDParameters.Method.Randomized; parms._impute_missing = true; parms._max_iterations = 20; diff --git a/h2o-r/h2o-package/R/svd.R b/h2o-r/h2o-package/R/svd.R index fc077d67d4cb..5594bcb2abe5 100644 --- a/h2o-r/h2o-package/R/svd.R +++ b/h2o-r/h2o-package/R/svd.R @@ -19,6 +19,11 @@ #' column; "DESCALE" for dividing by the standard deviation of each #' column; "STANDARDIZE" for demeaning and descaling; and "NORMALIZE" #' for demeaning and dividing each column by its range (max - min). +#' @param svd_method A character string that indicates how SVD should be calculated. +#' Possible values are "GramSVD": distributed computation of the Gram matrix +#' followed by a local SVD using the JAMA package, "Power": computation of +#' the SVD using the power iteration method, "Randomized": approximate SVD +#' by projecting onto a random subspace (see references). #' @param seed (Optional) Random seed used to initialize the right singular vectors #' at the beginning of each power method iteration. #' @param use_all_factor_levels (Optional) A logical value indicating whether all @@ -26,6 +31,7 @@ #' If FALSE, the indicator column corresponding to the first factor level #' of every categorical variable will be dropped. Defaults to TRUE. #' @return Returns an object of class \linkS4class{H2ODimReductionModel}. +#' @references N. Halko, P.G. Martinsson, J.A. Tropp. {Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions}[http://arxiv.org/abs/0909.4061]. SIAM Rev., Survey and Review section, Vol. 53, num. 2, pp. 217-288, June 2011. #' @examples #' library(h2o) #' localH2O <- h2o.init() @@ -38,6 +44,7 @@ h2o.svd <- function(training_frame, x, nv, destination_key, # h2o generates its own default parameters max_iterations = 1000, transform = "NONE", + svd_method = c("GramSVD", "Power", "Randomized"), seed, use_all_factor_levels) { @@ -69,6 +76,8 @@ h2o.svd <- function(training_frame, x, nv, parms$max_iterations <- max_iterations if(!missing(transform)) parms$transform <- transform + if(!missing(svd_method)) + parms$svd_method <- svd_method if(!missing(seed)) parms$seed <- seed if(!missing(use_all_factor_levels))