h2oai · jakubhava · Jun 19, 2019 · Jun 19, 2019 · Jun 19, 2019 · Jun 19, 2019
@@ -86,8 +86,6 @@ object AmazonFineFood extends SparkContextSupport with SparkSessionSupport with
     // Cleanup
     reviews.delete()
 
-    // RUN GLM or SVM
-
     // Create a predictor function
     println("DONE")
   }

@@ -20,15 +20,15 @@ import java.io.File
 
 import org.apache.spark.SparkContext
 import org.apache.spark.h2o.H2OContext
-import org.apache.spark.ml.spark.models.svm.{SVM, SVMParameters}
+import org.apache.spark.ml.spark.models.svm.{SparkSVM, SparkSVMParameters}
 import org.apache.spark.sql.SparkSession
 import water.fvec.H2OFrame
 import water.support.SparkContextSupport
 
 object SparkSVMDemo extends SparkContextSupport {
 
   def main(args: Array[String]) {
-    val conf = configure("Sparkling Water: Spark SVM demo.")
+    val conf = configure("Sparkling Water: SparkSVM demo.")
     val sc = new SparkContext(conf)
 
     val h2oContext = H2OContext.getOrCreate(sc)
@@ -43,11 +43,11 @@ object SparkSVMDemo extends SparkContextSupport {
     breastCancerData.update()
 
     // Configure Deep Learning algorithm
-    val parms = new SVMParameters
+    val parms = new SparkSVMParameters
     parms._train = breastCancerData.key
     parms._response_column = "label"
 
-    val svm = new SVM(parms, h2oContext)
+    val svm = new SparkSVM(parms, h2oContext)
 
     val svmModel = svm.trainModel.get
 

@@ -28,12 +28,12 @@
 import water.fvec.Frame;
 
 // Seems like this has to be in Java since H2O's frameworks uses reflection's getFields...
-// I probably could mix Java and Scala here, leave SVMParametersV3 with fields as Java
-// and then make the same Scala class SVMParametersV3 which extends it but not sure if it's worth it...
-public class SVMV3 extends ModelBuilderSchema<SVM, SVMV3, SVMV3.SVMParametersV3> {
+// I probably could mix Java and Scala here, leave SparkSVMParametersV3 with fields as Java
+// and then make the same Scala class SparkSVMParametersV3 which extends it but not sure if it's worth it...
+public class SparkSVMV3 extends ModelBuilderSchema<SparkSVM, SparkSVMV3, SparkSVMV3.SparkSVMParametersV3> {
 
-    public static final class SVMParametersV3 extends
-            ModelParametersSchemaV3<SVMParameters, SVMParametersV3> {
+    public static final class SparkSVMParametersV3 extends
+            ModelParametersSchemaV3<SparkSVMParameters, SparkSVMParametersV3> {
         public static String[] fields = new String[]{
                 "model_id",
                 "training_frame",
@@ -90,7 +90,7 @@ public static final class SVMParametersV3 extends
         public MissingValuesHandling missing_values_handling;
 
         @Override
-        public SVMParametersV3 fillFromImpl(SVMParameters impl) {
+        public SparkSVMParametersV3 fillFromImpl(SparkSVMParameters impl) {
             super.fillFromImpl(impl);
 
             if (null != impl._initial_weights) {
@@ -104,7 +104,7 @@ public SVMParametersV3 fillFromImpl(SVMParameters impl) {
         }
 
         @Override
-        public SVMParameters fillImpl(SVMParameters impl) {
+        public SparkSVMParameters fillImpl(SparkSVMParameters impl) {
             super.fillImpl(impl);
             impl._initial_weights =
                     null == this.initial_weights_frame ? null : Key.<Frame>make(this.initial_weights_frame.name);

@@ -3,7 +3,7 @@ package org.apache.spark.ml.spark.models.svm
 import hex.ModelMojoWriter
 import org.apache.spark.ml.spark.models.MissingValuesHandling
 
-class SVMMojoWriter(svmModel: SVMModel) extends ModelMojoWriter[SVMModel, SVMParameters, SVMModel.SVMOutput](svmModel) {
+class SVMMojoWriter(svmModel: SparkSVMModel) extends ModelMojoWriter[SparkSVMModel, SparkSVMParameters, SparkSVMModel.SparkSVMOutput](svmModel) {
 
   def this() {
     this(null)

@@ -23,7 +23,7 @@
 import org.apache.spark.ml.spark.ProgressListener;
 import org.apache.spark.ml.FrameMLUtils;
 import org.apache.spark.ml.spark.models.MissingValuesHandling;
-import org.apache.spark.ml.spark.models.svm.SVMModel.SVMOutput;
+import org.apache.spark.ml.spark.models.svm.SparkSVMModel.SparkSVMOutput;
 import org.apache.spark.mllib.classification.SVMWithSGD;
 import org.apache.spark.mllib.linalg.Vector;
 import org.apache.spark.mllib.linalg.Vectors;
@@ -45,16 +45,16 @@
 
 import static scala.collection.JavaConversions.*;
 
-public class SVM extends ModelBuilder<SVMModel, SVMParameters, SVMOutput> {
+public class SparkSVM extends ModelBuilder<SparkSVMModel, SparkSVMParameters, SparkSVMOutput> {
 
     transient private final H2OContext hc;
 
-    public SVM(boolean startup_once, H2OContext hc) {
-        super(new SVMParameters(), startup_once);
+    public SparkSVM(boolean startup_once, H2OContext hc) {
+        super(new SparkSVMParameters(), startup_once);
         this.hc = hc;
     }
 
-    public SVM(SVMParameters parms, H2OContext hc) {
+    public SparkSVM(SparkSVMParameters parms, H2OContext hc) {
         super(parms);
         init(false);
         this.hc = hc;
@@ -115,7 +115,7 @@ public void init(boolean expensive) {
         for (int i = 0; i < _train.vecs().length; i++) {
             Vec vec = _train.vec(i);
             if (!ignoredCols.contains(_train.name(i)) && !(vec.isNumeric() || vec.isCategorical())) {
-                error("_train", "SVM supports only frames with numeric/categorical values (except for result column). But a " + vec.get_type_str() + " was found.");
+                error("_train", "SparkSVM supports only frames with numeric/categorical values (except for result column). But a " + vec.get_type_str() + " was found.");
             }
         }
 
@@ -127,19 +127,19 @@ public void init(boolean expensive) {
             String[] responseDomains = responseDomains();
             if (null == responseDomains) {
                 if (!(Double.isNaN(_parms._threshold))) {
-                    error("_threshold", "Threshold cannot be set for regression SVM. Set the threshold to NaN or modify the response column to an enum.");
+                    error("_threshold", "Threshold cannot be set for regression SparkSVM. Set the threshold to NaN or modify the response column to an enum.");
                 }
 
                 if (!_train.vec(_parms._response_column).isNumeric()) {
-                    error("_response_column", "Regression SVM requires the response column type to be numeric.");
+                    error("_response_column", "Regression SparkSVM requires the response column type to be numeric.");
                 }
             } else {
                 if (Double.isNaN(_parms._threshold)) {
-                    error("_threshold", "Threshold has to be set for binomial SVM. Set the threshold to a numeric value or change the response column type.");
+                    error("_threshold", "Threshold has to be set for binomial SparkSVM. Set the threshold to a numeric value or change the response column type.");
                 }
 
                 if (responseDomains.length != 2) {
-                    error("_response_column", "SVM requires the response column's domain to be of size 2.");
+                    error("_response_column", "SparkSVM requires the response column's domain to be of size 2.");
                 }
             }
         }
@@ -172,7 +172,7 @@ public void computeImpl() {
             init(true);
 
             // The model to be built
-            SVMModel model = new SVMModel(dest(), _parms, new SVMModel.SVMOutput(SVM.this));
+            SparkSVMModel model = new SparkSVMModel(dest(), _parms, new SparkSVMModel.SparkSVMOutput(SparkSVM.this));
             try {
                 model.delete_and_lock(_job);
 

@@ -5,15 +5,15 @@
 import water.Key;
 import water.fvec.Frame;
 
-public class SVMParameters extends Model.Parameters {
+public class SparkSVMParameters extends Model.Parameters {
     @Override
-    public String algoName() { return "SVM"; }
+    public String algoName() { return "SparkSVM"; }
 
     @Override
     public String fullName() { return "Support Vector Machine (*Spark*)"; }
 
     @Override
-    public String javaName() { return SVMModel.class.getName(); }
+    public String javaName() { return SparkSVMModel.class.getName(); }
 
     @Override
     public long progressUnits() { return _max_iterations; }
@@ -38,7 +38,7 @@ public final Frame initialWeights() {
     public Key<Frame> _initial_weights = null;
     public MissingValuesHandling _missing_values_handling = MissingValuesHandling.MeanImputation;
 
-    public void validate(SVM svm) {
+    public void validate(SparkSVM svm) {
         if (_max_iterations < 0 || _max_iterations > 1e6) {
             svm.error("_max_iterations", " max_iterations must be between 0 and 1e6");
         }

@@ -1,4 +1,4 @@
-hex.schemas.SVMModelV3
-hex.schemas.SVMModelV3$SVMModelOutputV3
-hex.schemas.SVMV3
-hex.schemas.SVMV3$SVMParametersV3
+hex.schemas.SparkSVMModelV3
+hex.schemas.SparkSVMModelV3$SparkSVMModelOutputV3
+hex.schemas.SparkSVMV3
+hex.schemas.SparkSVMV3$SparkSVMParametersV3
@@ -18,15 +18,15 @@
 package hex
 
 import org.apache.spark.h2o.H2OContext
-import org.apache.spark.ml.spark.models.svm.SVM
+import org.apache.spark.ml.spark.models.svm.SparkSVM
 import water.api._
 
 class SVMModelRestAPI extends RestApi {
 
-  override def name: String = "SVM Model REST API"
+  override def name: String = "SparkSVM Model REST API"
 
   override def registerEndpoints(hc: H2OContext, context: RestApiContext): Unit = {
-    val models = Seq(new SVM(true, hc))
+    val models = Seq(new SparkSVM(true, hc))
 
     for (algo <- models) {
       val base: String = algo.getClass.getSimpleName

@@ -17,27 +17,27 @@
 
 package hex.schemas
 
-import SVMV3.SVMParametersV3
-import hex.schemas.SVMModelV3.SVMModelOutputV3
-import org.apache.spark.ml.spark.models.svm.{SVMModel, SVMParameters}
+import SparkSVMV3.SparkSVMParametersV3
+import hex.schemas.SparkSVMModelV3.SparkSVMModelOutputV3
+import org.apache.spark.ml.spark.models.svm.{SparkSVMModel, SparkSVMParameters}
 import water.api.schemas3.{ModelOutputSchemaV3, ModelSchemaV3}
 import water.api.API
 
-class SVMModelV3 extends ModelSchemaV3[SVMModel,
-  SVMModelV3,
-  SVMParameters,
-  SVMParametersV3,
-  SVMModel.SVMOutput,
-  SVMModelV3.SVMModelOutputV3] {
+class SparkSVMModelV3 extends ModelSchemaV3[SparkSVMModel,
+  SparkSVMModelV3,
+  SparkSVMParameters,
+  SparkSVMParametersV3,
+  SparkSVMModel.SparkSVMOutput,
+  SparkSVMModelV3.SparkSVMModelOutputV3] {
+
+  override def createParametersSchema(): SparkSVMParametersV3 = { new SparkSVMParametersV3() }
+  override def createOutputSchema(): SparkSVMModelOutputV3 = { new SparkSVMModelOutputV3() }
 
-  override def createParametersSchema(): SVMParametersV3 = { new SVMParametersV3() }
-  override def createOutputSchema(): SVMModelOutputV3 = { new SVMModelOutputV3() }
-
 }
 
-object SVMModelV3 {
+object SparkSVMModelV3 {
 
-  final class SVMModelOutputV3 extends ModelOutputSchemaV3[SVMModel.SVMOutput, SVMModelOutputV3] {
+  final class SparkSVMModelOutputV3 extends ModelOutputSchemaV3[SparkSVMModel.SparkSVMOutput, SparkSVMModelOutputV3] {
     // Output fields
     @API(help = "Iterations executed") var iterations: Int = 0
     @API(help = "Interceptor") var interceptor: Double = 0

@@ -26,9 +26,9 @@ import water.codegen.CodeGeneratorPipeline
 import water.util.{JCodeGen, SBPrintStream}
 import water.{H2O, Key}
 
-object SVMModel {
+object SparkSVMModel {
 
-  class SVMOutput(val b: SVM) extends Model.Output(b) {
+  class SparkSVMOutput(val b: SparkSVM) extends Model.Output(b) {
     var interceptor: Double = .0
     var iterations: Int = 0
     var weights: Array[Double] = _
@@ -37,10 +37,10 @@ object SVMModel {
 
 }
 
-class SVMModel private[svm](val selfKey: Key[SVMModel],
-                            val parms: SVMParameters,
-                            val output: SVMModel.SVMOutput)
-  extends Model[SVMModel, SVMParameters, SVMModel.SVMOutput](selfKey, parms, output) {
+class SparkSVMModel private[svm](val selfKey: Key[SparkSVMModel],
+                                 val parms: SparkSVMParameters,
+                                 val output: SparkSVMModel.SparkSVMOutput)
+  extends Model[SparkSVMModel, SparkSVMParameters, SparkSVMModel.SparkSVMOutput](selfKey, parms, output) {
 
   override protected def toJavaCheckTooBig: Boolean = output.weights.length > 10000
 
@@ -74,13 +74,13 @@ class SVMModel private[svm](val selfKey: Key[SVMModel],
       preds(0) = pred
     } else { // Binomial
       if (pred > _parms._threshold) {
-        // the probability of first and second class, since SVM does not give us probabilities, we assign
+        // the probability of first and second class, since SparkSVM does not give us probabilities, we assign
         // the probabilities to 0 or respectively to 1
         preds(2) = 1
         preds(1) = 0
         preds(0) = 1 // final class, either 1 or 0
       } else {
-        // the probability of first and second class, since SVM does not give us probabilities, we assign
+        // the probability of first and second class, since SparkSVM does not give us probabilities, we assign
         // the probabilities to 0 or respectively to 1
         preds(2) = 0
         preds(1) = 1

@@ -29,7 +29,7 @@ import water.support.H2OFrameSupport
 import scala.util.Random
 
 @RunWith(classOf[JUnitRunner])
-class SVMModelTest extends FunSuite with SharedH2OTestContext {
+class SparkSVMModelTest extends FunSuite with SharedH2OTestContext {
 
   override def createSparkContext: SparkContext = new SparkContext("local[*]", "test-local", conf = defaultSparkConf)
 
@@ -56,15 +56,15 @@ class SVMModelTest extends FunSuite with SharedH2OTestContext {
     val weightsFrame = hc.asH2OFrame(weightsDF, "weights")
 
     // Learning parameters
-    val parms = new SVMParameters
+    val parms = new SparkSVMParameters
     parms._train = trainFrame
     parms._response_column = "Label"
     parms._initial_weights = weightsFrame
 
-    val svm = new SVM(parms, h2oContext)
+    val svm = new SparkSVM(parms, h2oContext)
 
     // Train model
-    val h2oSVMModel: SVMModel = svm.trainModel.get
+    val h2oSVMModel: SparkSVMModel = svm.trainModel.get
 
     val sparkSVMModel = new classification.SVMModel(
       Vectors.dense(h2oSVMModel.output.weights),