Skip to content

Commit

Permalink
[SW-1364] Rename SVM to SparkSVM (#1284)
Browse files Browse the repository at this point in the history
  • Loading branch information
jakubhava committed Jun 19, 2019
1 parent 3d6705b commit b1fa901
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 62 deletions.
Expand Up @@ -86,8 +86,6 @@ object AmazonFineFood extends SparkContextSupport with SparkSessionSupport with
// Cleanup
reviews.delete()

// RUN GLM or SVM

// Create a predictor function
println("DONE")
}
Expand Down
Expand Up @@ -20,15 +20,15 @@ import java.io.File

import org.apache.spark.SparkContext
import org.apache.spark.h2o.H2OContext
import org.apache.spark.ml.spark.models.svm.{SVM, SVMParameters}
import org.apache.spark.ml.spark.models.svm.{SparkSVM, SparkSVMParameters}
import org.apache.spark.sql.SparkSession
import water.fvec.H2OFrame
import water.support.SparkContextSupport

object SparkSVMDemo extends SparkContextSupport {

def main(args: Array[String]) {
val conf = configure("Sparkling Water: Spark SVM demo.")
val conf = configure("Sparkling Water: SparkSVM demo.")
val sc = new SparkContext(conf)

val h2oContext = H2OContext.getOrCreate(sc)
Expand All @@ -43,11 +43,11 @@ object SparkSVMDemo extends SparkContextSupport {
breastCancerData.update()

// Configure Deep Learning algorithm
val parms = new SVMParameters
val parms = new SparkSVMParameters
parms._train = breastCancerData.key
parms._response_column = "label"

val svm = new SVM(parms, h2oContext)
val svm = new SparkSVM(parms, h2oContext)

val svmModel = svm.trainModel.get

Expand Down
Expand Up @@ -28,12 +28,12 @@
import water.fvec.Frame;

// Seems like this has to be in Java since H2O's frameworks uses reflection's getFields...
// I probably could mix Java and Scala here, leave SVMParametersV3 with fields as Java
// and then make the same Scala class SVMParametersV3 which extends it but not sure if it's worth it...
public class SVMV3 extends ModelBuilderSchema<SVM, SVMV3, SVMV3.SVMParametersV3> {
// I probably could mix Java and Scala here, leave SparkSVMParametersV3 with fields as Java
// and then make the same Scala class SparkSVMParametersV3 which extends it but not sure if it's worth it...
public class SparkSVMV3 extends ModelBuilderSchema<SparkSVM, SparkSVMV3, SparkSVMV3.SparkSVMParametersV3> {

public static final class SVMParametersV3 extends
ModelParametersSchemaV3<SVMParameters, SVMParametersV3> {
public static final class SparkSVMParametersV3 extends
ModelParametersSchemaV3<SparkSVMParameters, SparkSVMParametersV3> {
public static String[] fields = new String[]{
"model_id",
"training_frame",
Expand Down Expand Up @@ -90,7 +90,7 @@ public static final class SVMParametersV3 extends
public MissingValuesHandling missing_values_handling;

@Override
public SVMParametersV3 fillFromImpl(SVMParameters impl) {
public SparkSVMParametersV3 fillFromImpl(SparkSVMParameters impl) {
super.fillFromImpl(impl);

if (null != impl._initial_weights) {
Expand All @@ -104,7 +104,7 @@ public SVMParametersV3 fillFromImpl(SVMParameters impl) {
}

@Override
public SVMParameters fillImpl(SVMParameters impl) {
public SparkSVMParameters fillImpl(SparkSVMParameters impl) {
super.fillImpl(impl);
impl._initial_weights =
null == this.initial_weights_frame ? null : Key.<Frame>make(this.initial_weights_frame.name);
Expand Down
Expand Up @@ -3,7 +3,7 @@ package org.apache.spark.ml.spark.models.svm
import hex.ModelMojoWriter
import org.apache.spark.ml.spark.models.MissingValuesHandling

class SVMMojoWriter(svmModel: SVMModel) extends ModelMojoWriter[SVMModel, SVMParameters, SVMModel.SVMOutput](svmModel) {
class SVMMojoWriter(svmModel: SparkSVMModel) extends ModelMojoWriter[SparkSVMModel, SparkSVMParameters, SparkSVMModel.SparkSVMOutput](svmModel) {

def this() {
this(null)
Expand Down
Expand Up @@ -23,7 +23,7 @@
import org.apache.spark.ml.spark.ProgressListener;
import org.apache.spark.ml.FrameMLUtils;
import org.apache.spark.ml.spark.models.MissingValuesHandling;
import org.apache.spark.ml.spark.models.svm.SVMModel.SVMOutput;
import org.apache.spark.ml.spark.models.svm.SparkSVMModel.SparkSVMOutput;
import org.apache.spark.mllib.classification.SVMWithSGD;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
Expand All @@ -45,16 +45,16 @@

import static scala.collection.JavaConversions.*;

public class SVM extends ModelBuilder<SVMModel, SVMParameters, SVMOutput> {
public class SparkSVM extends ModelBuilder<SparkSVMModel, SparkSVMParameters, SparkSVMOutput> {

transient private final H2OContext hc;

public SVM(boolean startup_once, H2OContext hc) {
super(new SVMParameters(), startup_once);
public SparkSVM(boolean startup_once, H2OContext hc) {
super(new SparkSVMParameters(), startup_once);
this.hc = hc;
}

public SVM(SVMParameters parms, H2OContext hc) {
public SparkSVM(SparkSVMParameters parms, H2OContext hc) {
super(parms);
init(false);
this.hc = hc;
Expand Down Expand Up @@ -115,7 +115,7 @@ public void init(boolean expensive) {
for (int i = 0; i < _train.vecs().length; i++) {
Vec vec = _train.vec(i);
if (!ignoredCols.contains(_train.name(i)) && !(vec.isNumeric() || vec.isCategorical())) {
error("_train", "SVM supports only frames with numeric/categorical values (except for result column). But a " + vec.get_type_str() + " was found.");
error("_train", "SparkSVM supports only frames with numeric/categorical values (except for result column). But a " + vec.get_type_str() + " was found.");
}
}

Expand All @@ -127,19 +127,19 @@ public void init(boolean expensive) {
String[] responseDomains = responseDomains();
if (null == responseDomains) {
if (!(Double.isNaN(_parms._threshold))) {
error("_threshold", "Threshold cannot be set for regression SVM. Set the threshold to NaN or modify the response column to an enum.");
error("_threshold", "Threshold cannot be set for regression SparkSVM. Set the threshold to NaN or modify the response column to an enum.");
}

if (!_train.vec(_parms._response_column).isNumeric()) {
error("_response_column", "Regression SVM requires the response column type to be numeric.");
error("_response_column", "Regression SparkSVM requires the response column type to be numeric.");
}
} else {
if (Double.isNaN(_parms._threshold)) {
error("_threshold", "Threshold has to be set for binomial SVM. Set the threshold to a numeric value or change the response column type.");
error("_threshold", "Threshold has to be set for binomial SparkSVM. Set the threshold to a numeric value or change the response column type.");
}

if (responseDomains.length != 2) {
error("_response_column", "SVM requires the response column's domain to be of size 2.");
error("_response_column", "SparkSVM requires the response column's domain to be of size 2.");
}
}
}
Expand Down Expand Up @@ -172,7 +172,7 @@ public void computeImpl() {
init(true);

// The model to be built
SVMModel model = new SVMModel(dest(), _parms, new SVMModel.SVMOutput(SVM.this));
SparkSVMModel model = new SparkSVMModel(dest(), _parms, new SparkSVMModel.SparkSVMOutput(SparkSVM.this));
try {
model.delete_and_lock(_job);

Expand Down
Expand Up @@ -5,15 +5,15 @@
import water.Key;
import water.fvec.Frame;

public class SVMParameters extends Model.Parameters {
public class SparkSVMParameters extends Model.Parameters {
@Override
public String algoName() { return "SVM"; }
public String algoName() { return "SparkSVM"; }

@Override
public String fullName() { return "Support Vector Machine (*Spark*)"; }

@Override
public String javaName() { return SVMModel.class.getName(); }
public String javaName() { return SparkSVMModel.class.getName(); }

@Override
public long progressUnits() { return _max_iterations; }
Expand All @@ -38,7 +38,7 @@ public final Frame initialWeights() {
public Key<Frame> _initial_weights = null;
public MissingValuesHandling _missing_values_handling = MissingValuesHandling.MeanImputation;

public void validate(SVM svm) {
public void validate(SparkSVM svm) {
if (_max_iterations < 0 || _max_iterations > 1e6) {
svm.error("_max_iterations", " max_iterations must be between 0 and 1e6");
}
Expand Down
8 changes: 4 additions & 4 deletions ml/src/main/resources/META-INF/services/water.api.Schema
@@ -1,4 +1,4 @@
hex.schemas.SVMModelV3
hex.schemas.SVMModelV3$SVMModelOutputV3
hex.schemas.SVMV3
hex.schemas.SVMV3$SVMParametersV3
hex.schemas.SparkSVMModelV3
hex.schemas.SparkSVMModelV3$SparkSVMModelOutputV3
hex.schemas.SparkSVMV3
hex.schemas.SparkSVMV3$SparkSVMParametersV3
6 changes: 3 additions & 3 deletions ml/src/main/scala/hex/SVMModelRestAPI.scala
Expand Up @@ -18,15 +18,15 @@
package hex

import org.apache.spark.h2o.H2OContext
import org.apache.spark.ml.spark.models.svm.SVM
import org.apache.spark.ml.spark.models.svm.SparkSVM
import water.api._

class SVMModelRestAPI extends RestApi {

override def name: String = "SVM Model REST API"
override def name: String = "SparkSVM Model REST API"

override def registerEndpoints(hc: H2OContext, context: RestApiContext): Unit = {
val models = Seq(new SVM(true, hc))
val models = Seq(new SparkSVM(true, hc))

for (algo <- models) {
val base: String = algo.getClass.getSimpleName
Expand Down
Expand Up @@ -17,27 +17,27 @@

package hex.schemas

import SVMV3.SVMParametersV3
import hex.schemas.SVMModelV3.SVMModelOutputV3
import org.apache.spark.ml.spark.models.svm.{SVMModel, SVMParameters}
import SparkSVMV3.SparkSVMParametersV3
import hex.schemas.SparkSVMModelV3.SparkSVMModelOutputV3
import org.apache.spark.ml.spark.models.svm.{SparkSVMModel, SparkSVMParameters}
import water.api.schemas3.{ModelOutputSchemaV3, ModelSchemaV3}
import water.api.API

class SVMModelV3 extends ModelSchemaV3[SVMModel,
SVMModelV3,
SVMParameters,
SVMParametersV3,
SVMModel.SVMOutput,
SVMModelV3.SVMModelOutputV3] {
class SparkSVMModelV3 extends ModelSchemaV3[SparkSVMModel,
SparkSVMModelV3,
SparkSVMParameters,
SparkSVMParametersV3,
SparkSVMModel.SparkSVMOutput,
SparkSVMModelV3.SparkSVMModelOutputV3] {

override def createParametersSchema(): SparkSVMParametersV3 = { new SparkSVMParametersV3() }
override def createOutputSchema(): SparkSVMModelOutputV3 = { new SparkSVMModelOutputV3() }

override def createParametersSchema(): SVMParametersV3 = { new SVMParametersV3() }
override def createOutputSchema(): SVMModelOutputV3 = { new SVMModelOutputV3() }

}

object SVMModelV3 {
object SparkSVMModelV3 {

final class SVMModelOutputV3 extends ModelOutputSchemaV3[SVMModel.SVMOutput, SVMModelOutputV3] {
final class SparkSVMModelOutputV3 extends ModelOutputSchemaV3[SparkSVMModel.SparkSVMOutput, SparkSVMModelOutputV3] {
// Output fields
@API(help = "Iterations executed") var iterations: Int = 0
@API(help = "Interceptor") var interceptor: Double = 0
Expand Down
Expand Up @@ -26,9 +26,9 @@ import water.codegen.CodeGeneratorPipeline
import water.util.{JCodeGen, SBPrintStream}
import water.{H2O, Key}

object SVMModel {
object SparkSVMModel {

class SVMOutput(val b: SVM) extends Model.Output(b) {
class SparkSVMOutput(val b: SparkSVM) extends Model.Output(b) {
var interceptor: Double = .0
var iterations: Int = 0
var weights: Array[Double] = _
Expand All @@ -37,10 +37,10 @@ object SVMModel {

}

class SVMModel private[svm](val selfKey: Key[SVMModel],
val parms: SVMParameters,
val output: SVMModel.SVMOutput)
extends Model[SVMModel, SVMParameters, SVMModel.SVMOutput](selfKey, parms, output) {
class SparkSVMModel private[svm](val selfKey: Key[SparkSVMModel],
val parms: SparkSVMParameters,
val output: SparkSVMModel.SparkSVMOutput)
extends Model[SparkSVMModel, SparkSVMParameters, SparkSVMModel.SparkSVMOutput](selfKey, parms, output) {

override protected def toJavaCheckTooBig: Boolean = output.weights.length > 10000

Expand Down Expand Up @@ -74,13 +74,13 @@ class SVMModel private[svm](val selfKey: Key[SVMModel],
preds(0) = pred
} else { // Binomial
if (pred > _parms._threshold) {
// the probability of first and second class, since SVM does not give us probabilities, we assign
// the probability of first and second class, since SparkSVM does not give us probabilities, we assign
// the probabilities to 0 or respectively to 1
preds(2) = 1
preds(1) = 0
preds(0) = 1 // final class, either 1 or 0
} else {
// the probability of first and second class, since SVM does not give us probabilities, we assign
// the probability of first and second class, since SparkSVM does not give us probabilities, we assign
// the probabilities to 0 or respectively to 1
preds(2) = 0
preds(1) = 1
Expand Down
Expand Up @@ -29,7 +29,7 @@ import water.support.H2OFrameSupport
import scala.util.Random

@RunWith(classOf[JUnitRunner])
class SVMModelTest extends FunSuite with SharedH2OTestContext {
class SparkSVMModelTest extends FunSuite with SharedH2OTestContext {

override def createSparkContext: SparkContext = new SparkContext("local[*]", "test-local", conf = defaultSparkConf)

Expand All @@ -56,15 +56,15 @@ class SVMModelTest extends FunSuite with SharedH2OTestContext {
val weightsFrame = hc.asH2OFrame(weightsDF, "weights")

// Learning parameters
val parms = new SVMParameters
val parms = new SparkSVMParameters
parms._train = trainFrame
parms._response_column = "Label"
parms._initial_weights = weightsFrame

val svm = new SVM(parms, h2oContext)
val svm = new SparkSVM(parms, h2oContext)

// Train model
val h2oSVMModel: SVMModel = svm.trainModel.get
val h2oSVMModel: SparkSVMModel = svm.trainModel.get

val sparkSVMModel = new classification.SVMModel(
Vectors.dense(h2oSVMModel.output.weights),
Expand Down

0 comments on commit b1fa901

Please sign in to comment.