Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SW-1364] Rename SVM to SparkSVM #1284

Merged
merged 3 commits into from Jun 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -86,8 +86,6 @@ object AmazonFineFood extends SparkContextSupport with SparkSessionSupport with
// Cleanup
reviews.delete()

// RUN GLM or SVM

// Create a predictor function
println("DONE")
}
Expand Down
Expand Up @@ -20,15 +20,15 @@ import java.io.File

import org.apache.spark.SparkContext
import org.apache.spark.h2o.H2OContext
import org.apache.spark.ml.spark.models.svm.{SVM, SVMParameters}
import org.apache.spark.ml.spark.models.svm.{SparkSVM, SparkSVMParameters}
import org.apache.spark.sql.SparkSession
import water.fvec.H2OFrame
import water.support.SparkContextSupport

object SparkSVMDemo extends SparkContextSupport {

def main(args: Array[String]) {
val conf = configure("Sparkling Water: Spark SVM demo.")
val conf = configure("Sparkling Water: SparkSVM demo.")
val sc = new SparkContext(conf)

val h2oContext = H2OContext.getOrCreate(sc)
Expand All @@ -43,11 +43,11 @@ object SparkSVMDemo extends SparkContextSupport {
breastCancerData.update()

// Configure Deep Learning algorithm
val parms = new SVMParameters
val parms = new SparkSVMParameters
parms._train = breastCancerData.key
parms._response_column = "label"

val svm = new SVM(parms, h2oContext)
val svm = new SparkSVM(parms, h2oContext)

val svmModel = svm.trainModel.get

Expand Down
Expand Up @@ -28,12 +28,12 @@
import water.fvec.Frame;

// Seems like this has to be in Java since H2O's frameworks uses reflection's getFields...
// I probably could mix Java and Scala here, leave SVMParametersV3 with fields as Java
// and then make the same Scala class SVMParametersV3 which extends it but not sure if it's worth it...
public class SVMV3 extends ModelBuilderSchema<SVM, SVMV3, SVMV3.SVMParametersV3> {
// I probably could mix Java and Scala here, leave SparkSVMParametersV3 with fields as Java
// and then make the same Scala class SparkSVMParametersV3 which extends it but not sure if it's worth it...
public class SparkSVMV3 extends ModelBuilderSchema<SparkSVM, SparkSVMV3, SparkSVMV3.SparkSVMParametersV3> {

public static final class SVMParametersV3 extends
ModelParametersSchemaV3<SVMParameters, SVMParametersV3> {
public static final class SparkSVMParametersV3 extends
ModelParametersSchemaV3<SparkSVMParameters, SparkSVMParametersV3> {
public static String[] fields = new String[]{
"model_id",
"training_frame",
Expand Down Expand Up @@ -90,7 +90,7 @@ public static final class SVMParametersV3 extends
public MissingValuesHandling missing_values_handling;

@Override
public SVMParametersV3 fillFromImpl(SVMParameters impl) {
public SparkSVMParametersV3 fillFromImpl(SparkSVMParameters impl) {
super.fillFromImpl(impl);

if (null != impl._initial_weights) {
Expand All @@ -104,7 +104,7 @@ public SVMParametersV3 fillFromImpl(SVMParameters impl) {
}

@Override
public SVMParameters fillImpl(SVMParameters impl) {
public SparkSVMParameters fillImpl(SparkSVMParameters impl) {
super.fillImpl(impl);
impl._initial_weights =
null == this.initial_weights_frame ? null : Key.<Frame>make(this.initial_weights_frame.name);
Expand Down
Expand Up @@ -3,7 +3,7 @@ package org.apache.spark.ml.spark.models.svm
import hex.ModelMojoWriter
import org.apache.spark.ml.spark.models.MissingValuesHandling

class SVMMojoWriter(svmModel: SVMModel) extends ModelMojoWriter[SVMModel, SVMParameters, SVMModel.SVMOutput](svmModel) {
class SVMMojoWriter(svmModel: SparkSVMModel) extends ModelMojoWriter[SparkSVMModel, SparkSVMParameters, SparkSVMModel.SparkSVMOutput](svmModel) {

def this() {
this(null)
Expand Down
Expand Up @@ -23,7 +23,7 @@
import org.apache.spark.ml.spark.ProgressListener;
import org.apache.spark.ml.FrameMLUtils;
import org.apache.spark.ml.spark.models.MissingValuesHandling;
import org.apache.spark.ml.spark.models.svm.SVMModel.SVMOutput;
import org.apache.spark.ml.spark.models.svm.SparkSVMModel.SparkSVMOutput;
import org.apache.spark.mllib.classification.SVMWithSGD;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
Expand All @@ -45,16 +45,16 @@

import static scala.collection.JavaConversions.*;

public class SVM extends ModelBuilder<SVMModel, SVMParameters, SVMOutput> {
public class SparkSVM extends ModelBuilder<SparkSVMModel, SparkSVMParameters, SparkSVMOutput> {

transient private final H2OContext hc;

public SVM(boolean startup_once, H2OContext hc) {
super(new SVMParameters(), startup_once);
public SparkSVM(boolean startup_once, H2OContext hc) {
super(new SparkSVMParameters(), startup_once);
this.hc = hc;
}

public SVM(SVMParameters parms, H2OContext hc) {
public SparkSVM(SparkSVMParameters parms, H2OContext hc) {
super(parms);
init(false);
this.hc = hc;
Expand Down Expand Up @@ -115,7 +115,7 @@ public void init(boolean expensive) {
for (int i = 0; i < _train.vecs().length; i++) {
Vec vec = _train.vec(i);
if (!ignoredCols.contains(_train.name(i)) && !(vec.isNumeric() || vec.isCategorical())) {
error("_train", "SVM supports only frames with numeric/categorical values (except for result column). But a " + vec.get_type_str() + " was found.");
error("_train", "SparkSVM supports only frames with numeric/categorical values (except for result column). But a " + vec.get_type_str() + " was found.");
}
}

Expand All @@ -127,19 +127,19 @@ public void init(boolean expensive) {
String[] responseDomains = responseDomains();
if (null == responseDomains) {
if (!(Double.isNaN(_parms._threshold))) {
error("_threshold", "Threshold cannot be set for regression SVM. Set the threshold to NaN or modify the response column to an enum.");
error("_threshold", "Threshold cannot be set for regression SparkSVM. Set the threshold to NaN or modify the response column to an enum.");
}

if (!_train.vec(_parms._response_column).isNumeric()) {
error("_response_column", "Regression SVM requires the response column type to be numeric.");
error("_response_column", "Regression SparkSVM requires the response column type to be numeric.");
}
} else {
if (Double.isNaN(_parms._threshold)) {
error("_threshold", "Threshold has to be set for binomial SVM. Set the threshold to a numeric value or change the response column type.");
error("_threshold", "Threshold has to be set for binomial SparkSVM. Set the threshold to a numeric value or change the response column type.");
}

if (responseDomains.length != 2) {
error("_response_column", "SVM requires the response column's domain to be of size 2.");
error("_response_column", "SparkSVM requires the response column's domain to be of size 2.");
}
}
}
Expand Down Expand Up @@ -172,7 +172,7 @@ public void computeImpl() {
init(true);

// The model to be built
SVMModel model = new SVMModel(dest(), _parms, new SVMModel.SVMOutput(SVM.this));
SparkSVMModel model = new SparkSVMModel(dest(), _parms, new SparkSVMModel.SparkSVMOutput(SparkSVM.this));
try {
model.delete_and_lock(_job);

Expand Down
Expand Up @@ -5,15 +5,15 @@
import water.Key;
import water.fvec.Frame;

public class SVMParameters extends Model.Parameters {
public class SparkSVMParameters extends Model.Parameters {
@Override
public String algoName() { return "SVM"; }
public String algoName() { return "SparkSVM"; }

@Override
public String fullName() { return "Support Vector Machine (*Spark*)"; }

@Override
public String javaName() { return SVMModel.class.getName(); }
public String javaName() { return SparkSVMModel.class.getName(); }

@Override
public long progressUnits() { return _max_iterations; }
Expand All @@ -38,7 +38,7 @@ public final Frame initialWeights() {
public Key<Frame> _initial_weights = null;
public MissingValuesHandling _missing_values_handling = MissingValuesHandling.MeanImputation;

public void validate(SVM svm) {
public void validate(SparkSVM svm) {
if (_max_iterations < 0 || _max_iterations > 1e6) {
svm.error("_max_iterations", " max_iterations must be between 0 and 1e6");
}
Expand Down
8 changes: 4 additions & 4 deletions ml/src/main/resources/META-INF/services/water.api.Schema
@@ -1,4 +1,4 @@
hex.schemas.SVMModelV3
hex.schemas.SVMModelV3$SVMModelOutputV3
hex.schemas.SVMV3
hex.schemas.SVMV3$SVMParametersV3
hex.schemas.SparkSVMModelV3
hex.schemas.SparkSVMModelV3$SparkSVMModelOutputV3
hex.schemas.SparkSVMV3
hex.schemas.SparkSVMV3$SparkSVMParametersV3
6 changes: 3 additions & 3 deletions ml/src/main/scala/hex/SVMModelRestAPI.scala
Expand Up @@ -18,15 +18,15 @@
package hex

import org.apache.spark.h2o.H2OContext
import org.apache.spark.ml.spark.models.svm.SVM
import org.apache.spark.ml.spark.models.svm.SparkSVM
import water.api._

class SVMModelRestAPI extends RestApi {

override def name: String = "SVM Model REST API"
override def name: String = "SparkSVM Model REST API"

override def registerEndpoints(hc: H2OContext, context: RestApiContext): Unit = {
val models = Seq(new SVM(true, hc))
val models = Seq(new SparkSVM(true, hc))

for (algo <- models) {
val base: String = algo.getClass.getSimpleName
Expand Down
Expand Up @@ -17,27 +17,27 @@

package hex.schemas

import SVMV3.SVMParametersV3
import hex.schemas.SVMModelV3.SVMModelOutputV3
import org.apache.spark.ml.spark.models.svm.{SVMModel, SVMParameters}
import SparkSVMV3.SparkSVMParametersV3
import hex.schemas.SparkSVMModelV3.SparkSVMModelOutputV3
import org.apache.spark.ml.spark.models.svm.{SparkSVMModel, SparkSVMParameters}
import water.api.schemas3.{ModelOutputSchemaV3, ModelSchemaV3}
import water.api.API

class SVMModelV3 extends ModelSchemaV3[SVMModel,
SVMModelV3,
SVMParameters,
SVMParametersV3,
SVMModel.SVMOutput,
SVMModelV3.SVMModelOutputV3] {
class SparkSVMModelV3 extends ModelSchemaV3[SparkSVMModel,
SparkSVMModelV3,
SparkSVMParameters,
SparkSVMParametersV3,
SparkSVMModel.SparkSVMOutput,
SparkSVMModelV3.SparkSVMModelOutputV3] {

override def createParametersSchema(): SparkSVMParametersV3 = { new SparkSVMParametersV3() }
override def createOutputSchema(): SparkSVMModelOutputV3 = { new SparkSVMModelOutputV3() }

override def createParametersSchema(): SVMParametersV3 = { new SVMParametersV3() }
override def createOutputSchema(): SVMModelOutputV3 = { new SVMModelOutputV3() }

}

object SVMModelV3 {
object SparkSVMModelV3 {

final class SVMModelOutputV3 extends ModelOutputSchemaV3[SVMModel.SVMOutput, SVMModelOutputV3] {
final class SparkSVMModelOutputV3 extends ModelOutputSchemaV3[SparkSVMModel.SparkSVMOutput, SparkSVMModelOutputV3] {
// Output fields
@API(help = "Iterations executed") var iterations: Int = 0
@API(help = "Interceptor") var interceptor: Double = 0
Expand Down
Expand Up @@ -26,9 +26,9 @@ import water.codegen.CodeGeneratorPipeline
import water.util.{JCodeGen, SBPrintStream}
import water.{H2O, Key}

object SVMModel {
object SparkSVMModel {

class SVMOutput(val b: SVM) extends Model.Output(b) {
class SparkSVMOutput(val b: SparkSVM) extends Model.Output(b) {
var interceptor: Double = .0
var iterations: Int = 0
var weights: Array[Double] = _
Expand All @@ -37,10 +37,10 @@ object SVMModel {

}

class SVMModel private[svm](val selfKey: Key[SVMModel],
val parms: SVMParameters,
val output: SVMModel.SVMOutput)
extends Model[SVMModel, SVMParameters, SVMModel.SVMOutput](selfKey, parms, output) {
class SparkSVMModel private[svm](val selfKey: Key[SparkSVMModel],
val parms: SparkSVMParameters,
val output: SparkSVMModel.SparkSVMOutput)
extends Model[SparkSVMModel, SparkSVMParameters, SparkSVMModel.SparkSVMOutput](selfKey, parms, output) {

override protected def toJavaCheckTooBig: Boolean = output.weights.length > 10000

Expand Down Expand Up @@ -74,13 +74,13 @@ class SVMModel private[svm](val selfKey: Key[SVMModel],
preds(0) = pred
} else { // Binomial
if (pred > _parms._threshold) {
// the probability of first and second class, since SVM does not give us probabilities, we assign
// the probability of first and second class, since SparkSVM does not give us probabilities, we assign
// the probabilities to 0 or respectively to 1
preds(2) = 1
preds(1) = 0
preds(0) = 1 // final class, either 1 or 0
} else {
// the probability of first and second class, since SVM does not give us probabilities, we assign
// the probability of first and second class, since SparkSVM does not give us probabilities, we assign
// the probabilities to 0 or respectively to 1
preds(2) = 0
preds(1) = 1
Expand Down
Expand Up @@ -29,7 +29,7 @@ import water.support.H2OFrameSupport
import scala.util.Random

@RunWith(classOf[JUnitRunner])
class SVMModelTest extends FunSuite with SharedH2OTestContext {
class SparkSVMModelTest extends FunSuite with SharedH2OTestContext {

override def createSparkContext: SparkContext = new SparkContext("local[*]", "test-local", conf = defaultSparkConf)

Expand All @@ -56,15 +56,15 @@ class SVMModelTest extends FunSuite with SharedH2OTestContext {
val weightsFrame = hc.asH2OFrame(weightsDF, "weights")

// Learning parameters
val parms = new SVMParameters
val parms = new SparkSVMParameters
parms._train = trainFrame
parms._response_column = "Label"
parms._initial_weights = weightsFrame

val svm = new SVM(parms, h2oContext)
val svm = new SparkSVM(parms, h2oContext)

// Train model
val h2oSVMModel: SVMModel = svm.trainModel.get
val h2oSVMModel: SparkSVMModel = svm.trainModel.get

val sparkSVMModel = new classification.SVMModel(
Vectors.dense(h2oSVMModel.output.weights),
Expand Down