Skip to content

Commit

Permalink
[7.x][ML] Add num_top_feature_importance_values param to regression a…
Browse files Browse the repository at this point in the history
…nd classi… (#50914) (#50976)

Adds a new parameter to regression and classification that enables computation
of importance for the top most important features. The computation of the importance
is based on SHAP (SHapley Additive exPlanations) method.

Backport of #50914
  • Loading branch information
dimitris-athanasiou committed Jan 14, 2020
1 parent 0178c7c commit 1d8cb3c
Show file tree
Hide file tree
Showing 19 changed files with 266 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public static Builder builder(String dependentVariable) {
static final ParseField ETA = new ParseField("eta");
static final ParseField MAXIMUM_NUMBER_TREES = new ParseField("maximum_number_trees");
static final ParseField FEATURE_BAG_FRACTION = new ParseField("feature_bag_fraction");
static final ParseField NUM_TOP_FEATURE_IMPORTANCE_VALUES = new ParseField("num_top_feature_importance_values");
static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name");
static final ParseField TRAINING_PERCENT = new ParseField("training_percent");
static final ParseField NUM_TOP_CLASSES = new ParseField("num_top_classes");
Expand All @@ -62,10 +63,11 @@ public static Builder builder(String dependentVariable) {
(Double) a[3],
(Integer) a[4],
(Double) a[5],
(String) a[6],
(Double) a[7],
(Integer) a[8],
(Long) a[9]));
(Integer) a[6],
(String) a[7],
(Double) a[8],
(Integer) a[9],
(Long) a[10]));

static {
PARSER.declareString(ConstructingObjectParser.constructorArg(), DEPENDENT_VARIABLE);
Expand All @@ -74,6 +76,7 @@ public static Builder builder(String dependentVariable) {
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), ETA);
PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), MAXIMUM_NUMBER_TREES);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), FEATURE_BAG_FRACTION);
PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), NUM_TOP_FEATURE_IMPORTANCE_VALUES);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), PREDICTION_FIELD_NAME);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), TRAINING_PERCENT);
PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), NUM_TOP_CLASSES);
Expand All @@ -86,20 +89,23 @@ public static Builder builder(String dependentVariable) {
private final Double eta;
private final Integer maximumNumberTrees;
private final Double featureBagFraction;
private final Integer numTopFeatureImportanceValues;
private final String predictionFieldName;
private final Double trainingPercent;
private final Integer numTopClasses;
private final Long randomizeSeed;

private Classification(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta,
@Nullable Integer maximumNumberTrees, @Nullable Double featureBagFraction, @Nullable String predictionFieldName,
@Nullable Integer maximumNumberTrees, @Nullable Double featureBagFraction,
@Nullable Integer numTopFeatureImportanceValues, @Nullable String predictionFieldName,
@Nullable Double trainingPercent, @Nullable Integer numTopClasses, @Nullable Long randomizeSeed) {
this.dependentVariable = Objects.requireNonNull(dependentVariable);
this.lambda = lambda;
this.gamma = gamma;
this.eta = eta;
this.maximumNumberTrees = maximumNumberTrees;
this.featureBagFraction = featureBagFraction;
this.numTopFeatureImportanceValues = numTopFeatureImportanceValues;
this.predictionFieldName = predictionFieldName;
this.trainingPercent = trainingPercent;
this.numTopClasses = numTopClasses;
Expand Down Expand Up @@ -135,6 +141,10 @@ public Double getFeatureBagFraction() {
return featureBagFraction;
}

public Integer getNumTopFeatureImportanceValues() {
return numTopFeatureImportanceValues;
}

public String getPredictionFieldName() {
return predictionFieldName;
}
Expand Down Expand Up @@ -170,6 +180,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (featureBagFraction != null) {
builder.field(FEATURE_BAG_FRACTION.getPreferredName(), featureBagFraction);
}
if (numTopFeatureImportanceValues != null) {
builder.field(NUM_TOP_FEATURE_IMPORTANCE_VALUES.getPreferredName(), numTopFeatureImportanceValues);
}
if (predictionFieldName != null) {
builder.field(PREDICTION_FIELD_NAME.getPreferredName(), predictionFieldName);
}
Expand All @@ -188,8 +201,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws

@Override
public int hashCode() {
return Objects.hash(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName,
trainingPercent, randomizeSeed, numTopClasses);
return Objects.hash(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, numTopFeatureImportanceValues,
predictionFieldName, trainingPercent, randomizeSeed, numTopClasses);
}

@Override
Expand All @@ -203,6 +216,7 @@ public boolean equals(Object o) {
&& Objects.equals(eta, that.eta)
&& Objects.equals(maximumNumberTrees, that.maximumNumberTrees)
&& Objects.equals(featureBagFraction, that.featureBagFraction)
&& Objects.equals(numTopFeatureImportanceValues, that.numTopFeatureImportanceValues)
&& Objects.equals(predictionFieldName, that.predictionFieldName)
&& Objects.equals(trainingPercent, that.trainingPercent)
&& Objects.equals(randomizeSeed, that.randomizeSeed)
Expand All @@ -221,6 +235,7 @@ public static class Builder {
private Double eta;
private Integer maximumNumberTrees;
private Double featureBagFraction;
private Integer numTopFeatureImportanceValues;
private String predictionFieldName;
private Double trainingPercent;
private Integer numTopClasses;
Expand Down Expand Up @@ -255,6 +270,11 @@ public Builder setFeatureBagFraction(Double featureBagFraction) {
return this;
}

public Builder setNumTopFeatureImportanceValues(Integer numTopFeatureImportanceValues) {
this.numTopFeatureImportanceValues = numTopFeatureImportanceValues;
return this;
}

public Builder setPredictionFieldName(String predictionFieldName) {
this.predictionFieldName = predictionFieldName;
return this;
Expand All @@ -276,8 +296,8 @@ public Builder setNumTopClasses(Integer numTopClasses) {
}

public Classification build() {
return new Classification(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName,
trainingPercent, numTopClasses, randomizeSeed);
return new Classification(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction,
numTopFeatureImportanceValues, predictionFieldName, trainingPercent, numTopClasses, randomizeSeed);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public static Builder builder(String dependentVariable) {
static final ParseField ETA = new ParseField("eta");
static final ParseField MAXIMUM_NUMBER_TREES = new ParseField("maximum_number_trees");
static final ParseField FEATURE_BAG_FRACTION = new ParseField("feature_bag_fraction");
static final ParseField NUM_TOP_FEATURE_IMPORTANCE_VALUES = new ParseField("num_top_feature_importance_values");
static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name");
static final ParseField TRAINING_PERCENT = new ParseField("training_percent");
static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed");
Expand All @@ -61,9 +62,10 @@ public static Builder builder(String dependentVariable) {
(Double) a[3],
(Integer) a[4],
(Double) a[5],
(String) a[6],
(Double) a[7],
(Long) a[8]));
(Integer) a[6],
(String) a[7],
(Double) a[8],
(Long) a[9]));

static {
PARSER.declareString(ConstructingObjectParser.constructorArg(), DEPENDENT_VARIABLE);
Expand All @@ -72,6 +74,7 @@ public static Builder builder(String dependentVariable) {
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), ETA);
PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), MAXIMUM_NUMBER_TREES);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), FEATURE_BAG_FRACTION);
PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), NUM_TOP_FEATURE_IMPORTANCE_VALUES);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), PREDICTION_FIELD_NAME);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), TRAINING_PERCENT);
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), RANDOMIZE_SEED);
Expand All @@ -83,19 +86,22 @@ public static Builder builder(String dependentVariable) {
private final Double eta;
private final Integer maximumNumberTrees;
private final Double featureBagFraction;
private final Integer numTopFeatureImportanceValues;
private final String predictionFieldName;
private final Double trainingPercent;
private final Long randomizeSeed;

private Regression(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta,
@Nullable Integer maximumNumberTrees, @Nullable Double featureBagFraction, @Nullable String predictionFieldName,
private Regression(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta,
@Nullable Integer maximumNumberTrees, @Nullable Double featureBagFraction,
@Nullable Integer numTopFeatureImportanceValues, @Nullable String predictionFieldName,
@Nullable Double trainingPercent, @Nullable Long randomizeSeed) {
this.dependentVariable = Objects.requireNonNull(dependentVariable);
this.lambda = lambda;
this.gamma = gamma;
this.eta = eta;
this.maximumNumberTrees = maximumNumberTrees;
this.featureBagFraction = featureBagFraction;
this.numTopFeatureImportanceValues = numTopFeatureImportanceValues;
this.predictionFieldName = predictionFieldName;
this.trainingPercent = trainingPercent;
this.randomizeSeed = randomizeSeed;
Expand Down Expand Up @@ -130,6 +136,10 @@ public Double getFeatureBagFraction() {
return featureBagFraction;
}

public Integer getNumTopFeatureImportanceValues() {
return numTopFeatureImportanceValues;
}

public String getPredictionFieldName() {
return predictionFieldName;
}
Expand Down Expand Up @@ -161,6 +171,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (featureBagFraction != null) {
builder.field(FEATURE_BAG_FRACTION.getPreferredName(), featureBagFraction);
}
if (numTopFeatureImportanceValues != null) {
builder.field(NUM_TOP_FEATURE_IMPORTANCE_VALUES.getPreferredName(), numTopFeatureImportanceValues);
}
if (predictionFieldName != null) {
builder.field(PREDICTION_FIELD_NAME.getPreferredName(), predictionFieldName);
}
Expand All @@ -176,8 +189,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws

@Override
public int hashCode() {
return Objects.hash(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName,
trainingPercent, randomizeSeed);
return Objects.hash(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, numTopFeatureImportanceValues,
predictionFieldName, trainingPercent, randomizeSeed);
}

@Override
Expand All @@ -191,6 +204,7 @@ public boolean equals(Object o) {
&& Objects.equals(eta, that.eta)
&& Objects.equals(maximumNumberTrees, that.maximumNumberTrees)
&& Objects.equals(featureBagFraction, that.featureBagFraction)
&& Objects.equals(numTopFeatureImportanceValues, that.numTopFeatureImportanceValues)
&& Objects.equals(predictionFieldName, that.predictionFieldName)
&& Objects.equals(trainingPercent, that.trainingPercent)
&& Objects.equals(randomizeSeed, that.randomizeSeed);
Expand All @@ -208,6 +222,7 @@ public static class Builder {
private Double eta;
private Integer maximumNumberTrees;
private Double featureBagFraction;
private Integer numTopFeatureImportanceValues;
private String predictionFieldName;
private Double trainingPercent;
private Long randomizeSeed;
Expand Down Expand Up @@ -241,6 +256,11 @@ public Builder setFeatureBagFraction(Double featureBagFraction) {
return this;
}

public Builder setNumTopFeatureImportanceValues(Integer numTopFeatureImportanceValues) {
this.numTopFeatureImportanceValues = numTopFeatureImportanceValues;
return this;
}

public Builder setPredictionFieldName(String predictionFieldName) {
this.predictionFieldName = predictionFieldName;
return this;
Expand All @@ -257,8 +277,8 @@ public Builder setRandomizeSeed(Long randomizeSeed) {
}

public Regression build() {
return new Regression(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName,
trainingPercent, randomizeSeed);
return new Regression(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction,
numTopFeatureImportanceValues, predictionFieldName, trainingPercent, randomizeSeed);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,12 @@ public void testPutDataFrameAnalyticsConfig_GivenRegression() throws Exception {
.setPredictionFieldName("my_dependent_variable_prediction")
.setTrainingPercent(80.0)
.setRandomizeSeed(42L)
.setLambda(1.0)
.setGamma(1.0)
.setEta(1.0)
.setMaximumNumberTrees(10)
.setFeatureBagFraction(0.5)
.setNumTopFeatureImportanceValues(3)
.build())
.setDescription("this is a regression")
.build();
Expand Down Expand Up @@ -1361,6 +1367,12 @@ public void testPutDataFrameAnalyticsConfig_GivenClassification() throws Excepti
.setTrainingPercent(80.0)
.setRandomizeSeed(42L)
.setNumTopClasses(1)
.setLambda(1.0)
.setGamma(1.0)
.setEta(1.0)
.setMaximumNumberTrees(10)
.setFeatureBagFraction(0.5)
.setNumTopFeatureImportanceValues(3)
.build())
.setDescription("this is a classification")
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2975,10 +2975,11 @@ public void testPutDataFrameAnalytics() throws Exception {
.setEta(5.5) // <4>
.setMaximumNumberTrees(50) // <5>
.setFeatureBagFraction(0.4) // <6>
.setPredictionFieldName("my_prediction_field_name") // <7>
.setTrainingPercent(50.0) // <8>
.setRandomizeSeed(1234L) // <9>
.setNumTopClasses(1) // <10>
.setNumTopFeatureImportanceValues(3) // <7>
.setPredictionFieldName("my_prediction_field_name") // <8>
.setTrainingPercent(50.0) // <9>
.setRandomizeSeed(1234L) // <10>
.setNumTopClasses(1) // <11>
.build();
// end::put-data-frame-analytics-classification

Expand All @@ -2989,9 +2990,10 @@ public void testPutDataFrameAnalytics() throws Exception {
.setEta(5.5) // <4>
.setMaximumNumberTrees(50) // <5>
.setFeatureBagFraction(0.4) // <6>
.setPredictionFieldName("my_prediction_field_name") // <7>
.setTrainingPercent(50.0) // <8>
.setRandomizeSeed(1234L) // <9>
.setNumTopFeatureImportanceValues(3) // <7>
.setPredictionFieldName("my_prediction_field_name") // <8>
.setTrainingPercent(50.0) // <9>
.setRandomizeSeed(1234L) // <10>
.build();
// end::put-data-frame-analytics-regression

Expand Down Expand Up @@ -3670,7 +3672,7 @@ public void testPutTrainedModel() throws Exception {
}
{
PutTrainedModelRequest request = new PutTrainedModelRequest(trainedModelConfig);

// tag::put-trained-model-execute-listener
ActionListener<PutTrainedModelResponse> listener = new ActionListener<PutTrainedModelResponse>() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public static Classification randomClassification() {
.setEta(randomBoolean() ? null : randomDoubleBetween(0.001, 1.0, true))
.setMaximumNumberTrees(randomBoolean() ? null : randomIntBetween(1, 2000))
.setFeatureBagFraction(randomBoolean() ? null : randomDoubleBetween(0.0, 1.0, false))
.setNumTopFeatureImportanceValues(randomBoolean() ? null : randomIntBetween(0, Integer.MAX_VALUE))
.setPredictionFieldName(randomBoolean() ? null : randomAlphaOfLength(10))
.setTrainingPercent(randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true))
.setRandomizeSeed(randomBoolean() ? null : randomLong())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public static Regression randomRegression() {
.setEta(randomBoolean() ? null : randomDoubleBetween(0.001, 1.0, true))
.setMaximumNumberTrees(randomBoolean() ? null : randomIntBetween(1, 2000))
.setFeatureBagFraction(randomBoolean() ? null : randomDoubleBetween(0.0, 1.0, false))
.setNumTopFeatureImportanceValues(randomBoolean() ? null : randomIntBetween(0, Integer.MAX_VALUE))
.setPredictionFieldName(randomBoolean() ? null : randomAlphaOfLength(10))
.setTrainingPercent(randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true))
.build();
Expand Down

0 comments on commit 1d8cb3c

Please sign in to comment.