Skip to content

Commit

Permalink
PUDBEV-4940 implement review suggestion, add make_metrics runit test,…
Browse files Browse the repository at this point in the history
… remove unused parameter
  • Loading branch information
maurever committed Nov 29, 2021
1 parent 3ae93d3 commit 52186b7
Show file tree
Hide file tree
Showing 9 changed files with 68 additions and 38 deletions.
1 change: 0 additions & 1 deletion h2o-algos/src/main/java/hex/schemas/UpliftDRFV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ public static final class UpliftDRFParametersV3 extends SharedTreeV3.SharedTreeP
"custom_metric_func",
"export_checkpoints_dir",
"check_constant_response",
"gainslift_bins",
"treatment_column",
"uplift_metric",
"auuc_type",
Expand Down
7 changes: 7 additions & 0 deletions h2o-algos/src/main/java/hex/tree/DHistogram.java
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,13 @@ void updateHisto(double[] ws, double[] resp, Object cs, double[] ys, double[] pr
updateHisto(ws, resp, (double[]) cs, ys, preds, rows, hi, lo, treatment);
}

void updateHisto(double[] ws, double[] resp, Object cs, double[] ys, double[] preds, int[] rows, int hi, int lo){
if (_intOpt)
updateHistoInt(ws, (int[])cs, ys, rows, hi, lo);
else
updateHisto(ws, resp, (double[]) cs, ys, preds, rows, hi, lo, null);
}

/**
* Update counts in appropriate bins. Not thread safe, assumed to have private copy.
*
Expand Down
4 changes: 2 additions & 2 deletions h2o-algos/src/test/java/hex/tree/DHistogramTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,14 @@ public void testUpdateHistoWithIntOpt() {
SharedTreeModel.SharedTreeParameters.HistogramType.UniformAdaptive, 42L, null, null, false);
histoOpt.init();

histoOpt.updateHisto(weights, null, dataInt, ys, null, rows, N, 0, null);
histoOpt.updateHisto(weights, null, dataInt, ys, null, rows, N, 0);

// optimization OFF
DHistogram histo = new DHistogram("intOpt-off", 1000, 1024, (byte) 1, 0, 1000, false, false, -0.001,
SharedTreeModel.SharedTreeParameters.HistogramType.UniformAdaptive, 42L, null, null, false);
histo.init();

histo.updateHisto(weights, null, data, ys, null, rows, N, 0, null);
histo.updateHisto(weights, null, data, ys, null, rows, N, 0);

assertEquals(histo._min2, histoOpt._min2, 0);
assertEquals(histo._maxIn, histoOpt._maxIn, 0);
Expand Down
2 changes: 1 addition & 1 deletion h2o-core/src/main/java/water/api/ModelMetricsHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ public ModelMetricsMakerSchemaV3 make(int version, ModelMetricsMakerSchemaV3 s)
ModelMetricsRegression mm = ModelMetricsRegression.make(pred.anyVec(), act.anyVec(), weights, s.distribution);
s.model_metrics = new ModelMetricsRegressionV3().fillFromImpl(mm);
} else if (s.domain.length==2) {
if(treatment != null){
if (treatment != null) {
ModelMetricsBinomialUplift mm = ModelMetricsBinomialUplift.make(pred.anyVec(), act.anyVec(), treatment, s.domain, s.auuc_type, s.auuc_nbins);
s.model_metrics = new ModelMetricsBinomialUpliftV3().fillFromImpl(mm);
} else {
Expand Down
9 changes: 5 additions & 4 deletions h2o-core/src/main/java/water/util/ArrayUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -2222,14 +2222,15 @@ public static double[] uniformDistFromArray(double[][] array, long seed) {

/*
* Linear interpolation values in the array with Double.NaN values.
* The interpolation always starts from 0.
* The interpolation always starts from the first item of the array.
* The last element of array cannot be Double.NaN.
*
* @param array input array with Double.NaN values
*/
public static void interpolateLinear(double[] array){
assert array.length > 0 && !Double.isNaN(array[array.length-1]);
if(array.length == 1){
assert array.length > 0 && !Double.isNaN(array[array.length-1]):
"Input array length should be > 0 and the first item should not be NaN";
if (array.length == 1){
return;
}
List<Integer> nonNullIdx = new ArrayList<>();
Expand All @@ -2250,7 +2251,7 @@ public static void interpolateLinear(double[] array){
if(nonNullIdx.size() == 0) return;
double start = Double.NaN, end = Double.NaN, step = Double.NaN, mean = Double.NaN;
for (int i=0; i<array.length; i++) {
// begin always with 0
// always begin with 0
if(i == 0 && Double.isNaN(array[i])) {
start = 0;
end = array[nonNullIdx.get(0)];
Expand Down
20 changes: 0 additions & 20 deletions h2o-py/h2o/estimators/uplift_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def __init__(self,
custom_metric_func=None, # type: Optional[str]
export_checkpoints_dir=None, # type: Optional[str]
check_constant_response=True, # type: bool
gainslift_bins=-1, # type: int
treatment_column="treatment", # type: str
uplift_metric="auto", # type: Literal["auto", "kl", "euclidean", "chi_squared"]
auuc_type="auto", # type: Literal["auto", "qini", "lift", "gain"]
Expand Down Expand Up @@ -175,10 +174,6 @@ def __init__(self,
column being a constant value or not.
Defaults to ``True``.
:type check_constant_response: bool
:param gainslift_bins: Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic
binning.
Defaults to ``-1``.
:type gainslift_bins: int
:param treatment_column: Define column which will be use for computing uplift gain to select best split for a
tree. The column has to devide dataset into treatment (value 1) and control (value 0) group.
Defaults to ``"treatment"``.
Expand Down Expand Up @@ -228,7 +223,6 @@ def __init__(self,
self.custom_metric_func = custom_metric_func
self.export_checkpoints_dir = export_checkpoints_dir
self.check_constant_response = check_constant_response
self.gainslift_bins = gainslift_bins
self.treatment_column = treatment_column
self.uplift_metric = uplift_metric
self.auuc_type = auuc_type
Expand Down Expand Up @@ -690,20 +684,6 @@ def check_constant_response(self, check_constant_response):
assert_is_type(check_constant_response, None, bool)
self._parms["check_constant_response"] = check_constant_response

@property
def gainslift_bins(self):
"""
Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning.
Type: ``int``, defaults to ``-1``.
"""
return self._parms.get("gainslift_bins")

@gainslift_bins.setter
def gainslift_bins(self, gainslift_bins):
assert_is_type(gainslift_bins, None, int)
self._parms["gainslift_bins"] = gainslift_bins

@property
def treatment_column(self):
"""
Expand Down
7 changes: 4 additions & 3 deletions h2o-r/h2o-package/R/models.R
Original file line number Diff line number Diff line change
Expand Up @@ -1192,9 +1192,10 @@ h2o.auc <- function(object, train=FALSE, valid=FALSE, xval=FALSE) {
invisible(NULL)
}

#' Retrieve the AUUC
#' Retrieve the default AUUC
#'
#' Retrieves the AUUC value from an \linkS4class{H2OBinomialUpliftMetrics}.
#' Retrieves the AUUC value from an \linkS4class{H2OBinomialUpliftMetrics}. The type of AUUC depends on auuc_type which
#' was set before training. If you need specific AUUC, see h2o.auuc_table function.
#' If "train" and "valid" parameters are FALSE (default), then the training AUUC value is returned. If more
#' than one parameter is set to TRUE, then a named vector of AUUCs are returned, where the names are "train", "valid".
#'
Expand All @@ -1218,7 +1219,7 @@ h2o.auc <- function(object, train=FALSE, valid=FALSE, xval=FALSE) {
#' h2o.auuc(perf)
#' }
#' @export
h2o.auuc <- function(object, train=FALSE, valid=FALSE, metric=None) {
h2o.auuc <- function(object, train=FALSE, valid=FALSE) {
if( is(object, "H2OModelMetrics") ) return( object@metrics$AUUC )
if( is(object, "H2OModel") ) {
model.parts <- .model.parts(object)
Expand Down
7 changes: 0 additions & 7 deletions h2o-r/h2o-package/R/upliftrandomforest.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
#' @param check_constant_response \code{Logical}. Check if response column is constant. If enabled, then an exception is thrown if the response
#' column is a constant value.If disabled, then model will train regardless of the response column being a
#' constant value or not. Defaults to TRUE.
#' @param gainslift_bins Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning. Defaults to -1.
#' @param uplift_metric Divergence metric used to find best split when building an upplift tree. Must be one of: "AUTO", "KL",
#' "Euclidean", "ChiSquared". Defaults to AUTO.
#' @param auuc_type AUUC metric used to calculate Area under Uplift. Must be one of: "AUTO", "Qini", "Lift", "Gain". Defaults to
Expand Down Expand Up @@ -103,7 +102,6 @@ h2o.upliftRandomForest <- function(x,
custom_metric_func = NULL,
export_checkpoints_dir = NULL,
check_constant_response = TRUE,
gainslift_bins = -1,
uplift_metric = c("AUTO", "KL", "Euclidean", "ChiSquared"),
auuc_type = c("AUTO", "Qini", "Lift", "Gain"),
auuc_nbins = -1,
Expand Down Expand Up @@ -193,8 +191,6 @@ h2o.upliftRandomForest <- function(x,
parms$export_checkpoints_dir <- export_checkpoints_dir
if (!missing(check_constant_response))
parms$check_constant_response <- check_constant_response
if (!missing(gainslift_bins))
parms$gainslift_bins <- gainslift_bins
if (!missing(uplift_metric))
parms$uplift_metric <- uplift_metric
if (!missing(auuc_type))
Expand Down Expand Up @@ -244,7 +240,6 @@ h2o.upliftRandomForest <- function(x,
custom_metric_func = NULL,
export_checkpoints_dir = NULL,
check_constant_response = TRUE,
gainslift_bins = -1,
uplift_metric = c("AUTO", "KL", "Euclidean", "ChiSquared"),
auuc_type = c("AUTO", "Qini", "Lift", "Gain"),
auuc_nbins = -1,
Expand Down Expand Up @@ -338,8 +333,6 @@ h2o.upliftRandomForest <- function(x,
parms$export_checkpoints_dir <- export_checkpoints_dir
if (!missing(check_constant_response))
parms$check_constant_response <- check_constant_response
if (!missing(gainslift_bins))
parms$gainslift_bins <- gainslift_bins
if (!missing(uplift_metric))
parms$uplift_metric <- uplift_metric
if (!missing(auuc_type))
Expand Down
49 changes: 49 additions & 0 deletions h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source("../../scripts/h2o-r-test-setup.R")

test.make_metrics_uplift_binomial <- function() {
response <- "outcome"
treatment <- "treatment"
train <- h2o.importFile(locate("smalldata/uplift/upliftml_train.csv"))
train$treatment <- as.factor(train$treatment)
train$outcome <- as.factor(train$outcome)

predictors <- sprintf("feature_%s",seq(0:11))


model <- h2o.upliftRandomForest(training_frame=train,
x=predictors,
y=response,
ntrees=5,
max_depth=5,
treatment_column=treatment,
min_rows=10,
nbins=100,
seed=1234)
print(model)

pred <- h2o.assign(h2o.predict(model,train)[,1],"pred")
actual <- h2o.assign(train[,response],"act")
treat <- h2o.assign(train[,treatment],"treatment")

m0 <- h2o.make_metrics(pred, actual, treatment=treatment)
print(m0)
m1 <- h2o.performance(model, train)
print(m1)

auuc0 <- h2o.auuc(m0)
auuc1 <- h2o.auuc(m1)

auuc_table0 <- h2o.auuc_table(m0)
auuc_table1 <- h2o.auuc_table(m1)

expect_true(is.data.frame(auuc_table0))
expect_true(is.data.frame(auuc_table1))

expect_equal(auuc0, auuc1)
expect_equal(auuc_table0, auuc_table1)
}

doSuite("Check making uplift binomial model metrics.", makeSuite(
test.make_metrics_uplift_binomial
))

0 comments on commit 52186b7

Please sign in to comment.