From cd7fbe6ee16d2ba04378f3709197a42ee01dc67f Mon Sep 17 00:00:00 2001 From: maurever Date: Fri, 11 Feb 2022 15:06:58 +0100 Subject: [PATCH] PUDBEV-8485 fix python examples, fix type --- h2o-py/h2o/model/metrics_base.py | 59 +++++++++++++++----------------- h2o-py/h2o/model/model_base.py | 23 ++++++++++++- 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index 92660ab9746e..37cd7c374cdf 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -1808,9 +1808,8 @@ def auuc(self, metric=None): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) @@ -1842,9 +1841,8 @@ def qini(self): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) @@ -1873,13 +1871,13 @@ def aecu(self, metric="qini"): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) - >>> uplift_model.aecu() + >>> perf = uplift_model.model_performance() + >>> perf.aecu() """ assert metric in ['qini', 'lift', 'gain'], \ "AECU metric "+metric+" should be 'qini','lift' or 'gain'." @@ -1906,13 +1904,13 @@ def uplift(self, metric="AUTO"): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) - >>> uplift_model.uplift() + >>> perf = uplift_model.model_performance() + >>> perf.uplift() """ assert metric in ['AUTO', 'qini', 'lift', 'gain'] @@ -1941,13 +1939,13 @@ def uplift_random(self, metric="AUTO"): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) - >>> uplift_model.uplift() + >>> perf = uplift_model.model_performance() + >>> perf.uplift() """ assert metric in ['AUTO', 'qini', 'lift', 'gain'] @@ -1974,13 +1972,13 @@ def n(self): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) - >>> uplift_model.n() + >>> perf = uplift_model.model_performance() + >>> perf.n() """ return self._metric_json["thresholds_and_metric_scores"]["n"] @@ -2003,13 +2001,13 @@ def thresholds(self): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) - >>> uplift_model.thresholds() + >>> perf = uplift_model.model_performance() + >>> perf.thresholds() """ return self._metric_json["thresholds_and_metric_scores"]["thresholds"] @@ -2032,13 +2030,13 @@ def thresholds_and_metric_scores(self): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) - >>> uplift_model.thresholds_and_metric_scores() + >>> perf = uplift_model.model_performance() + >>> perf.thresholds_and_metric_scores() """ return self._metric_json["thresholds_and_metric_scores"] @@ -2061,17 +2059,17 @@ def auuc_table(self): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) - >>> uplift_model.auuc_table() + >>> perf = uplift_model.model_performance() + >>> perf.auuc_table() """ return self._metric_json["auuc_table"] - def eacu_table(self): + def aecu_table(self): """ Retrieve all types of AECU values in a table. @@ -2090,13 +2088,13 @@ def eacu_table(self): >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) - >>> uplift_model.aecu_table() + >>> perf = uplift_model.model_performance() + >>> perf.aecu_table() """ return self._metric_json["aecu_table"] @@ -2123,9 +2121,8 @@ def plot_uplift(self, server=False, save_to_file=None, plot=True, metric="AUTO") >>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, ... max_depth=5, ... treatment_column=treatment_column, - ... uplift_metric="qini", + ... uplift_metric="kl", ... distribution="bernoulli", - ... gainslift_bins=10, ... min_rows=10, ... auuc_type="gain") >>> uplift_model.train(y=response_column, x=predictors, training_frame=train) diff --git a/h2o-py/h2o/model/model_base.py b/h2o-py/h2o/model/model_base.py index d2795e2d0c75..09f7414812f4 100644 --- a/h2o-py/h2o/model/model_base.py +++ b/h2o-py/h2o/model/model_base.py @@ -956,7 +956,28 @@ def auuc_table(self, train=False, valid=False): raise H2OValueError("auuc_table() is only available for Uplift Binomial classifiers.") m[k] = None if v is None else v.auuc_table() return list(m.values())[0] if len(m) == 1 else m - + + def qini(self, train=False, valid=False): + """ + Get the Qini value (Area Under Uplift Curve - Area Under Random Curve for Qini uplift). + + If all are False (default), then return the training metric value. + If more than one options is set to True, then return a dictionary of metrics where the keys are "train", + "valid". + + :param bool train: If train is True, then return the Qini value for the training data. + :param bool valid: If valid is True, then return the Qini value for the validation data. + + :returns: The Qini value. + """ + tm = ModelBase._get_metrics(self, train, valid, False) + m = {} + for k, v in viewitems(tm): + if not(v is None) and not(is_type(v, h2o.model.metrics_base.H2OBinomialUpliftModelMetrics)): + raise H2OValueError("auuc() is only available for Uplift Binomial classifiers.") + m[k] = None if v is None else v.qini() + return list(m.values())[0] if len(m) == 1 else m + def aic(self, train=False, valid=False, xval=False): """ Get the AIC (Akaike Information Criterium).