PUBDEV-2098 visualize scoring history in python and some corrections …

…to models.R
h2oai · Oct 11, 2015 · 77b2710 · 77b2710
1 parent 15f380b
commit 77b2710
Show file tree

Hide file tree

Showing 6 changed files with 162 additions and 60 deletions.
diff --git a/h2o-py/h2o/model/binomial.py b/h2o-py/h2o/model/binomial.py
@@ -339,21 +339,22 @@ def metric(self, metric, thresholds=None, train=False, valid=False, xval=False):
     for k,v in zip(tm.keys(),tm.values()): m[k] = None if v is None else v.metric(metric,thresholds)
     return m.values()[0] if len(m) == 1 else m
 
-  def plot(self, type="roc", train=False, valid=False, xval=False, **kwargs):
+  def plot(self, timestep="AUTO", metric="AUTO", **kwargs):
     """
-    Produce the desired metric plot
-    If all are False (default), then return the training metric value.
+    Plots training set (and validation set if available) scoring history for an H2OBinomialModel. The timestep and metric
+    arguments are restricted to what is available in its scoring history.
 
-    :param type: the type of metric plot (currently, only ROC supported)
-    :param train: If train is True, then plot for training data.
-    :param valid: If valid is True, then plot for validation data.
-    :param xval:  If xval is True, then plot for cross validation data.
-    :param show: if False, the plot is not shown. matplotlib show method is blocking.
-    :return: None
+    :param timestep: A unit of measurement for the x-axis.
+    :param metric: A unit of measurement for the y-axis.
+    :return: A scoring history plot.
     """
-    tm = ModelBase._get_metrics(self, train, valid, xval)
-    for k,v in zip(tm.keys(),tm.values()):
-      if v is not None: v.plot(type=type, **kwargs)
+
+    if self._model_json["algo"] in ("deeplearning", "drf", "gbm"):
+      if metric == "AUTO": metric = "logloss"
+      elif metric not in ("logloss","AUC","classification_error","MSE"):
+        raise ValueError("metric for H2OBinomialModel must be one of: AUTO, logloss, AUC, classification_error, MSE")
+
+    self._plot(timestep=timestep, metric=metric, **kwargs)
 
   def roc(self, train=False, valid=False, xval=False):
     """

diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py
@@ -79,7 +79,7 @@ def show(self):
       print "Total Sum of Square Error to Grand Mean: "       + str(self.totss())
       print "Between Cluster Sum of Square Error: "           + str(self.betweenss())
       self._metric_json['centroid_stats'].show()
-    
+
     if metric_type in types_w_dim:
         print "Sum of Squared Error (Numeric): "              + str(self.num_err())
         print "Misclassification Error (Categorical): "       + str(self.cat_err())
@@ -388,15 +388,13 @@ def plot(self, type="roc", **kwargs):
       return
 
     # TODO: add more types (i.e. cutoffs)
-    if type not in ["roc"]: raise ValueError("type {0} is not supported".format(type))
+    if type not in ["roc"]: raise ValueError("type {} is not supported".format(type))
     if type == "roc":
-      x_axis = self.fprs
-      y_axis = self.tprs
       plt.xlabel('False Positive Rate (FPR)')
       plt.ylabel('True Positive Rate (TPR)')
       plt.title('ROC Curve')
-      plt.text(0.5, 0.5, r'AUC={0}'.format(self._metric_json["AUC"]))
-      plt.plot(x_axis, y_axis, 'b--')
+      plt.text(0.5, 0.5, r'AUC={0:.4f}'.format(self._metric_json["AUC"]))
+      plt.plot(self.fprs, self.tprs, 'b--')
       plt.axis([0, 1, 0, 1])
       if not ('server' in kwargs.keys() and kwargs['server']): plt.show()
 
@@ -523,15 +521,15 @@ def __init__(self, metric_json, on=None, algo=""):
 class H2ODimReductionModelMetrics(MetricsBase):
   def __init__(self, metric_json, on=None, algo=""):
     super(H2ODimReductionModelMetrics, self).__init__(metric_json, on, algo)
-    
+
   def num_err(self):
     """
     :return: the Sum of Squared Error over non-missing numeric entries, or None if not present.
     """
     if ModelBase._has(self._metric_json, "numerr"):
       return self._metric_json["numerr"]
     return None
-  
+
   def cat_err(self):
     """
     :return: the Number of Misclassified categories over non-missing categorical entries, or None if not present.

diff --git a/h2o-py/h2o/model/model_base.py b/h2o-py/h2o/model/model_base.py
@@ -5,6 +5,7 @@
 import h2o
 from . import H2OFrame
 from . import H2OConnection
+import imp
 
 
 class ModelBase(object):
@@ -181,7 +182,7 @@ def score_history(self):
         import pandas
         pandas.options.display.max_rows = 20
         return pandas.DataFrame(s.cell_values,columns=s.col_header)
-      return model["scoring_history"]
+      return s
     else: print "No score history for this model"
 
 
@@ -460,6 +461,74 @@ def _get_metrics(o, train, valid, xval):
   # def __del__(self):
   #   h2o.remove(self._id)
 
+
+  def _plot(self, timestep, metric, **kwargs):
+
+    # check for matplotlib. exit if absent
+    try:
+      imp.find_module('matplotlib')
+      import matplotlib
+      if 'server' in kwargs.keys() and kwargs['server']: matplotlib.use('Agg', warn=False)
+      import matplotlib.pyplot as plt
+    except ImportError:
+      print "matplotlib is required for this function!"
+      return
+
+    scoring_history = self.score_history()
+    # Separate functionality for GLM since its output is different from other algos
+    if self._model_json["algo"] == "glm":
+      # GLM has only one timestep option, which is `iteration`
+      timestep = "iteration"
+      if metric == "AUTO": metric = "log_likelihood"
+      elif metric not in ("log_likelihood", "objective"):
+        raise ValueError("for GLM, metric must be one of: log_likelihood, objective")
+      plt.xlabel(timestep)
+      plt.ylabel(metric)
+      plt.title("Validation Scoring History")
+      plt.plot(scoring_history[timestep], scoring_history[metric])
+
+    elif self._model_json["algo"] in ("deeplearning", "drf", "gbm"):
+      # Set timestep
+      if self._model_json["algo"] in ("gbm", "drf"):
+        if timestep == "AUTO": timestep = "number_of_trees"
+        elif timestep not in ("duration","number_of_trees"):
+          raise ValueError("timestep for gbm or drf must be one of: duration, number_of_trees")
+      else: #self._model_json["algo"] == "deeplearning":
+        # Delete first row of DL scoring history since it contains NAs & NaNs
+        if scoring_history["samples"][0] == 0:
+          scoring_history = scoring_history.ix[1:]
+        if timestep == "AUTO": timestep = "epochs"
+        elif timestep not in ("epochs","samples","duration"):
+          raise ValueError("timestep for deeplearning must be one of: epochs, samples, duration")
+
+      training_metric = "training_{}".format(metric)
+      validation_metric = "validation_{}".format(metric)
+      if timestep == "duration":
+        dur_colname = "duration_{}".format(scoring_history["duration"][1].split()[1])
+        scoring_history[dur_colname] = map(lambda x: str(x).split()[0],scoring_history["duration"])
+        timestep = dur_colname
+      if validation_metric in scoring_history.columns.values: #Training and Validation scoring history
+        ylim = (scoring_history.ix[:,[training_metric, validation_metric]].min().min(), scoring_history.ix[:,[training_metric, validation_metric]].max().max())
+        plt.xlabel(timestep)
+        plt.ylabel(metric)
+        plt.title("Scoring History")
+        plt.ylim(ylim)
+        plt.plot(scoring_history[timestep], scoring_history[training_metric], label = "Training")
+        plt.plot(scoring_history[timestep], scoring_history[validation_metric], color = "orange", label = "Validation")
+        plt.legend()
+      else: #Training scoring history only
+        ylim = (scoring_history[training_metric].min(), scoring_history[training_metric].max())
+        plt.xlabel(timestep)
+        plt.ylabel(training_metric)
+        plt.title("Training Scoring History")
+        plt.ylim(ylim)
+        plt.plot(scoring_history[timestep], scoring_history[training_metric])
+
+    else: # algo is not glm, deeplearning, drf, gbm
+      raise ValueError("Plotting not implemented for this type of model")
+    if "server" not in kwargs.keys() or not kwargs["server"]: plt.show()
+
+
   @staticmethod
   def _has(dictionary, key):
     return key in dictionary and dictionary[key] is not None

diff --git a/h2o-py/h2o/model/multinomial.py b/h2o-py/h2o/model/multinomial.py
@@ -8,32 +8,50 @@
 
 
 class H2OMultinomialModel(ModelBase):
-    def __init__(self, dest_key, model_json):
-        super(H2OMultinomialModel, self).__init__(dest_key, model_json,H2OMultinomialModelMetrics)
-
-    def confusion_matrix(self, data):
-        """
-        Returns a confusion matrix based of H2O's default prediction threshold for a dataset
-        """
-        if not isinstance(data, H2OFrame): raise ValueError("data argument must be of type H2OFrame, but got {0}"
-                                                            .format(type(data)))
-        j = H2OConnection.post_json("Predictions/models/" + self._id + "/frames/" + data._id)
-        return j["model_metrics"][0]["cm"]["table"]
-
-    def hit_ratio_table(self, train=False, valid=False, xval=False):
-        """
-        Retrieve the Hit Ratios
-
-        If all are False (default), then return the training metric value.
-        If more than one options is set to True, then return a dictionary of metrics where the keys are "train", "valid",
-        and "xval"
-
-        :param train: If train is True, then return the R^2 value for the training data.
-        :param valid: If valid is True, then return the R^2 value for the validation data.
-        :param xval:  If xval is True, then return the R^2 value for the cross validation data.
-        :return: The R^2 for this regression model.
-        """
-        tm = ModelBase._get_metrics(self, train, valid, xval)
-        m = {}
-        for k,v in zip(tm.keys(),tm.values()): m[k] = None if v is None else v.hit_ratio_table()
-        return m.values()[0] if len(m) == 1 else m
+  def __init__(self, dest_key, model_json):
+    super(H2OMultinomialModel, self).__init__(dest_key, model_json,H2OMultinomialModelMetrics)
+
+  def confusion_matrix(self, data):
+    """
+    Returns a confusion matrix based of H2O's default prediction threshold for a dataset
+    """
+    if not isinstance(data, H2OFrame): raise ValueError("data argument must be of type H2OFrame, but got {0}"
+                                                        .format(type(data)))
+    j = H2OConnection.post_json("Predictions/models/" + self._id + "/frames/" + data._id)
+    return j["model_metrics"][0]["cm"]["table"]
+
+  def hit_ratio_table(self, train=False, valid=False, xval=False):
+    """
+    Retrieve the Hit Ratios
+
+    If all are False (default), then return the training metric value.
+    If more than one options is set to True, then return a dictionary of metrics where the keys are "train", "valid",
+    and "xval"
+
+    :param train: If train is True, then return the R^2 value for the training data.
+    :param valid: If valid is True, then return the R^2 value for the validation data.
+    :param xval:  If xval is True, then return the R^2 value for the cross validation data.
+    :return: The R^2 for this regression model.
+    """
+    tm = ModelBase._get_metrics(self, train, valid, xval)
+    m = {}
+    for k,v in zip(tm.keys(),tm.values()): m[k] = None if v is None else v.hit_ratio_table()
+    return m.values()[0] if len(m) == 1 else m
+
+  def plot(self, timestep="AUTO", metric="AUTO", **kwargs):
+    """
+    Plots training set (and validation set if available) scoring history for an H2OMultinomialModel. The timestep and metric
+    arguments are restricted to what is available in its scoring history.
+
+    :param timestep: A unit of measurement for the x-axis.
+    :param metric: A unit of measurement for the y-axis.
+    :return: A scoring history plot.
+    """
+
+    if self._model_json["algo"] in ("deeplearning", "drf", "gbm"):
+      if metric == "AUTO": metric = "classification_error"
+      elif metric not in ("logloss","AUC","classification_error","MSE"):
+        raise ValueError("metric for H2OMultinomialModel must be one of: AUTO, logloss, AUC, classification_error, MSE")
+
+    self._plot(timestep=timestep, metric=metric, **kwargs)
+
diff --git a/h2o-py/h2o/model/regression.py b/h2o-py/h2o/model/regression.py
@@ -7,11 +7,27 @@
 
 class H2ORegressionModel(ModelBase):
   """
-  Class for Regression models.  
+  Class for Regression models.
   """
   def __init__(self, dest_key, model_json):
     super(H2ORegressionModel, self).__init__(dest_key, model_json,H2ORegressionModelMetrics)
 
+  def plot(self, timestep="AUTO", metric="AUTO", **kwargs):
+    """
+    Plots training set (and validation set if available) scoring history for an H2ORegressionModel. The timestep and metric
+    arguments are restricted to what is available in its scoring history.
+
+    :param timestep: A unit of measurement for the x-axis.
+    :param metric: A unit of measurement for the y-axis.
+    :return: A scoring history plot.
+    """
+
+    if self._model_json["algo"] in ("deeplearning", "drf", "gbm"):
+      if metric == "AUTO": metric = "MSE"
+      elif metric not in ("MSE","deviance", "r2"):
+        raise ValueError("metric for H2ORegressionModel must be one of: AUTO, MSE, deviance, r2")
+
+    self._plot(timestep=timestep, metric=metric, **kwargs)
 
 def _mean_var(frame, weights=None):
   """

diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R
@@ -1814,20 +1814,20 @@ plot.H2OModel <- function(x, timestep = "AUTO", metric = "AUTO", ...) {
     if (is(x, "H2OBinomialModel")) {
       if (metric == "AUTO") {
         metric <- "logloss"
-      } else if (!(metric %in% c("r2","logloss","AUC","classification_error","MSE"))) {
-        stop("metric for H2OBinomialModel must be one of: AUTO, r2, logloss, AUC, classification_error, MSE")
+      } else if (!(metric %in% c("logloss","AUC","classification_error","MSE"))) {
+        stop("metric for H2OBinomialModel must be one of: AUTO, logloss, AUC, classification_error, MSE")
       }
     } else if (is(x, "H2OMultinomialModel")) {
       if (metric == "AUTO") {
         metric <- "classification_error"
-      } else if (!(metric %in% c("r2","logloss","classification_error","MSE"))) {
-        stop("metric for H2OMultinomialModel must be one of: AUTO, r2, logloss, classification_error, MSE")
+      } else if (!(metric %in% c("logloss","AUC","classification_error","MSE"))) {
+        stop("metric for H2OMultinomialModel must be one of: AUTO, logloss, AUC, classification_error, MSE")
       }
     } else if (is(x, "H2ORegressionModel")) {
       if (metric == "AUTO") {
         metric <- "MSE"
-      } else if (!(metric %in% c("MSE","deviance"))) {
-        stop("metric for H2OMultinomialModel must be one of: MSE, deviance")
+      } else if (!(metric %in% c("MSE","deviance", "r2"))) {
+        stop("metric for H2ORegressionModel must be one of: AUTO, MSE, deviance, r2")
       }
     } else {
       stop("Must be one of: H2OBinomialModel, H2OMultinomialModel or H2ORegressionModel")
@@ -1839,7 +1839,7 @@ plot.H2OModel <- function(x, timestep = "AUTO", metric = "AUTO", ...) {
       } else if (!(timestep %in% c("duration","number_of_trees"))) {
         stop("timestep for gbm or drf must be one of: duration, number_of_trees")
       }
-    } else if (x@algorithm == "deeplearning") {
+    } else { # x@algorithm == "deeplearning"
       # Delete first row of DL scoring history since it contains NAs & NaNs
       if (df$samples[1] == 0) {
         df <- df[-1,]
@@ -1849,8 +1849,6 @@ plot.H2OModel <- function(x, timestep = "AUTO", metric = "AUTO", ...) {
       } else if (!(timestep %in% c("epochs","samples","duration"))) {
         stop("timestep for deeplearning must be one of: epochs, samples, duration")
       }
-    } else {
-      stop("Plotting not implemented for this type of model")
     }
     training_metric <- sprintf("training_%s", metric)
     validation_metric <- sprintf("validation_%s", metric)
@@ -1874,6 +1872,8 @@ plot.H2OModel <- function(x, timestep = "AUTO", metric = "AUTO", ...) {
                      main = "Training Scoring History", col = "blue", ylim = ylim)
 
     }
+  } else { # algo is not glm, deeplearning, drf, gbm
+  	stop("Plotting not implemented for this type of model")
   }
 }