preparing for 0.1.8

- renamed same files - now performance metric is unified for binary, multi-label, and multi-class - spear allows any reduction_metric - added pred_class() which automatically detects the type of prediction challenge - cleaned up results
autonomio · May 11, 2018 · 4dcc65d · 4dcc65d
1 parent fa5bd3d
commit 4dcc65d
Show file tree

Hide file tree

Showing 8 changed files with 85 additions and 20 deletions.
diff --git a/hyperio/__init__.py b/hyperio/__init__.py
@@ -1,6 +1,6 @@
 from hyperio.scan import Hyperio
 from hyperio.reporting import Reporting
-from hyperio.metrics.binary_performance import BinaryPerformance 
+from hyperio.metrics.performance import Performance 
 from hyperio.data import datasets, models
 from hyperio.utils import save_load
 

diff --git a/hyperio/metrics/binary_performance.py → hyperio/metrics/performance.py b/hyperio/metrics/binary_performance.py → hyperio/metrics/performance.py
@@ -1,37 +1,78 @@
-class BinaryPerformance():
+from keras.utils import to_categorical
+import numpy as np
 
-    def __init__(self, y_pred, y_val):
+
+class Performance():
+
+    def __init__(self, y_pred, y_val, shape):
 
         self.y_pred = y_pred
         self.y_val = y_val
+        self.shape = shape
+
+        self.classes = np.maximum(self.y_pred, self.y_val).max() + 1
+
+        if self.shape == 'binary_class':
+            self.binary_class()
+        elif self.shape == 'multi_class':
+            self.multi_class()
+        elif self.shape == 'multi_label':
+            self.multi_label()
 
         self.trues_and_falses()
         self.f1score()
         self.balance()
         self.one_rule()
         self.zero_rule()
 
+    def multi_class(self):
+
+        '''For one-hot encoded'''
+
+        self.y_pred = self.y_pred.flatten('F')
+        self.y_val = self.y_val.flatten('F')
+
+    def binary_class(self):
+
+        '''For single column, single label'''
+
+        return
+
+    def multi_label(self):
+
+        '''For many labels in a single column'''
+
+        self.y_pred = to_categorical(self.y_pred, num_classes=self.classes)
+        self.y_val = to_categorical(self.y_val, num_classes=self.classes)
+
+        self.multi_class()
+
     def f1score(self):
 
         '''Computes fscore when possible'''
 
         if sum(self.y_pred) == len(self.y_pred):
             if sum(self.y_val) != len(self.y_val):
-                self.result = '_all_ones_'
+                self.result = '_warning_all_ones_'
                 return
-
+        elif sum(self.y_pred) == 0:
+            if sum(self.y_val) != 0:
+                self.result = '_warning_all_zeros_'
+            elif sum(self.y_val) == 0:
+                self.result = 1
+            return
         try:
             self.precision = self.tp / (self.tp + self.fp)
             1 / self.precision
         except ZeroDivisionError:
-            self.result = '_all_zeros_'
+            self.result = '_warning_no_true_positive'
             return
 
         try:
             self.recall = self.tp / (self.tp + self.fn)
             1 / self.recall
         except ZeroDivisionError:
-            self.result = '_no_ones_'
+            self.result = '_warning_3'
             return
 
         try:
@@ -40,7 +81,6 @@ def f1score(self):
         except ZeroDivisionError:
             return
 
-
     def trues_and_falses(self):
 
         '''Returns tp, tn, fp, and fn values'''
@@ -52,7 +92,6 @@ def trues_and_falses(self):
 
         # then we iterate through the predictions
         for i in range(len(self.y_val)):
-
             if self.y_pred[i] == 1 and self.y_val[i] == 1:
                 self.tp += 1
             elif self.y_pred[i] == 1 and self.y_val[i] == 0:

diff --git a/hyperio/metrics/score_model.py b/hyperio/metrics/score_model.py
@@ -1,7 +1,7 @@
-from .binary_performance import BinaryPerformance
+from .performance import Performance
 
 
 def get_score(self):
 
     y_pred = self.keras_model.predict_classes(self.x_val)
-    return BinaryPerformance(y_pred, self.y_val).result
+    return Performance(y_pred, self.y_val, self.shape).result
diff --git a/hyperio/reducers/spear_reducer.py b/hyperio/reducers/spear_reducer.py
@@ -34,7 +34,7 @@ def spear(self, metric, neg_corr=True, treshold=-.1):
 
 def spear_reducer(self):
 
-    to_drop = spear(self, 'val_score')
+    to_drop = spear(self, self.reduction_metric)
 
     # if a value have been returned, proceed with dropping
     if to_drop != "_NULL":

diff --git a/hyperio/scan.py b/hyperio/scan.py
@@ -12,6 +12,7 @@
 from .parameters.permutations import param_grid
 from .utils.save_load import save_model
 from .metrics.score_model import get_score
+from .utils.pred_class import classify
 
 
 class Hyperio:
@@ -23,6 +24,7 @@ def __init__(self, x, y, params, dataset_name, experiment_no, model,
                  save_best_model=False,
                  reduction_method=None, reduction_interval=100,
                  reduction_window=None, grid_downsample=None,
+                 reduction_metric='val_acc',
                  hyperio_log_name='hyperio.log', debug=False):
 
         self.dataset_name = dataset_name
@@ -42,6 +44,7 @@ def __init__(self, x, y, params, dataset_name, experiment_no, model,
         self.reduction_method = reduction_method
         self.reduction_interval = reduction_interval
         self.reduction_window = reduction_window
+        self.reduction_metric = reduction_metric
         self.grid_downsample = grid_downsample
         self.val_split = val_split
         self.shuffle = shuffle
@@ -58,6 +61,7 @@ def __init__(self, x, y, params, dataset_name, experiment_no, model,
         self.x = x
         self.y = y
         self = validation_split(self)
+        self.shape = classify(self.y)
 
         self._data_len = len(self.x)
         self = prediction_type(self)

diff --git a/hyperio/utils/logging.py b/hyperio/utils/logging.py
@@ -1,5 +1,6 @@
 import re
 
+
 def clean_dict(self):
 
     '''this is operated from _write_tolog'''
@@ -38,12 +39,12 @@ def dict_tostr(self, d):
         s += ',' + str(round(self._val_score, 3))
     except TypeError:
         s += ',' + self._val_score
-    s += ',' + str(self._round_epochs)
-    s += ',' + self._y_type
-    s += ',' + str(self._y_range)
-    s += ',' + self._y_format
-    s += ',' + str(self.val_split)
-    s += ',' + self.dataset_name
+        s += ',' + str(self._round_epochs)
+        s += ',' + self.shape
+        s += ',' + str(self._y_range)
+        s += ',' + self._y_format
+        s += ',' + str(self.val_split)
+        s += ',' + self.dataset_name
 
     return s
 

diff --git a/hyperio/utils/pred_class.py b/hyperio/utils/pred_class.py
@@ -0,0 +1,21 @@
+def classify(y):
+
+    '''Detects if prediction is binary, multi-label or multi-class'''
+
+    shape = detect_shape(y)
+
+    if shape > 1:
+        return 'multi_class'
+
+    elif y.max() <= 1:
+        return 'binary_class'
+    else:
+        return 'multi_label'
+
+
+def detect_shape(y):
+
+    try:
+        return y.shape[1]
+    except IndexError:
+        return 1
diff --git a/hyperio/utils/results.py b/hyperio/utils/results.py
@@ -19,7 +19,7 @@ def run_round_results(self, out):
         _rr_out.append('round_epochs')
         _rr_out = list(out.history.keys())
         [_rr_out.append(key) for key in self.params.keys()]
-        return _rr_out
+        return ",".join(str(i) for i in _rr_out)
 
     # otherwise proceed to create the value row
     _rr_out.append(self._round_epochs)
@@ -32,7 +32,7 @@ def run_round_results(self, out):
     for key in self.params.keys():
         _rr_out.append(self.params[key])
 
-    return _rr_out
+    return ",".join(str(i) for i in _rr_out)
 
 
 def save_result(self):