more refactoring

- scan is now in its own submodule/folder - reduction files are now following the new convention - major refactor of reduction procedures
autonomio · Aug 6, 2018 · da26fe5 · da26fe5
1 parent 86ed1bd
commit da26fe5
Show file tree

Hide file tree

Showing 11 changed files with 107 additions and 97 deletions.
diff --git a/setup.py b/setup.py
@@ -78,6 +78,7 @@ def check_dependencies():
           download_url=DOWNLOAD_URL,
           install_requires=install_requires,
           packages=['talos',
+                    'talos.scan',
                     'talos.examples',
                     'talos.utils',
                     'talos.model',

diff --git a/talos/__init__.py b/talos/__init__.py
@@ -1,4 +1,4 @@
-from .scan import Scan
+from .scan.Scan import Scan
 from .reporting import Reporting
 from .metrics.performance import Performance
 from .examples import datasets, params

diff --git a/talos/reducers/correlation.py b/talos/reducers/correlation.py
@@ -0,0 +1,45 @@
+import pandas as pd
+
+
+def correlation(self,
+                correlation='spearman',
+                corr_to_drop='neg'):
+
+    '''Correlation Reducers
+
+    Note that this set of reducers works only for the continuous
+    and stepped (e.g. batch size) hyperparameters.
+
+    '''
+
+    out = self.param_table.corr(correlation)[self.reduction_metric]
+    out = out.dropna()
+
+    if len(out) == 0:
+        self._reduce_keys = None
+        return self
+
+    out = out[1:].sort_values(ascending=False)
+    out = out.index[-1], out[-1]
+
+    if abs(out[1]) >= self.reduction_threshold:
+        dummy_cols = pd.get_dummies(self.param_table[out[0]])
+        dummy_cols.insert(0,
+                          self.reduction_metric,
+                          self.param_table[self.reduction_metric])
+
+    # case where threshold is not met
+    else:
+        self._reduce_keys = None
+        return self
+
+    # all other cases continue
+    to_drop_temp = dummy_cols.corr(correlation)[self.reduction_metric]
+
+    # pick the drop method based on paramaters
+    if corr_to_drop == 'neg':
+        self._reduce_keys = to_drop_temp.sort_values().index[0], out[0]
+    elif corr_to_drop == 'pos':
+        self._reduce_keys = to_drop_temp.sort_values().index[-2], out[0]
+
+    return self
diff --git a/talos/reducers/reduce_drop.py → talos/reducers/reduce_finish.py b/talos/reducers/reduce_drop.py → talos/reducers/reduce_finish.py
@@ -1,18 +1,18 @@
-def reduction_drop(self):
+def reduce_finish(self):
 
     '''Takes input from a Reducer in form of a tuple
     where the values the hyperparamater name and the
     value to drop. Returns self with a modified param_log.'''
 
     # get the column index
-    to_remove_col = self.param_reference[self.out[1]]
+    to_remove_col = self.param_reference[self._reduce_keys[1]]
 
-    value_to_remove = self.out[0]
+    value_to_remove = self._reduce_keys[0]
 
     # pick the index numbers for dropping available permutations
     indexs_to_drop = self.param_grid[self.param_grid[:, to_remove_col] == value_to_remove][:,-1]
 
     # drop the index numbers
-    param_log = list(set(self.param_log).difference(set(indexs_to_drop)))
+    self.param_log = list(set(self.param_log).difference(set(indexs_to_drop)))
 
-    return param_log
+    return self
diff --git a/talos/reducers/reduce_prepare.py b/talos/reducers/reduce_prepare.py
@@ -0,0 +1,20 @@
+import pandas as pd
+
+from ..metrics.names import metric_names
+
+
+def reduce_prepare(self):
+
+    # load the data from the experiment log
+    self.data = pd.read_csv(self.experiment_name + '.csv')
+    self.names = metric_names()
+
+    # apply the lookback window
+    if self.reduction_window is not None:
+        self.data = self.data.tail(self.reduction_window)
+
+    self.param_columns = [col for col in self.data.columns if col not in self.names]
+    self.param_table = self.data[self.param_columns]
+    self.param_table.insert(0, self.reduction_metric, self.data[self.reduction_metric])
+
+    return self
diff --git a/talos/reducers/reduce_run.py b/talos/reducers/reduce_run.py
@@ -1,29 +1,16 @@
-from .ReductionTable import ReductionTable
-from .Reducers import Reducers
-from .reduce_drop import reduction_drop
+from .reduce_prepare import reduce_prepare
+from .reduce_finish import reduce_finish
+from .correlation import correlation
 
 
 def reduce_run(self):
 
-    '''Takes in the Scan object, and returns a modified version
-    of the self.param_log.'''
+    self = reduce_prepare(self)
 
-    self._filaname = self.experiment_name + '.csv'
-
-    # create the table for reduction
-    out = ReductionTable(self._filaname,
-                         self.reduction_metric,
-                         self.reduction_window,
-                         self.reduction_threshold)
-
-    # create the reducer object
-    out = Reducers(out)
-
-    # apply the reduction
     if self.reduction_method == 'correlation':
-        self.out = out.correlation()
+        self = correlation(self)
 
-    if self.out is None:
-        return self.param_log
+    if self._reduce_keys is None:
+        return self
     else:
-        return reduction_drop(self)
+        return reduce_finish(self)
diff --git a/talos/reducers/spear_reducer.py b/talos/reducers/spear_reducer.py
diff --git a/talos/scan.py → talos/scan/Scan.py b/talos/scan.py → talos/scan/Scan.py
@@ -1,5 +1,4 @@
 from .scan_prepare import scan_prepare
-from .utils.logging import debug_logging
 from .scan_run import scan_run
 
 
@@ -144,6 +143,5 @@ def __init__(self, x, y, params, dataset_name, experiment_no, model,
 
     def runtime(self):
 
-        self = debug_logging(self)
         self = scan_prepare(self)
         self = scan_run(self)
diff --git a/talos/scan/__init__.py b/talos/scan/__init__.py
diff --git a/talos/scan_prepare.py → talos/scan/scan_prepare.py b/talos/scan_prepare.py → talos/scan/scan_prepare.py
@@ -1,20 +1,28 @@
-from .utils.validation_split import validation_split
-from .utils.detector import prediction_type
-from .parameters.ParamGrid import ParamGrid
-from .utils.pred_class import classify
-from .utils.last_neuron import last_neuron
+from ..utils.validation_split import validation_split
+from ..utils.detector import prediction_type
+from ..parameters.ParamGrid import ParamGrid
+from ..utils.pred_class import classify
+from ..utils.last_neuron import last_neuron
+from ..utils.logging import debug_logging
 
 
 TRAIN_VAL_RUNTIME_ERROR_MSG = """
-If setting a custom train/val split, both x_val and y_val must be input data
-and not None.
+If x_val or y_val is inputted, then the other must be inputted as well.
 """
 
 
 def scan_prepare(self):
 
+    '''Includes all preparation procedures up until starting the first scan
+    through scan_run()'''
+
+    # based on the 'debug' Scan() paramater
+    self = debug_logging(self)
+
+    # create the name for the experiment
     self.experiment_name = self.dataset_name + '_' + self.experiment_no
 
+    # for the case where x_val or y_val is missing when other is present
     self.custom_val_split = False
     if (self.x_val is not None and self.y_val is None) or \
        (self.x_val is None and self.y_val is not None):
@@ -29,6 +37,7 @@ def scan_prepare(self):
     self.param_grid = self.paramgrid_object.param_grid
     del self.paramgrid_object
 
+    # creates a reference dictionary for column number to label
     self.param_reference = {}
     for i, col in enumerate(self.params.keys()):
         self.param_reference[col] = i
@@ -38,6 +47,7 @@ def scan_prepare(self):
     self.epoch_entropy = []
     self.round_models = []
 
+    # create the data asset
     self.y_max = self.y.max()
     self = validation_split(self)
     self.shape = classify(self.y)

diff --git a/talos/scan_run.py → talos/scan/scan_run.py b/talos/scan_run.py → talos/scan/scan_run.py
@@ -2,15 +2,15 @@
 
 from keras import backend as K
 
-from .utils.results import run_round_results, save_result
-from .parameters.round_params import round_params
-from .utils.results import create_header
-from .metrics.entropy import epoch_entropy
-from .model.ingest_model import ingest_model
-from .metrics.score_model import get_score
-from .utils.logging import write_log
-from .utils.results import result_todf, peak_epochs_todf
-from .reducers.reduce_run import reduce_run
+from ..utils.results import run_round_results, save_result
+from ..parameters.round_params import round_params
+from ..utils.results import create_header
+from ..metrics.entropy import epoch_entropy
+from ..model.ingest_model import ingest_model
+from ..metrics.score_model import get_score
+from ..utils.logging import write_log
+from ..utils.results import result_todf, peak_epochs_todf
+from ..reducers.reduce_run import reduce_run
 
 
 def scan_run(self):
@@ -67,7 +67,7 @@ def rounds_run(self):
     if self.reduction_method is not None:
         if (self.round_counter + 1) % self.reduction_interval == 0:
             len_before_reduce = len(self.param_log)
-            self.param_log = reduce_run(self)
+            self = reduce_run(self)
             total_reduced = len_before_reduce - len(self.param_log)
             # update the progress bar
             self.pbar.update(total_reduced)