Skip to content

Commit

Permalink
more refactoring
Browse files Browse the repository at this point in the history
- scan is now in its own submodule/folder
- reduction files are now following the new convention
- major refactor of reduction procedures
  • Loading branch information
mikkokotila committed Aug 6, 2018
1 parent 86ed1bd commit da26fe5
Show file tree
Hide file tree
Showing 11 changed files with 107 additions and 97 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def check_dependencies():
download_url=DOWNLOAD_URL,
install_requires=install_requires,
packages=['talos',
'talos.scan',
'talos.examples',
'talos.utils',
'talos.model',
Expand Down
2 changes: 1 addition & 1 deletion talos/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .scan import Scan
from .scan.Scan import Scan
from .reporting import Reporting
from .metrics.performance import Performance
from .examples import datasets, params
Expand Down
45 changes: 45 additions & 0 deletions talos/reducers/correlation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pandas as pd


def correlation(self,
correlation='spearman',
corr_to_drop='neg'):

'''Correlation Reducers
Note that this set of reducers works only for the continuous
and stepped (e.g. batch size) hyperparameters.
'''

out = self.param_table.corr(correlation)[self.reduction_metric]
out = out.dropna()

if len(out) == 0:
self._reduce_keys = None
return self

out = out[1:].sort_values(ascending=False)
out = out.index[-1], out[-1]

if abs(out[1]) >= self.reduction_threshold:
dummy_cols = pd.get_dummies(self.param_table[out[0]])
dummy_cols.insert(0,
self.reduction_metric,
self.param_table[self.reduction_metric])

# case where threshold is not met
else:
self._reduce_keys = None
return self

# all other cases continue
to_drop_temp = dummy_cols.corr(correlation)[self.reduction_metric]

# pick the drop method based on paramaters
if corr_to_drop == 'neg':
self._reduce_keys = to_drop_temp.sort_values().index[0], out[0]
elif corr_to_drop == 'pos':
self._reduce_keys = to_drop_temp.sort_values().index[-2], out[0]

return self
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
def reduction_drop(self):
def reduce_finish(self):

'''Takes input from a Reducer in form of a tuple
where the values the hyperparamater name and the
value to drop. Returns self with a modified param_log.'''

# get the column index
to_remove_col = self.param_reference[self.out[1]]
to_remove_col = self.param_reference[self._reduce_keys[1]]

value_to_remove = self.out[0]
value_to_remove = self._reduce_keys[0]

# pick the index numbers for dropping available permutations
indexs_to_drop = self.param_grid[self.param_grid[:, to_remove_col] == value_to_remove][:,-1]

# drop the index numbers
param_log = list(set(self.param_log).difference(set(indexs_to_drop)))
self.param_log = list(set(self.param_log).difference(set(indexs_to_drop)))

return param_log
return self
20 changes: 20 additions & 0 deletions talos/reducers/reduce_prepare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pandas as pd

from ..metrics.names import metric_names


def reduce_prepare(self):

# load the data from the experiment log
self.data = pd.read_csv(self.experiment_name + '.csv')
self.names = metric_names()

# apply the lookback window
if self.reduction_window is not None:
self.data = self.data.tail(self.reduction_window)

self.param_columns = [col for col in self.data.columns if col not in self.names]
self.param_table = self.data[self.param_columns]
self.param_table.insert(0, self.reduction_metric, self.data[self.reduction_metric])

return self
29 changes: 8 additions & 21 deletions talos/reducers/reduce_run.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,16 @@
from .ReductionTable import ReductionTable
from .Reducers import Reducers
from .reduce_drop import reduction_drop
from .reduce_prepare import reduce_prepare
from .reduce_finish import reduce_finish
from .correlation import correlation


def reduce_run(self):

'''Takes in the Scan object, and returns a modified version
of the self.param_log.'''
self = reduce_prepare(self)

self._filaname = self.experiment_name + '.csv'

# create the table for reduction
out = ReductionTable(self._filaname,
self.reduction_metric,
self.reduction_window,
self.reduction_threshold)

# create the reducer object
out = Reducers(out)

# apply the reduction
if self.reduction_method == 'correlation':
self.out = out.correlation()
self = correlation(self)

if self.out is None:
return self.param_log
if self._reduce_keys is None:
return self
else:
return reduction_drop(self)
return reduce_finish(self)
51 changes: 0 additions & 51 deletions talos/reducers/spear_reducer.py

This file was deleted.

2 changes: 0 additions & 2 deletions talos/scan.py → talos/scan/Scan.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from .scan_prepare import scan_prepare
from .utils.logging import debug_logging
from .scan_run import scan_run


Expand Down Expand Up @@ -144,6 +143,5 @@ def __init__(self, x, y, params, dataset_name, experiment_no, model,

def runtime(self):

self = debug_logging(self)
self = scan_prepare(self)
self = scan_run(self)
Empty file added talos/scan/__init__.py
Empty file.
24 changes: 17 additions & 7 deletions talos/scan_prepare.py → talos/scan/scan_prepare.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
from .utils.validation_split import validation_split
from .utils.detector import prediction_type
from .parameters.ParamGrid import ParamGrid
from .utils.pred_class import classify
from .utils.last_neuron import last_neuron
from ..utils.validation_split import validation_split
from ..utils.detector import prediction_type
from ..parameters.ParamGrid import ParamGrid
from ..utils.pred_class import classify
from ..utils.last_neuron import last_neuron
from ..utils.logging import debug_logging


TRAIN_VAL_RUNTIME_ERROR_MSG = """
If setting a custom train/val split, both x_val and y_val must be input data
and not None.
If x_val or y_val is inputted, then the other must be inputted as well.
"""


def scan_prepare(self):

'''Includes all preparation procedures up until starting the first scan
through scan_run()'''

# based on the 'debug' Scan() paramater
self = debug_logging(self)

# create the name for the experiment
self.experiment_name = self.dataset_name + '_' + self.experiment_no

# for the case where x_val or y_val is missing when other is present
self.custom_val_split = False
if (self.x_val is not None and self.y_val is None) or \
(self.x_val is None and self.y_val is not None):
Expand All @@ -29,6 +37,7 @@ def scan_prepare(self):
self.param_grid = self.paramgrid_object.param_grid
del self.paramgrid_object

# creates a reference dictionary for column number to label
self.param_reference = {}
for i, col in enumerate(self.params.keys()):
self.param_reference[col] = i
Expand All @@ -38,6 +47,7 @@ def scan_prepare(self):
self.epoch_entropy = []
self.round_models = []

# create the data asset
self.y_max = self.y.max()
self = validation_split(self)
self.shape = classify(self.y)
Expand Down
20 changes: 10 additions & 10 deletions talos/scan_run.py → talos/scan/scan_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

from keras import backend as K

from .utils.results import run_round_results, save_result
from .parameters.round_params import round_params
from .utils.results import create_header
from .metrics.entropy import epoch_entropy
from .model.ingest_model import ingest_model
from .metrics.score_model import get_score
from .utils.logging import write_log
from .utils.results import result_todf, peak_epochs_todf
from .reducers.reduce_run import reduce_run
from ..utils.results import run_round_results, save_result
from ..parameters.round_params import round_params
from ..utils.results import create_header
from ..metrics.entropy import epoch_entropy
from ..model.ingest_model import ingest_model
from ..metrics.score_model import get_score
from ..utils.logging import write_log
from ..utils.results import result_todf, peak_epochs_todf
from ..reducers.reduce_run import reduce_run


def scan_run(self):
Expand Down Expand Up @@ -67,7 +67,7 @@ def rounds_run(self):
if self.reduction_method is not None:
if (self.round_counter + 1) % self.reduction_interval == 0:
len_before_reduce = len(self.param_log)
self.param_log = reduce_run(self)
self = reduce_run(self)
total_reduced = len_before_reduce - len(self.param_log)
# update the progress bar
self.pbar.update(total_reduced)
Expand Down

0 comments on commit da26fe5

Please sign in to comment.