Skip to content

Commit

Permalink
Merge pull request #23 from autonomio/master
Browse files Browse the repository at this point in the history
updating dev from master
  • Loading branch information
mikkokotila committed Jul 24, 2018
2 parents 95d6e9f + 502ccbd commit 5c1f0d0
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 26 deletions.
34 changes: 19 additions & 15 deletions talos/model/normalizers.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
from keras.optimizers import SGD, Adam, Adadelta, Adagrad, Adamax, RMSprop, Nadam
from keras.optimizers import SGD, Adam, Adadelta, Adagrad, Adamax, RMSprop
from keras.optimizers import Nadam


def lr_normalizer(lr, optimizer):
"""Assuming a default learning rate 1, rescales the learning rate
such that learning rates amongst different optimizers are more or less
equivalent.
'''NORMALIZE LEARNING RATE ON DEFAULT 1'''
Parameters
----------
lr : float
The learning rate.
optimizer : keras optimizer
The optimizer. For example, Adagrad, Adam, RMSprop.
"""

if optimizer == Adadelta:
lr = lr
elif optimizer == SGD:
lr = lr / 100
elif optimizer == Adam:
lr = lr / 1000
elif optimizer == Adagrad:
lr = lr / 100
elif optimizer == Adamax:
lr = lr / 500
elif optimizer == RMSprop:
lr = lr / 1000
elif optimizer == Nadam:
lr = lr / 500
pass
elif optimizer == SGD or optimizer == Adagrad:
lr /= 100.0
elif optimizer == Adam or optimizer == RMSprop:
lr /= 1000.0
elif optimizer == Adamax or optimizer == Nadam:
lr /= 500.0

return lr
24 changes: 19 additions & 5 deletions talos/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@


class Reporting:
"""Output table of the Scan execution. Takes as an argument a string
of the file name of the execution set during the call to Scan()."""

def __init__(self, filename):

Expand All @@ -17,13 +19,15 @@ def __init__(self, filename):
self.plots = astetik

def _load_data(self):
"""Loads the saved csv data file from the execution."""

data = pd.read_csv(self.filename)
# cleanes up the function/class name artifacts
for col in data.columns:
try:
if data[col][0].startswith('<'):
data[col] = data[col].str.replace('keras.optimizers.','').str.replace("'|\.",' ')
data[col] = data[col].str.replace('keras.optimizers.', '')\
.str.replace("'|\.", ' ')
data[col] = [i[1] for i in data[col].str.split()]
except AttributeError:
pass
Expand All @@ -34,22 +38,32 @@ def _load_data(self):
return data

def _min_and_maxes(self, mode):
"""Get the best and worst parameter data points, sorted by validation
accuracy."""

# TODO: validation accuracy may not be the best metric to use
# add option to implement other metrics

mins = pd.DataFrame(self.data.sort_values('val_acc').tail(10).min())
maxs = pd.DataFrame(self.data.sort_values('val_acc').tail(10).max())
min_max = pd.merge(mins, maxs, left_index=True, right_index=True).tail(-9)
min_max = pd.merge(mins, maxs, left_index=True,
right_index=True).tail(-9)
min_max.columns = ['min', 'max']

return min_max

def _print_report(self):
"""Print the report. Depending on the notebook being used, the format
may be distorted, in which case pandas can be used directly."""

'''PRINT PRETTY RESULT REPORT'''
# TODO: implement the alternative printing method

display(HTML('<h3>highest</h3>'))
display(self.data.sort_values('val_acc', ascending=False).head(10).set_index('val_acc').iloc[:,6:])
display(self.data.sort_values('val_acc', ascending=False)
.head(10).set_index('val_acc').iloc[:, 6:])

display(HTML('<h3>lowest</h3>'))
display(self.data.sort_values('val_acc', ascending=True).head(10).set_index('val_acc').iloc[:,6:])
display(self.data.sort_values('val_acc', ascending=True)
.head(10).set_index('val_acc').iloc[:, 6:])

print('\n NOTE: you have more options in the Reporting object.\n')
89 changes: 83 additions & 6 deletions talos/scan.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from keras import backend as K
from tensorflow import get_default_graph, Session

from .utils.validation_split import validation_split

from .utils.results import run_round_results, save_result, result_todf, peak_epochs_todf
from .utils.results import run_round_results, save_result, result_todf
from .utils.results import peak_epochs_todf
from .utils.logging import write_log
from .utils.detector import prediction_type
from .reducers.sample_reducer import sample_reducer
from .reducers.spear_reducer import spear_reducer
from .utils.estimators import time_estimator
from .parameters.handling import param_format, param_space, param_index, round_params
from .parameters.handling import param_format, param_space, param_index
from .parameters.handling import round_params
from .parameters.permutations import param_grid
from .metrics.score_model import get_score
from .utils.pred_class import classify
Expand All @@ -18,6 +19,78 @@


class Scan:
"""Suite of operations for training and evaluating Keras neural networks.
Inputs train/dev data and a set of parameters as a dictionary. The name and
experiment number must also be chosen since they define the output
filenames. The model must also be specified of the form
my_model(x_train, y_train, x_val, y_val, params),
and the dictionary
d = {
'fcc_layer_1_N': [50, 100, 200]
'fcc_layer_1_act': ['relu', 'tanh']
'fcc_layer_1_dropout': (0, 0.1, 5) # 5 points between 0 and 0.1
}
The dictionary is parsed for every run and only one entry per parameter
is fed into the neural network at a time.
Parameters
----------
x : ndarray
1d or 2d array consisting of the training data. `x` should have the
shape (m, n), where m is the number of training examples and n is the
number of features. Extra dimensions can be added to account for the
channels entry in convolutional neural networks.
y : ndarray
The labels corresponding to the training data. `y` should have the
shape (m, c) where c is the number of classes. A binary classification
problem will have c=1.
params : python dictionary
Lists all permutations of hyperparameters, a subset of which will be
selected at random for training and evaluation.
dataset_name : str
References the name of the experiment. The dataset_name and
experiment_no will be concatenated to produce the file name for the
results saved in the local directory.
experiment_no : str
Indexes the user's choice of experiment number.
model : keras_model
A Keras style model which compiles and fits the data, and returns
the history and compiled model.
val_split : float, optional
The proportion of the input `x` which is set aside as the
cross-validation data. (Default is 0.3).
shuffle : bool, optional
If True, shuffle the data in x and y before splitting into the train
and cross-validation datasets. (Default is True).
search_method : {None, 'random', 'linear', 'reverse'}
Determines the random sampling of the dictionary. `random` picks one
hyperparameter point at random and removes it from the list, then
samples again. `linear` starts from the start of the grid and moves
forward, and `reverse` starts at the end of the grid and moves
backwards.
reduction_method : {None, 'spear'}
Method for honing in on the optimal hyperparameter subspace. (Default
is None).
reduction_interval : int
The number of reduction method rounds that will be performed. (Default
is None).
reduction_window : int
The number of rounds of the reduction method before observing the
results. (Default is None).
grid_downsample : int
The fraction of `params` that will be tested (Default is None).
reduction_metric : {'val_acc'}
Metric used to tune the reductions.
talos_log_name : str
The lame of the saved Talos log. (Default is 'talos.log').
debug : bool
Implements debugging feedback. (Default is False).
"""

global self

Expand All @@ -32,7 +105,7 @@ def __init__(self, x, y, params, dataset_name, experiment_no, model,
self.experiment_no = experiment_no
self.experiment_name = dataset_name + '_' + experiment_no

if debug == True:
if debug:
self.logfile = open('talos.debug.log', 'a')
else:
self.logfile_name = talos_log_name
Expand Down Expand Up @@ -74,7 +147,7 @@ def __init__(self, x, y, params, dataset_name, experiment_no, model,

self.result = []

if self.round_limit != None:
if self.round_limit is not None:
for i in range(self.round_limit):
self._null = self._run()
else:
Expand All @@ -88,12 +161,16 @@ def __init__(self, x, y, params, dataset_name, experiment_no, model,

def _run(self):

# determine the parameters for the particular execution
round_params(self)

# _model() function should return both the result from training
# and the model itself
try:
_hr_out, self.keras_model = self._model()
except TypeError:
print('The model needs to have Return in format "return history, model"')
print('The model needs to have Return in format '
' "return history, model"')

self.epoch_entropy.append(epoch_entropy((_hr_out)))
_hr_out = run_round_results(self, _hr_out)
Expand Down

0 comments on commit 5c1f0d0

Please sign in to comment.