From eb79e254af359d49fca5f0fa06874ed4ff6a61c7 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Fri, 20 Nov 2015 19:20:15 +0100 Subject: [PATCH 1/4] Prototype of `callback` parameter to observe various stages of training. --- sknn/backend/lasagne/mlp.py | 2 ++ sknn/mlp.py | 42 ++++++++++++++++++++++++++----------- sknn/nn.py | 2 ++ 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 1e1cfbe..2326535 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -243,12 +243,14 @@ def _batch_impl(self, X, y, processor, output, shuffle): progress, batches = 0, X.shape[0] / self.batch_size loss, count = 0.0, 0 for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle): + self._do_callback('on_batch_start', locals()) loss += processor(Xb, yb) count += 1 while count / batches > progress / 60: sys.stdout.write(output) sys.stdout.flush() progress += 1 + self._do_callback('on_batch_finish', locals()) sys.stdout.write('\r') return loss / count diff --git a/sknn/mlp.py b/sknn/mlp.py index 3473156..a17fbe0 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -118,6 +118,17 @@ def _reshape(self, X, y=None): X = X.reshape((X.shape[0], numpy.product(X.shape[1:]))) return X, y + def _do_callback(self, event, variables): + if self.callback is None: + return + + del variables['self'] + if isinstance(self.callback, dict): + function = self.callback.get(event, None) + return function(**variables) if function else None + else: + return self.callback(event, **variables) + def _train(self, X, y): assert self.n_iter or self.n_stable,\ "Neither n_iter nor n_stable were specified; training would loop forever." @@ -125,46 +136,51 @@ def _train(self, X, y): best_train_error, best_valid_error = float("inf"), float("inf") best_params = [] n_stable = 0 + self._do_callback('on_train_start', locals()) for i in itertools.count(1): - start = time.time() + start_time = time.time() + self._do_callback('on_epoch_start', locals()) - best_train = False + is_best_train = False avg_train_error = self._backend._train_impl(X, y) if avg_train_error is not None: if math.isnan(avg_train_error): raise RuntimeError("Training diverged and returned NaN.") best_train_error = min(best_train_error, avg_train_error) - best_train = bool(avg_train_error < best_train_error * (1.0 + self.f_stable)) + is_best_train = bool(avg_train_error < best_train_error * (1.0 + self.f_stable)) - best_valid = False + is_best_valid = False avg_valid_error = None if self.valid_set is not None: avg_valid_error = self._backend._valid_impl(*self.valid_set) if avg_valid_error is not None: best_valid_error = min(best_valid_error, avg_valid_error) - best_valid = bool(avg_valid_error < best_valid_error * (1.0 + self.f_stable)) + is_best_valid = bool(avg_valid_error < best_valid_error * (1.0 + self.f_stable)) + finish_time = time.time() log.debug("\r{:>5} {}{}{} {}{}{} {:>5.1f}s".format( i, - ansi.BLUE if best_train else "", + ansi.BLUE if is_best_train else "", "{0:>10.3e}".format(float(avg_train_error)) if (avg_train_error is not None) else " N/A ", - ansi.ENDC if best_train else "", + ansi.ENDC if is_best_train else "", - ansi.GREEN if best_valid else "", + ansi.GREEN if is_best_valid else "", "{:>10.3e}".format(float(avg_valid_error)) if (avg_valid_error is not None) else " N/A ", - ansi.ENDC if best_valid else "", + ansi.ENDC if is_best_valid else "", - time.time() - start + finish_time - start_time )) - if best_valid or (self.valid_set is None and best_train): + if is_best_valid or (self.valid_set is None and best_train): best_params = self._backend._mlp_to_array() n_stable = 0 else: n_stable += 1 + self._do_callback('on_epoch_finish', locals()) + if self.valid_set is not None and n_stable >= self.n_stable: log.debug("") log.info("Early termination condition fired at %i iterations.", i) @@ -173,7 +189,8 @@ def _train(self, X, y): log.debug("") log.info("Terminating after specified %i total iterations.", i) break - + + self._do_callback('on_train_finish', locals()) self._backend._array_to_mlp(best_params, self._backend.mlp) def _fit(self, X, y): @@ -362,6 +379,7 @@ def partial_fit(self, X, y, classes=None): self.label_binarizers = [LB() for _ in range(y.shape[1])] for lb, cls in zip(self.label_binarizers, classes): lb.fit(cls) + return self.fit(X, y) def predict_proba(self, X): diff --git a/sknn/nn.py b/sknn/nn.py index 39cec4b..5e9ea66 100644 --- a/sknn/nn.py +++ b/sknn/nn.py @@ -391,6 +391,7 @@ def __init__( mutator=None, debug=False, verbose=None, + callback=None, **params): assert warning is None,\ @@ -442,6 +443,7 @@ def __init__( self.mutator = mutator self.debug = debug self.verbose = verbose + self.callback = callback self._backend = None self._create_logger() From d4c905d56c8e28b40770d99ca45f36aa4abb3b51 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Fri, 20 Nov 2015 19:41:02 +0100 Subject: [PATCH 2/4] Tests for the new callback function. Variables are currently passed as-is from the inner loop, no tests for those as they may change. --- sknn/mlp.py | 2 +- sknn/tests/test_callback.py | 65 +++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 sknn/tests/test_callback.py diff --git a/sknn/mlp.py b/sknn/mlp.py index a17fbe0..d840cbd 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -173,7 +173,7 @@ def _train(self, X, y): finish_time - start_time )) - if is_best_valid or (self.valid_set is None and best_train): + if is_best_valid or (self.valid_set is None and is_best_train): best_params = self._backend._mlp_to_array() n_stable = 0 else: diff --git a/sknn/tests/test_callback.py b/sknn/tests/test_callback.py new file mode 100644 index 0000000..99d206f --- /dev/null +++ b/sknn/tests/test_callback.py @@ -0,0 +1,65 @@ +import unittest +from nose.tools import (assert_in, assert_raises, assert_equals) + +import collections +import numpy +from sknn.mlp import MultiLayerPerceptron as MLP, Layer as L + +import sknn.mlp + + +class TestSingleCallback(unittest.TestCase): + + def setUp(self): + self.data = collections.defaultdict(list) + + def _callback(self, event, **variables): + self.data[event].append(variables) + + def test_TrainingCallbacks(self): + a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) + nn = MLP(layers=[L("Linear")], n_iter=4, callback=self._callback) + nn._fit(a_in, a_out) + assert_equals(len(self.data['on_train_start']), 1) + assert_equals(len(self.data['on_train_finish']), 1) + + def test_EpochCallbacks(self): + a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) + nn = MLP(layers=[L("Linear")], n_iter=4, callback=self._callback) + nn._fit(a_in, a_out) + assert_equals(len(self.data['on_epoch_start']), 4) + assert_equals(len(self.data['on_epoch_finish']), 4) + + def test_BatchCallbacks(self): + a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) + nn = MLP(layers=[L("Linear")], n_iter=1, batch_size=4, callback=self._callback) + nn._fit(a_in, a_out) + assert_equals(len(self.data['on_batch_start']), 2) + assert_equals(len(self.data['on_batch_finish']), 2) + + +class TestSpecificCallback(unittest.TestCase): + + def setUp(self): + self.data = [] + + def _callback(self, **variables): + self.data.append(variables) + + def test_TrainingCallback(self): + a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) + nn = MLP(layers=[L("Linear")], n_iter=4, callback={'on_train_start': self._callback}) + nn._fit(a_in, a_out) + assert_equals(len(self.data), 1) + + def test_EpochCallback(self): + a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) + nn = MLP(layers=[L("Linear")], n_iter=4, callback={'on_epoch_start': self._callback}) + nn._fit(a_in, a_out) + assert_equals(len(self.data), 4) + + def test_BatchCallbacks(self): + a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) + nn = MLP(layers=[L("Linear")], n_iter=1, batch_size=4, callback={'on_batch_start': self._callback}) + nn._fit(a_in, a_out) + assert_equals(len(self.data), 2) From 63aae4cb8e09f001098d2337e394136d4c7e231b Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Fri, 20 Nov 2015 20:22:40 +0100 Subject: [PATCH 3/4] Inline documentation for the callback. --- sknn/nn.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/sknn/nn.py b/sknn/nn.py index 5e9ea66..1d78257 100644 --- a/sknn/nn.py +++ b/sknn/nn.py @@ -346,6 +346,23 @@ class NeuralNetwork(object): A function that takes a single training sample ``(X, y)`` at each epoch and returns a modified version. This is useful for dataset augmentation, e.g. mirroring input images or jittering. + + callback: callable or dict, optional + An observer mechanism that exposes information about the inner training loop. This is + either a single function that takes ``cbs(event, **variables)`` as a parameter, or a + dictionary of functions indexed by on `event` string that conforms to ``cb(**variables)``. + + There are multiple events sent from the inner training loop: + + * ``on_train_start`` — Called when the main training function is entered. + * ``on_epoch_start`` — Called the first thing when a new iteration starts. + * ``on_batch_start`` — Called before an individual batch is processed. + * ``on_batch_finish`` — Called after that individual batch is processed. + * ``on_epoch_finish`` — Called the first last when the iteration is done. + * ``on_train_finish`` — Called just before the training function exits. + + For each function, the ``variables`` dictionary passed contains all local variables within + the training implementation. debug: bool, optional Should the underlying training algorithms perform validation on the data @@ -389,9 +406,9 @@ def __init__( valid_size=0.0, loss_type=None, mutator=None, + callback=None, debug=False, verbose=None, - callback=None, **params): assert warning is None,\ From 3d527d35dbca6a86d5e0d396b1f34c99c504f672 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Fri, 20 Nov 2015 20:48:16 +0100 Subject: [PATCH 4/4] Removed the mutator as it can be replaced entirely with the callbacks. Ported tests accordingly. Note, however, that repeatedly mutating data in batches may have unintended side-effects. --- sknn/backend/lasagne/mlp.py | 10 ++++------ sknn/nn.py | 7 ------- sknn/tests/test_data.py | 8 ++++---- sknn/tests/test_types.py | 6 +++--- 4 files changed, 11 insertions(+), 20 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 2326535..7d90a9b 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -234,12 +234,10 @@ def cast(array): for start_idx in range(0, total_size - batch_size + 1, batch_size): excerpt = indices[start_idx:start_idx + batch_size] Xb, yb = cast(X[excerpt]), cast(y[excerpt]) - if self.mutator is not None: - for x, _ in zip(Xb, yb): - self.mutator(x) + yield Xb, yb - def _batch_impl(self, X, y, processor, output, shuffle): + def _batch_impl(self, X, y, processor, mode, output, shuffle): progress, batches = 0, X.shape[0] / self.batch_size loss, count = 0.0, 0 for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle): @@ -255,10 +253,10 @@ def _batch_impl(self, X, y, processor, output, shuffle): return loss / count def _train_impl(self, X, y): - return self._batch_impl(X, y, self.trainer, output='.', shuffle=True) + return self._batch_impl(X, y, self.trainer, mode='train', output='.', shuffle=True) def _valid_impl(self, X, y): - return self._batch_impl(X, y, self.validator, output=' ', shuffle=False) + return self._batch_impl(X, y, self.validator, mode='valid', output=' ', shuffle=False) @property def is_initialized(self): diff --git a/sknn/nn.py b/sknn/nn.py index 1d78257..015cd41 100644 --- a/sknn/nn.py +++ b/sknn/nn.py @@ -342,11 +342,6 @@ class NeuralNetwork(object): only be applied to layers of type ``Linear`` or ``Gaussian`` and they must be used as the output layer (PyLearn2 only). - mutator: callable, optional - A function that takes a single training sample ``(X, y)`` at each epoch and returns - a modified version. This is useful for dataset augmentation, e.g. mirroring input - images or jittering. - callback: callable or dict, optional An observer mechanism that exposes information about the inner training loop. This is either a single function that takes ``cbs(event, **variables)`` as a parameter, or a @@ -405,7 +400,6 @@ def __init__( valid_set=None, valid_size=0.0, loss_type=None, - mutator=None, callback=None, debug=False, verbose=None, @@ -457,7 +451,6 @@ def __init__( self.valid_set = valid_set self.valid_size = valid_size self.loss_type = loss_type - self.mutator = mutator self.debug = debug self.verbose = verbose self.callback = callback diff --git a/sknn/tests/test_data.py b/sknn/tests/test_data.py index 26de705..b132f6e 100644 --- a/sknn/tests/test_data.py +++ b/sknn/tests/test_data.py @@ -17,12 +17,12 @@ def setUp(self): self.nn = MLPR( layers=[L("Linear")], n_iter=1, - batch_size=2, - mutator=self._mutate_fn) + batch_size=1, + callback={'on_batch_start': self._mutate_fn}) - def _mutate_fn(self, sample): + def _mutate_fn(self, Xb, **_): self.called += 1 - sample[sample == 0.0] = self.value + Xb[Xb == 0.0] = self.value def test_TestCalledOK(self): a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) diff --git a/sknn/tests/test_types.py b/sknn/tests/test_types.py index 77da0ea..6b38f86 100644 --- a/sknn/tests/test_types.py +++ b/sknn/tests/test_types.py @@ -42,10 +42,10 @@ def test_FitHybrid(self): self.nn._fit(X, y) def test_FitMutator(self): - def mutate(x): + def mutate(Xb, **_): self.count += 1 - return x - 0.5 - self.nn.mutator = mutate + Xb -= 0.5 + self.nn.callback = {'on_batch_start': mutate} for t in SPARSE_TYPES: sparse_matrix = getattr(scipy.sparse, t)