diff --git a/.gitignore b/.gitignore index aa521bd..0a08d74 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,6 @@ var/ *.egg # Machine Learning -Lasagne/ nolearn/ scikit-learn/ diff --git a/sknn/__init__.py b/sknn/__init__.py index 0ec3d83..29be782 100644 --- a/sknn/__init__.py +++ b/sknn/__init__.py @@ -2,7 +2,7 @@ from __future__ import (absolute_import, unicode_literals, print_function) __author__ = 'alexjc, ssamot' -__version__ = '0.3' +__version__ = '0.4' import os diff --git a/sknn/backend/__init__.py b/sknn/backend/__init__.py index f574601..300dc0b 100644 --- a/sknn/backend/__init__.py +++ b/sknn/backend/__init__.py @@ -21,5 +21,5 @@ def __init__(self, _): # Automatically import the recommended backend if none was manually imported. def setup(): if name == None: - from . import pylearn2 - assert name is not None + from . import pylearn2 + assert name is not None, "No backend for module sknn was imported." diff --git a/sknn/backend/lasagne/__init__.py b/sknn/backend/lasagne/__init__.py new file mode 100644 index 0000000..e3beaf1 --- /dev/null +++ b/sknn/backend/lasagne/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +from __future__ import (absolute_import, unicode_literals, print_function) + +from ... import backend +from .mlp import MultiLayerPerceptronBackend + +# Register this implementation as the MLP backend. +backend.MultiLayerPerceptronBackend = MultiLayerPerceptronBackend +backend.name = 'lasagne' \ No newline at end of file diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py new file mode 100644 index 0000000..00adc71 --- /dev/null +++ b/sknn/backend/lasagne/mlp.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +from __future__ import (absolute_import, unicode_literals, print_function) + +__all__ = ['MultiLayerPerceptronBackend'] + +import os +import sys +import math +import time +import logging +import itertools + +log = logging.getLogger('sknn') + + +import numpy +import theano +import sklearn.base +import sklearn.pipeline +import sklearn.preprocessing +import sklearn.cross_validation + +import theano.tensor as T +import lasagne.layers +import lasagne.nonlinearities as nl + +from ..base import BaseBackend +from ...nn import Layer, Convolution, ansi + + +class MultiLayerPerceptronBackend(BaseBackend): + """ + Abstract base class for wrapping the multi-layer perceptron functionality + from Lasagne. + """ + + def __init__(self, spec): + super(MultiLayerPerceptronBackend, self).__init__(spec) + self.mlp = None + self.f = None + self.trainer = None + self.cost = None + + def _create_mlp_trainer(self, params): + # Aggregate all regularization parameters into common dictionaries. + layer_decay = {} + if self.regularize in ('L1', 'L2') or any(l.weight_decay for l in self.layers): + wd = self.weight_decay or 0.0001 + for l in self.layers: + layer_decay[l.name] = l.weight_decay or wd + assert len(layer_decay) == 0 or self.regularize in ('L1', 'L2', None) + + if len(layer_decay) > 0: + if self.regularize is None: + self.regularize = 'L2' + penalty = getattr(lasagne.regularization, self.regularize.lower()) + regularize = lasagne.regularization.apply_penalty + self.cost = sum(layer_decay[s.name] * regularize(l.get_params(tags={'regularizable': True}), penalty) + for s, l in zip(self.layers, self.mlp)) + + cost_functions = {'mse': 'squared_error', 'mcc': 'categorical_crossentropy'} + loss_type = self.loss_type or ('mcc' if self.is_classifier else 'mse') + assert loss_type in cost_functions,\ + "Loss type `%s` not supported by Lasagne backend." % loss_type + cost_fn = getattr(lasagne.objectives, cost_functions[loss_type]) + cost_eval = cost_fn(self.symbol_output, self.tensor_output).mean() + if self.cost is not None: + cost_eval = cost_eval * self.cost + return self._create_trainer(params, cost_eval) + + def _create_trainer(self, params, cost): + if self.learning_rule in ('sgd', 'adagrad', 'adadelta', 'rmsprop', 'adam'): + lr = getattr(lasagne.updates, self.learning_rule) + self._learning_rule = lr(cost, params, learning_rate=self.learning_rate) + elif self.learning_rule in ('momentum', 'nesterov'): + lasagne.updates.nesterov = lasagne.updates.nesterov_momentum + lr = getattr(lasagne.updates, self.learning_rule) + self._learning_rule = lr(cost, params, learning_rate=self.learning_rate, momentum=self.learning_momentum) + else: + raise NotImplementedError( + "Learning rule type `%s` is not supported." % self.learning_rule) + + return theano.function([self.tensor_input, self.tensor_output], cost, + updates=self._learning_rule, + allow_input_downcast=True) + + def _get_activation(self, l): + nonlinearities = {'Rectifier': nl.rectify, + 'Sigmoid': nl.sigmoid, + 'Tanh': nl.tanh, + 'Softmax': nl.softmax, + 'Linear': nl.linear} + + assert l.type in nonlinearities,\ + "Layer type `%s` is not supported for `%s`." % (l.type, l.name) + return nonlinearities[l.type] + + def _create_convolution_layer(self, name, layer, network): + self._check_layer(layer, + required=['channels', 'kernel_shape'], + optional=['kernel_stride', 'border_mode', 'pool_shape', 'pool_type']) + + network = lasagne.layers.Conv2DLayer( + network, + num_filters=layer.channels, + filter_size=layer.kernel_shape, + stride=layer.kernel_stride, + pad=layer.border_mode, + nonlinearity=self._get_activation(layer)) + + if layer.pool_shape != (1, 1): + network = lasagne.layers.Pool2DLayer( + network, + pool_size=layer.pool_shape, + stride=layer.pool_shape) + + return network + + def _create_layer(self, name, layer, network): + dropout = layer.dropout or self.dropout_rate + if dropout is not None: + network = lasagne.layers.dropout(network, dropout) + + if isinstance(layer, Convolution): + return self._create_convolution_layer(name, layer, network) + + self._check_layer(layer, required=['units']) + return lasagne.layers.DenseLayer(network, + num_units=layer.units, + nonlinearity=self._get_activation(layer)) + + def _create_mlp(self, X): + self.tensor_input = T.tensor4('X') if self.is_convolution else T.matrix('X') + self.tensor_output = T.matrix('y') + + lasagne.random.get_rng().seed(self.random_state) + + shape = list(X.shape) + network = lasagne.layers.InputLayer([None]+shape[1:], self.tensor_input) + + # Create the layers one by one, connecting to previous. + self.mlp = [] + for i, layer in enumerate(self.layers): + network = self._create_layer(layer.name, layer, network) + self.mlp.append(network) + + log.info( + "Initializing neural network with %i layers, %i inputs and %i outputs.", + len(self.layers), self.unit_counts[0], self.layers[-1].units) + + for l, p, count in zip(self.layers, self.mlp, self.unit_counts[1:]): + space = p.output_shape + if isinstance(l, Convolution): + log.debug(" - Convl: {}{: <10}{} Output: {}{: <10}{} Channels: {}{}{}".format( + ansi.BOLD, l.type, ansi.ENDC, + ansi.BOLD, repr(space[2:]), ansi.ENDC, + ansi.BOLD, space[1], ansi.ENDC)) + + # NOTE: Numbers don't match up exactly for pooling; one off. The logic is convoluted! + # assert count == numpy.product(space.shape) * space.num_channels,\ + # "Mismatch in the calculated number of convolution layer outputs." + else: + log.debug(" - Dense: {}{: <10}{} Units: {}{: <4}{}".format( + ansi.BOLD, l.type, ansi.ENDC, ansi.BOLD, l.units, ansi.ENDC)) + assert count == space[1],\ + "Mismatch in the calculated number of dense layer outputs." + + if self.weights is not None: + l = min(len(self.weights), len(self.mlp)) + log.info("Reloading parameters for %i layer weights and biases." % (l,)) + self._array_to_mlp(self.weights, self.mlp) + self.weights = None + + log.debug("") + + self.symbol_output = lasagne.layers.get_output(network, deterministic=True) + self.f = theano.function([self.tensor_input], self.symbol_output, allow_input_downcast=True) + + def _initialize_impl(self, X, y=None): + if self.is_convolution: + X = numpy.transpose(X, (0, 3, 1, 2)) + + if self.mlp is None: + self._create_mlp(X) + + # Can do partial initialization when predicting, no trainer needed. + if y is None: + return + + if self.valid_size > 0.0: + assert self.valid_set is None, "Can't specify valid_size and valid_set together." + X, X_v, y, y_v = sklearn.cross_validation.train_test_split( + X, y, + test_size=self.valid_size, + random_state=self.random_state) + self.valid_set = X_v, y_v + + params = [] + for spec, mlp_layer in zip(self.layers, self.mlp): + if spec.frozen: continue + params.extend(mlp_layer.get_params()) + + self.trainer = self._create_mlp_trainer(params) + return X, y + + def _predict_impl(self, X): + if not self.is_initialized: + self._initialize_impl(X) + + if self.is_convolution: + X = numpy.transpose(X, (0, 3, 1, 2)) + return self.f(X) + + def _iterate_data(self, X, y, batch_size, shuffle=False): + def cast(array): + if type(array) != numpy.ndarray: + array = array.todense() + return array.astype(theano.config.floatX) + + total_size = X.shape[0] + indices = numpy.arange(total_size) + if shuffle: + numpy.random.shuffle(indices) + + for start_idx in range(0, total_size - batch_size + 1, batch_size): + excerpt = indices[start_idx:start_idx + batch_size] + Xb, yb = cast(X[excerpt]), cast(y[excerpt]) + if self.mutator is not None: + for x, _ in zip(Xb, yb): + self.mutator(x) + yield Xb, yb + + def _train_impl(self, X, y): + loss, batches = 0.0, 0 + for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle=True): + loss += self.trainer(Xb, yb) + batches += 1 + return loss / batches + + def _valid_impl(self, X, y): + loss, batches = 0.0, 0 + for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle=True): + ys = self.f(Xb) + loss += ((ys - yb) ** 2.0).mean() + batches += 1 + return loss / batches + + @property + def is_initialized(self): + """Check if the neural network was setup already. + """ + return not (self.f is None) + + def _mlp_get_params(self, layer): + while not hasattr(layer, 'W') and not hasattr(layer, 'b'): + layer = layer.input_layer + return (layer.W.get_value(), layer.b.get_value()) + + def _mlp_to_array(self): + return [self._mlp_get_params(l) for l in self.mlp] + + def _array_to_mlp(self, array, nn): + for layer, (weights, biases) in zip(nn, array): + while not hasattr(layer, 'W') and not hasattr(layer, 'b'): + layer = layer.input_layer + + ws = tuple(layer.W.shape.eval()) + assert ws == weights.shape, "Layer weights shape mismatch: %r != %r" %\ + (ws, weights.shape) + layer.W.set_value(weights) + + bs = tuple(layer.b.shape.eval()) + assert bs == biases.shape, "Layer biases shape mismatch: %r != %r" %\ + (bs, biases.shape) + layer.b.set_value(biases) diff --git a/sknn/backend/pylearn2/__init__.py b/sknn/backend/pylearn2/__init__.py index cc18126..c707789 100644 --- a/sknn/backend/pylearn2/__init__.py +++ b/sknn/backend/pylearn2/__init__.py @@ -1,6 +1,18 @@ # -*- coding: utf-8 -*- from __future__ import (absolute_import, unicode_literals, print_function) +from ...nn import ansi + + +import warnings +warnings.warn(ansi.YELLOW + """\n +The PyLearn2 backend is deprecated; the next release will switch to Lasagne by default. + +Test the change using the following at the top of your script: +> from sknn.backend import lasagne +""" + ansi.ENDC, category=UserWarning) + + from ... import backend from .mlp import MultiLayerPerceptronBackend from .ae import AutoEncoderBackend diff --git a/sknn/backend/pylearn2/dataset.py b/sknn/backend/pylearn2/dataset.py index d3bbf4c..2c52a2e 100644 --- a/sknn/backend/pylearn2/dataset.py +++ b/sknn/backend/pylearn2/dataset.py @@ -103,7 +103,7 @@ def _mutate_fn(self, array): array = self._conv_fn(array) if self.mutator is not None: for i in range(array.shape[0]): - self.mutator(array[i]) + array[i] = self.mutator(array[i]) return array @functools.wraps(dataset.Dataset.iterator) @@ -160,17 +160,3 @@ def iterator(self, **kwargs): if self.mutator is not None: bit._convert[0] = self._conv_fn return bit - -""" -OriginalDatasetIterator = iteration.FiniteDatasetIterator - -def create_finite_iterator(*args, **kwargs): - print('create_finite_iterator', kwargs['convert']) - def conv_fn(x): - return x + 0.01 - kwargs['convert'] = [conv_fn, None] - return OriginalDatasetIterator(*args, **kwargs) - # convert=convert) - -datasets.dense_design_matrix.FiniteDatasetIterator = create_finite_iterator -""" \ No newline at end of file diff --git a/sknn/backend/pylearn2/mlp.py b/sknn/backend/pylearn2/mlp.py index 1a9b8dc..18f3b9f 100644 --- a/sknn/backend/pylearn2/mlp.py +++ b/sknn/backend/pylearn2/mlp.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (absolute_import, unicode_literals, print_function) -__all__ = ['Regressor', 'Classifier', 'Layer', 'Convolution'] +__all__ = ['MultiLayerPerceptronBackend'] import os import sys @@ -270,7 +270,10 @@ def _train_impl(self, X, y): X = self.ds.view_converter.topo_view_to_design_mat(X) self.ds.X, self.ds.y = X, y - self._train_layer(self.trainer, self.mlp, self.ds) + return self._train_layer(self.trainer, self.mlp, self.ds) + + def _valid_impl(self, X, y): + return self._valid_layer(self.mlp) @property def is_initialized(self): diff --git a/sknn/backend/pylearn2/nn.py b/sknn/backend/pylearn2/nn.py index 2c97e02..2542d5b 100644 --- a/sknn/backend/pylearn2/nn.py +++ b/sknn/backend/pylearn2/nn.py @@ -16,6 +16,7 @@ from .pywrap2 import learning_rule as lr, termination_criteria as tc from .dataset import DenseDesignMatrix, SparseDesignMatrix, FastVectorSpace +from ...nn import ansi from ..base import BaseBackend @@ -75,3 +76,18 @@ def _create_trainer(self, dataset, cost): learning_rate=self.learning_rate, termination_criterion=termination_criterion, monitoring_dataset=dataset) + + def _train_layer(self, trainer, layer, dataset): + # Bug in PyLearn2 that has some unicode channels, can't sort. + layer.monitor.channels = {str(k): v for k, v in layer.monitor.channels.items()} + + trainer.train(dataset=dataset) + return None + + def _valid_layer(self, layer): + layer.monitor.report_epoch() + layer.monitor() + + # 'objective' channel is only defined with validation set. + objective = layer.monitor.channels.get('objective', None) + return objective.val_shared.get_value() if objective else None diff --git a/sknn/mlp.py b/sknn/mlp.py index 5c68da7..bbe9e66 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -72,11 +72,11 @@ def _create_specs(self, X, y=None): assert l.kernel_shape is not None,\ "Layer `%s` requires parameter `kernel_shape` to be set." % (l.name,) if l.border_mode == 'valid': - res = (int((res[0] - l.kernel_shape[0]) / l.kernel_stride[0]) + 1, - int((res[1] - l.kernel_shape[1]) / l.kernel_stride[1]) + 1) + res = (int((res[0] - l.kernel_shape[0]) / l.pool_shape[0]) + 1, + int((res[1] - l.kernel_shape[1]) / l.pool_shape[1]) + 1) if l.border_mode == 'full': - res = (int((res[0] + l.kernel_shape[0]) / l.kernel_stride[0]) - 1, - int((res[1] + l.kernel_shape[1]) / l.kernel_stride[1]) - 1) + res = (int((res[0] + l.kernel_shape[0]) / l.pool_shape[0]) - 1, + int((res[1] + l.kernel_shape[1]) / l.pool_shape[1]) - 1) unit_count = numpy.prod(res) * l.channels else: unit_count = l.units @@ -90,6 +90,7 @@ def __getstate__(self): # this object to communicate between multiple processes. if self._backend is not None: d['weights'] = self._backend._mlp_to_array() + d['valid_set'] = None for k in [k for k in d.keys() if k.startswith('_')]: del d[k] @@ -116,6 +117,57 @@ def _reshape(self, X, y=None): if not self.is_convolution and X.ndim > 2: X = X.reshape((X.shape[0], numpy.product(X.shape[1:]))) return X, y + + def _train(self, X, y): + best_train_error, best_valid_error = float("inf"), float("inf") + stable = 0 + + for i in itertools.count(1): + start = time.time() + + best_train = False + avg_train_error = self._backend._train_impl(X, y) + if avg_train_error is not None: + if math.isnan(avg_train_error): + raise RuntimeError("Training diverged and returned NaN.") + + best_train_error = min(best_train_error, avg_train_error) + best_train = bool(avg_train_error < best_train_error * (1.0 + self.f_stable)) + + best_valid = False + avg_valid_error = None + if self.valid_set is not None: + avg_valid_error = self._backend._valid_impl(*self.valid_set) + if avg_valid_error is not None: + best_valid_error = min(best_valid_error, avg_valid_error) + best_valid = bool(avg_valid_error < best_valid_error * (1.0 + self.f_stable)) + + log.debug("\r{:>5} {}{}{} {}{}{} {:>5.1f}s".format( + i, + ansi.BLUE if best_train else "", + "{0:>10.3e}".format(float(avg_train_error)) if (avg_train_error is not None) else " N/A ", + ansi.ENDC if best_train else "", + + ansi.GREEN if best_valid else "", + "{:>10.3e}".format(float(avg_valid_error)) if (avg_valid_error is not None) else " N/A ", + ansi.ENDC if best_valid else "", + + time.time() - start + )) + + if best_valid: + stable = 0 + else: + stable += 1 + + if stable >= self.n_stable: + log.debug("") + log.info("Early termination condition fired at %i iterations.", i) + break + if self.n_iter is not None and i >= self.n_iter: + log.debug("") + log.info("Terminating after specified %i total iterations.", i) + break def _fit(self, X, y): assert X.shape[0] == y.shape[0],\ @@ -141,16 +193,16 @@ def _fit(self, X, y): log.debug(" - Early termination after {} stable iterations.".format(self.n_stable)) if self.verbose: - log.debug("\nEpoch Validation Error Time" - "\n-----------------------------------") + log.debug("\nEpoch Training Error Validation Error Time" + "\n------------------------------------------------------------") try: - self._backend._train_impl(X, y) + self._train(X, y) except RuntimeError as e: log.error("\n{}{}{}\n\n{}\n".format( ansi.RED, "A runtime exception was caught during training. This likely occurred due to\n" - "a divergence of the SGD algorithm, and NaN floats were found by PyLearn2.", + "a divergence of the SGD algorithm, and NaN floats were found by the backend.", ansi.ENDC, "Try setting the `learning_rate` 10x lower to resolve this, for example:\n" " learning_rate=%f" % (self.learning_rate * 0.1))) @@ -220,6 +272,10 @@ def predict(self, X): """ return super(Regressor, self)._predict(X) + @property + def is_classifier(self): + return False + class Classifier(MultiLayerPerceptron, sklearn.base.ClassifierMixin): # Classifier compatible with sklearn that wraps various NN implementations. @@ -346,3 +402,7 @@ def predict(self, X): index += sz y = numpy.concatenate(ys, axis=1) return y + + @property + def is_classifier(self): + return True diff --git a/sknn/nn.py b/sknn/nn.py index b3d9ed7..667cc84 100644 --- a/sknn/nn.py +++ b/sknn/nn.py @@ -156,17 +156,18 @@ class Convolution(Layer): kernel_stride: tuple of ints, optional A two-dimensional tuple of integers that represents the steps taken by the kernel - through the input image. By default, this is set to the same as `pool_shape` but can - be customized separately even if pooling is turned off. + through the input image. By default, this is set to `(1,1)` and can be + customized separately to pooling. border_mode: str String indicating the way borders in the image should be processed, one of two options: * `valid` — Only pixels from input where the kernel fits within bounds are processed. * `full` — All pixels from input are processed, and the boundaries are zero-padded. + * `same` — The output resolution is set to the exact same as the input. The size of the output will depend on this mode, for `full` it's identical to the input, - but for `valid` it will be smaller or equal. + but for `valid` (default) it will be smaller or equal. pool_shape: tuple of ints, optional A two-dimensional tuple of integers corresponding to the pool size. This should be @@ -216,7 +217,7 @@ def __init__( if type not in ['Rectifier', 'Sigmoid', 'Tanh', 'Linear']: raise NotImplementedError("Convolution type `%s` is not implemented." % (type,)) - if border_mode not in ['valid', 'full']: + if border_mode not in ['valid', 'full', 'same']: raise NotImplementedError("Convolution border_mode `%s` is not implemented." % (border_mode,)) super(Convolution, self).__init__( @@ -231,7 +232,7 @@ def __init__( self.pool_shape = pool_shape or (1,1) self.pool_type = pool_type or ('max' if pool_shape else None) self.kernel_shape = kernel_shape - self.kernel_stride = kernel_stride or self.pool_shape + self.kernel_stride = kernel_stride or (1,1) self.border_mode = border_mode @@ -331,9 +332,11 @@ class NeuralNetwork(object): * ``mse`` — Use mean squared error, for learning to predict the mean of the data. * ``mae`` — Use mean average error, for learning to predict the median of the data. + * ``mcc`` — Use mean categorical cross-entropy, particularly for classifiers. - The default option is ``mse``, and ``mae`` can only be applied to layers of type - ``Linear`` or ``Gaussian`` and they must be used as the output layer. + The default option is ``mse`` for regressors and ``mcc`` for classifiers, but ``mae`` can + only be applied to layers of type ``Linear`` or ``Gaussian`` and they must be used as + the output layer (PyLearn2 only). mutator: callable, optional A function that takes a single training sample ``(X, y)`` at each epoch and returns @@ -380,7 +383,7 @@ def __init__( f_stable=0.001, valid_set=None, valid_size=0.0, - loss_type='mse', + loss_type=None, mutator=None, debug=False, verbose=None, @@ -413,7 +416,7 @@ def __init__( # Basic checking of the freeform string options. assert regularize in (None, 'L1', 'L2', 'dropout'),\ "Unknown type of regularization specified: %s." % regularize - assert loss_type in ('mse', 'mae'),\ + assert loss_type in ('mse', 'mae', 'mcc', None),\ "Unknown loss function type specified: %s." % loss_type self.weights = weights @@ -456,6 +459,11 @@ def is_convolution(self): """ return isinstance(self.layers[0], Convolution) + @property + def is_classifier(self): + """Is this neural network instanced as a classifier or regressor?""" + return False + def _create_logger(self): # If users have configured logging already, assume they know best. if len(log.handlers) > 0 or len(log.parent.handlers) > 0 or self.verbose is None: @@ -470,41 +478,3 @@ def _create_logger(self): hnd.setLevel(lvl) log.addHandler(hnd) log.setLevel(lvl) - - def _train_layer(self, trainer, layer, dataset): - # Bug in PyLearn2 that has some unicode channels, can't sort. - layer.monitor.channels = {str(k): v for k, v in layer.monitor.channels.items()} - best_valid_error = float("inf") - - for i in itertools.count(1): - start = time.time() - trainer.train(dataset=dataset) - - layer.monitor.report_epoch() - layer.monitor() - - objective = layer.monitor.channels.get('objective', None) - if objective: - avg_valid_error = objective.val_shared.get_value() - best_valid_error = min(best_valid_error, avg_valid_error) - else: - # 'objective' channel is only defined with validation set. - avg_valid_error = None - - best_valid = bool(best_valid_error == avg_valid_error) - log.debug("{:>5} {}{}{} {:>5.1f}s".format( - i, - ansi.GREEN if best_valid else "", - "{:>10.6f}".format(float(avg_valid_error)) if (avg_valid_error is not None) else " N/A ", - ansi.ENDC if best_valid else "", - time.time() - start - )) - - if not trainer.continue_learning(layer): - log.debug("") - log.info("Early termination condition fired at %i iterations.", i) - break - if self.n_iter is not None and i >= self.n_iter: - log.debug("") - log.info("Terminating after specified %i total iterations.", i) - break diff --git a/sknn/tests/test_classifier.py b/sknn/tests/test_classifier.py index 71f9ec8..4a95880 100644 --- a/sknn/tests/test_classifier.py +++ b/sknn/tests/test_classifier.py @@ -12,7 +12,7 @@ class TestClassifierFunctionality(unittest.TestCase): def setUp(self): - self.nn = MLPC(layers=[L("Linear")], n_iter=1) + self.nn = MLPC(layers=[L("Softmax")], n_iter=1) def test_FitAutoInitialize(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) @@ -69,7 +69,7 @@ def test_CalculateScore(self): class TestClassifierClone(TestClassifierFunctionality): def setUp(self): - cc = MLPC(layers=[L("Linear")], n_iter=1) + cc = MLPC(layers=[L("Sigmoid")], n_iter=1) self.nn = clone(cc) # This runs the same tests on the clone as for the original above. diff --git a/sknn/tests/test_conv.py b/sknn/tests/test_conv.py index 4be8365..26a6234 100644 --- a/sknn/tests/test_conv.py +++ b/sknn/tests/test_conv.py @@ -47,21 +47,21 @@ def test_KernelPooling(self): def test_VerticalKernel(self): self._run(MLPR( layers=[ - C("Rectifier", channels=4, kernel_shape=(16,1)), + C("Rectifier", channels=4, kernel_shape=(16,1), border_mode='valid'), L("Linear")], n_iter=1)) def test_VerticalVerbose(self): self._run(MLPR( layers=[ - C("Sigmoid", channels=4, kernel_shape=(16,1)), + C("Sigmoid", channels=4, kernel_shape=(16,1), border_mode='valid'), L("Linear")], n_iter=1, verbose=1, valid_size=0.1)) def test_HorizontalKernel(self): self._run(MLPR( layers=[ - C("Rectifier", channels=4, kernel_shape=(1,16)), + C("Rectifier", channels=4, kernel_shape=(1,16), border_mode='valid'), L("Linear")], n_iter=1)) @@ -103,7 +103,7 @@ class TestConvolutionSpecs(unittest.TestCase): def test_SmallSquareKernel(self): nn = MLPR(layers=[ - C("Rectifier", channels=4, kernel_shape=(3,3)), + C("Rectifier", channels=4, kernel_shape=(3,3), border_mode='valid'), L("Linear", units=5)]) a_in = numpy.zeros((8,32,32,1)) @@ -121,7 +121,7 @@ def test_SquareKernelFull(self): def test_HorizontalKernel(self): nn = MLPR(layers=[ - C("Rectifier", channels=7, kernel_shape=(16,1)), + C("Rectifier", channels=7, kernel_shape=(16,1), border_mode='valid'), L("Linear", units=5)]) a_in = numpy.zeros((8,16,16,1)) @@ -130,7 +130,7 @@ def test_HorizontalKernel(self): def test_VerticalKernel(self): nn = MLPR(layers=[ - C("Rectifier", channels=4, kernel_shape=(1,16)), + C("Rectifier", channels=4, kernel_shape=(1,16), border_mode='valid'), L("Linear", units=7)]) a_in = numpy.zeros((8,16,16,1)) @@ -139,7 +139,7 @@ def test_VerticalKernel(self): def test_SquareKernelPool(self): nn = MLPR(layers=[ - C("Rectifier", channels=4, kernel_shape=(3,3), pool_shape=(2,2)), + C("Rectifier", channels=4, kernel_shape=(3,3), pool_shape=(2,2), border_mode='valid'), L("Linear", units=5)]) a_in = numpy.zeros((8,32,32,1)) diff --git a/sknn/tests/test_deep.py b/sknn/tests/test_deep.py index 6f4f5f3..33410aa 100644 --- a/sknn/tests/test_deep.py +++ b/sknn/tests/test_deep.py @@ -23,7 +23,6 @@ def setUp(self): layers=[ L("Rectifier", units=16), L("Sigmoid", units=12), - L("Maxout", units=16, pieces=2), L("Tanh", units=4), L("Linear")], n_iter=1) @@ -45,7 +44,7 @@ def setUp(self): def run_EqualityTest(self, copier, asserter): # Only PyLearn2 supports Maxout. - extra = ["Maxout"] if sknn.backend.name != 'pylearn2' else [] + extra = ["Maxout"] if sknn.backend.name == 'pylearn2' else [] for activation in ["Rectifier", "Sigmoid", "Tanh"] + extra: nn1 = MLPR(layers=[L(activation, units=16, pieces=2), L("Linear", units=1)], random_state=1234) nn1._initialize(self.a_in, self.a_out) diff --git a/sknn/tests/test_output.py b/sknn/tests/test_output.py index 3ca4361..22fbc3b 100644 --- a/sknn/tests/test_output.py +++ b/sknn/tests/test_output.py @@ -3,12 +3,14 @@ import numpy +import sknn from sknn.mlp import Regressor as MLPR from sknn.mlp import Layer as L from . import test_linear +@unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') class TestGaussianOutput(test_linear.TestLinearNetwork): def setUp(self): @@ -26,6 +28,7 @@ class TestLossTypes(unittest.TestCase): def test_UnknownLossType(self): assert_raises(AssertionError, MLPR, layers=[], loss_type='unknown') + @unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') def test_MeanAverageErrorLinear(self): nn = MLPR(layers=[L("Linear")], loss_type='mae', n_iter=1) self._run(nn) @@ -33,11 +36,18 @@ def test_MeanAverageErrorLinear(self): def test_MeanSquaredErrorLinear(self): nn = MLPR(layers=[L("Linear")], loss_type='mse', n_iter=1) self._run(nn) + + @unittest.skipIf(sknn.backend.name != 'lasagne', 'only lasagne') + def test_MeanSquaredErrorLinear(self): + nn = MLPR(layers=[L("Softmax")], loss_type='mcc', n_iter=1) + self._run(nn) + @unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') def test_MeanAverageErrorGaussian(self): nn = MLPR(layers=[L("Gaussian")], loss_type='mae', n_iter=1) self._run(nn) + @unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') def test_MeanSquaredErrorGaussian(self): nn = MLPR(layers=[L("Gaussian")], loss_type='mse', n_iter=1) self._run(nn) diff --git a/sknn/tests/test_rules.py b/sknn/tests/test_rules.py index d984885..2a3d59e 100644 --- a/sknn/tests/test_rules.py +++ b/sknn/tests/test_rules.py @@ -32,12 +32,13 @@ def _run(self, nn): class TestLearningRules(LoggingTestCase): def test_Default(self): - self._run(MLPR(layers=[L("Linear")], + activation = "Gaussian" if sknn.backend.name == 'pylearn2' else "Linear" + self._run(MLPR(layers=[L(activation)], learning_rule='sgd', n_iter=1)) def test_Momentum(self): - self._run(MLPR(layers=[L("Gaussian")], + self._run(MLPR(layers=[L("Linear")], learning_rule='momentum', n_iter=1)) @@ -104,6 +105,7 @@ def test_DropoutPerLayer(self): self._run(nn) assert_in('Using `dropout` for regularization.', self.output.getvalue()) + @unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') def test_AutomaticDropout(self): nn = MLPR(layers=[L("Tanh", units=8, dropout=0.25), L("Linear")], n_iter=1) self._run(nn) diff --git a/sknn/tests/test_sklearn.py b/sknn/tests/test_sklearn.py index 402d908..9fcf105 100644 --- a/sknn/tests/test_sklearn.py +++ b/sknn/tests/test_sklearn.py @@ -14,6 +14,7 @@ class TestGridSearchRegressor(unittest.TestCase): __estimator__ = MLPR + __output__ = "Linear" def setUp(self): self.a_in = numpy.random.uniform(0.0, 1.0, (64,16)) @@ -21,33 +22,33 @@ def setUp(self): def test_GridGlobalParams(self): clf = GridSearchCV( - self.__estimator__(layers=[L("Linear")], n_iter=1), + self.__estimator__(layers=[L(self.__output__)], n_iter=1), param_grid={'learning_rate': [0.01, 0.001]}) clf.fit(self.a_in, self.a_out) def test_GridLayerParams(self): clf = GridSearchCV( - self.__estimator__(layers=[L("Rectifier", units=12), L("Linear")], n_iter=1), + self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1), param_grid={'hidden0__units': [4, 8, 12]}) clf.fit(self.a_in, self.a_out) def test_RandomGlobalParams(self): clf = RandomizedSearchCV( - self.__estimator__(layers=[L("Softmax")], n_iter=1), + self.__estimator__(layers=[L("Sigmoid")], n_iter=1), param_distributions={'learning_rate': uniform(0.001, 0.01)}, n_iter=2) clf.fit(self.a_in, self.a_out) def test_RandomLayerParams(self): clf = RandomizedSearchCV( - self.__estimator__(layers=[L("Softmax", units=12), L("Linear")], n_iter=1), + self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1), param_distributions={'hidden0__units': randint(4, 12)}, n_iter=2) clf.fit(self.a_in, self.a_out) def test_RandomMultipleJobs(self): clf = RandomizedSearchCV( - self.__estimator__(layers=[L("Softmax", units=12), L("Linear")], n_iter=1), + self.__estimator__(layers=[L("Sigmoid", units=12), L(self.__output__)], n_iter=1), param_distributions={'hidden0__units': randint(4, 12)}, n_iter=4, n_jobs=4) clf.fit(self.a_in, self.a_out) @@ -56,6 +57,7 @@ def test_RandomMultipleJobs(self): class TestGridSearchClassifier(TestGridSearchRegressor): __estimator__ = MLPC + __output__ = "Softmax" def setUp(self): self.a_in = numpy.random.uniform(0.0, 1.0, (64,16)) @@ -74,4 +76,4 @@ def test_Classifier(self): a_in = numpy.random.uniform(0.0, 1.0, (64,16)) a_out = numpy.random.randint(0, 4, (64,)) - cross_val_score(MLPC(layers=[L("Linear")], n_iter=1), a_in, a_out, cv=5) + cross_val_score(MLPC(layers=[L("Softmax")], n_iter=1), a_in, a_out, cv=5) diff --git a/sknn/tests/test_training.py b/sknn/tests/test_training.py index 6e0edd2..45c87f3 100644 --- a/sknn/tests/test_training.py +++ b/sknn/tests/test_training.py @@ -17,9 +17,10 @@ class TestTrainingProcedure(unittest.TestCase): def test_FitTerminateStable(self): a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) + activation = "Gaussian" if sknn.backend.name == "pylearn2" else "Linear" self.nn = MLP( - layers=[L("Gaussian")], learning_rate=0.001, - n_iter=None, n_stable=1, f_stable=0.1, + layers=[L(activation)], learning_rate=0.001, + n_iter=None, n_stable=1, f_stable=0.01, valid_set=(a_in, a_out)) self.nn._fit(a_in, a_out) @@ -27,7 +28,7 @@ def test_FitTerminateStable(self): def test_FitAutomaticValidation(self): a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) self.nn = MLP( - layers=[L("Gaussian")], learning_rate=0.001, + layers=[L("Linear")], learning_rate=0.001, n_iter=10, n_stable=1, f_stable=0.1, valid_size=0.25) @@ -73,15 +74,17 @@ def test_VerboseRegressor(self): nn = MLPR(layers=[L("Linear")], verbose=1, n_iter=1) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) nn.fit(a_in, a_out) - assert_in("Epoch Validation Error Time", self.buf.getvalue()) - assert_in(" 1 N/A ", self.buf.getvalue()) + assert_in("Epoch Training Error Validation Error Time", self.buf.getvalue()) + assert_in(" 1 ", self.buf.getvalue()) + assert_in(" N/A ", self.buf.getvalue()) def test_VerboseClassifier(self): - nn = MLPC(layers=[L("Linear")], verbose=1, n_iter=1) + nn = MLPC(layers=[L("Softmax")], verbose=1, n_iter=1) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,1), dtype=numpy.int32) nn.fit(a_in, a_out) - assert_in("Epoch Validation Error Time", self.buf.getvalue()) - assert_in(" 1 N/A ", self.buf.getvalue()) + assert_in("Epoch Training Error Validation Error Time", self.buf.getvalue()) + assert_in(" 1 ", self.buf.getvalue()) + assert_in(" N/A ", self.buf.getvalue()) def test_CaughtRuntimeError(self): nn = MLPC(layers=[L("Linear")], learning_rate=float("nan"), n_iter=1) diff --git a/sknn/tests/test_types.py b/sknn/tests/test_types.py index 8494122..77da0ea 100644 --- a/sknn/tests/test_types.py +++ b/sknn/tests/test_types.py @@ -43,8 +43,8 @@ def test_FitHybrid(self): def test_FitMutator(self): def mutate(x): - x -= 0.5 self.count += 1 + return x - 0.5 self.nn.mutator = mutate for t in SPARSE_TYPES: