Better error checking when creating layers and when the NN is constru…

…cted. Updates #6.
aigamedev · Apr 27, 2015 · bea3bba · bea3bba
1 parent 99f9155
commit bea3bba
Show file tree

Hide file tree

Showing 3 changed files with 110 additions and 25 deletions.
diff --git a/sknn/mlp.py b/sknn/mlp.py
@@ -108,6 +108,10 @@ def __init__(
         assert nop is None,\
             "Specify layer parameters as keyword arguments, not positional arguments."
 
+        if type not in ['Rectifier', 'Sigmoid', 'Tanh', 'Maxout', 'Convolution',
+                        'Linear', 'Softmax', 'Gaussian']:
+            raise NotImplementedError("Layer type `%s` is not implemented." % type)
+
         self.name = name
         self.type = type
         self.units = units
@@ -121,6 +125,9 @@ def __init__(
     def __eq__(self, other):
         return self.__dict__ == other.__dict__
 
+    def __repr__(self):
+        params = ", ".join(["%s=%r" % (k, v) for k, v in self.__dict__.items() if v is not None])
+        return "<sknn.mlp.Layer %s: %s>" % (self.type, params)
 
 
 class BaseMLP(sklearn.base.BaseEstimator):
@@ -130,18 +137,18 @@ class BaseMLP(sklearn.base.BaseEstimator):
 
     Parameters
     ----------
-    layers : list of tuples
-        An iterable sequence of each layer each as a tuple: first with an
-        activation type and then optional parameters such as the number of
-        units.
+    layers : list[Layer]
+        An iterable sequence of each layer each as a Layer instance that contains
+        its type, optional name, and any paramaters required.
 
             * For hidden layers, you can use the following layer types:
               ``Rectifier``, ``Sigmoid``, ``Tanh``, ``Maxout`` or ``Convolution``.
             * For output layers, you can use the following layer types:
               ``Linear``, ``Softmax`` or ``Gaussian``.
 
-        You must specify at least an output layer, so the last tuple in your
-        layers parameter should contain ``Linear`` (for example).
+        You must specify exactly one output layer type, so the last entry in your
+        ``layers`` list should contain ``Linear`` for regression, or ``Softmax`` for
+        classification (recommended).
 
     random_state : int
         Seed for the initialization of the neural network parameters (e.g.
@@ -275,17 +282,19 @@ def _setup(self):
     def _create_trainer(self, dataset):
         sgd.log.setLevel(logging.WARNING)
 
-        if self.cost == "Dropout":
-            probs, scales = {}, {}
-            for l in [l for l in self.layers if l.dropout is not None]:
-                incl = 1.0 - l.dropout
-                probs[l.name] = incl
-                scales[l.name] = 1.0 / incl
+        # Aggregate all the dropout parameters into shared dictionaries.
+        probs, scales = {}, {}
+        for l in [l for l in self.layers if l.dropout is not None]:
+            incl = 1.0 - l.dropout
+            probs[l.name] = incl
+            scales[l.name] = 1.0 / incl
 
+        if self.cost == "Dropout" or len(probs) > 0:
             # Use the globally specified dropout rate when there are no layer-specific ones.
             incl = 1.0 - self.dropout
             default_prob, default_scale = incl, 1.0 / incl
 
+            # Pass all the parameters to pylearn2 as a custom cost function.
             self.cost = Dropout(
                 default_input_include_prob=default_prob,
                 default_input_scale=default_scale,
@@ -308,33 +317,54 @@ def _create_trainer(self, dataset):
             termination_criterion=termination_criterion,
             monitoring_dataset=dataset)
 
+    def _check_layer(self, layer, required, optional=[]):
+        required.extend(['name', 'type'])
+        for r in required:
+            if getattr(layer, r) is None:
+                raise ValueError("Layer type `%s` requires parameter `%s`."\
+                                 % (layer.type, r))
+
+        optional.extend(['dropout'])
+        for a in layer.__dict__:
+            if a in required+optional:
+                continue
+            if getattr(layer, a) is not None:
+                log.warning("Parameter `%s` is unused for layer type `%s`."\
+                            % (a, layer.type))
+
     def _create_hidden_layer(self, name, layer, irange=0.1):
         if layer.type == "Rectifier":
+            self._check_layer(layer, ['units'])
             return mlp.RectifiedLinear(
                 layer_name=name,
                 dim=layer.units,
                 irange=irange)
 
         if layer.type == "Sigmoid":
+            self._check_layer(layer, ['units'])
             return mlp.Sigmoid(
                 layer_name=name,
                 dim=layer.units,
                 irange=irange)
 
         if layer.type == "Tanh":
+            self._check_layer(layer, ['units'])
             return mlp.Tanh(
                 layer_name=name,
                 dim=layer.units,
                 irange=irange)
 
         if layer.type == "Maxout":
+            self._check_layer(layer, ['units', 'pieces'])
             return maxout.Maxout(
                 layer_name=name,
                 num_units=layer.units,
                 num_pieces=layer.pieces,
                 irange=irange)
 
         if layer.type == "Convolution":
+            self._check_layer(layer, ['channels', 'kernel_shape'],
+                                     ['pool_shape', 'pool_type'])
             return mlp.ConvRectifiedLinear(
                 layer_name=name,
                 output_channels=layer.channels,
@@ -345,37 +375,40 @@ def _create_hidden_layer(self, name, layer, irange=0.1):
                 irange=irange)
 
         raise NotImplementedError(
-            "Hidden layer type `%s` is not implemented." % layer.type)
+            "Hidden layer type `%s` is not supported." % layer.type)
 
     def _create_output_layer(self, layer):
         fan_in = self.unit_counts[-2]
         fan_out = self.unit_counts[-1]
         lim = numpy.sqrt(6) / (numpy.sqrt(fan_in + fan_out))
 
         if layer.type == "Linear":
+            self._check_layer(layer, ['units'])
             return mlp.Linear(
-                dim=layer.units,
                 layer_name=layer.name,
+                dim=layer.units,
                 irange=lim)
 
         if layer.type == "Gaussian":
+            self._check_layer(layer, ['units'])
             return mlp.LinearGaussian(
+                layer_name=layer.name,
                 init_beta=0.1,
                 min_beta=0.001,
                 max_beta=1000,
                 beta_lr_scale=None,
                 dim=layer.units,
-                layer_name=layer.name,
                 irange=lim)
 
         if layer.type == "Softmax":
+            self._check_layer(layer, ['units'])
             return mlp.Softmax(
                 layer_name=layer.name,
                 n_classes=layer.units,
                 irange=lim)
 
         raise NotImplementedError(
-            "Output layer type `%s` is not implemented." % layer.type)
+            "Output layer type `%s` is not supported." % layer.type)
 
     def _create_mlp(self):
         # Create the layers one by one, connecting to previous.
@@ -415,8 +448,8 @@ def _create_mlp(self):
 
     def _create_matrix_input(self, X, y):
         if self.is_convolution:
-            # b01c arrangement of data
-            # http://benanne.github.io/2014/04/03/faster-convolutions-in-theano.html for more
+            # Using `b01c` arrangement of data, see this for details:
+            #   http://benanne.github.io/2014/04/03/faster-convolutions-in-theano.html
             # input: (batch size, channels, rows, columns)
             # filters: (number of filters, channels, rows, columns)
             input_space = Conv2DSpace(shape=X.shape[1:3], num_channels=X.shape[-1])

diff --git a/sknn/tests/test_deep.py b/sknn/tests/test_deep.py
@@ -1,11 +1,15 @@
 import unittest
-from nose.tools import (assert_false, assert_raises, assert_true, assert_equal)
+from nose.tools import (assert_false, assert_raises, assert_true,
+                        assert_equal, assert_in)
 
 import io
 import pickle
 import numpy
+import logging
+
 from sklearn.base import clone
 
+import sknn
 from sknn.mlp import MultiLayerPerceptronRegressor as MLPR
 from sknn.mlp import Layer as L
 
@@ -24,13 +28,16 @@ def setUp(self):
                 L("Linear")],
             n_iter=1)
 
+    def test_UnknownLayer(self):
+        assert_raises(NotImplementedError, L, "Unknown")
+
     def test_UnknownOuputActivation(self):
-        nn = MLPR(layers=[L("Unknown", units=16)])
+        nn = MLPR(layers=[L("Rectifier", units=16)])
         a_in = numpy.zeros((8,16))
         assert_raises(NotImplementedError, nn.fit, a_in, a_in)
 
     def test_UnknownHiddenActivation(self):
-        nn = MLPR(layers=[L("Unknown", units=8), L("Linear")])
+        nn = MLPR(layers=[L("Gaussian", units=8), L("Linear")])
         a_in = numpy.zeros((8,16))
         assert_raises(NotImplementedError, nn.fit, a_in, a_in)
 
@@ -79,3 +86,30 @@ def serialize(nn, _):
             buf.seek(0)
             return pickle.load(buf)
         self.run_EqualityTest(serialize, assert_true)
+
+
+class TestActivations(unittest.TestCase):
+
+    def setUp(self):
+        self.buf = io.StringIO()
+        self.hnd = logging.StreamHandler(self.buf)
+        logging.getLogger('sknn').addHandler(self.hnd)
+        logging.getLogger().setLevel(logging.WARNING)
+
+    def tearDown(self):
+        assert_equal('', self.buf.getvalue())
+        sknn.mlp.log.removeHandler(self.hnd)
+
+    def test_MissingParameterException(self):
+        nn = MLPR(layers=[L("Maxout", units=32), L("Linear")])
+        a_in = numpy.zeros((8,16))
+        assert_raises(ValueError, nn._initialize, a_in, a_in)
+
+    def test_UnusedParameterWarning(self):
+        nn = MLPR(layers=[L("Linear", kernel_shape=(1,1))], n_iter=1)
+        a_in = numpy.zeros((8,16))
+        nn._initialize(a_in, a_in)
+
+        assert_in('Parameter `kernel_shape` is unused', self.buf.getvalue())
+        self.buf = io.StringIO() # clear
+
diff --git a/sknn/tests/test_rules.py b/sknn/tests/test_rules.py
@@ -1,26 +1,39 @@
 import unittest
-from nose.tools import (assert_is_not_none, assert_false, assert_raises, assert_equal)
+from nose.tools import (assert_true, assert_raises, assert_equal)
 
+import io
 import numpy
+import logging
 
+import sknn
 from sknn.mlp import MultiLayerPerceptronRegressor as MLPR
 from sknn.mlp import Layer as L
 
 
 class TestLearningRules(unittest.TestCase):
 
+    def setUp(self):
+        self.buf = io.StringIO()
+        self.hnd = logging.StreamHandler(self.buf)
+        logging.getLogger('sknn').addHandler(self.hnd)
+        logging.getLogger().setLevel(logging.WARNING)
+
+    def tearDown(self):
+        assert_equal('', self.buf.getvalue())
+        sknn.mlp.log.removeHandler(self.hnd)
+
     def test_Default(self):
         self._run(MLPR(layers=[L("Linear")],
                        learning_rule='sgd',
                        n_iter=1))
 
     def test_Momentum(self):
-        self._run(MLPR(layers=[L("Linear")],
+        self._run(MLPR(layers=[L("Gaussian")],
                        learning_rule='momentum',
                        n_iter=1))
 
     def test_Nesterov(self):
-        self._run(MLPR(layers=[L("Linear")],
+        self._run(MLPR(layers=[L("Softmax")],
                        learning_rule='nesterov',
                        n_iter=1))
 
@@ -53,10 +66,15 @@ def test_DropoutAsFloat(self):
                        n_iter=1))
 
     def test_DropoutPerLayer(self):
-        self._run(MLPR(layers=[L("Tanh", units=8, dropout=0.25), L("Linear")],
+        self._run(MLPR(layers=[L("Maxout", units=8, pieces=2, dropout=0.25), L("Linear")],
                        dropout=True,
                        n_iter=1))
 
+    def test_AutomaticDropout(self):
+        nn = MLPR(layers=[L("Tanh", units=8, dropout=0.25), L("Linear")], n_iter=1)
+        self._run(nn)
+        assert_true(nn.cost is not None)
+
     def test_UnknownRule(self):
         assert_raises(NotImplementedError, MLPR,
                       layers=[], learning_rule='unknown')