From ac26499b38074ce2b2a94b2703f60faa9311036c Mon Sep 17 00:00:00 2001 From: Jeremy Gore Date: Wed, 23 Dec 2015 17:54:01 -0500 Subject: [PATCH 1/3] Fix bug when batch_size > dataset size If batch size is greater than the total size of the data set, no data will be yielded from _iterate_data. Consequently, no training will happen and you will get a ZeroDivision error for "loss / count" in _batch_impl. Also, it only generates batches of the exact size - any remaining data at the end is not trained on. --- sknn/backend/lasagne/mlp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 6cbabf4..08a02fb 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -252,7 +252,7 @@ def cast(array): if shuffle: numpy.random.shuffle(indices) - for start_idx in range(0, total_size - batch_size + 1, batch_size): + for start_idx in range(0, total_size, batch_size): excerpt = indices[start_idx:start_idx + batch_size] Xb, yb, wb = cast(X[excerpt]), cast(y[excerpt]), None if w is not None: From 783121459721ff7976a19d1e3cacdb6d8dc5b709 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 31 Dec 2015 17:47:37 +0100 Subject: [PATCH 2/3] Tests for pull request #158. --- sknn/tests/test_training.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/sknn/tests/test_training.py b/sknn/tests/test_training.py index 8adf7f3..d995b6e 100644 --- a/sknn/tests/test_training.py +++ b/sknn/tests/test_training.py @@ -53,6 +53,37 @@ def terminate(**_): assert_equals(self.counter, 1) +class TestBatchSize(unittest.TestCase): + + def setUp(self): + self.batch_count = 0 + self.nn = MLP( + layers=[L("Rectifier")], + learning_rate=0.001, n_iter=1, + callback={'on_batch_start': self.on_batch_start}) + + def on_batch_start(self, **args): + self.batch_count += 1 + + def test_BatchSizeLargerThanInput(self): + self.nn.batch_size = 32 + a_in, a_out = numpy.zeros((8,16)), numpy.ones((8,4)) + self.nn._fit(a_in, a_out) + assert_equals(1, self.batch_count) + + def test_BatchSizeSmallerThanInput(self): + self.nn.batch_size = 4 + a_in, a_out = numpy.ones((8,16)), numpy.zeros((8,4)) + self.nn._fit(a_in, a_out) + assert_equals(2, self.batch_count) + + def test_BatchSizeNonMultiple(self): + self.nn.batch_size = 4 + a_in, a_out = numpy.zeros((9,16)), numpy.ones((9,4)) + self.nn._fit(a_in, a_out) + assert_equals(3, self.batch_count) + + class TestCustomLogging(unittest.TestCase): def setUp(self): From 46b4cc34680b1d89877d62f0ddb73ceb079b22dd Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Fri, 1 Jan 2016 14:25:26 +0100 Subject: [PATCH 3/3] Attempt to fix small batch size support on Python 2.7. --- sknn/backend/lasagne/mlp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 977306e..f7473d5 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import (absolute_import, unicode_literals, print_function) +from __future__ import (absolute_import, division, unicode_literals, print_function) __all__ = ['MultiLayerPerceptronBackend']