Fixed bug in confusion matrix

akusok · Nov 2, 2015 · 74a2030 · 74a2030
1 parent 8643a48
commit 74a2030
Show file tree

Hide file tree

Showing 7 changed files with 123 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,4 @@
 
 # OSX stuff
 *.DS_Store
+*~
diff --git a/.gitignore~ b/.gitignore~
@@ -1,3 +1,16 @@
+# Compiled python modules
 *.pyc
+
+# Setuptools distribution folder
 /dist/
+/datasets_big/
+/benchmark/
+/try/
+
+# Python egg metadata, regenerated from source files by setuptools
 /*.egg-info
+/*.egg
+
+# OSX stuff
+*.DS_Store
+*~
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,3 +20,8 @@ All changes to 'hpelm' toolbox will be documented in this file.
 ## [0.6.21] - 03-09-2015
 ### New
 - Fixed a small GPU function name bug
+
+## [0.6.22] - 03-09-2015
+### New
+- Fixed confusion matrix bug
+
diff --git a/CHANGELOG.md~ b/CHANGELOG.md~
@@ -0,0 +1,22 @@
+# Change Log
+All changes to 'hpelm' toolbox will be documented in this file.
+
+## [0.6.13] - 18-08-2015
+### Fixed
+- Fixed a bug (typo) which caused HPELM to skip the last batch of data in training/predicting/etc.
+
+## [0.6.14] - 19-08-2015
+### New
+- Added asyncronous HDF5 file reading in HPELM.predict(). The program must be able to spawn a separate process.
+
+## [0.6.16] - 19-08-2015
+### Fixed
+- Asyncronous HDF5 file reading now setting "async_io=True" in HPELM.predict(). On Windows, make sure you use 'if __name__ == "__main__"' construction in your main code, or async reader will not start.
+
+## [0.6.20] - 20-08-2015
+### New
+- Working Asyncronous I/O! for HDF5 files with multiprocessing. Use HPELM.train_async() and HPELM.predict_async() 
+
+## [0.6.21] - 03-09-2015
+### New
+- Fixed a small GPU function name bug
diff --git a/dataset_tests/test_confusion.py b/dataset_tests/test_confusion.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+"""Check that confusion matrix bug will not repeat.
+
+Created on Sun Nov  1 23:17:48 2015
+
+@author: akusok
+"""
+
+
+from unittest import TestCase
+import numpy as np
+import os
+
+import hpelm
+
+
+def classification_conf(folder, nn, ntype="sigm", b=1):
+    folder = os.path.join(os.path.dirname(__file__), folder)
+    i = np.random.randint(0, 10)
+    print "using init number: ", i
+    # get file names
+    Xtr = np.load(os.path.join(folder, "xtrain_%d.npy" % (i + 1)))
+    Xts = np.load(os.path.join(folder, "xtest_%d.npy" % (i + 1)))
+    Ttr = np.load(os.path.join(folder, "ytrain_%d.npy" % (i + 1)))
+    Tts = np.load(os.path.join(folder, "ytest_%d.npy" % (i + 1)))
+    # train ELM
+    Bsize = Xtr.shape[0]/b + 1  # batch size larger than amount of data
+    elm = hpelm.ELM(Xtr.shape[1], Ttr.shape[1], batch = Bsize)
+    elm.add_neurons(nn, ntype)
+    elm.train(Xtr, Ttr, 'c')
+    Yts = elm.predict(Xts)
+    conf = elm.confusion(Yts, Tts)
+    return conf
+
+
+
+class TestAllDatasets(TestCase):
+
+    # how much worse our result can be
+    # tol = 1.10 means 10% worse
+    # tol = 0.90 means 10% better
+    tolerance = 1.10
+
+    def test_ConfusionSingleBatch_Iris_NonZero(self):
+        conf = classification_conf("Classification-Iris", 10, "sigm", b=1)
+        self.assertGreater(conf.sum(), 0)
+
+    def test_ConfusionMultiBatch_Iris_NonZero(self):
+        conf = classification_conf("Classification-Iris", 10, "sigm", b=10)
+        self.assertGreater(conf.sum(), 0)
+
+    def test_ConfusionUnitBatch_Iris_NonZero(self):
+        conf = classification_conf("Classification-Iris", 10, "sigm", b=100000)
+        self.assertGreater(conf.sum(), 0)
+
+    def test_ConfusionSingleBatch_Pima_NonZero(self):
+        conf = classification_conf("Classification-Pima_Indians_Diabetes", 10, "sigm", b=1)
+        self.assertGreater(conf.sum(), 0)
+
+    def test_ConfusionSingleBatch_Wine_NonZero(self):
+        conf = classification_conf("Classification-Wine", 10, "sigm", b=1)
+        self.assertGreater(conf.sum(), 0)
+
+    def test_ConfusionSingleBatch_Wisconsin_NonZero(self):
+        conf = classification_conf("Classification-Wisconsin_Breast_Cancer", 10, "sigm", b=1)
+        self.assertGreater(conf.sum(), 0)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/hpelm/slfn.py b/hpelm/slfn.py
@@ -226,9 +226,7 @@ def confusion(self, Y1, T1):
         nn = np.sum([n1[1] for n1 in self.neurons])
         N = T.shape[0]
         batch = max(self.batch, nn)
-        nb = N / batch  # number of batches
-        if batch > N * nb:
-            nb += 1
+        nb = int(np.ceil(float(N) / self.batch))  # number of batches
 
         C = self.targets
         conf = np.zeros((C, C))

diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@ def readme():
         return f.read()
 
 setup(name='hpelm',
-      version='0.6.21',
+      version='0.6.22',
       description='High-Performance implementation of an\
                    Extreme Learning Machine',
       long_description=readme(),