From 702d15dd48be58f5c2ec434737b08a673e1651fb Mon Sep 17 00:00:00 2001
From: hmendozap <heist.mendoza@gmail.com>
Date: Wed, 2 Dec 2015 16:45:04 +0100
Subject: [PATCH] Fix binary classification score and added unittest for it

---
 autosklearn/automl.py            |  6 +++++-
 test/automl/test_start_automl.py | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index 6103a11e95..7f358ce3e3 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -606,7 +606,11 @@ def _load_models(self):
             seed)
 
     def score(self, X, y):
+        # fix: Consider only index 1 of second dimension
+        # Don't know if the reshaping should be done there or in calculate_score
         prediction = self.predict(X)
+        if self._task == BINARY_CLASSIFICATION:
+            prediction = prediction[:, 1].reshape((-1, 1))
         return calculate_score(y, prediction, self._task,
                                self._metric, self._label_num,
                                logger=self._logger)
@@ -687,4 +691,4 @@ def _delete_output_directories(self):
                     pass
                 else:
                     print("Could not delete tmp dir: %s" %
-                          self._tmp_dir)
\ No newline at end of file
+                          self._tmp_dir)
diff --git a/test/automl/test_start_automl.py b/test/automl/test_start_automl.py
index ada9a21faf..cbcba020b6 100644
--- a/test/automl/test_start_automl.py
+++ b/test/automl/test_start_automl.py
@@ -40,6 +40,38 @@ def test_fit(self):
         del automl
         self._tearDown(output)
 
+    def test_binary_score(self):
+        """
+        Test fix for binary classification prediction
+        taking the index 1 of second dimension in prediction matrix
+        """
+        if self.travis:
+            self.skipTest('This test does currently not run on travis-ci. '
+                          'Make sure it runs locally on your machine!')
+
+        output = os.path.join(self.test_dir, '..', '.tmp_test_fit')
+        self._setUp(output)
+
+        # Had to use this dummy dataset because
+        # I cannot find a way to efficiently load a binary dataset
+        # without changing files in paramsklearn or automl class
+
+        X_train = np.random.rand(100, 20)
+        Y_train = np.random.randint(0, 2, 100)
+
+        automl = autosklearn.automl.AutoML(output, output, 30, 15)
+        automl.fit(X_train, Y_train, task=BINARY_CLASSIFICATION)
+        self.assertEqual(automl._task, BINARY_CLASSIFICATION)
+
+        X_test = np.random.rand(50, 20)
+        Y_test = np.random.randint(0, 2, 50)
+
+        score = automl.score(X_test, Y_test)
+        self.assertGreaterEqual(score, 0.0)
+
+        del automl
+        self._tearDown(output)
+
     def test_automl_outputs(self):
         output = os.path.join(self.test_dir, '..',
                               '.tmp_test_automl_outputs')