From 702d15dd48be58f5c2ec434737b08a673e1651fb Mon Sep 17 00:00:00 2001 From: hmendozap Date: Wed, 2 Dec 2015 16:45:04 +0100 Subject: [PATCH] Fix binary classification score and added unittest for it --- autosklearn/automl.py | 6 +++++- test/automl/test_start_automl.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 6103a11e95..7f358ce3e3 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -606,7 +606,11 @@ def _load_models(self): seed) def score(self, X, y): + # fix: Consider only index 1 of second dimension + # Don't know if the reshaping should be done there or in calculate_score prediction = self.predict(X) + if self._task == BINARY_CLASSIFICATION: + prediction = prediction[:, 1].reshape((-1, 1)) return calculate_score(y, prediction, self._task, self._metric, self._label_num, logger=self._logger) @@ -687,4 +691,4 @@ def _delete_output_directories(self): pass else: print("Could not delete tmp dir: %s" % - self._tmp_dir) \ No newline at end of file + self._tmp_dir) diff --git a/test/automl/test_start_automl.py b/test/automl/test_start_automl.py index ada9a21faf..cbcba020b6 100644 --- a/test/automl/test_start_automl.py +++ b/test/automl/test_start_automl.py @@ -40,6 +40,38 @@ def test_fit(self): del automl self._tearDown(output) + def test_binary_score(self): + """ + Test fix for binary classification prediction + taking the index 1 of second dimension in prediction matrix + """ + if self.travis: + self.skipTest('This test does currently not run on travis-ci. ' + 'Make sure it runs locally on your machine!') + + output = os.path.join(self.test_dir, '..', '.tmp_test_fit') + self._setUp(output) + + # Had to use this dummy dataset because + # I cannot find a way to efficiently load a binary dataset + # without changing files in paramsklearn or automl class + + X_train = np.random.rand(100, 20) + Y_train = np.random.randint(0, 2, 100) + + automl = autosklearn.automl.AutoML(output, output, 30, 15) + automl.fit(X_train, Y_train, task=BINARY_CLASSIFICATION) + self.assertEqual(automl._task, BINARY_CLASSIFICATION) + + X_test = np.random.rand(50, 20) + Y_test = np.random.randint(0, 2, 50) + + score = automl.score(X_test, Y_test) + self.assertGreaterEqual(score, 0.0) + + del automl + self._tearDown(output) + def test_automl_outputs(self): output = os.path.join(self.test_dir, '..', '.tmp_test_automl_outputs')