From 7e6481ec0c59bc9a94741b23429f416fdbb9081a Mon Sep 17 00:00:00 2001 From: jinu Date: Sun, 12 May 2019 21:47:35 +0200 Subject: [PATCH 01/10] First Commit. Change way of computing train loss. --- autosklearn/evaluation/train_evaluator.py | 34 +++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index 013af9ade5..a69203942b 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -10,7 +10,6 @@ from autosklearn.evaluation.abstract_evaluator import AbstractEvaluator from autosklearn.constants import * - __all__ = ['TrainEvaluator', 'eval_holdout', 'eval_iterative_holdout', 'eval_cv', 'eval_partial_cv', 'eval_partial_cv_iterative'] @@ -45,6 +44,7 @@ 'random_state': None} } + def _get_y_array(y, task_type): if task_type in CLASSIFICATION_TASKS and task_type != \ MULTILABEL_CLASSIFICATION: @@ -53,7 +53,6 @@ def _get_y_array(y, task_type): return y - class TrainEvaluator(AbstractEvaluator): def __init__(self, backend, queue, metric, configuration=None, @@ -137,6 +136,7 @@ def fit_predict_and_loss(self, iterative=False): train_splits = [None] * self.cv_folds y = _get_y_array(self.Y_train, self.task_type) + # TODO: mention that no additional run info is possible in this # case! -> maybe remove full CV from the train evaluator anyway and # make the user implement this! @@ -194,7 +194,33 @@ def fit_predict_and_loss(self, iterative=False): if Y_train_pred[i] is None: continue Y_train_pred_full[i][train_splits[i]] = Y_train_pred[i] + + #TODO: remove this! Y_train_pred = np.nanmean(Y_train_pred_full, axis=0) + + # New computation of training score (loss) + train_losses = [] # stores all train losses of each fold. + fold_weights = [] # used as weights when averaging train losses. + + # For each fold, compute the train loss independently. + for i in range(self.cv_folds): + i_th_Y_train_pred = Y_train_pred_full[i][train_splits[i]] + + #TODO: 1. check if y is really what we want. Check if y is the train target. + #TODO: 2. check how loss is computed. old and new lossees diverge over time! + i_th_train_loss = self._loss( + self.Y_train_targets[train_splits[i]], + i_th_Y_train_pred, + ) + train_losses.append(i_th_train_loss) + # append number of data of current fold divided by the total + # number of train data (weight of current fold). + fold_weights.append(len(train_splits[i])) + + fold_weights = [weight / sum(fold_weights) for weight in fold_weights] + train_loss = np.average(train_losses, weights=fold_weights) + #print("new train loss: ", train_loss) + if self.cv_folds == 1: Y_train_pred = Y_train_pred[ # if the first column is np.NaN, all other columns have @@ -241,6 +267,7 @@ def fit_predict_and_loss(self, iterative=False): self.finish_up( loss=loss, + # TODO: pass only the score , not pred train_pred=Y_train_pred, opt_pred=Y_optimization_pred, valid_pred=Y_valid_pred, @@ -282,6 +309,9 @@ def partial_fit_predict_and_loss(self, fold, iterative=False): iterative=iterative, ) ) + # TODO: here we compute loss (score). We need to make sure that + # score is computed independently for each fold, and + # averaged in the end. loss = self._loss(self.Y_targets[fold], opt_pred) if self.cv_folds > 1: From f7051756aa750f9d241dec4754b8a6313a6e230e Mon Sep 17 00:00:00 2001 From: jinu Date: Sun, 12 May 2019 21:49:33 +0200 Subject: [PATCH 02/10] Minor fix --- autosklearn/evaluation/train_evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index a69203942b..2679717e0b 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -10,6 +10,7 @@ from autosklearn.evaluation.abstract_evaluator import AbstractEvaluator from autosklearn.constants import * + __all__ = ['TrainEvaluator', 'eval_holdout', 'eval_iterative_holdout', 'eval_cv', 'eval_partial_cv', 'eval_partial_cv_iterative'] @@ -136,7 +137,6 @@ def fit_predict_and_loss(self, iterative=False): train_splits = [None] * self.cv_folds y = _get_y_array(self.Y_train, self.task_type) - # TODO: mention that no additional run info is possible in this # case! -> maybe remove full CV from the train evaluator anyway and # make the user implement this! From a9db40a4f3027e2c4745664fbb14baf5a89c37c7 Mon Sep 17 00:00:00 2001 From: jinu Date: Sun, 12 May 2019 21:50:39 +0200 Subject: [PATCH 03/10] Minor fix again --- autosklearn/evaluation/train_evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index 2679717e0b..f11cb15c7b 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -45,7 +45,6 @@ 'random_state': None} } - def _get_y_array(y, task_type): if task_type in CLASSIFICATION_TASKS and task_type != \ MULTILABEL_CLASSIFICATION: @@ -54,6 +53,7 @@ def _get_y_array(y, task_type): return y + class TrainEvaluator(AbstractEvaluator): def __init__(self, backend, queue, metric, configuration=None, From 39e4d041b991588f5ac98e326ce038b94ac50206 Mon Sep 17 00:00:00 2001 From: jinu Date: Mon, 13 May 2019 14:36:48 +0200 Subject: [PATCH 04/10] Update computing train loss --- autosklearn/evaluation/abstract_evaluator.py | 45 ++------------- autosklearn/evaluation/train_evaluator.py | 61 +++++--------------- 2 files changed, 20 insertions(+), 86 deletions(-) diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index 6496950a11..9aebee6f73 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -219,7 +219,7 @@ def _loss(self, y_true, y_hat, all_scoring_functions=None): return err - def finish_up(self, loss, train_pred, opt_pred, valid_pred, test_pred, + def finish_up(self, loss, train_loss, opt_pred, valid_pred, test_pred, additional_run_info, file_output, final_call): """This function does everything necessary after the fitting is done: @@ -233,14 +233,14 @@ def finish_up(self, loss, train_pred, opt_pred, valid_pred, test_pred, if file_output: loss_, additional_run_info_ = self.file_output( - train_pred, opt_pred, valid_pred, test_pred, + opt_pred, valid_pred, test_pred, ) else: loss_ = None additional_run_info_ = {} - train_loss, validation_loss, test_loss = self.calculate_auxiliary_losses( - train_pred, valid_pred, test_pred, + validation_loss, test_loss = self.calculate_auxiliary_losses( + valid_pred, test_pred, ) if loss_ is not None: @@ -276,42 +276,9 @@ def finish_up(self, loss, train_pred, opt_pred, valid_pred, test_pred, def calculate_auxiliary_losses( self, - Y_train_pred, Y_valid_pred, Y_test_pred ): - # Second check makes unit tests easier as it is not necessary to - # actually inject data to compare against for calculating a loss - if Y_train_pred is not None and self.Y_actual_train is not None: - if len(self.Y_actual_train.shape) > 1: - assert ( - np.sum(np.isfinite(self.Y_actual_train[:, 0])) - == Y_train_pred.shape[0] - ), ( - np.sum(np.isfinite(self.Y_actual_train[:, 0])), - Y_train_pred.shape[0], - ) - else: - assert ( - np.sum(np.isfinite(self.Y_actual_train)) - == Y_train_pred.shape[0] - ), ( - np.sum(np.isfinite(self.Y_actual_train)), - Y_train_pred.shape[0], - ) - Y_true_tmp = self.Y_actual_train - if len(Y_true_tmp.shape) == 1: - Y_true_tmp = Y_true_tmp[np.isfinite(self.Y_actual_train)] - else: - Y_true_tmp = Y_true_tmp[np.isfinite(self.Y_actual_train[:, 0])] - train_loss = self._loss( - Y_true_tmp, - Y_train_pred, - all_scoring_functions=False, - ) - else: - train_loss = None - if Y_valid_pred is not None: if self.y_valid is not None: validation_loss = self._loss(self.y_valid, Y_valid_pred) @@ -332,11 +299,10 @@ def calculate_auxiliary_losses( else: test_loss = None - return train_loss, validation_loss, test_loss + return validation_loss, test_loss def file_output( self, - Y_train_pred, Y_optimization_pred, Y_valid_pred, Y_test_pred @@ -360,7 +326,6 @@ def file_output( ) for y, s in [ - [Y_train_pred, 'train'], [Y_optimization_pred, 'optimization'], [Y_valid_pred, 'validation'], [Y_test_pred, 'test'] diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index f11cb15c7b..b4043353fd 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -137,6 +137,10 @@ def fit_predict_and_loss(self, iterative=False): train_splits = [None] * self.cv_folds y = _get_y_array(self.Y_train, self.task_type) + + train_losses = [] # stores train loss of each fold. + fold_weights = [] # used as weights when averaging train losses. + # TODO: mention that no additional run info is possible in this # case! -> maybe remove full CV from the train evaluator anyway and # make the user implement this! @@ -179,55 +183,20 @@ def fit_predict_and_loss(self, iterative=False): Y_test_pred[i] = test_pred train_splits[i] = train_split - Y_targets = self.Y_targets - Y_train_targets = self.Y_train_targets - - Y_train_pred_full = np.array( - [ - np.ones( - (self.Y_train.shape[0], Y_train_pred[i].shape[1]) - ) * np.NaN - for _ in range(self.cv_folds) if Y_train_pred[i] is not None - ] - ) - for i in range(self.cv_folds): - if Y_train_pred[i] is None: - continue - Y_train_pred_full[i][train_splits[i]] = Y_train_pred[i] - - #TODO: remove this! - Y_train_pred = np.nanmean(Y_train_pred_full, axis=0) - - # New computation of training score (loss) - train_losses = [] # stores all train losses of each fold. - fold_weights = [] # used as weights when averaging train losses. - - # For each fold, compute the train loss independently. - for i in range(self.cv_folds): - i_th_Y_train_pred = Y_train_pred_full[i][train_splits[i]] - - #TODO: 1. check if y is really what we want. Check if y is the train target. - #TODO: 2. check how loss is computed. old and new lossees diverge over time! - i_th_train_loss = self._loss( - self.Y_train_targets[train_splits[i]], - i_th_Y_train_pred, + # Compute train loss of this fold. + train_loss = self._loss( + self.Y_train_targets[train_split], + train_pred, ) - train_losses.append(i_th_train_loss) - # append number of data of current fold divided by the total - # number of train data (weight of current fold). - fold_weights.append(len(train_splits[i])) + train_losses.append(train_loss) + # number of data points for this fold. Used for weighting the average. + fold_weights.append(len(train_split)) - fold_weights = [weight / sum(fold_weights) for weight in fold_weights] + fold_weights = [w / sum(fold_weights) for w in fold_weights] train_loss = np.average(train_losses, weights=fold_weights) - #print("new train loss: ", train_loss) - - if self.cv_folds == 1: - Y_train_pred = Y_train_pred[ - # if the first column is np.NaN, all other columns have - # to be np.NaN as well - np.isfinite(Y_train_pred[:, 0]) - ] + Y_targets = self.Y_targets + Y_train_targets = self.Y_train_targets Y_optimization_pred = np.concatenate( [Y_optimization_pred[i] for i in range(self.cv_folds) @@ -268,7 +237,7 @@ def fit_predict_and_loss(self, iterative=False): self.finish_up( loss=loss, # TODO: pass only the score , not pred - train_pred=Y_train_pred, + train_loss=train_loss, opt_pred=Y_optimization_pred, valid_pred=Y_valid_pred, test_pred=Y_test_pred, From acceff9e484316a88387fd9ae24d757f6c3ecf14 Mon Sep 17 00:00:00 2001 From: jinu Date: Mon, 13 May 2019 14:55:58 +0200 Subject: [PATCH 05/10] Modify finish_up() --- autosklearn/evaluation/test_evaluator.py | 2 +- autosklearn/evaluation/train_evaluator.py | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/autosklearn/evaluation/test_evaluator.py b/autosklearn/evaluation/test_evaluator.py index 17c429db8c..5d0a1afc12 100644 --- a/autosklearn/evaluation/test_evaluator.py +++ b/autosklearn/evaluation/test_evaluator.py @@ -51,7 +51,7 @@ def fit_predict_and_loss(self): loss, Y_pred, _, _ = self.predict_and_loss() self.finish_up( loss=loss, - train_pred=None, + train_loss=None, opt_pred=Y_pred, valid_pred=None, test_pred=None, diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index b4043353fd..3302dbec7a 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -236,7 +236,6 @@ def fit_predict_and_loss(self, iterative=False): self.finish_up( loss=loss, - # TODO: pass only the score , not pred train_loss=train_loss, opt_pred=Y_optimization_pred, valid_pred=Y_valid_pred, @@ -278,9 +277,7 @@ def partial_fit_predict_and_loss(self, fold, iterative=False): iterative=iterative, ) ) - # TODO: here we compute loss (score). We need to make sure that - # score is computed independently for each fold, and - # averaged in the end. + train_loss = self._loss(self.Y_actual_train, train_pred) loss = self._loss(self.Y_targets[fold], opt_pred) if self.cv_folds > 1: @@ -291,7 +288,7 @@ def partial_fit_predict_and_loss(self, fold, iterative=False): self.finish_up( loss=loss, - train_pred=train_pred, + train_loss=train_loss, opt_pred=opt_pred, valid_pred=valid_pred, test_pred=test_pred, @@ -344,6 +341,9 @@ def _partial_fit_and_predict(self, fold, train_indices, test_indices, if self.cv_folds == 1: self.model = model + train_loss = self._loss(self.Y_train[train_indices], + Y_train_pred, + ) loss = self._loss(self.Y_train[test_indices], Y_optimization_pred) additional_run_info = model.get_additional_run_info() @@ -353,7 +353,7 @@ def _partial_fit_and_predict(self, fold, train_indices, test_indices, final_call = False self.finish_up( loss=loss, - train_pred=Y_train_pred, + train_loss=train_loss, opt_pred=Y_optimization_pred, valid_pred=Y_valid_pred, test_pred=Y_test_pred, @@ -385,11 +385,14 @@ def _partial_fit_and_predict(self, fold, train_indices, test_indices, train_indices=train_indices, test_indices=test_indices ) + train_loss = self._loss(self.Y_train[train_indices], + Y_train_pred, + ) loss = self._loss(self.Y_train[test_indices], Y_optimization_pred) additional_run_info = model.get_additional_run_info() self.finish_up( loss=loss, - train_pred=Y_train_pred, + train_loss=train_loss, opt_pred=Y_optimization_pred, valid_pred=Y_valid_pred, test_pred=Y_test_pred, From 3ff25edbf117edfbcdb49860a70ad5199d55e7a3 Mon Sep 17 00:00:00 2001 From: jinu Date: Thu, 16 May 2019 15:30:53 +0200 Subject: [PATCH 06/10] Fix unittest errors, modify train_loss computation to account for dicts --- autosklearn/evaluation/train_evaluator.py | 15 +++++++-- .../test_abstract_evaluator.py | 9 ++--- test/test_evaluation/test_train_evaluator.py | 33 ++++++++----------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index 3302dbec7a..dcc1a11a4e 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -183,7 +183,8 @@ def fit_predict_and_loss(self, iterative=False): Y_test_pred[i] = test_pred train_splits[i] = train_split - # Compute train loss of this fold. + # Compute train loss of this fold. train_loss could either be a scalar or + # a dict of scalars with metrics as keys. train_loss = self._loss( self.Y_train_targets[train_split], train_pred, @@ -193,7 +194,17 @@ def fit_predict_and_loss(self, iterative=False): fold_weights.append(len(train_split)) fold_weights = [w / sum(fold_weights) for w in fold_weights] - train_loss = np.average(train_losses, weights=fold_weights) + # train_losses is a list of either scalars of dicts. If it contains + # dicts, then train_loss is a dict as well. + if all(isinstance(elem, dict) for elem in train_losses): + train_loss = {} + for metric in train_losses[0].keys(): + train_loss[metric] = np.average([train_losses[i][metric] + for i in range(self.cv_folds)], + weights=fold_weights, + ) + else: + train_loss = np.average(train_losses, weights=fold_weights) Y_targets = self.Y_targets Y_train_targets = self.Y_train_targets diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py index 9430607613..cecfe6f417 100644 --- a/test/test_evaluation/test_abstract_evaluator.py +++ b/test/test_evaluation/test_abstract_evaluator.py @@ -39,7 +39,7 @@ def test_finish_up_model_predicts_NaN(self): predictions_ensemble[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( loss=0.1, - train_pred=predictions_train, + train_loss=0.1, opt_pred=predictions_ensemble, valid_pred=predictions_valid, test_pred=predictions_test, @@ -57,7 +57,7 @@ def test_finish_up_model_predicts_NaN(self): predictions_valid[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( loss=0.1, - train_pred=predictions_train, + train_loss=0.1, opt_pred=predictions_ensemble, valid_pred=predictions_valid, test_pred=predictions_test, @@ -75,7 +75,7 @@ def test_finish_up_model_predicts_NaN(self): predictions_test[5, 2] = np.NaN _, loss, _, additional_run_info = ae.finish_up( loss=0.1, - train_pred=predictions_train, + train_loss=0.1, opt_pred=predictions_ensemble, valid_pred=predictions_valid, test_pred=predictions_test, @@ -114,7 +114,6 @@ def test_disable_file_output(self, exists_mock): loss_, additional_run_info_ = ( ae.file_output( - predictions_train, predictions_ensemble, predictions_valid, predictions_test, @@ -138,7 +137,6 @@ def test_disable_file_output(self, exists_mock): loss_, additional_run_info_ = ( ae.file_output( - predictions_train, predictions_ensemble, predictions_valid, predictions_test, @@ -164,7 +162,6 @@ def test_disable_file_output(self, exists_mock): loss_, additional_run_info_ = ( ae.file_output( - predictions_train, predictions_ensemble, predictions_valid, predictions_test, diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py index 577cb2809a..8a041c88fa 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_train_evaluator.py @@ -92,10 +92,9 @@ def test_holdout(self, pipeline_mock): # four calls because of train, holdout, validation and test set self.assertEqual(pipeline_mock.predict_proba.call_count, 4) self.assertEqual(evaluator.file_output.call_count, 1) - self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 45) - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], 24) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][3].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 24) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) self.assertEqual(evaluator.model.fit.call_count, 1) @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline') @@ -169,12 +168,10 @@ def side_effect(self, *args, **kwargs): # 20 calls because of train, holdout, validation and test set # and a total of five calls because of five iterations of fitting self.assertEqual(evaluator.model.predict_proba.call_count, 20) - # 2/3 of 69 - self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 46) # 1/3 of 69 - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], 23) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][3].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) self.assertEqual(evaluator.file_output.call_count, 5) self.assertEqual(evaluator.model.fit.call_count, 0) @@ -299,10 +296,9 @@ def test_iterative_holdout_not_iterative(self, pipeline_mock): self.assertEqual(pipeline_mock.iterative_fit.call_count, 0) # four calls for train, opt, valid and test self.assertEqual(evaluator.model.predict_proba.call_count, 4) - self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 46) - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], 23) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][3].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(evaluator.model.fit.call_count, 1) @@ -346,9 +342,8 @@ def test_cv(self, pipeline_mock): # test set (4 sets x 5 folds = 20) self.assertEqual(pipeline_mock.predict_proba.call_count, 20) self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], D.data['Y_train'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_train'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][3].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) # The model prior to fitting is saved, this cannot be directly tested # because of the way the mock module is used. Instead, we test whether # the if block in which model assignment is done is accessed @@ -495,7 +490,6 @@ def test_file_output(self, loss_mock, makedirs_mock, backend_mock): evaluator.model = 'model' evaluator.Y_optimization = D.data['Y_train'] rval = evaluator.file_output( - D.data['Y_train'], D.data['Y_train'], D.data['Y_valid'], D.data['Y_test'], @@ -511,7 +505,6 @@ def test_file_output(self, loss_mock, makedirs_mock, backend_mock): # for unseen data D.data['Y_valid'][0] = np.NaN rval = evaluator.file_output( - D.data['Y_train'], D.data['Y_train'], D.data['Y_valid'], D.data['Y_test'], @@ -528,7 +521,6 @@ def test_file_output(self, loss_mock, makedirs_mock, backend_mock): ) D.data['Y_train'][0] = np.NaN rval = evaluator.file_output( - D.data['Y_train'], D.data['Y_train'], D.data['Y_valid'], D.data['Y_test'], @@ -1413,7 +1405,8 @@ def test_eval_holdout_all_loss_functions(self): 'num_run': 1, 'validation_loss': 0.0, 'test_loss': 0.04, - 'train_loss': 0.0, + # Why is this here? Train loss was never computed before.. + #'train_loss': 0.0, } additional_run_info = rval[0]['additional_run_info'] From ac08d52683e7947033543c8945153f49b67e1e8b Mon Sep 17 00:00:00 2001 From: jinu Date: Wed, 22 May 2019 21:16:09 +0200 Subject: [PATCH 07/10] Fix unittest errors --- autosklearn/evaluation/abstract_evaluator.py | 10 ++++++++-- autosklearn/evaluation/train_evaluator.py | 13 ++++++------- test/test_evaluation/test_train_evaluator.py | 14 ++++++-------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index 9aebee6f73..5562aa20e0 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -16,12 +16,13 @@ from autosklearn.pipeline.implementations.util import ( convert_multioutput_multiclass_to_multilabel ) -from autosklearn.metrics import calculate_score +from autosklearn.metrics import calculate_score, CLASSIFICATION_METRICS from autosklearn.util.logging_ import get_logger from ConfigSpace import Configuration + __all__ = [ 'AbstractEvaluator' ] @@ -213,7 +214,11 @@ def _loss(self, y_true, y_hat, all_scoring_functions=None): all_scoring_functions=all_scoring_functions) if hasattr(score, '__len__'): - err = {key: self.metric._optimum - score[key] for key in score} + # TODO: instead of using self.metric, it should use all metrics given by key. + # But now this throws error... + + err = {key: metric._optimum - score[key] for key, metric in + CLASSIFICATION_METRICS.items() if key in score} else: err = self.metric._optimum - score @@ -326,6 +331,7 @@ def file_output( ) for y, s in [ + # Y_train_pred deleted here. Fix unittest accordingly. [Y_optimization_pred, 'optimization'], [Y_valid_pred, 'validation'], [Y_test_pred, 'test'] diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index dcc1a11a4e..da6490e268 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -195,14 +195,13 @@ def fit_predict_and_loss(self, iterative=False): fold_weights = [w / sum(fold_weights) for w in fold_weights] # train_losses is a list of either scalars of dicts. If it contains - # dicts, then train_loss is a dict as well. + # dicts, then the train_loss is computed using the + # target metric (self.metric). if all(isinstance(elem, dict) for elem in train_losses): - train_loss = {} - for metric in train_losses[0].keys(): - train_loss[metric] = np.average([train_losses[i][metric] - for i in range(self.cv_folds)], - weights=fold_weights, - ) + train_loss = np.average([train_losses[i][str(self.metric)] + for i in range(self.cv_folds)], + weights=fold_weights, + ) else: train_loss = np.average(train_losses, weights=fold_weights) diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py index 8a041c88fa..0deefe0d74 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_train_evaluator.py @@ -250,10 +250,9 @@ def side_effect(self, *args, **kwargs): # eight calls because of train, holdout, the validation and the test set # and a total of two calls each because of two iterations of fitting self.assertEqual(evaluator.model.predict_proba.call_count, 8) - self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 46) - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], 23) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][3].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) self.assertEqual(evaluator.file_output.call_count, 2) self.assertEqual(evaluator.model.fit.call_count, 0) @@ -531,7 +530,7 @@ def test_file_output(self, loss_mock, makedirs_mock, backend_mock): 1.0, { 'error': - 'Model predictions for train set contains NaNs.' + 'Model predictions for optimization set contains NaNs.' }, ) ) @@ -667,7 +666,7 @@ def test_fit_predict_and_loss_additional_run_info( metric=accuracy, ) evaluator.Y_targets[0] = np.array([1] * 23) - evaluator.Y_train_targets = np.array([1] * 46) + evaluator.Y_train_targets = np.array([1] * 69) # Y_train_target is train_train + train_opt (train part of train_test_split) evaluator.fit_predict_and_loss(iterative=False) class SideEffect(object): @@ -1405,8 +1404,7 @@ def test_eval_holdout_all_loss_functions(self): 'num_run': 1, 'validation_loss': 0.0, 'test_loss': 0.04, - # Why is this here? Train loss was never computed before.. - #'train_loss': 0.0, + 'train_loss': 0.0, } additional_run_info = rval[0]['additional_run_info'] From eb9d0df23c2cdca7c25eb4ed5a6615cf6c5b8f6c Mon Sep 17 00:00:00 2001 From: jinu Date: Sun, 2 Jun 2019 18:24:29 +0200 Subject: [PATCH 08/10] Modify the way optimization loss is computed in train_evaluator.py --- autosklearn/evaluation/train_evaluator.py | 50 +++++++++++++++++------ 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index da6490e268..94004cc557 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -139,7 +139,9 @@ def fit_predict_and_loss(self, iterative=False): y = _get_y_array(self.Y_train, self.task_type) train_losses = [] # stores train loss of each fold. - fold_weights = [] # used as weights when averaging train losses. + train_fold_weights = [] # used as weights when averaging train losses. + opt_losses = [] # stores opt (validation) loss of each fold. + opt_fold_weights = [] # weights for opt_losses. # TODO: mention that no additional run info is possible in this # case! -> maybe remove full CV from the train evaluator anyway and @@ -183,27 +185,50 @@ def fit_predict_and_loss(self, iterative=False): Y_test_pred[i] = test_pred train_splits[i] = train_split - # Compute train loss of this fold. train_loss could either be a scalar or - # a dict of scalars with metrics as keys. + # Compute train loss of this fold and store it. train_loss could + # either be a scalar or a dict of scalars with metrics as keys. train_loss = self._loss( self.Y_train_targets[train_split], train_pred, ) train_losses.append(train_loss) - # number of data points for this fold. Used for weighting the average. - fold_weights.append(len(train_split)) + # number of training data points for this fold. Used for weighting + # the average. + train_fold_weights.append(len(train_split)) - fold_weights = [w / sum(fold_weights) for w in fold_weights] - # train_losses is a list of either scalars of dicts. If it contains - # dicts, then the train_loss is computed using the + # Compute validation loss of this fold and store it. + optimization_loss = self._loss( + self.Y_targets[i], + opt_pred, + ) + opt_losses.append(optimization_loss) + # number of optimization data points for this fold. Used for weighting + # the average. + opt_fold_weights.append(len(test_split)) + + # Compute weights of each fold based on the number of samples in each + # fold. + train_fold_weights = [w / sum(train_fold_weights) for w in train_fold_weights] + opt_fold_weights = [w / sum(opt_fold_weights) for w in opt_fold_weights] + + # train_ and opt_losses are lists of either scalars of dicts. If they contain + # dicts, then the train_ and opt_loss are computed using the # target metric (self.metric). if all(isinstance(elem, dict) for elem in train_losses): train_loss = np.average([train_losses[i][str(self.metric)] - for i in range(self.cv_folds)], - weights=fold_weights, + for i in range(self.cv_folds)], + weights=train_fold_weights, ) else: - train_loss = np.average(train_losses, weights=fold_weights) + train_loss = np.average(train_losses, weights=train_fold_weights) + + if all(isinstance(elem, dict) for elem in opt_losses): + opt_loss = np.average([opt_losses[i][str(self.metric)] + for i in range(self.cv_folds)], + weights=opt_fold_weights, + ) + else: + opt_loss = np.average(opt_losses, weights=opt_fold_weights) Y_targets = self.Y_targets Y_train_targets = self.Y_train_targets @@ -235,7 +260,6 @@ def fit_predict_and_loss(self, iterative=False): Y_test_pred = None self.Y_optimization = Y_targets - loss = self._loss(Y_targets, Y_optimization_pred) self.Y_actual_train = Y_train_targets if self.cv_folds > 1: @@ -245,7 +269,7 @@ def fit_predict_and_loss(self, iterative=False): self._added_empty_model = True self.finish_up( - loss=loss, + loss=opt_loss, train_loss=train_loss, opt_pred=Y_optimization_pred, valid_pred=Y_valid_pred, From 80171f7552658008e79d70f9efe83337c9f908be Mon Sep 17 00:00:00 2001 From: jinu Date: Wed, 19 Jun 2019 17:55:00 +0200 Subject: [PATCH 09/10] Modify errors --- autosklearn/evaluation/train_evaluator.py | 20 ++++++++++++-------- test/test_evaluation/test_train_evaluator.py | 20 ++++++++++---------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py index 94004cc557..0feacf138c 100644 --- a/autosklearn/evaluation/train_evaluator.py +++ b/autosklearn/evaluation/train_evaluator.py @@ -211,9 +211,8 @@ def fit_predict_and_loss(self, iterative=False): train_fold_weights = [w / sum(train_fold_weights) for w in train_fold_weights] opt_fold_weights = [w / sum(opt_fold_weights) for w in opt_fold_weights] - # train_ and opt_losses are lists of either scalars of dicts. If they contain - # dicts, then the train_ and opt_loss are computed using the - # target metric (self.metric). + # train_losses is a list of either scalars or dicts. If it contains dicts, + # then train_loss is computed using the target metric (self.metric). if all(isinstance(elem, dict) for elem in train_losses): train_loss = np.average([train_losses[i][str(self.metric)] for i in range(self.cv_folds)], @@ -222,11 +221,15 @@ def fit_predict_and_loss(self, iterative=False): else: train_loss = np.average(train_losses, weights=train_fold_weights) - if all(isinstance(elem, dict) for elem in opt_losses): - opt_loss = np.average([opt_losses[i][str(self.metric)] - for i in range(self.cv_folds)], - weights=opt_fold_weights, - ) + # if all_scoring_function is true, return a dict of opt_loss. Otherwise, + # return a scalar. + if self.all_scoring_functions is True: + opt_loss = {} + for metric in opt_losses[0].keys(): + opt_loss[metric] = np.average([opt_losses[i][metric] + for i in range(self.cv_folds)], + weights=opt_fold_weights, + ) else: opt_loss = np.average(opt_losses, weights=opt_fold_weights) @@ -260,6 +263,7 @@ def fit_predict_and_loss(self, iterative=False): Y_test_pred = None self.Y_optimization = Y_targets + loss = self._loss(Y_targets, Y_optimization_pred) self.Y_actual_train = Y_train_targets if self.cv_folds > 1: diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py index 0deefe0d74..507ecadca0 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_train_evaluator.py @@ -335,7 +335,7 @@ def test_cv(self, pipeline_mock): self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) self.assertEqual(evaluator.file_output.call_count, 1) - self.assertEqual(result, 0.46376811594202894) + self.assertEqual(result, 0.463768115942029) self.assertEqual(pipeline_mock.fit.call_count, 5) # Fifteen calls because of the training, holdout, validation and # test set (4 sets x 5 folds = 20) @@ -1393,7 +1393,7 @@ def test_eval_holdout_all_loss_functions(self): 'f1_macro': 0.032036613272311221, 'f1_micro': 0.030303030303030276, 'f1_weighted': 0.030441716940572849, - 'log_loss': 1.0635047098903945, + 'log_loss': 0.0635047098903945, 'pac_score': 0.09242315351515851, 'precision_macro': 0.02777777777777779, 'precision_micro': 0.030303030303030276, @@ -1504,16 +1504,16 @@ def test_eval_cv_all_loss_functions(self): fixture = { 'accuracy': 0.06, - 'balanced_accuracy': 0.063508064516129004, - 'f1_macro': 0.063508064516129004, + 'balanced_accuracy': 0.06315151515151515, + 'f1_macro': 0.06369358178053833, 'f1_micro': 0.06, - 'f1_weighted': 0.06, - 'log_loss': 1.1408473360538482, - 'pac_score': 0.1973689470076717, - 'precision_macro': 0.063508064516129004, + 'f1_weighted': 0.0600621118012, + 'log_loss': 0.1408473360538482, + 'pac_score': 0.19586655485411472, + 'precision_macro': 0.0624444444444, 'precision_micro': 0.06, - 'precision_weighted': 0.06, - 'recall_macro': 0.063508064516129004, + 'precision_weighted': 0.05842424242424235, + 'recall_macro': 0.06315151515151515, 'recall_micro': 0.06, 'recall_weighted': 0.06, 'num_run': 1, From ae9a8e6ac322b122e277ce393cabcd73d3d5feda Mon Sep 17 00:00:00 2001 From: jinu Date: Fri, 26 Jul 2019 18:16:31 +0200 Subject: [PATCH 10/10] Fix Unittest --- test/test_evaluation/test_train_evaluator.py | 40 +++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py index 507ecadca0..b88c3d50bf 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_train_evaluator.py @@ -93,8 +93,10 @@ def test_holdout(self, pipeline_mock): self.assertEqual(pipeline_mock.predict_proba.call_count, 4) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 24) - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], + D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], + D.data['Y_test'].shape[0]) self.assertEqual(evaluator.model.fit.call_count, 1) @unittest.mock.patch('autosklearn.pipeline.classification.SimpleClassificationPipeline') @@ -170,8 +172,10 @@ def side_effect(self, *args, **kwargs): self.assertEqual(evaluator.model.predict_proba.call_count, 20) # 1/3 of 69 self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], + D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], + D.data['Y_test'].shape[0]) self.assertEqual(evaluator.file_output.call_count, 5) self.assertEqual(evaluator.model.fit.call_count, 0) @@ -251,8 +255,10 @@ def side_effect(self, *args, **kwargs): # and a total of two calls each because of two iterations of fitting self.assertEqual(evaluator.model.predict_proba.call_count, 8) self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], + D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], + D.data['Y_test'].shape[0]) self.assertEqual(evaluator.file_output.call_count, 2) self.assertEqual(evaluator.model.fit.call_count, 0) @@ -296,8 +302,10 @@ def test_iterative_holdout_not_iterative(self, pipeline_mock): # four calls for train, opt, valid and test self.assertEqual(evaluator.model.predict_proba.call_count, 4) self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], 23) - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], + D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], + D.data['Y_test'].shape[0]) self.assertEqual(evaluator.file_output.call_count, 1) self.assertEqual(evaluator.model.fit.call_count, 1) @@ -340,9 +348,12 @@ def test_cv(self, pipeline_mock): # Fifteen calls because of the training, holdout, validation and # test set (4 sets x 5 folds = 20) self.assertEqual(pipeline_mock.predict_proba.call_count, 20) - self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], D.data['Y_train'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], D.data['Y_valid'].shape[0]) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], D.data['Y_test'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], + D.data['Y_train'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][1].shape[0], + D.data['Y_valid'].shape[0]) + self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], + D.data['Y_test'].shape[0]) # The model prior to fitting is saved, this cannot be directly tested # because of the way the mock module is used. Instead, we test whether # the if block in which model assignment is done is accessed @@ -530,7 +541,7 @@ def test_file_output(self, loss_mock, makedirs_mock, backend_mock): 1.0, { 'error': - 'Model predictions for optimization set contains NaNs.' + 'Model predictions for optimization set contains NaNs.' }, ) ) @@ -666,7 +677,7 @@ def test_fit_predict_and_loss_additional_run_info( metric=accuracy, ) evaluator.Y_targets[0] = np.array([1] * 23) - evaluator.Y_train_targets = np.array([1] * 69) # Y_train_target is train_train + train_opt (train part of train_test_split) + evaluator.Y_train_targets = np.array([1] * 69) evaluator.fit_predict_and_loss(iterative=False) class SideEffect(object): @@ -676,7 +687,7 @@ def __call__(self, *args, **kwargs): if self.n_call == 0: self.n_call += 1 return ( - np.array([[0.1, 0.9]] * 35), + np.array([[0.1, 0.9]] * 34), np.array([[0.1, 0.9]] * 35), np.array([[0.1, 0.9]] * 25), np.array([[0.1, 0.9]] * 6), @@ -701,6 +712,7 @@ def __call__(self, *args, **kwargs): ) evaluator.Y_targets[0] = np.array([1] * 35) evaluator.Y_targets[1] = np.array([1] * 34) + evaluator.Y_train_targets = np.array([1] * 69) self.assertRaises( TAEAbortException,