Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 13 additions & 42 deletions autosklearn/evaluation/abstract_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
from autosklearn.pipeline.implementations.util import (
convert_multioutput_multiclass_to_multilabel
)
from autosklearn.metrics import calculate_score
from autosklearn.metrics import calculate_score, CLASSIFICATION_METRICS
from autosklearn.util.logging_ import get_logger

from ConfigSpace import Configuration



__all__ = [
'AbstractEvaluator'
]
Expand Down Expand Up @@ -213,13 +214,17 @@ def _loss(self, y_true, y_hat, all_scoring_functions=None):
all_scoring_functions=all_scoring_functions)

if hasattr(score, '__len__'):
err = {key: self.metric._optimum - score[key] for key in score}
# TODO: instead of using self.metric, it should use all metrics given by key.
# But now this throws error...

err = {key: metric._optimum - score[key] for key, metric in
CLASSIFICATION_METRICS.items() if key in score}
else:
err = self.metric._optimum - score

return err

def finish_up(self, loss, train_pred, opt_pred, valid_pred, test_pred,
def finish_up(self, loss, train_loss, opt_pred, valid_pred, test_pred,
additional_run_info, file_output, final_call):
"""This function does everything necessary after the fitting is done:

Expand All @@ -233,14 +238,14 @@ def finish_up(self, loss, train_pred, opt_pred, valid_pred, test_pred,

if file_output:
loss_, additional_run_info_ = self.file_output(
train_pred, opt_pred, valid_pred, test_pred,
opt_pred, valid_pred, test_pred,
)
else:
loss_ = None
additional_run_info_ = {}

train_loss, validation_loss, test_loss = self.calculate_auxiliary_losses(
train_pred, valid_pred, test_pred,
validation_loss, test_loss = self.calculate_auxiliary_losses(
valid_pred, test_pred,
)

if loss_ is not None:
Expand Down Expand Up @@ -276,42 +281,9 @@ def finish_up(self, loss, train_pred, opt_pred, valid_pred, test_pred,

def calculate_auxiliary_losses(
self,
Y_train_pred,
Y_valid_pred,
Y_test_pred
):
# Second check makes unit tests easier as it is not necessary to
# actually inject data to compare against for calculating a loss
if Y_train_pred is not None and self.Y_actual_train is not None:
if len(self.Y_actual_train.shape) > 1:
assert (
np.sum(np.isfinite(self.Y_actual_train[:, 0]))
== Y_train_pred.shape[0]
), (
np.sum(np.isfinite(self.Y_actual_train[:, 0])),
Y_train_pred.shape[0],
)
else:
assert (
np.sum(np.isfinite(self.Y_actual_train))
== Y_train_pred.shape[0]
), (
np.sum(np.isfinite(self.Y_actual_train)),
Y_train_pred.shape[0],
)
Y_true_tmp = self.Y_actual_train
if len(Y_true_tmp.shape) == 1:
Y_true_tmp = Y_true_tmp[np.isfinite(self.Y_actual_train)]
else:
Y_true_tmp = Y_true_tmp[np.isfinite(self.Y_actual_train[:, 0])]
train_loss = self._loss(
Y_true_tmp,
Y_train_pred,
all_scoring_functions=False,
)
else:
train_loss = None

if Y_valid_pred is not None:
if self.y_valid is not None:
validation_loss = self._loss(self.y_valid, Y_valid_pred)
Expand All @@ -332,11 +304,10 @@ def calculate_auxiliary_losses(
else:
test_loss = None

return train_loss, validation_loss, test_loss
return validation_loss, test_loss

def file_output(
self,
Y_train_pred,
Y_optimization_pred,
Y_valid_pred,
Y_test_pred
Expand All @@ -360,7 +331,7 @@ def file_output(
)

for y, s in [
[Y_train_pred, 'train'],
# Y_train_pred deleted here. Fix unittest accordingly.
[Y_optimization_pred, 'optimization'],
[Y_valid_pred, 'validation'],
[Y_test_pred, 'test']
Expand Down
2 changes: 1 addition & 1 deletion autosklearn/evaluation/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def fit_predict_and_loss(self):
loss, Y_pred, _, _ = self.predict_and_loss()
self.finish_up(
loss=loss,
train_pred=None,
train_loss=None,
opt_pred=Y_pred,
valid_pred=None,
test_pred=None,
Expand Down
92 changes: 66 additions & 26 deletions autosklearn/evaluation/train_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ def fit_predict_and_loss(self, iterative=False):
train_splits = [None] * self.cv_folds

y = _get_y_array(self.Y_train, self.task_type)

train_losses = [] # stores train loss of each fold.
train_fold_weights = [] # used as weights when averaging train losses.
opt_losses = [] # stores opt (validation) loss of each fold.
opt_fold_weights = [] # weights for opt_losses.

# TODO: mention that no additional run info is possible in this
# case! -> maybe remove full CV from the train evaluator anyway and
# make the user implement this!
Expand Down Expand Up @@ -179,30 +185,57 @@ def fit_predict_and_loss(self, iterative=False):
Y_test_pred[i] = test_pred
train_splits[i] = train_split

# Compute train loss of this fold and store it. train_loss could
# either be a scalar or a dict of scalars with metrics as keys.
train_loss = self._loss(
self.Y_train_targets[train_split],
train_pred,
)
train_losses.append(train_loss)
# number of training data points for this fold. Used for weighting
# the average.
train_fold_weights.append(len(train_split))

# Compute validation loss of this fold and store it.
optimization_loss = self._loss(
self.Y_targets[i],
opt_pred,
)
opt_losses.append(optimization_loss)
# number of optimization data points for this fold. Used for weighting
# the average.
opt_fold_weights.append(len(test_split))

# Compute weights of each fold based on the number of samples in each
# fold.
train_fold_weights = [w / sum(train_fold_weights) for w in train_fold_weights]
opt_fold_weights = [w / sum(opt_fold_weights) for w in opt_fold_weights]

# train_losses is a list of either scalars or dicts. If it contains dicts,
# then train_loss is computed using the target metric (self.metric).
if all(isinstance(elem, dict) for elem in train_losses):
train_loss = np.average([train_losses[i][str(self.metric)]
for i in range(self.cv_folds)],
weights=train_fold_weights,
)
else:
train_loss = np.average(train_losses, weights=train_fold_weights)

# if all_scoring_function is true, return a dict of opt_loss. Otherwise,
# return a scalar.
if self.all_scoring_functions is True:
opt_loss = {}
for metric in opt_losses[0].keys():
opt_loss[metric] = np.average([opt_losses[i][metric]
for i in range(self.cv_folds)],
weights=opt_fold_weights,
)
else:
opt_loss = np.average(opt_losses, weights=opt_fold_weights)

Y_targets = self.Y_targets
Y_train_targets = self.Y_train_targets

Y_train_pred_full = np.array(
[
np.ones(
(self.Y_train.shape[0], Y_train_pred[i].shape[1])
) * np.NaN
for _ in range(self.cv_folds) if Y_train_pred[i] is not None
]
)
for i in range(self.cv_folds):
if Y_train_pred[i] is None:
continue
Y_train_pred_full[i][train_splits[i]] = Y_train_pred[i]
Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
if self.cv_folds == 1:
Y_train_pred = Y_train_pred[
# if the first column is np.NaN, all other columns have
# to be np.NaN as well
np.isfinite(Y_train_pred[:, 0])
]


Y_optimization_pred = np.concatenate(
[Y_optimization_pred[i] for i in range(self.cv_folds)
if Y_optimization_pred[i] is not None])
Expand Down Expand Up @@ -240,8 +273,8 @@ def fit_predict_and_loss(self, iterative=False):
self._added_empty_model = True

self.finish_up(
loss=loss,
train_pred=Y_train_pred,
loss=opt_loss,
train_loss=train_loss,
opt_pred=Y_optimization_pred,
valid_pred=Y_valid_pred,
test_pred=Y_test_pred,
Expand Down Expand Up @@ -282,6 +315,7 @@ def partial_fit_predict_and_loss(self, fold, iterative=False):
iterative=iterative,
)
)
train_loss = self._loss(self.Y_actual_train, train_pred)
loss = self._loss(self.Y_targets[fold], opt_pred)

if self.cv_folds > 1:
Expand All @@ -292,7 +326,7 @@ def partial_fit_predict_and_loss(self, fold, iterative=False):

self.finish_up(
loss=loss,
train_pred=train_pred,
train_loss=train_loss,
opt_pred=opt_pred,
valid_pred=valid_pred,
test_pred=test_pred,
Expand Down Expand Up @@ -345,6 +379,9 @@ def _partial_fit_and_predict(self, fold, train_indices, test_indices,
if self.cv_folds == 1:
self.model = model

train_loss = self._loss(self.Y_train[train_indices],
Y_train_pred,
)
loss = self._loss(self.Y_train[test_indices], Y_optimization_pred)
additional_run_info = model.get_additional_run_info()

Expand All @@ -354,7 +391,7 @@ def _partial_fit_and_predict(self, fold, train_indices, test_indices,
final_call = False
self.finish_up(
loss=loss,
train_pred=Y_train_pred,
train_loss=train_loss,
opt_pred=Y_optimization_pred,
valid_pred=Y_valid_pred,
test_pred=Y_test_pred,
Expand Down Expand Up @@ -386,11 +423,14 @@ def _partial_fit_and_predict(self, fold, train_indices, test_indices,
train_indices=train_indices,
test_indices=test_indices
)
train_loss = self._loss(self.Y_train[train_indices],
Y_train_pred,
)
loss = self._loss(self.Y_train[test_indices], Y_optimization_pred)
additional_run_info = model.get_additional_run_info()
self.finish_up(
loss=loss,
train_pred=Y_train_pred,
train_loss=train_loss,
opt_pred=Y_optimization_pred,
valid_pred=Y_valid_pred,
test_pred=Y_test_pred,
Expand Down
9 changes: 3 additions & 6 deletions test/test_evaluation/test_abstract_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_finish_up_model_predicts_NaN(self):
predictions_ensemble[5, 2] = np.NaN
_, loss, _, additional_run_info = ae.finish_up(
loss=0.1,
train_pred=predictions_train,
train_loss=0.1,
opt_pred=predictions_ensemble,
valid_pred=predictions_valid,
test_pred=predictions_test,
Expand All @@ -57,7 +57,7 @@ def test_finish_up_model_predicts_NaN(self):
predictions_valid[5, 2] = np.NaN
_, loss, _, additional_run_info = ae.finish_up(
loss=0.1,
train_pred=predictions_train,
train_loss=0.1,
opt_pred=predictions_ensemble,
valid_pred=predictions_valid,
test_pred=predictions_test,
Expand All @@ -75,7 +75,7 @@ def test_finish_up_model_predicts_NaN(self):
predictions_test[5, 2] = np.NaN
_, loss, _, additional_run_info = ae.finish_up(
loss=0.1,
train_pred=predictions_train,
train_loss=0.1,
opt_pred=predictions_ensemble,
valid_pred=predictions_valid,
test_pred=predictions_test,
Expand Down Expand Up @@ -114,7 +114,6 @@ def test_disable_file_output(self, exists_mock):

loss_, additional_run_info_ = (
ae.file_output(
predictions_train,
predictions_ensemble,
predictions_valid,
predictions_test,
Expand All @@ -138,7 +137,6 @@ def test_disable_file_output(self, exists_mock):

loss_, additional_run_info_ = (
ae.file_output(
predictions_train,
predictions_ensemble,
predictions_valid,
predictions_test,
Expand All @@ -164,7 +162,6 @@ def test_disable_file_output(self, exists_mock):

loss_, additional_run_info_ = (
ae.file_output(
predictions_train,
predictions_ensemble,
predictions_valid,
predictions_test,
Expand Down
Loading