diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py
index 2114d72ff3..6496950a11 100644
--- a/autosklearn/evaluation/abstract_evaluator.py
+++ b/autosklearn/evaluation/abstract_evaluator.py
@@ -213,9 +213,9 @@ def _loss(self, y_true, y_hat, all_scoring_functions=None):
             all_scoring_functions=all_scoring_functions)
 
         if hasattr(score, '__len__'):
-            err = {key: 1 - score[key] for key in score}
+            err = {key: self.metric._optimum - score[key] for key in score}
         else:
-            err = 1 - score
+            err = self.metric._optimum - score
 
         return err
 
diff --git a/autosklearn/metrics/__init__.py b/autosklearn/metrics/__init__.py
index 83f324796c..d72aec9e07 100644
--- a/autosklearn/metrics/__init__.py
+++ b/autosklearn/metrics/__init__.py
@@ -11,10 +11,11 @@
 
 
 class Scorer(object, metaclass=ABCMeta):
-    def __init__(self, name, score_func, sign, kwargs):
+    def __init__(self, name, score_func, optimum, sign, kwargs):
         self.name = name
         self._kwargs = kwargs
         self._score_func = score_func
+        self._optimum = optimum
         self._sign = sign
 
     @abstractmethod
@@ -133,8 +134,8 @@ def __call__(self, y_true, y_pred, sample_weight=None):
             return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
 
 
-def make_scorer(name, score_func, greater_is_better=True, needs_proba=False,
-                needs_threshold=False, **kwargs):
+def make_scorer(name, score_func, optimum=1, greater_is_better=True,
+                needs_proba=False, needs_threshold=False, **kwargs):
     """Make a scorer from a performance metric or loss function.
 
     Factory inspired by scikit-learn which wraps scikit-learn scoring functions
@@ -146,6 +147,10 @@ def make_scorer(name, score_func, greater_is_better=True, needs_proba=False,
         Score function (or loss function) with signature
         ``score_func(y, y_pred, **kwargs)``.
 
+    optimum : int or float, default=1
+        The best score achievable by the score function, i.e. maximum in case of
+        scorer function and minimum in case of loss function.
+
     greater_is_better : boolean, default=True
         Whether score_func is a score function (default), meaning high is good,
         or a loss function, meaning low is good. In the latter case, the
@@ -174,41 +179,56 @@ def make_scorer(name, score_func, greater_is_better=True, needs_proba=False,
         cls = _ThresholdScorer
     else:
         cls = _PredictScorer
-    return cls(name, score_func, sign, kwargs)
+    return cls(name, score_func, optimum, sign, kwargs)
 
 
 # Standard regression scores
-r2 = make_scorer('r2', sklearn.metrics.r2_score)
+r2 = make_scorer('r2',
+                 sklearn.metrics.r2_score)
 mean_squared_error = make_scorer('mean_squared_error',
                                  sklearn.metrics.mean_squared_error,
+                                 optimum=0,
                                  greater_is_better=False)
 mean_absolute_error = make_scorer('mean_absolute_error',
                                   sklearn.metrics.mean_absolute_error,
+                                  optimum=0,
                                   greater_is_better=False)
 median_absolute_error = make_scorer('median_absolute_error',
                                     sklearn.metrics.median_absolute_error,
+                                    optimum=0,
                                     greater_is_better=False)
 
 # Standard Classification Scores
-accuracy = make_scorer('accuracy', sklearn.metrics.accuracy_score)
+accuracy = make_scorer('accuracy',
+                       sklearn.metrics.accuracy_score)
 balanced_accuracy = make_scorer('balanced_accuracy',
                                 classification_metrics.balanced_accuracy)
-f1 = make_scorer('f1', sklearn.metrics.f1_score)
+f1 = make_scorer('f1',
+                 sklearn.metrics.f1_score)
 
 # Score functions that need decision values
-roc_auc = make_scorer('roc_auc', sklearn.metrics.roc_auc_score,
-                      greater_is_better=True, needs_threshold=True)
+roc_auc = make_scorer('roc_auc',
+                      sklearn.metrics.roc_auc_score,
+                      greater_is_better=True,
+                      needs_threshold=True)
 average_precision = make_scorer('average_precision',
                                 sklearn.metrics.average_precision_score,
                                 needs_threshold=True)
-precision = make_scorer('precision', sklearn.metrics.precision_score)
-recall = make_scorer('recall', sklearn.metrics.recall_score)
+precision = make_scorer('precision',
+                        sklearn.metrics.precision_score)
+recall = make_scorer('recall',
+                     sklearn.metrics.recall_score)
 
 # Score function for probabilistic classification
-log_loss = make_scorer('log_loss', sklearn.metrics.log_loss,
-                       greater_is_better=False, needs_proba=True)
-pac_score = make_scorer('pac_score', classification_metrics.pac_score,
-                        greater_is_better=True, needs_proba=True)
+log_loss = make_scorer('log_loss',
+                       sklearn.metrics.log_loss,
+                       optimum=0,
+                       greater_is_better=False,
+                       needs_proba=True)
+pac_score = make_scorer('pac_score',
+                        classification_metrics.pac_score,
+                        greater_is_better=True,
+                        needs_proba=True)
 # TODO what about mathews correlation coefficient etc?
 
 
diff --git a/example/example_metrics.py b/example/example_metrics.py
index 603b82f946..dd306e6c63 100644
--- a/example/example_metrics.py
+++ b/example/example_metrics.py
@@ -55,6 +55,7 @@ def main():
     accuracy_scorer = autosklearn.metrics.make_scorer(
         name="accu",
         score_func=accuracy,
+        optimum=1,
         greater_is_better=True,
         needs_proba=False,
         needs_threshold=False,
@@ -77,6 +78,7 @@ def main():
     accuracy_scorer = autosklearn.metrics.make_scorer(
         name="accu_add",
         score_func=accuracy_wk,
+        optimum=1,
         greater_is_better=True,
         needs_proba=False,
         needs_threshold=False,
diff --git a/test/test_metric/test_metrics.py b/test/test_metric/test_metrics.py
index 1038e0ab29..4b7da5d438 100644
--- a/test/test_metric/test_metrics.py
+++ b/test/test_metric/test_metrics.py
@@ -13,7 +13,7 @@ def test_predict_scorer_binary(self):
         y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, 1, {})
+            'accuracy', sklearn.metrics.accuracy_score, 1, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -28,13 +28,13 @@ def test_predict_scorer_binary(self):
 
         scorer = autosklearn.metrics._PredictScorer(
             'bac', autosklearn.metrics.classification_metrics.balanced_accuracy,
-            1, {})
+            1, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.5)
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, -1, {})
+            'accuracy', sklearn.metrics.accuracy_score, 1, -1, {})
 
         y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -45,7 +45,7 @@ def test_predict_scorer_multiclass(self):
         y_pred = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, 1, {})
+            'accuracy', sklearn.metrics.accuracy_score, 1, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -60,13 +60,13 @@ def test_predict_scorer_multiclass(self):
 
         scorer = autosklearn.metrics._PredictScorer(
             'bac', autosklearn.metrics.classification_metrics.balanced_accuracy,
-            1, {})
+            1, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.333333333)
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, -1, {})
+            'accuracy', sklearn.metrics.accuracy_score, 1, -1, {})
 
         y_pred = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -77,7 +77,7 @@ def test_predict_scorer_multilabel(self):
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, 1, {})
+            'accuracy', sklearn.metrics.accuracy_score, 1, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -92,13 +92,13 @@ def test_predict_scorer_multilabel(self):
 
         scorer = autosklearn.metrics._PredictScorer(
             'bac', autosklearn.metrics.classification_metrics.balanced_accuracy,
-            1, {})
+            1, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.5)
 
         scorer = autosklearn.metrics._PredictScorer(
-            'accuracy', sklearn.metrics.accuracy_score, -1, {})
+            'accuracy', sklearn.metrics.accuracy_score, 1, -1, {})
 
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -109,7 +109,7 @@ def test_predict_scorer_regression(self):
         y_pred = y_true.copy()
 
         scorer = autosklearn.metrics._PredictScorer(
-            'r2', sklearn.metrics.r2_score, 1, {})
+            'r2', sklearn.metrics.r2_score, 1, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -123,7 +123,7 @@ def test_proba_scorer_binary(self):
         y_pred = [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]]
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'accuracy', sklearn.metrics.log_loss, 1, {})
+            'accuracy', sklearn.metrics.log_loss, 0, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.0)
@@ -137,7 +137,7 @@ def test_proba_scorer_binary(self):
         self.assertAlmostEqual(score, 0.69314718055994529)
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'accuracy', sklearn.metrics.log_loss, -1, {})
+            'accuracy', sklearn.metrics.log_loss, 0, -1, {})
 
         y_pred = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
         score = scorer(y_true, y_pred)
@@ -148,7 +148,7 @@ def test_proba_scorer_multiclass(self):
         y_pred = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'accuracy', sklearn.metrics.log_loss, 1, {})
+            'accuracy', sklearn.metrics.log_loss, 0, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.0)
@@ -162,7 +162,7 @@ def test_proba_scorer_multiclass(self):
         self.assertAlmostEqual(score, 1.0986122886681096)
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'accuracy', sklearn.metrics.log_loss, -1, {})
+            'accuracy', sklearn.metrics.log_loss, 0, -1, {})
 
         y_pred = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
         score = scorer(y_true, y_pred)
@@ -173,7 +173,7 @@ def test_proba_scorer_multilabel(self):
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'accuracy', sklearn.metrics.log_loss, 1, {})
+            'accuracy', sklearn.metrics.log_loss, 0, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 0.34657359027997314)
@@ -187,7 +187,7 @@ def test_proba_scorer_multilabel(self):
         self.assertAlmostEqual(score, 0.69314718055994529)
 
         scorer = autosklearn.metrics._ProbaScorer(
-            'accuracy', sklearn.metrics.log_loss, -1, {})
+            'accuracy', sklearn.metrics.log_loss, 0, -1, {})
 
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -198,7 +198,7 @@ def test_threshold_scorer_binary(self):
         y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
 
         scorer = autosklearn.metrics._ThresholdScorer(
-            'accuracy', sklearn.metrics.roc_auc_score, 1, {})
+            'accuracy', sklearn.metrics.roc_auc_score, 1, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -212,7 +212,7 @@ def test_threshold_scorer_binary(self):
         self.assertAlmostEqual(score, 0.5)
 
         scorer = autosklearn.metrics._ThresholdScorer(
-            'accuracy', sklearn.metrics.roc_auc_score, -1, {})
+            'accuracy', sklearn.metrics.roc_auc_score, 1, -1, {})
 
         y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -223,7 +223,7 @@ def test_threshold_scorer_multilabel(self):
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
 
         scorer = autosklearn.metrics._ThresholdScorer(
-            'accuracy', sklearn.metrics.roc_auc_score, 1, {})
+            'accuracy', sklearn.metrics.roc_auc_score, 1, 1, {})
 
         score = scorer(y_true, y_pred)
         self.assertAlmostEqual(score, 1.0)
@@ -237,7 +237,7 @@ def test_threshold_scorer_multilabel(self):
         self.assertAlmostEqual(score, 0.5)
 
         scorer = autosklearn.metrics._ThresholdScorer(
-            'accuracy', sklearn.metrics.roc_auc_score, -1, {})
+            'accuracy', sklearn.metrics.roc_auc_score, 1, -1, {})
 
         y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
         score = scorer(y_true, y_pred)
@@ -308,3 +308,128 @@ def test_classification_metrics(self):
                     pass
                 else:
                     raise e
+
+
+class TestMetric(unittest.TestCase):
+
+    def test_regression_all(self):
+
+        for metric, scorer in autosklearn.metrics.REGRESSION_METRICS.items():
+            y_true = np.array([1, 2, 3, 4])
+            y_pred = y_true.copy()
+            previous_score = scorer._optimum
+            current_score = scorer(y_true, y_pred)
+            self.assertAlmostEqual(current_score, previous_score)
+
+            y_pred = np.array([3, 4, 5, 6])
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+            y_pred = np.array([-1, 0, -1, 0])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+            y_pred = np.array([-5, 10, 7, -3])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+
+    def test_classification_binary(self):
+
+        for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
+            # Skip functions not applicable for binary classification.
+            # TODO: Average precision should work for binary classification,
+            # TODO: but its behavior is not right. When y_pred is completely
+            # TODO: wrong, it does return 0.5, but when it is not completely
+            # TODO: wrong, it returns value smaller than 0.5.
+            if metric in ['average_precision', 'pac_score',
+                          'precision_samples', 'recall_samples', 'f1_samples']:
+                continue
+
+            y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
+            y_pred = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0],
+                                [1.0, 0.0], [1.0, 0.0]])
+            previous_score = scorer._optimum
+            current_score = scorer(y_true, y_pred)
+            self.assertAlmostEqual(current_score, previous_score)
+
+            y_pred = np.array([[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0],
+                                [0.0, 1.0], [1.0, 0.0]])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+            y_pred = np.array([[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0],
+                               [0.0, 1.0], [0.0, 1.0]])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+            y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0],
+                               [0.0, 1.0], [0.0, 1.0]])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+    def test_classification_multiclass(self):
+
+        for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
+            # Skip functions not applicable for multiclass classification.
+            if metric in ['pac_score', 'roc_auc', 'average_precision',
+                          'precision', 'recall', 'f1','precision_samples',
+                          'recall_samples', 'f1_samples']:
+                continue
+            y_true = np.array([0.0, 0.0, 1.0, 1.0, 2.0])
+            y_pred = np.array([[1.0, 0.0, 0.0], [1.0, 0.0, 0.0],
+                            [0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+            previous_score = scorer._optimum
+            current_score = scorer(y_true, y_pred)
+            self.assertAlmostEqual(current_score, previous_score)
+
+            y_pred = np.array([[1.0, 0.0, 0.0], [1.0, 0.0, 0.0],
+                            [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+            y_pred = np.array([[0.0, 0.0, 1.0], [0.0, 1.0, 0.0],
+                            [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 1.0, 0.0]])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+            y_pred = np.array([[0.0, 0.0, 1.0], [0.0, 0.0, 1.0],
+                            [1.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+    def test_classification_multilabel(self):
+
+        for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
+            # Skip functions not applicable for multi-label classification.
+            if metric in ['roc_auc', 'log_loss',
+                          'pac_score', 'precision', 'recall', 'f1']:
+                continue
+            y_true = np.array([[1, 0, 0], [1, 1, 0], [0, 1, 1], [1, 1, 1]])
+            y_pred = y_true.copy()
+            previous_score = scorer._optimum
+            current_score = scorer(y_true, y_pred)
+            self.assertAlmostEqual(current_score, previous_score)
+
+            y_pred = np.array([[1, 0, 0], [0, 0, 1], [0, 1, 1], [1, 1, 1]])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+            y_pred = np.array([[1, 0, 0], [0, 0, 1], [1, 0, 1], [1, 1, 0]])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)
+
+            y_pred = np.array([[0, 1, 1], [0, 0, 1], [1, 0, 0], [0, 0, 0]])
+            previous_score = current_score
+            current_score = scorer(y_true, y_pred)
+            self.assertLess(current_score, previous_score)