bump xgboost version number, remove grid_scores_ attribute

Matthias Feurer · Matthias Feurer · commit 7d297574cbab · 2017-05-02T17:52:41.000+02:00
diff --git a/autosklearn/__init__.py b/autosklearn/__init__.py
@@ -4,13 +4,13 @@
 
 
 __MANDATORY_PACKAGES__ = '''
-numpy>=1.9,<1.12
+numpy>=1.9
 scikit-learn==0.18.1
 smac==0.4.0
 lockfile>=0.10
 ConfigSpace>=0.3.3,<0.4
 pyrfr==0.2.0
-xgboost==0.4a30
+xgboost==0.6a2
 '''
 
 dependencies.verify_packages(__MANDATORY_PACKAGES__)
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
@@ -14,7 +14,6 @@
 from sklearn.base import BaseEstimator
 from smac.tae.execute_ta_run import StatusType
 from smac.stats.stats import Stats
-from sklearn.grid_search import _CVScoreTuple
 
 from autosklearn.constants import *
 from autosklearn.metrics import Scorer
@@ -638,35 +637,6 @@ def score(self, X, y):
                                metric=self._metric,
                                all_scoring_functions=False)
 
-    @property
-    def grid_scores_(self):
-        grid_scores = list()
-
-        scores_per_config = defaultdict(list)
-        config_list = list()
-
-        for run_key in self.runhistory_.data:
-            run_value = self.runhistory_.data[run_key]
-
-            config_id = run_key.config_id
-            cost = run_value.cost
-
-            if config_id not in config_list:
-                config_list.append(config_id)
-
-            scores_per_config[config_id].append(cost)
-
-        for config_id in config_list:
-            scores = [1 - score for score in scores_per_config[config_id]]
-            mean_score = np.mean(scores)
-            config = self.runhistory_.ids_config[config_id]
-
-            grid_score = _CVScoreTuple(config.get_dictionary(), mean_score,
-                                       scores)
-            grid_scores.append(grid_score)
-
-        return grid_scores
-
     @property
     def cv_results_(self):
         results = dict()
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -80,10 +80,6 @@ def show_models(self):
         """
         return self._automl.show_models()
 
-    @property
-    def grid_scores_(self):
-        return self._automl.grid_scores_
-
     @property
     def cv_results_(self):
         return self._automl.cv_results_
@@ -227,23 +223,12 @@ def __init__(self,
 
         Attributes
         ----------
-        grid_scores\_ : list of named tuples
-            Contains scores for all parameter combinations in param_grid.
-            Each entry corresponds to one parameter setting.
-            Each named tuple has the attributes:
-
-            * ``parameters``, a dict of parameter settings
-            * ``mean_validation_score``, the mean score over the
-              cross-validation folds
-            * ``cv_validation_scores``, the list of scores for each fold
 
         cv_results\_ : dict of numpy (masked) ndarrays
             A dict with keys as column headers and values as columns, that can be
             imported into a pandas ``DataFrame``.
 
-            This attribute is a backward port to already support the advanced
-            output of scikit-learn 0.18. Not all keys returned by scikit-learn
-            are supported yet.
+            Not all keys returned by scikit-learn are supported yet.
 
         """
         self.time_left_for_this_task = time_left_for_this_task
diff --git a/example/example_holdout.py b/example/example_holdout.py
@@ -8,19 +8,6 @@
 import autosklearn.classification
 
 
-# Utility function to report best scores
-# from http://scikit-learn.org/stable/auto_examples/model_selection/randomized_search.html#example-model-selection-randomized-search-py
-def report(grid_scores, n_top=3):
-    top_scores = sorted(grid_scores, key=itemgetter(1), reverse=True)[:n_top]
-    for i, score in enumerate(top_scores):
-        print("Model with rank: {0}".format(i + 1))
-        print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
-            score.mean_validation_score,
-            np.std(score.cv_validation_scores)))
-        print("Parameters: {0}".format(score.parameters))
-        print("")
-
-
 def main():
     digits = sklearn.datasets.load_digits()
     X = digits.data
@@ -35,10 +22,6 @@ def main():
         disable_evaluator_output=False)
     automl.fit(X_train, y_train, dataset_name='digits')
 
-    # Print the best models together with their scores - if all scores are
-    # unreasonably bad (around 0.0) you should have a look into the logging
-    # file to figure out the error
-    report(automl.grid_scores_)
     # Print the final ensemble constructed by auto-sklearn.
     print(automl.show_models())
     predictions = automl.predict(X_test)
diff --git a/requirements.txt b/requirements.txt
@@ -4,7 +4,7 @@ nose
 six
 Cython
 
-numpy>=1.9.0,<1.12
+numpy>=1.9.0
 scipy>=0.14.1
 
 scikit-learn==0.18.1
@@ -15,7 +15,7 @@ psutil
 pyyaml
 liac-arff
 pandas
-xgboost==0.4a30
+xgboost==0.6a20
 
 ConfigSpace>=0.3.3,<0.4
 pynisher>=0.4
diff --git a/setup.py b/setup.py
@@ -16,7 +16,7 @@
     "nose",
     "six",
     "Cython",
-    "numpy>=1.9.0,<1.12",
+    "numpy>=1.9.0",
     "scipy>=0.14.1",
     "scikit-learn==0.18.1",
     "lockfile",
@@ -25,7 +25,7 @@
     "pyyaml",
     "liac-arff",
     "pandas",
-    "xgboost==0.4a30",
+    "xgboost==0.6a2",
     "ConfigSpace>=0.3.3,<0.4",
     "pynisher>=0.4",
     "pyrfr",
diff --git a/test/test_automl/test_estimators.py b/test/test_automl/test_estimators.py
@@ -9,7 +9,6 @@
 
 import numpy as np
 import numpy.ma as npma
-from sklearn.grid_search import _CVScoreTuple
 
 import autosklearn.pipeline.util as putil
 from autosklearn.classification import AutoSklearnClassifier
@@ -114,7 +113,9 @@ def test_fit_pSMAC(self):
         with open(true_targets_ensemble_path, 'rb') as fh:
             true_targets_ensemble = np.load(fh)
         true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0
+        true_targets_ensemble = true_targets_ensemble.astype(int)
         probas = np.zeros((len(true_targets_ensemble), 3), dtype=float)
+
         for i, value in enumerate(true_targets_ensemble):
             probas[i, value] = 1.0
         dummy_predictions_path = os.path.join(output, '.auto-sklearn',
@@ -164,42 +165,6 @@ def test_fit_pSMAC(self):
         del automl
         self._tearDown(output)
 
-    def test_grid_scores(self):
-        output = os.path.join(self.test_dir, '..', '.tmp_grid_scores')
-        self._setUp(output)
-
-        cls = AutoSklearnClassifier(time_left_for_this_task=30,
-                                    per_run_time_limit=5,
-                                    output_folder=output,
-                                    tmp_folder=output,
-                                    shared_mode=False,
-                                    seed=1,
-                                    initial_configurations_via_metalearning=0,
-                                    ensemble_size=0)
-        cls_ = cls.build_automl()
-        automl = cls_._automl
-        automl.runhistory_ = unittest.mock.MagicMock()
-
-        RunKey = collections.namedtuple(
-            'RunKey', ['config_id', 'instance_id', 'seed'])
-
-        RunValue = collections.namedtuple(
-            'RunValue', ['cost', 'time', 'status', 'additional_info'])
-
-        runhistory = dict()
-        runhistory[RunKey(1, 1, 1)] = RunValue(1, 1, 1, '')
-        automl.runhistory_.data = runhistory
-        grid_scores_ = automl.grid_scores_
-
-        self.assertIsInstance(grid_scores_[0], _CVScoreTuple)
-        # In the runhistory we store losses, thus the score is zero
-        self.assertEqual(grid_scores_[0].mean_validation_score, 0)
-        self.assertEqual(grid_scores_[0].cv_validation_scores, [0])
-        self.assertIsInstance(grid_scores_[0].parameters, unittest.mock.MagicMock)
-
-        del automl
-        self._tearDown(output)
-
     def test_cv_results(self):
         # TODO restructure and actually use real SMAC output from a long run
         # to do this unittest!