automl · mfeurer · Nov 5, 2020 · Nov 5, 2020 · Nov 5, 2020 · Nov 5, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -23,9 +23,9 @@ matrix:
   - os: linux
     env: DISTRIB="conda" DOCPUSH="true" PYTHON="3.7" SKIP_TESTS="true"
   - os: linux
-    env: DISTRIB="conda" RUN_FLAKE8="true" SKIP_TESTS="true"
+    env: DISTRIB="conda" PYTHON="3.8" RUN_FLAKE8="true" SKIP_TESTS="true"
   - os: linux
-    env: DISTRIB="conda" RUN_MYPY="true" SKIP_TESTS="true"
+    env: DISTRIB="conda" PYTHON="3.8" RUN_MYPY="true" SKIP_TESTS="true"
   - os: linux
     env: DISTRIB="conda" COVERAGE="true" PYTHON="3.6"
   - os: linux

diff --git a/autosklearn/automl.py b/autosklearn/automl.py
@@ -872,7 +872,10 @@ def fit_ensemble(self, y, task=None, precision=32,
         future = manager.futures.pop()
         dask.distributed.wait([future])  # wait for the ensemble process to finish
         result = future.result()
-        self.ensemble_performance_history, _ = result
+        if result is None:
+            raise ValueError("Error building the ensemble - please check the log file and command "
+                             "line output for error messages.")
+        self.ensemble_performance_history, _, _, _, _ = result
 
         self._load_models()
         self._close_dask_client()

diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py
diff --git a/autosklearn/ensembles/abstract_ensemble.py b/autosklearn/ensembles/abstract_ensemble.py
@@ -1,5 +1,5 @@
 from abc import ABCMeta, abstractmethod
-from typing import Dict, List, Tuple
+from typing import Dict, List, Tuple, Union
 
 import numpy as np
 
@@ -40,7 +40,7 @@ def fit(
         pass
 
     @abstractmethod
-    def predict(self, base_models_predictions: np.ndarray) -> np.ndarray:
+    def predict(self, base_models_predictions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
         """Create ensemble predictions from the base model predictions.
 
         Parameters

diff --git a/autosklearn/ensembles/ensemble_selection.py b/autosklearn/ensembles/ensemble_selection.py
@@ -1,6 +1,6 @@
 import random
 from collections import Counter
-from typing import Any, Dict, List, Tuple, cast
+from typing import Any, Dict, List, Tuple, Union, cast
 
 import numpy as np
 
@@ -265,27 +265,32 @@ def _bagging(
             dtype=np.int64,
         )
 
-    def predict(self, predictions: np.ndarray) -> np.ndarray:
-        predictions = np.asarray(
-            predictions,
-            dtype=np.float64,
-        )
+    def predict(self, predictions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+
+        average = np.zeros_like(predictions[0], dtype=np.float64)
+        tmp_predictions = np.empty_like(predictions[0], dtype=np.float64)
 
         # if predictions.shape[0] == len(self.weights_),
         # predictions include those of zero-weight models.
-        if predictions.shape[0] == len(self.weights_):
-            return np.average(predictions, axis=0, weights=self.weights_)
+        if len(predictions) == len(self.weights_):
+            for pred, weight in zip(predictions, self.weights_):
+                np.multiply(pred, weight, out=tmp_predictions)
+                np.add(average, tmp_predictions, out=average)
 
         # if prediction model.shape[0] == len(non_null_weights),
         # predictions do not include those of zero-weight models.
-        elif predictions.shape[0] == np.count_nonzero(self.weights_):
+        elif len(predictions) == np.count_nonzero(self.weights_):
             non_null_weights = [w for w in self.weights_ if w > 0]
-            return np.average(predictions, axis=0, weights=non_null_weights)
+            for pred, weight in zip(predictions, non_null_weights):
+                np.multiply(pred, weight, out=tmp_predictions)
+                np.add(average, tmp_predictions, out=average)
 
         # If none of the above applies, then something must have gone wrong.
         else:
             raise ValueError("The dimensions of ensemble predictions"
                              " and ensemble weights do not match!")
+        del tmp_predictions
+        return average
 
     def __str__(self) -> str:
         return 'Ensemble Selection:\n\tTrajectory: %s\n\tMembers: %s' \

diff --git a/autosklearn/ensembles/singlebest_ensemble.py b/autosklearn/ensembles/singlebest_ensemble.py
@@ -1,5 +1,5 @@
 import os
-from typing import List, Tuple
+from typing import List, Tuple, Union
 
 import numpy as np
 
@@ -85,7 +85,7 @@ def get_identifiers_from_run_history(self) -> List[Tuple[int, int, float]]:
 
         return best_model_identifier
 
-    def predict(self, predictions: np.ndarray) -> np.ndarray:
+    def predict(self, predictions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
         return predictions[0]
 
     def __str__(self) -> str:

diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -42,6 +42,7 @@ def __init__(
         logging_config=None,
         metadata_directory=None,
         metric=None,
+        load_models: bool = True,
     ):
         """
         Parameters
@@ -216,6 +217,9 @@ def __init__(
             :meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
             Metrics`_.
             If None is provided, a default metric is selected depending on the task.
+
+        load_models : bool, optional (True)
+            Whether to load the models after fitting Auto-sklearn.
 
         Attributes
         ----------
@@ -257,6 +261,7 @@ def __init__(
         self.logging_config = logging_config
         self.metadata_directory = metadata_directory
         self._metric = metric
+        self._load_models = load_models
 
         self.automl_ = None  # type: Optional[AutoML]
         # n_jobs after conversion to a number (b/c default is None)
@@ -340,7 +345,7 @@ def fit(self, **kwargs):
             tmp_folder=self.tmp_folder,
             output_folder=self.output_folder,
         )
-        self.automl_.fit(load_models=True, **kwargs)
+        self.automl_.fit(load_models=self._load_models, **kwargs)
 
         return self
 

diff --git a/test/test_automl/test_automl.py b/test/test_automl/test_automl.py
@@ -305,6 +305,7 @@ def test_automl_outputs(backend, dask_client):
         'start_time_100',
         'datamanager.pkl',
         'ensemble_read_preds.pkl',
+        'ensemble_read_scores.pkl',
         'runs',
         'ensembles',
     ]

diff --git a/test/test_ensemble_builder/ensemble_utils.py b/test/test_ensemble_builder/ensemble_utils.py
@@ -1,16 +1,14 @@
 import os
+import shutil
 import unittest
 
-
 import numpy as np
 
 from autosklearn.metrics import make_scorer
 from autosklearn.ensemble_builder import (
-    EnsembleBuilder,
+    EnsembleBuilder, AbstractEnsemble
 )
 
-this_directory = os.path.dirname(__file__)
-
 
 def scorer_function(a, b):
     return 0.9
@@ -21,22 +19,19 @@ def scorer_function(a, b):
 
 class BackendMock(object):
 
-    def __init__(self):
+    def __init__(self, target_directory):
         this_directory = os.path.abspath(
             os.path.dirname(__file__)
         )
-        self.temporary_directory = os.path.join(
-            this_directory, 'data',
-        )
-        self.internals_directory = os.path.join(
-            this_directory, 'data', '.auto-sklearn',
-        )
+        shutil.copytree(os.path.join(this_directory, 'data'), os.path.join(target_directory))
+        self.temporary_directory = target_directory
+        self.internals_directory = os.path.join(self.temporary_directory, '.auto-sklearn')
 
     def load_datamanager(self):
         manager = unittest.mock.Mock()
         manager.__reduce__ = lambda self: (unittest.mock.MagicMock, ())
         array = np.load(os.path.join(
-            this_directory, 'data',
+            self.temporary_directory,
             '.auto-sklearn',
             'runs', '0_3_100.0',
             'predictions_test_0_3_100.0.npy'
@@ -60,7 +55,7 @@ def save_predictions_as_txt(self, predictions, subset, idx, prefix, precision):
         return
 
     def get_runs_directory(self) -> str:
-        return os.path.join(this_directory, 'data', '.auto-sklearn', 'runs')
+        return os.path.join(self.temporary_directory, '.auto-sklearn', 'runs')
 
     def get_numrun_directory(self, seed: int, num_run: int, budget: float) -> str:
         return os.path.join(self.get_runs_directory(), '%d_%d_%s' % (seed, num_run, budget))
@@ -97,4 +92,11 @@ def compare_read_preds(read_preds1, read_preds2):
 class EnsembleBuilderMemMock(EnsembleBuilder):
 
     def fit_ensemble(self, selected_keys):
+        return True
+
+    def predict(self, set_: str,
+                ensemble: AbstractEnsemble,
+                selected_keys: list,
+                n_preds: int,
+                index_run: int):
         np.ones([10000000, 1000000])