automl · mfeurer · Nov 12, 2018 · Jul 17, 2018 · Jul 18, 2018 · Aug 1, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -16,9 +16,14 @@ matrix:
   - os: linux
     env: DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
   - os: linux
-    env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+    env: DISTRIB="conda" PYTHON_VERSION="3.6" DOCPUSH="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
   - os: linux
     env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+  - os: linux
+    env: DISTRIB="conda" PYTHON_VERSION="3.7" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+  - os: linux
+    env: DISTRIB="conda" PYTHON_VERSION="3.6" RUN_FLAKE8="true" SKIP_TESTS="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+
 
   # Temporarily disabling OSX builds because thy take too long
   # Set language to generic to not break travis-ci
@@ -58,17 +63,29 @@ before_install:
 install:
   # Install general requirements the way setup.py suggests
   - pip install pep8 codecov
+  # Temporarily pin the numpy version for travis-ci
+  - pip install "numpy<=1.14.5"
   - cat requirements.txt | xargs -n 1 -L 1 pip install
   # Install openml dependency for metadata generation unittest
-  - pip install xmltodict requests
-  - pip install git+https://github.com/renatopp/liac-arff
+  - pip install xmltodict requests liac-arff
   - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps
   - mkdir ~/.openml
   - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config
+  - pip install flake8
   # Debug output to know all exact package versions!
   - pip freeze
   - python setup.py install
-  
+
 script: bash ci_scripts/test.sh
-after_success: source ci_scripts/success.sh
+after_success: source ci_scripts/success.sh && source ci_scripts/create_doc.sh $TRAVIS_BRANCH "doc_result"
 
+deploy:
+  provider: pages
+  skip-cleanup: true
+  github-token: $GITHUB_TOKEN # set in the settings page of my repository
+  keep-hisotry: true
+  commiter-from-gh: true
+  on:
+    all_branches: true
+    condition: $doc_result = "success"
+  local_dir: doc/$TRAVIS_BRANCH
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ auto-sklearn is an automated machine learning toolkit and a drop-in replacement
 
 Find the documentation [here](http://automl.github.io/auto-sklearn/)
 
-Status for master branch:
+Status for master branch
 
 [![Build Status](https://travis-ci.org/automl/auto-sklearn.svg?branch=master)](https://travis-ci.org/automl/auto-sklearn)
 [![Code Health](https://landscape.io/github/automl/auto-sklearn/master/landscape.png)](https://landscape.io/github/automl/auto-sklearn/master)

diff --git a/autosklearn/__version__.py b/autosklearn/__version__.py
@@ -1,4 +1,4 @@
 """Version information."""
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.4.0"
+__version__ = "0.4.1"
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
@@ -82,6 +82,7 @@ def __init__(self,
                  disable_evaluator_output=False,
                  get_smac_object_callback=None,
                  smac_scenario_args=None,
+                 logging_config=None,
                  ):
         super(AutoML, self).__init__()
         self._backend = backend
@@ -110,6 +111,7 @@ def __init__(self,
         self._disable_evaluator_output = disable_evaluator_output
         self._get_smac_object_callback = get_smac_object_callback
         self._smac_scenario_args = smac_scenario_args
+        self.logging_config = logging_config
 
         self._datamanager = None
         self._dataset_name = None
@@ -235,7 +237,10 @@ def fit_on_datamanager(self, datamanager, metric):
 
     def _get_logger(self, name):
         logger_name = 'AutoML(%d):%s' % (self._seed, name)
-        setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name)))
+        setup_logger(os.path.join(self._backend.temporary_directory,
+                                  '%s.log' % str(logger_name)),
+                     self.logging_config,
+                     )
         return get_logger(logger_name)
 
     @staticmethod

diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py
@@ -171,11 +171,8 @@ def main(self):
         while True:
 
             #maximal number of iterations
-            if (
-                self.max_iterations is not None
-                and self.max_iterations > 0
-                and iteration >= self.max_iterations
-            ):
+            if (self.max_iterations is not None
+                    and 0 < self.max_iterations <= iteration):
                 self.logger.info("Terminate ensemble building because of max iterations: %d of %d",
                                  self.max_iterations,
                                  iteration)
@@ -300,7 +297,7 @@ def read_ensemble_preds(self):
                     Y_TEST: None,
                     # Lazy keys so far:
                     # 0 - not loaded
-                    # 1 - loaded and ind memory
+                    # 1 - loaded and in memory
                     # 2 - loaded but dropped again
                     "loaded": 0
                 }
@@ -372,14 +369,18 @@ def get_n_best_preds(self):
             ],
             key=lambda x: x[1],
         )))
-        # remove all that are at most as good as random, cannot assume a
-        # minimum number here because all kinds of metric can be used
-        sorted_keys = filter(lambda x: x[1] > 0.001, sorted_keys)
+        # remove all that are at most as good as random
+        # note: dummy model must have run_id=1 (there is not run_id=0)
+        dummy_score = list(filter(lambda x: x[2] == 1, sorted_keys))[0]
+        self.logger.debug("Use %f as dummy score" %
+                          dummy_score[1])
+        sorted_keys = filter(lambda x: x[1] > dummy_score[1], sorted_keys)
         # remove Dummy Classifier
         sorted_keys = list(filter(lambda x: x[2] > 1, sorted_keys))
         if not sorted_keys: 
-            # no model left; try to use dummy classifier (num_run==0)
-            self.logger.warning("No models better than random - using Dummy Classifier!")
+            # no model left; try to use dummy score (num_run==0)
+            self.logger.warning("No models better than random - "
+                                "using Dummy Score!")
             sorted_keys = [
                 (k, v["ens_score"], v["num_run"]) for k, v in self.read_preds.items()
                 if v["seed"] == self.seed and v["num_run"] == 1

diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -3,6 +3,7 @@
 
 from autosklearn.automl import AutoMLClassifier, AutoMLRegressor
 from autosklearn.util.backend import create
+from sklearn.utils.multiclass import type_of_target
 
 
 class AutoSklearnEstimator(BaseEstimator):
@@ -28,7 +29,9 @@ def __init__(self,
                  shared_mode=False,
                  disable_evaluator_output=False,
                  get_smac_object_callback=None,
-                 smac_scenario_args=None):
+                 smac_scenario_args=None,
+                 logging_config=None,
+                 ):
         """
         Parameters
         ----------
@@ -168,6 +171,11 @@ def __init__(self,
             This is an advanced feature. Use only if you are familiar with
             `SMAC <https://automl.github.io/SMAC3/stable/index.html>`_.
 
+        logging_config : dict, optional (None)
+            dictionary object specifying the logger configuration. If None,
+            the default logging.yaml file is used, which can be found in
+            the directory ``util/logging.yaml`` relative to the installation.
+
         Attributes
         ----------
 
@@ -199,6 +207,7 @@ def __init__(self,
         self.disable_evaluator_output = disable_evaluator_output
         self.get_smac_object_callback = get_smac_object_callback
         self.smac_scenario_args = smac_scenario_args
+        self.logging_config = logging_config
 
         self._automl = None
         super().__init__()
@@ -238,7 +247,8 @@ def build_automl(self):
             shared_mode=self.shared_mode,
             get_smac_object_callback=self.get_smac_object_callback,
             disable_evaluator_output=self.disable_evaluator_output,
-            smac_scenario_args=self.smac_scenario_args
+            smac_scenario_args=self.smac_scenario_args,
+            logging_config=self.logging_config,
         )
 
         return automl
@@ -456,6 +466,18 @@ def fit(self, X, y,
         self
 
         """
+        # Before running anything else, first check that the
+        # type of data is compatible with auto-sklearn. Legal target
+        # types are: binary, multiclass, multilabel-indicator.
+        target_type = type_of_target(y)
+        if target_type in ['multiclass-multioutput',
+                           'continuous',
+                           'continuous-multioutput',
+                           'unknown',
+                           ]:
+            raise ValueError("classification with data of type %s is"
+                             " not supported" % target_type)
+
         super().fit(
             X=X,
             y=y,
@@ -559,6 +581,18 @@ def fit(self, X, y,
         self
 
         """
+        # Before running anything else, first check that the
+        # type of data is compatible with auto-sklearn. Legal target
+        # types are: continuous, binary, multiclass.
+        target_type = type_of_target(y)
+        if target_type in ['multiclass-multioutput',
+                           'multilabel-indicator',
+                           'continuous-multioutput',
+                           'unknown',
+                           ]:
+            raise ValueError("regression with data of type %s is not"
+                             " supported" % target_type)
+
         # Fit is supposed to be idempotent!
         # But not if we use share_mode.
         super().fit(

diff --git a/autosklearn/metalearning/metalearning/clustering/gmeans.py b/autosklearn/metalearning/metalearning/clustering/gmeans.py
@@ -69,7 +69,7 @@ def fit(self, X):
                     break
 
                 # Refinement
-                KMeans = sklearn.cluster.KMeans(n_clusters=1, n_init=1,
+                KMeans = sklearn.cluster.KMeans(n_clusters=len(cluster_centers), n_init=1,
                                                 init=np.array(cluster_centers),
                                                 random_state=self.random_state)
                 KMeans.fit(X)

diff --git a/autosklearn/pipeline/create_searchspace_util.py b/autosklearn/pipeline/create_searchspace_util.py
@@ -117,8 +117,8 @@ def find_active_choices(matches, node, node_idx, dataset_properties, \
 
     choices = []
     for c_idx, component in enumerate(available_components):
-        slices = [slice(None) if idx != node_idx else slice(c_idx, c_idx+1)
-                  for idx in range(len(matches.shape))]
+        slices = tuple(slice(None) if idx != node_idx else slice(c_idx, c_idx+1)
+                  for idx in range(len(matches.shape)))
 
         if np.sum(matches[slices]) > 0:
             choices.append(component)
@@ -200,10 +200,10 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                 for product in itertools.product(*num_node_choices):
                     for node_idx, choice_idx in enumerate(product):
                         node_idx += start_idx
-                        slices_ = [
+                        slices_ = tuple(
                             slice(None) if idx != node_idx else
                             slice(choice_idx, choice_idx + 1) for idx in
-                            range(len(matches.shape))]
+                            range(len(matches.shape)))
 
                         if np.sum(matches[slices_]) == 0:
                             skip_array[product] = 1
@@ -212,13 +212,11 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                     if skip_array[product]:
                         continue
 
-                    slices = []
-                    for idx in range(len(matches.shape)):
-                        if idx not in indices:
-                            slices.append(slice(None))
-                        else:
-                            slices.append(slice(product[idx - start_idx],
-                                                product[idx - start_idx] + 1))
+                    slices = tuple(
+                        slice(None) if idx not in indices else
+                        slice(product[idx - start_idx],
+                              product[idx - start_idx] + 1) for idx in
+                        range(len(matches.shape)))
 
                     # This prints the affected nodes
                     # print [node_choice_names[i][product[i]]

diff --git a/autosklearn/util/backend.py b/autosklearn/util/backend.py
@@ -71,11 +71,17 @@ def _prepare_directories(self, temporary_directory, output_directory):
 
         self.__temporary_directory = temporary_directory \
             if temporary_directory \
-            else '/tmp/autosklearn_tmp_%d_%d' % (pid, random_number)
+            else os.path.join(
+                tempfile.gettempdir(),
+                'autosklearn_tmp_%d_%d' % (pid, random_number)
+            )
 
         self.__output_directory = output_directory \
             if output_directory \
-            else '/tmp/autosklearn_output_%d_%d' % (pid, random_number)
+            else os.path.join(
+                tempfile.gettempdir(),
+                'autosklearn_output_%d_%d' % (pid, random_number)
+            )
 
     def create_directories(self):
         if self.shared_mode:
@@ -401,9 +407,10 @@ def save_ensemble(self, ensemble, idx, seed):
         except Exception:
             pass
 
-        filepath = os.path.join(self.get_ensemble_dir(),
-                                '%s.%s.ensemble' % (str(seed),
-                                                    str(idx)))
+        filepath = os.path.join(
+            self.get_ensemble_dir(),
+            '%s.%s.ensemble' % (str(seed), str(idx).zfill(10))
+        )
         with tempfile.NamedTemporaryFile('wb', dir=os.path.dirname(
                 filepath), delete=False) as fh:
             pickle.dump(ensemble, fh)
@@ -460,4 +467,4 @@ def write_txt_file(self, filepath, data, name):
                 self.logger.debug('Created %s file %s' % (name, filepath))
             else:
                 self.logger.debug('%s file already present %s' %
-                                  (name, filepath))
+                                  (name, filepath))
diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py
@@ -7,18 +7,23 @@
 import yaml
 
 
-def setup_logger(output_file=None):
-    with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'),
-              'r') as fh:
-        config = yaml.load(fh)
-    if output_file is not None:
-        config['handlers']['file_handler']['filename'] = output_file
-    logging.config.dictConfig(config)
+def setup_logger(output_file=None, logging_config=None):
+    # logging_config must be a dictionary object specifying the configuration
+    # for the loggers to be used in auto-sklearn.
+    if logging_config is not None:
+        if output_file is not None:
+            logging_config['handlers']['file_handler']['filename'] = output_file
+        logging.config.dictConfig(logging_config)
+    else:
+        with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'),
+                  'r') as fh:
+            logging_config = yaml.safe_load(fh)
+        if output_file is not None:
+            logging_config['handlers']['file_handler']['filename'] = output_file
+        logging.config.dictConfig(logging_config)
 
 
 def _create_logger(name):
-    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
-                           'message)s', datefmt='%H:%M:%S')
     return logging.getLogger(name)
 
 

diff --git a/ci_scripts/circle_install.sh b/ci_scripts/circle_install.sh