From ecadd822dd9e017f9ac2b1387381491cfe51be63 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 17 Jul 2018 21:56:01 +0200
Subject: [PATCH 01/45] Extending Autosklearn. First commit.

---
 examples/example_extending_preprocessing.py | 97 +++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 examples/example_extending_preprocessing.py

diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py
new file mode 100644
index 0000000000..c2300ddfb2
--- /dev/null
+++ b/examples/example_extending_preprocessing.py
@@ -0,0 +1,97 @@
+"""
+===============================================
+Extending Auto-sklearn with Custom Preprocessor
+===============================================
+
+
+explanation goes here.
+"""
+
+import autosklearn.pipeline.components.feature_preprocessing
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
+    UniformIntegerHyperparameter
+
+from autosklearn.pipeline.components.base import \
+    AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import *
+
+# Custom wrapper class for using Sklearn's polynomial feature preprocessing
+# function.
+class custom_preprocessor(AutoSklearnPreprocessingAlgorithm):
+    def __init__(self, degree, interaction_only, include_bias, random_state=None):
+        # Define hyperparameters to be tuned here.
+        self.degree = degree
+        self.interaction_only = interaction_only
+        self.include_bias = include_bias
+        self.random_state = random_state
+        self.preprocessor = None
+
+    def fit(self, X, Y):
+        # wrapper function for the fit method of Sklearn's polynomial
+        # preprocessing function.
+        import sklearn.preprocessing
+        self.preprocessor = sklearn.preprocessing.PolynomialFeatures(degree=self.degree,
+                                                                     interaction_only=self.interaction_only,
+                                                                     include_bias=self.include_bias)
+        self.preprocessor.fit(X, Y)
+        return self
+
+    def transform(self, X):
+        # wrapper function for the transform method of sklearn's polynomial
+        # preprocessing function. It is also possible to implement
+        # a preprocessing algorithm directly in this function, provided that
+        # it behaves in the way compatible with that from sklearn.
+        if self.preprocessor is None:
+            raise NotImplementedError()
+        return self.preprocessor.transform(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': 'CustomPreprocessor',
+                'name': 'PolynomialFeatures',
+                'handles_regression': True,
+                'handles_classification': True,
+                'handles_multiclass': True,
+                'handles_multilabel': True,
+                'is_deterministic': True,
+                'input': (DENSE, UNSIGNED_DATA),
+                'output': (INPUT,)}
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        # For each hyperparameter, its type (categorical, integer, float, etc.)
+        # and its range and the default value must be specified here.
+        degree = UniformIntegerHyperparameter(
+            name="degree", lower=2, upper=5, default_value=2)
+        interaction_only = CategoricalHyperparameter(
+            name="interaction_only", choices=["False", "True"], default_value="False")
+        include_bias = CategoricalHyperparameter(
+            name="include_bias", choices=["True", "False"], default_value="True")
+
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([degree, interaction_only, include_bias])
+
+        return cs
+
+
+# Include the custom preprocessor class to auto-sklearn.
+autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(custom_preprocessor)
+
+# Import toy data from sklearn and apply train_test_split.
+from sklearn.datasets import load_boston
+from sklearn.model_selection import train_test_split
+X, y = load_boston(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
+
+# Run auto-sklearn regression with the custom preprocessor.
+import autosklearn.regression
+reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30,
+                                                  per_run_time_limit=10,
+                                                  include_preprocessors=['custom_preprocessor']
+                                                  )
+reg.fit(X_train, y_train)
+y_pred = reg.predict(X_test)
+print(reg.show_models())
+print(reg.sprint_statistics())
+

From b912d67eea14622d3ecade5a4c07b8b0deb8305c Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Wed, 18 Jul 2018 21:34:32 +0200
Subject: [PATCH 02/45] Add regression example

---
 examples/example_extending_preprocessing.py | 146 ++++++++++++++++----
 1 file changed, 116 insertions(+), 30 deletions(-)

diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py
index c2300ddfb2..bb20970d39 100644
--- a/examples/example_extending_preprocessing.py
+++ b/examples/example_extending_preprocessing.py
@@ -1,24 +1,100 @@
 """
 ===============================================
-Extending Auto-sklearn with Custom Preprocessor
+Extending Auto-sklearn
 ===============================================
 
+In order to include new machine learning algorithms in auto-sklearn's
+optimization process, users can implement a wrapper class for the algorithm
+and register it to auto-sklearn. The example code below demonstrates how
+to implement custom regressor and preprocessor (Lasso and polynomial processing from sklearn, respectively),
+register it to auto-sklearn, and use them for the given task.
+A detailed walkthrough of extending auto-sklearn can be found `here <https://automl.github.io/auto-sklearn/stable/extending.html>`_.
 
-explanation goes here.
 """
 
-import autosklearn.pipeline.components.feature_preprocessing
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
-    UniformIntegerHyperparameter
+from ConfigSpace.hyperparameters import *
+from ConfigSpace.conditions import EqualsCondition, InCondition
 
+from autosklearn.pipeline.components.base import \
+    AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.components.base import \
     AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import *
+from autosklearn.util.common import check_for_bool
+
+
+# Custom Regression algorithm added to auto-sklearn (Lasso from sklearn).
+class MyRegressor(AutoSklearnRegressionAlgorithm):
+    def __init__(self, alpha, fit_intercept, tol, positive, random_state=None):
+        self.alpha = alpha
+        self.fit_intercept = fit_intercept
+        #self.normalize = normalize
+        self.tol = tol
+        self.positive = positive
+
+        self.random_state = random_state
+        self.estimator = None
+
+    def fit(self, X, Y):
+        import sklearn.linear_model
+
+        self.alpha = float(self.alpha)
+        self.fit_intercept = check_for_bool(self.fit_intercept)
+        self.normalize = check_for_bool(self.normalize)
+        self.tol = float(self.tol)
+        self.positive = check_for_bool(self.positive)
+
+        self.estimator = sklearn.linear_model.\
+            Lasso(alpha=self.alpha,
+                  fit_intercept=self.fit_intercept,
+                  tol=self.tol,
+                  positive=self.positive,
+                  n_iter=300)
+
+        self.estimator.fit(X, Y)
+        return self
+
+    def predict(self, X):
+        if self.estimator is None:
+            raise NotImplementedError
+        return self.estimator.predict(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': 'MyRegressor',
+                'name': 'MyRegressor',
+                'handles_regression': True,
+                'handles_classification': False,
+                'handles_multiclass': False,
+                'handles_multilabel': False,
+                'is_deterministic': True,
+                'input': (DENSE, UNSIGNED_DATA),
+                'output': (PREDICTIONS,)}
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        alpha = UniformFloatHyperparameter(
+            name="alpha", lower=0, upper=10, default_value=1)
+        fit_intercept = CategoricalHyperparameter(
+            name="fit_intercept", choices=[True, False], default_value=True)
+        normalize = CategoricalHyperparameter(
+            name="normalize", choices=[True, False], default_value=False)
+        tol = UniformFloatHyperparameter(
+            name="tol", lower=10 ** -5, upper=10 ** -1,
+            default_value=10 ** -3, log=True)
+        positive = CategoricalHyperparameter(
+            name="positive", choices=[True, False], default_value=False)
+
+        cs.add_hyperparameters([alpha, fit_intercept, tol, positive])
+
+        return cs
+
 
 # Custom wrapper class for using Sklearn's polynomial feature preprocessing
 # function.
-class custom_preprocessor(AutoSklearnPreprocessingAlgorithm):
+class MyPreprocessor(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, degree, interaction_only, include_bias, random_state=None):
         # Define hyperparameters to be tuned here.
         self.degree = degree
@@ -48,8 +124,8 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'CustomPreprocessor',
-                'name': 'PolynomialFeatures',
+        return {'shortname': 'MyPreprocessor',
+                'name': 'MyPreprocessor',
                 'handles_regression': True,
                 'handles_classification': True,
                 'handles_multiclass': True,
@@ -60,8 +136,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
-        # For each hyperparameter, its type (categorical, integer, float, etc.)
-        # and its range and the default value must be specified here.
+        # For each hyperparameter, its type (categorical, integer, float, etc.),
+        # range and the default value must be specified here.
         degree = UniformIntegerHyperparameter(
             name="degree", lower=2, upper=5, default_value=2)
         interaction_only = CategoricalHyperparameter(
@@ -75,23 +151,33 @@ def get_hyperparameter_search_space(dataset_properties=None):
         return cs
 
 
-# Include the custom preprocessor class to auto-sklearn.
-autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(custom_preprocessor)
-
-# Import toy data from sklearn and apply train_test_split.
-from sklearn.datasets import load_boston
-from sklearn.model_selection import train_test_split
-X, y = load_boston(return_X_y=True)
-X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
-
-# Run auto-sklearn regression with the custom preprocessor.
-import autosklearn.regression
-reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30,
-                                                  per_run_time_limit=10,
-                                                  include_preprocessors=['custom_preprocessor']
-                                                  )
-reg.fit(X_train, y_train)
-y_pred = reg.predict(X_test)
-print(reg.show_models())
-print(reg.sprint_statistics())
-
+def main():
+    # Include the custom preprocessor class to auto-sklearn.
+    import autosklearn.pipeline.components.regression
+    import autosklearn.pipeline.components.feature_preprocessing
+    autosklearn.pipeline.components.regression.add_regressor(MyRegressor)
+    autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(MyPreprocessor)
+
+    # Import toy data from sklearn and apply train_test_split.
+    from sklearn.datasets import load_boston
+    from sklearn.model_selection import train_test_split
+    X, y = load_boston(return_X_y=True)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
+
+    # Run auto-sklearn regression with the custom preprocessor.
+    import autosklearn.regression
+    import autosklearn.metrics
+    reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30,
+                                                      per_run_time_limit=10,
+                                                      include_estimators=["MyRegressor"],
+                                                      include_preprocessors=["MyPreprocessor"])
+    reg.fit(X_train, y_train)
+    y_pred = reg.predict(X_test)
+    scorer = autosklearn.metrics.r2
+    print("Test score: ", scorer(y_pred, y_test))
+    print(reg.show_models())
+    print(reg.sprint_statistics())
+
+
+if __name__ == "__main__":
+    main()

From 8e6927e87eb61f136e91d89b2fc2415cfa7cb506 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Wed, 1 Aug 2018 09:57:01 +0200
Subject: [PATCH 03/45] CI: upper bound numpy version due to travis failures

---
 .travis.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f299f314f9..8c11d3f318 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -58,10 +58,11 @@ before_install:
 install:
   # Install general requirements the way setup.py suggests
   - pip install pep8 codecov
+  # Temporarily pin the numpy version for travis-ci
+  - pip install numpy<1.15
   - cat requirements.txt | xargs -n 1 -L 1 pip install
   # Install openml dependency for metadata generation unittest
-  - pip install xmltodict requests
-  - pip install git+https://github.com/renatopp/liac-arff
+  - pip install xmltodict requests liac-arff
   - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps
   - mkdir ~/.openml
   - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config

From e8130f7cf7fdc80688eea545a184dd7511eb7106 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Wed, 1 Aug 2018 10:32:18 +0200
Subject: [PATCH 04/45] CI: upper bound numpy version due to travis failures

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 8c11d3f318..968d8e4ec1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -59,7 +59,7 @@ install:
   # Install general requirements the way setup.py suggests
   - pip install pep8 codecov
   # Temporarily pin the numpy version for travis-ci
-  - pip install numpy<1.15
+  - pip install "numpy<1.15"
   - cat requirements.txt | xargs -n 1 -L 1 pip install
   # Install openml dependency for metadata generation unittest
   - pip install xmltodict requests liac-arff

From 683238254025b504b78d0aebc83038457574bcd1 Mon Sep 17 00:00:00 2001
From: Manuel Streuhofer <manuel@streuhofer.net>
Date: Wed, 1 Aug 2018 14:41:34 +0200
Subject: [PATCH 05/45] use tempfile.gettempdir() (#521)

* use tempfile.gettempdir()

* follow quality review coding standards
---
 autosklearn/util/backend.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/autosklearn/util/backend.py b/autosklearn/util/backend.py
index 1f7ea70f7f..52ac678762 100644
--- a/autosklearn/util/backend.py
+++ b/autosklearn/util/backend.py
@@ -71,11 +71,17 @@ def _prepare_directories(self, temporary_directory, output_directory):
 
         self.__temporary_directory = temporary_directory \
             if temporary_directory \
-            else '/tmp/autosklearn_tmp_%d_%d' % (pid, random_number)
+            else os.path.join(
+                tempfile.gettempdir(),
+                'autosklearn_tmp_%d_%d' % (pid, random_number)
+            )
 
         self.__output_directory = output_directory \
             if output_directory \
-            else '/tmp/autosklearn_output_%d_%d' % (pid, random_number)
+            else os.path.join(
+                tempfile.gettempdir(),
+                'autosklearn_output_%d_%d' % (pid, random_number)
+            )
 
     def create_directories(self):
         if self.shared_mode:
@@ -460,4 +466,4 @@ def write_txt_file(self, filepath, data, name):
                 self.logger.debug('Created %s file %s' % (name, filepath))
             else:
                 self.logger.debug('%s file already present %s' %
-                                  (name, filepath))
\ No newline at end of file
+                                  (name, filepath))

From df273da9d9428902b52f7f132362c19472b9e704 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Taneli=20Mielik=C3=A4inen?= <taneli.mielikainen@iki.fi>
Date: Mon, 13 Aug 2018 04:44:40 -0700
Subject: [PATCH 06/45] Remove a colon from README.md (#527)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d0de400572..dd53e9f140 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ auto-sklearn is an automated machine learning toolkit and a drop-in replacement
 
 Find the documentation [here](http://automl.github.io/auto-sklearn/)
 
-Status for master branch:
+Status for master branch
 
 [![Build Status](https://travis-ci.org/automl/auto-sklearn.svg?branch=master)](https://travis-ci.org/automl/auto-sklearn)
 [![Code Health](https://landscape.io/github/automl/auto-sklearn/master/landscape.png)](https://landscape.io/github/automl/auto-sklearn/master)

From 8c5e3c7c1b7a729661f680df7eaf20885be3e039 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Taneli=20Mielik=C3=A4inen?= <taneli.mielikainen@iki.fi>
Date: Tue, 14 Aug 2018 06:08:30 -0700
Subject: [PATCH 07/45] fixing warnings on non-tuple sequence for indexing
 (#526)

---
 .../pipeline/create_searchspace_util.py       | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/autosklearn/pipeline/create_searchspace_util.py b/autosklearn/pipeline/create_searchspace_util.py
index 5ebb7d1246..2abfa5c172 100644
--- a/autosklearn/pipeline/create_searchspace_util.py
+++ b/autosklearn/pipeline/create_searchspace_util.py
@@ -117,8 +117,8 @@ def find_active_choices(matches, node, node_idx, dataset_properties, \
 
     choices = []
     for c_idx, component in enumerate(available_components):
-        slices = [slice(None) if idx != node_idx else slice(c_idx, c_idx+1)
-                  for idx in range(len(matches.shape))]
+        slices = tuple(slice(None) if idx != node_idx else slice(c_idx, c_idx+1)
+                  for idx in range(len(matches.shape)))
 
         if np.sum(matches[slices]) > 0:
             choices.append(component)
@@ -200,10 +200,10 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                 for product in itertools.product(*num_node_choices):
                     for node_idx, choice_idx in enumerate(product):
                         node_idx += start_idx
-                        slices_ = [
+                        slices_ = tuple(
                             slice(None) if idx != node_idx else
                             slice(choice_idx, choice_idx + 1) for idx in
-                            range(len(matches.shape))]
+                            range(len(matches.shape)))
 
                         if np.sum(matches[slices_]) == 0:
                             skip_array[product] = 1
@@ -212,13 +212,11 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties,
                     if skip_array[product]:
                         continue
 
-                    slices = []
-                    for idx in range(len(matches.shape)):
-                        if idx not in indices:
-                            slices.append(slice(None))
-                        else:
-                            slices.append(slice(product[idx - start_idx],
-                                                product[idx - start_idx] + 1))
+                    slices = tuple(
+                        slice(None) if idx not in indices else
+                        slice(product[idx - start_idx],
+                              product[idx - start_idx] + 1) for idx in
+                        range(len(matches.shape)))
 
                     # This prints the affected nodes
                     # print [node_choice_names[i][product[i]]

From c02dc8f1609af8043c1df7c6ec26e414cdfb8696 Mon Sep 17 00:00:00 2001
From: Katharina Eggensperger <eggenspk@informatik.uni-freiburg.de>
Date: Mon, 10 Sep 2018 17:28:52 +0200
Subject: [PATCH 08/45] fix string formatting (#540)

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 4ae310adf0..86c9c3202b 100644
--- a/setup.py
+++ b/setup.py
@@ -18,8 +18,8 @@
 
 if sys.version_info < (3, 5):
     raise ValueError(
-        'Unsupported python version %s found. Auto-sklearn requires Python '
-        '3.5 or higher.' % sys.version_info
+        'Unsupported Python version %d.%d.%d found. Auto-sklearn requires Python '
+        '3.5 or higher.' % (sys.version_info.major, sys.version_info.minor, sys.version_info.micro)
     )
 
 

From 9e91a3358bfb7ea36cf6a26310687e311e8ff698 Mon Sep 17 00:00:00 2001
From: Katharina Eggensperger <eggenspk@informatik.uni-freiburg.de>
Date: Mon, 10 Sep 2018 17:29:47 +0200
Subject: [PATCH 09/45] FIX removing models wrt wrong metric in ensemble (#522)

---
 autosklearn/ensemble_builder.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py
index c3851cab13..6d96e30562 100644
--- a/autosklearn/ensemble_builder.py
+++ b/autosklearn/ensemble_builder.py
@@ -171,11 +171,8 @@ def main(self):
         while True:
 
             #maximal number of iterations
-            if (
-                self.max_iterations is not None
-                and self.max_iterations > 0
-                and iteration >= self.max_iterations
-            ):
+            if (self.max_iterations is not None
+                    and 0 < self.max_iterations <= iteration):
                 self.logger.info("Terminate ensemble building because of max iterations: %d of %d",
                                  self.max_iterations,
                                  iteration)
@@ -300,7 +297,7 @@ def read_ensemble_preds(self):
                     Y_TEST: None,
                     # Lazy keys so far:
                     # 0 - not loaded
-                    # 1 - loaded and ind memory
+                    # 1 - loaded and in memory
                     # 2 - loaded but dropped again
                     "loaded": 0
                 }
@@ -372,14 +369,18 @@ def get_n_best_preds(self):
             ],
             key=lambda x: x[1],
         )))
-        # remove all that are at most as good as random, cannot assume a
-        # minimum number here because all kinds of metric can be used
-        sorted_keys = filter(lambda x: x[1] > 0.001, sorted_keys)
+        # remove all that are at most as good as random
+        # note: dummy model must have run_id=1 (there is not run_id=0)
+        dummy_score = list(filter(lambda x: x[2] == 1, sorted_keys))[0]
+        self.logger.debug("Use %f as dummy score" %
+                          dummy_score[1])
+        sorted_keys = filter(lambda x: x[1] > dummy_score[1], sorted_keys)
         # remove Dummy Classifier
         sorted_keys = list(filter(lambda x: x[2] > 1, sorted_keys))
         if not sorted_keys: 
-            # no model left; try to use dummy classifier (num_run==0)
-            self.logger.warning("No models better than random - using Dummy Classifier!")
+            # no model left; try to use dummy score (num_run==0)
+            self.logger.warning("No models better than random - "
+                                "using Dummy Score!")
             sorted_keys = [
                 (k, v["ens_score"], v["num_run"]) for k, v in self.read_preds.items()
                 if v["seed"] == self.seed and v["num_run"] == 1

From 8eaa36cece9309b13d8a7d17de7bc9b7c5b3dde9 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 2 Oct 2018 03:16:32 +0200
Subject: [PATCH 10/45] Add examples for extending auto-sklearn.

---
 examples/example_extending_classification.py | 124 +++++++++++++
 examples/example_extending_preprocessing.py  | 183 -------------------
 examples/example_extending_preprocessor.py   | 109 +++++++++++
 examples/example_extending_regression.py     | 109 +++++++++++
 4 files changed, 342 insertions(+), 183 deletions(-)
 create mode 100644 examples/example_extending_classification.py
 delete mode 100644 examples/example_extending_preprocessing.py
 create mode 100644 examples/example_extending_preprocessor.py
 create mode 100644 examples/example_extending_regression.py

diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py
new file mode 100644
index 0000000000..11b82503d0
--- /dev/null
+++ b/examples/example_extending_classification.py
@@ -0,0 +1,124 @@
+"""
+====================================================================
+Extending Auto-Sklearn with Classification Component
+====================================================================
+
+The following example demonstrates how to create a new classification
+component for using in auto-sklearn.
+"""
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
+    UniformIntegerHyperparameter, UniformFloatHyperparameter
+
+import sklearn.metrics
+import autosklearn.classification
+import autosklearn.pipeline.components.classification
+import autosklearn.pipeline.components.base
+from autosklearn.pipeline.constants import *
+
+
+# Create MLP classifier component for auto-sklearn.
+class MLPClassifier(autosklearn.pipeline.components.base.AutoSklearnClassificationAlgorithm):
+    def __init__(self,
+                 hidden_layer_depth,
+                 num_nodes_per_layer,
+                 activation,
+                 alpha,
+                 random_state=None,
+                 ):
+        self.hidden_layer_depth = hidden_layer_depth
+        self.num_nodes_per_layer = num_nodes_per_layer
+        self.activation = activation
+        self.alpha = alpha
+        self.random_state = random_state
+
+    def fit(self, X, Y):
+        self.num_nodes_per_layer = int(self.num_nodes_per_layer)
+        self.hidden_layer_depth = int(self.hidden_layer_depth)
+        self.alpha = float(self.alpha)
+
+        from sklearn.neural_network import MLPClassifier
+        hidden_layer_sizes = tuple(self.num_nodes_per_layer \
+                                   for i in range(self.hidden_layer_depth))
+
+        self.estimator = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes,
+                                       alpha=self.alpha,
+                                       activation=self.activation,
+                                       random_state=self.random_state,
+                                       )
+        self.estimator.fit(X, Y)
+        return self
+
+    def predict(self, X):
+        if self.estimator is None:
+            raise NotImplementedError()
+        return self.estimator.predict(X)
+
+    def predict_proba(self, X):
+        if self.estimator is None:
+            raise NotImplementedError()
+        return self.estimator.predict_proba(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname':'MLP Classifier',
+                'name': 'MLP CLassifier',
+                'handles_regression': False,
+                'handles_classification': True,
+                'handles_multiclass': True,
+                'handles_multilabel': False,
+                'is_deterministic': False,
+                # Both input and output must be tuple(iterable)
+                'input': [DENSE, SIGNED_DATA, UNSIGNED_DATA],
+                'output': [PREDICTIONS]
+                }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        hidden_layer_depth = UniformIntegerHyperparameter(
+            name="hidden_layer_depth", lower=1, upper=3, default_value=1
+        )
+        num_nodes_per_layer = UniformIntegerHyperparameter(
+            name="num_nodes_per_layer", lower=16, upper=216, default_value=32
+        )
+        activation = CategoricalHyperparameter(
+            name="activation", choices=['identity', 'logistic', 'tanh', 'relu'],
+            default_value='relu'
+        )
+        alpha = UniformFloatHyperparameter(
+            name="alpha", lower=0.0001, upper=1.0, default_value=0.0001
+        )
+        cs.add_hyperparameters([hidden_layer_depth,
+                                num_nodes_per_layer,
+                                activation,
+                                alpha,
+                                ])
+        return cs
+
+
+# Add MLP classifier component to auto-sklearn.
+autosklearn.pipeline.components.classification.add_classifier(MLPClassifier)
+cs = MLPClassifier.get_hyperparameter_search_space()
+print(cs)
+
+# Generate data.
+from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import train_test_split
+X, y = load_breast_cancer(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+# Fit MLP classifier to the data.
+clf = autosklearn.classification.AutoSklearnClassifier(
+    time_left_for_this_task=20,
+    per_run_time_limit=10,
+    include_estimators=['MLPClassifier'],
+)
+clf.fit(X_train, y_train)
+
+# Print test accuracy and statistics.
+y_pred = clf.predict(X_test)
+print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
+print(clf.sprint_statistics())
+print(clf.show_models())
\ No newline at end of file
diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py
deleted file mode 100644
index bb20970d39..0000000000
--- a/examples/example_extending_preprocessing.py
+++ /dev/null
@@ -1,183 +0,0 @@
-"""
-===============================================
-Extending Auto-sklearn
-===============================================
-
-In order to include new machine learning algorithms in auto-sklearn's
-optimization process, users can implement a wrapper class for the algorithm
-and register it to auto-sklearn. The example code below demonstrates how
-to implement custom regressor and preprocessor (Lasso and polynomial processing from sklearn, respectively),
-register it to auto-sklearn, and use them for the given task.
-A detailed walkthrough of extending auto-sklearn can be found `here <https://automl.github.io/auto-sklearn/stable/extending.html>`_.
-
-"""
-
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import *
-from ConfigSpace.conditions import EqualsCondition, InCondition
-
-from autosklearn.pipeline.components.base import \
-    AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import *
-from autosklearn.util.common import check_for_bool
-
-
-# Custom Regression algorithm added to auto-sklearn (Lasso from sklearn).
-class MyRegressor(AutoSklearnRegressionAlgorithm):
-    def __init__(self, alpha, fit_intercept, tol, positive, random_state=None):
-        self.alpha = alpha
-        self.fit_intercept = fit_intercept
-        #self.normalize = normalize
-        self.tol = tol
-        self.positive = positive
-
-        self.random_state = random_state
-        self.estimator = None
-
-    def fit(self, X, Y):
-        import sklearn.linear_model
-
-        self.alpha = float(self.alpha)
-        self.fit_intercept = check_for_bool(self.fit_intercept)
-        self.normalize = check_for_bool(self.normalize)
-        self.tol = float(self.tol)
-        self.positive = check_for_bool(self.positive)
-
-        self.estimator = sklearn.linear_model.\
-            Lasso(alpha=self.alpha,
-                  fit_intercept=self.fit_intercept,
-                  tol=self.tol,
-                  positive=self.positive,
-                  n_iter=300)
-
-        self.estimator.fit(X, Y)
-        return self
-
-    def predict(self, X):
-        if self.estimator is None:
-            raise NotImplementedError
-        return self.estimator.predict(X)
-
-    @staticmethod
-    def get_properties(dataset_properties=None):
-        return {'shortname': 'MyRegressor',
-                'name': 'MyRegressor',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
-
-    @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
-        cs = ConfigurationSpace()
-        alpha = UniformFloatHyperparameter(
-            name="alpha", lower=0, upper=10, default_value=1)
-        fit_intercept = CategoricalHyperparameter(
-            name="fit_intercept", choices=[True, False], default_value=True)
-        normalize = CategoricalHyperparameter(
-            name="normalize", choices=[True, False], default_value=False)
-        tol = UniformFloatHyperparameter(
-            name="tol", lower=10 ** -5, upper=10 ** -1,
-            default_value=10 ** -3, log=True)
-        positive = CategoricalHyperparameter(
-            name="positive", choices=[True, False], default_value=False)
-
-        cs.add_hyperparameters([alpha, fit_intercept, tol, positive])
-
-        return cs
-
-
-# Custom wrapper class for using Sklearn's polynomial feature preprocessing
-# function.
-class MyPreprocessor(AutoSklearnPreprocessingAlgorithm):
-    def __init__(self, degree, interaction_only, include_bias, random_state=None):
-        # Define hyperparameters to be tuned here.
-        self.degree = degree
-        self.interaction_only = interaction_only
-        self.include_bias = include_bias
-        self.random_state = random_state
-        self.preprocessor = None
-
-    def fit(self, X, Y):
-        # wrapper function for the fit method of Sklearn's polynomial
-        # preprocessing function.
-        import sklearn.preprocessing
-        self.preprocessor = sklearn.preprocessing.PolynomialFeatures(degree=self.degree,
-                                                                     interaction_only=self.interaction_only,
-                                                                     include_bias=self.include_bias)
-        self.preprocessor.fit(X, Y)
-        return self
-
-    def transform(self, X):
-        # wrapper function for the transform method of sklearn's polynomial
-        # preprocessing function. It is also possible to implement
-        # a preprocessing algorithm directly in this function, provided that
-        # it behaves in the way compatible with that from sklearn.
-        if self.preprocessor is None:
-            raise NotImplementedError()
-        return self.preprocessor.transform(X)
-
-    @staticmethod
-    def get_properties(dataset_properties=None):
-        return {'shortname': 'MyPreprocessor',
-                'name': 'MyPreprocessor',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
-
-    @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
-        # For each hyperparameter, its type (categorical, integer, float, etc.),
-        # range and the default value must be specified here.
-        degree = UniformIntegerHyperparameter(
-            name="degree", lower=2, upper=5, default_value=2)
-        interaction_only = CategoricalHyperparameter(
-            name="interaction_only", choices=["False", "True"], default_value="False")
-        include_bias = CategoricalHyperparameter(
-            name="include_bias", choices=["True", "False"], default_value="True")
-
-        cs = ConfigurationSpace()
-        cs.add_hyperparameters([degree, interaction_only, include_bias])
-
-        return cs
-
-
-def main():
-    # Include the custom preprocessor class to auto-sklearn.
-    import autosklearn.pipeline.components.regression
-    import autosklearn.pipeline.components.feature_preprocessing
-    autosklearn.pipeline.components.regression.add_regressor(MyRegressor)
-    autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(MyPreprocessor)
-
-    # Import toy data from sklearn and apply train_test_split.
-    from sklearn.datasets import load_boston
-    from sklearn.model_selection import train_test_split
-    X, y = load_boston(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
-
-    # Run auto-sklearn regression with the custom preprocessor.
-    import autosklearn.regression
-    import autosklearn.metrics
-    reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30,
-                                                      per_run_time_limit=10,
-                                                      include_estimators=["MyRegressor"],
-                                                      include_preprocessors=["MyPreprocessor"])
-    reg.fit(X_train, y_train)
-    y_pred = reg.predict(X_test)
-    scorer = autosklearn.metrics.r2
-    print("Test score: ", scorer(y_pred, y_test))
-    print(reg.show_models())
-    print(reg.sprint_statistics())
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py
new file mode 100644
index 0000000000..5bdc024bfa
--- /dev/null
+++ b/examples/example_extending_preprocessor.py
@@ -0,0 +1,109 @@
+"""
+====================================================================
+Extending Auto-Sklearn with Preprocessor Component
+====================================================================
+
+The following example demonstrates how to create a wrapper around the linear
+discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor
+in auto-sklearn.
+"""
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
+    UniformIntegerHyperparameter, CategoricalHyperparameter
+
+import sklearn.metrics
+import autosklearn.classification
+import autosklearn.metrics
+import autosklearn.pipeline.components.feature_preprocessing
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import *
+
+
+# Create LDA component for auto-sklearn.
+class LDA(AutoSklearnPreprocessingAlgorithm):
+    def __init__(self, shrinkage, solver, n_components, tol, random_state=None):
+        self.solver = solver
+        self.shrinkage = shrinkage
+        self.n_components = n_components
+        self.tol = tol
+        self.random_state = random_state
+        self.preprocessor = None
+
+    def fit(self, X, Y=None):
+        self.shrinkage = float(self.shrinkage)
+        self.n_components = int(self.n_components)
+        self.tol = float(self.tol)
+
+        import sklearn.discriminant_analysis
+        self.preprocessor = \
+            sklearn.discriminant_analysis.LinearDiscriminantAnalysis(
+                shrinkage=self.shrinkage,
+                solver=self.solver,
+                n_components=self.n_components,
+                tol=self.tol,
+            )
+        self.preprocessor.fit(X, Y)
+        return self
+
+    def transform(self, X):
+        if self.preprocessor is None:
+            raise NotImplementedError()
+        return self.preprocessor.transform(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': 'LDA',
+                'name': 'Linear Discriminant Analysis',
+                'handles_regression': False,
+                'handles_classification': True,
+                'handles_multiclass': False,
+                'handles_multilabel': False,
+                'is_deterministic': True,
+                'input': (DENSE, UNSIGNED_DATA, SIGNED_DATA),
+                'output': (DENSE, UNSIGNED_DATA, SIGNED_DATA)}
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        solver = CategoricalHyperparameter(
+            name="solver", choices=['svd','lsqr','eigen'], default_value='svd'
+        )
+        shrinkage = UniformFloatHyperparameter(
+            name="shrinkage", lower=0.0, upper=1.0, default_value=0.5
+        )
+        n_components = UniformIntegerHyperparameter(
+            name="n_components", lower=1, upper=29, default_value=10
+        )
+        tol = UniformFloatHyperparameter(
+            name="tol", lower=0.0001, upper=1, default_value=0.0001
+        )
+        cs.add_hyperparameters([solver, shrinkage, n_components, tol])
+        return cs
+
+
+# Add LDA component to auto-sklearn.
+autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(LDA)
+
+# Create dataset.
+from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import train_test_split
+X, y = load_breast_cancer(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+# Configuration space.
+cs = LDA.get_hyperparameter_search_space()
+print(cs)
+
+# Fit the model using LDA as preprocessor.
+clf = autosklearn.classification.AutoSklearnClassifier(
+    time_left_for_this_task=20,
+    include_preprocessors=['LDA'],
+)
+clf.fit(X_train, y_train)
+
+# Print prediction score and statistics.
+y_pred = clf.predict(X_test)
+print("accracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
+print(clf.sprint_statistics())
+print(clf.show_models())
\ No newline at end of file
diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py
new file mode 100644
index 0000000000..aaea3d13b5
--- /dev/null
+++ b/examples/example_extending_regression.py
@@ -0,0 +1,109 @@
+"""
+====================================================================
+Extending Auto-Sklearn with Regression Component
+====================================================================
+
+The following example demonstrates how to create a new regression
+component for using in auto-sklearn.
+"""
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
+    UniformIntegerHyperparameter, CategoricalHyperparameter
+
+import sklearn.metrics
+import autosklearn.regression
+import autosklearn.pipeline.components.regression
+from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
+from autosklearn.pipeline.constants import *
+
+
+# Implement kernel ridge regression component for auto-sklearn.
+class KernelRidgeRegression(AutoSklearnRegressionAlgorithm):
+    def __init__(self, alpha, kernel, gamma, degree, random_state=None):
+        self.alpha = alpha
+        self.kernel = kernel
+        self.gamma = gamma
+        self.degree = degree
+        self.random_state = random_state
+        self.estimator = None
+
+    def fit(self, X, Y):
+        self.alpha = float(self.alpha)
+        self.gamma = float(self.gamma)
+        self.degree = int(self.degree)
+
+        import sklearn.kernel_ridge
+        self.estimator = sklearn.kernel_ridge.KernelRidge(alpha=self.alpha,
+                                                          kernel=self.kernel,
+                                                          gamma=self.gamma,
+                                                          degree=self.degree,
+                                                          )
+        self.estimator.fit(X, Y)
+        return self
+
+    def predict(self, X):
+        if self.estimator is None:
+            raise NotImplementedError
+        return self.estimator.predict(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': 'KRR',
+                'name': 'Kernel Ridge Regression',
+                'handles_regression': True,
+                'handles_classification': False,
+                'handles_multiclass': False,
+                'handles_multilabel': False,
+                'is_deterministic': True,
+                'input': (SPARSE, DENSE, UNSIGNED_DATA, SIGNED_DATA),
+                'output': (PREDICTIONS,)}
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        alpha = UniformFloatHyperparameter(
+            name='alpha', lower=10 ** -5, upper=1, log=True, default_value=0.1)
+        kernel = CategoricalHyperparameter(
+            name='kernel',
+            choices=['linear',
+                     'rbf',
+                     'sigmoid',
+                     'polynomial',
+                     ],
+            default_value='linear'
+        )
+        gamma = UniformFloatHyperparameter(
+            name='gamma', lower=0.00001, upper=1, default_value=0.1, log=True
+        )
+        degree = UniformIntegerHyperparameter(
+            name='degree', lower=2, upper=5, default_value=3
+        )
+        cs.add_hyperparameters([alpha, kernel, gamma, degree])
+        return cs
+
+
+# Add KRR component to auto-sklearn.
+autosklearn.pipeline.components.regression.add_regressor(KernelRidgeRegression)
+cs = KernelRidgeRegression.get_hyperparameter_search_space()
+print(cs)
+
+# Generate data.
+from sklearn.datasets import load_diabetes
+from sklearn.model_selection import train_test_split
+X, y = load_diabetes(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+# Fit the model using KRR.
+reg = autosklearn.regression.AutoSklearnRegressor(
+    time_left_for_this_task=30,
+    per_run_time_limit=10,
+    include_estimators=['KernelRidgeRegression'],
+)
+reg.fit(X_train, y_train)
+
+# Print prediction score and statistics.
+y_pred = reg.predict(X_test)
+print("r2 score: ", sklearn.metrics.r2_score(y_pred, y_test))
+print(reg.sprint_statistics())
+print(reg.show_models())
\ No newline at end of file

From e1e8c2575d38fc39b8b158342c7fdbf71ec78607 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 2 Oct 2018 14:00:45 +0200
Subject: [PATCH 11/45] .

---
 examples/example_extending_classification.py | 7 ++++---
 examples/example_extending_preprocessor.py   | 6 ++++--
 examples/example_extending_regression.py     | 3 ++-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py
index 11b82503d0..e7f8334901 100644
--- a/examples/example_extending_classification.py
+++ b/examples/example_extending_classification.py
@@ -14,12 +14,13 @@
 import sklearn.metrics
 import autosklearn.classification
 import autosklearn.pipeline.components.classification
-import autosklearn.pipeline.components.base
-from autosklearn.pipeline.constants import *
+from autosklearn.pipeline.components.base \
+    import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA
 
 
 # Create MLP classifier component for auto-sklearn.
-class MLPClassifier(autosklearn.pipeline.components.base.AutoSklearnClassificationAlgorithm):
+class MLPClassifier(AutoSklearnClassificationAlgorithm):
     def __init__(self,
                  hidden_layer_depth,
                  num_nodes_per_layer,
diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py
index 5bdc024bfa..815d698ecd 100644
--- a/examples/example_extending_preprocessor.py
+++ b/examples/example_extending_preprocessor.py
@@ -16,8 +16,10 @@
 import autosklearn.classification
 import autosklearn.metrics
 import autosklearn.pipeline.components.feature_preprocessing
-from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import *
+from autosklearn.pipeline.components.base \
+    import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, \
+    UNSIGNED_DATA
 
 
 # Create LDA component for auto-sklearn.
diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py
index aaea3d13b5..59f2af58eb 100644
--- a/examples/example_extending_regression.py
+++ b/examples/example_extending_regression.py
@@ -15,7 +15,8 @@
 import autosklearn.regression
 import autosklearn.pipeline.components.regression
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import *
+from autosklearn.pipeline.constants import SPARSE, DENSE, \
+    SIGNED_DATA, UNSIGNED_DATA
 
 
 # Implement kernel ridge regression component for auto-sklearn.

From c55cbacc892c9a005f440d0ab9ccf19c48d30bb3 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 2 Oct 2018 21:36:12 +0200
Subject: [PATCH 12/45] Change datasets used in examples from digits to
 breast_cancer.

---
 examples/example_crossvalidation.py | 4 ++--
 examples/example_eips.py            | 4 ++--
 examples/example_holdout.py         | 4 ++--
 examples/example_parallel.py        | 4 ++--
 examples/example_random_search.py   | 6 +++---
 examples/example_sequential.py      | 4 ++--
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/examples/example_crossvalidation.py b/examples/example_crossvalidation.py
index 85530b591b..52e3050f7b 100644
--- a/examples/example_crossvalidation.py
+++ b/examples/example_crossvalidation.py
@@ -21,7 +21,7 @@
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -37,7 +37,7 @@ def main():
 
     # fit() changes the data in place, but refit needs the original data. We
     # therefore copy the data. In practice, one should reload the data
-    automl.fit(X_train.copy(), y_train.copy(), dataset_name='digits')
+    automl.fit(X_train.copy(), y_train.copy(), dataset_name='breast_cancer')
     # During fit(), models are fit on individual cross-validation folds. To use
     # all available data, we call refit() which trains all models in the
     # final ensemble on the whole dataset.
diff --git a/examples/example_eips.py b/examples/example_eips.py
index eef3c6cf11..db2a434092 100644
--- a/examples/example_eips.py
+++ b/examples/example_eips.py
@@ -69,7 +69,7 @@ def get_eips_object_callback(
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -81,7 +81,7 @@ def main():
         get_smac_object_callback=get_eips_object_callback,
         initial_configurations_via_metalearning=0,
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
 
     # Print the final ensemble constructed by auto-sklearn via ROAR.
     print(automl.show_models())
diff --git a/examples/example_holdout.py b/examples/example_holdout.py
index fe1ff1c7a7..19a438bd87 100644
--- a/examples/example_holdout.py
+++ b/examples/example_holdout.py
@@ -18,7 +18,7 @@
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -34,7 +34,7 @@ def main():
         resampling_strategy='holdout',
         resampling_strategy_arguments={'train_size': 0.67}
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
 
     # Print the final ensemble constructed by auto-sklearn.
     print(automl.show_models())
diff --git a/examples/example_parallel.py b/examples/example_parallel.py
index f5572ab97d..bcb45206c2 100644
--- a/examples/example_parallel.py
+++ b/examples/example_parallel.py
@@ -78,14 +78,14 @@ def spawn_classifier(seed, dataset_name):
 
 def main():
 
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
     processes = []
     spawn_classifier = get_spawn_classifier(X_train, y_train)
     for i in range(4): # set this at roughly half of your cores
-        p = multiprocessing.Process(target=spawn_classifier, args=(i, 'digits'))
+        p = multiprocessing.Process(target=spawn_classifier, args=(i, 'breast_cancer'))
         p.start()
         processes.append(p)
     for p in processes:
diff --git a/examples/example_random_search.py b/examples/example_random_search.py
index 9d04a39974..2a64b36efb 100644
--- a/examples/example_random_search.py
+++ b/examples/example_random_search.py
@@ -68,7 +68,7 @@ def get_random_search_object_callback(
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -79,7 +79,7 @@ def main():
         get_smac_object_callback=get_roar_object_callback,
         initial_configurations_via_metalearning=0,
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
 
     print('#' * 80)
     print('Results for ROAR.')
@@ -99,7 +99,7 @@ def main():
         get_smac_object_callback=get_random_search_object_callback,
         initial_configurations_via_metalearning=0,
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
 
     print('#' * 80)
     print('Results for random search.')
diff --git a/examples/example_sequential.py b/examples/example_sequential.py
index 06820e7ebe..694ea81404 100644
--- a/examples/example_sequential.py
+++ b/examples/example_sequential.py
@@ -17,7 +17,7 @@
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -32,7 +32,7 @@ def main():
         ensemble_size=0,
         delete_tmp_folder_after_terminate=False,
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
     # This call to fit_ensemble uses all models trained in the previous call
     # to fit to build an ensemble which can be used with automl.predict()
     automl.fit_ensemble(y_train, ensemble_size=50)

From ef12841bab5256e3c5abadbcafea32764c7ca1fc Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 9 Oct 2018 13:50:36 +0200
Subject: [PATCH 13/45] First commit

---
 .travis.yml               |   6 ++
 ci_scripts/flake8_diff.sh | 144 ++++++++++++++++++++++++++++++++++++++
 ci_scripts/test.sh        |  43 +++++++-----
 3 files changed, 177 insertions(+), 16 deletions(-)
 create mode 100644 ci_scripts/flake8_diff.sh
 mode change 100644 => 100755 ci_scripts/test.sh

diff --git a/.travis.yml b/.travis.yml
index 968d8e4ec1..bf8727adf4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,6 +19,10 @@ matrix:
     env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
   - os: linux
     env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+    # Add flake8 check in travis.
+  - os: linux
+    env: DISTRIB="conda" PYTHON_VERSION="3.6" RUN_FLAKE8="true" SKIP_TESTS="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+
 
   # Temporarily disabling OSX builds because thy take too long
   # Set language to generic to not break travis-ci
@@ -66,6 +70,8 @@ install:
   - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps
   - mkdir ~/.openml
   - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config
+  # Install flake 8.
+  - pip install flake8
   # Debug output to know all exact package versions!
   - pip freeze
   - python setup.py install
diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh
new file mode 100644
index 0000000000..f0b9b083e1
--- /dev/null
+++ b/ci_scripts/flake8_diff.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+
+# This script is used in Travis to check that PRs do not add obvious
+# flake8 violations. It relies on two things:
+#   - find common ancestor between branch and
+#     automl/auto-sklearn remote
+#   - run flake8 --diff on the diff between the branch and the common
+#     ancestor
+#
+# Additional features:
+#   - the line numbers in Travis match the local branch on the PR
+#     author machine.
+#   - ./build_tools/travis/flake8_diff.sh can be run locally for quick
+#     turn-around
+
+set -e
+# pipefail is necessary to propagate exit codes
+set -o pipefail
+
+PROJECT=automl/auto-sklearn
+PROJECT_URL=https://github.com/$PROJECT.git
+
+# Find the remote with the project name (upstream in most cases)
+REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '')
+
+# Add a temporary remote if needed. For example this is necessary when
+# Travis is configured to run in a fork. In this case 'origin' is the
+# fork and not the reference repo we want to diff against.
+if [[ -z "$REMOTE" ]]; then
+    TMP_REMOTE=tmp_reference_upstream
+    REMOTE=$TMP_REMOTE
+    git remote add $REMOTE $PROJECT_URL
+fi
+
+echo "Remotes:"
+echo '--------------------------------------------------------------------------------'
+git remote --verbose
+
+# Travis does the git clone with a limited depth (50 at the time of
+# writing). This may not be enough to find the common ancestor with
+# $REMOTE/master so we unshallow the git checkout
+if [[ -a .git/shallow ]]; then
+    echo -e '\nTrying to unshallow the repo:'
+    echo '--------------------------------------------------------------------------------'
+    git fetch --unshallow
+fi
+
+if [[ "$TRAVIS" == "true" ]]; then
+    if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]
+    then
+        # In main repo, using TRAVIS_COMMIT_RANGE to test the commits
+        # that were pushed into a branch
+        if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then
+            if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then
+                echo "New branch, no commit range from Travis so passing this test by convention"
+                exit 0
+            fi
+            COMMIT_RANGE=$TRAVIS_COMMIT_RANGE
+        fi
+    else
+        # We want to fetch the code as it is in the PR branch and not
+        # the result of the merge into master. This way line numbers
+        # reported by Travis will match with the local code.
+        LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST
+        # In Travis the PR target is always origin
+        git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF
+    fi
+fi
+
+# If not using the commit range from Travis we need to find the common
+# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master
+if [[ -z "$COMMIT_RANGE" ]]; then
+    if [[ -z "$LOCAL_BRANCH_REF" ]]; then
+        LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD)
+    fi
+    echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:"
+    echo '--------------------------------------------------------------------------------'
+    git --no-pager log -2 $LOCAL_BRANCH_REF
+
+    REMOTE_MASTER_REF="$REMOTE/master"
+    # Make sure that $REMOTE_MASTER_REF is a valid reference
+    echo -e "\nFetching $REMOTE_MASTER_REF"
+    echo '--------------------------------------------------------------------------------'
+    git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF
+    LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF)
+    REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF)
+
+    COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \
+        echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)"
+
+    if [ -z "$COMMIT" ]; then
+        exit 1
+    fi
+
+    COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT)
+
+    echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\
+         "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:"
+    echo '--------------------------------------------------------------------------------'
+    git --no-pager show --no-patch $COMMIT_SHORT_HASH
+
+    COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH"
+
+    if [[ -n "$TMP_REMOTE" ]]; then
+        git remote remove $TMP_REMOTE
+    fi
+
+else
+    echo "Got the commit range from Travis: $COMMIT_RANGE"
+fi
+
+echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
+     "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):"
+echo '--------------------------------------------------------------------------------'
+
+# We ignore files from sklearn/externals. Unfortunately there is no
+# way to do it with flake8 directly (the --exclude does not seem to
+# work with --diff). We could use the exclude magic in the git pathspec
+# ':!sklearn/externals' but it is only available on git 1.9 and Travis
+# uses git 1.8.
+# We need the following command to exit with 0 hence the echo in case
+# there is no match
+MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE || echo "no_match")"
+
+check_files() {
+    files="$1"
+    shift
+    options="$*"
+    if [ -n "$files" ]; then
+        # Conservative approach: diff without context (--unified=0) so that code
+        # that was not changed does not create failures
+        git diff --unified=0 $COMMIT_RANGE -- $files | flake8 --diff --show-source $options
+    fi
+}
+
+if [[ "$MODIFIED_FILES" == "no_match" ]]; then
+    echo "No file outside sklearn/externals and doc/sphinxext/sphinx_gallery has been modified"
+else
+
+    check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)"
+    check_files "$(echo "$MODIFIED_FILES" | grep ^examples)" \
+        --config ./examples/.flake8
+fi
+echo -e "No problem detected by flake8\n"
diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
old mode 100644
new mode 100755
index 0026279285..1e356bd125
--- a/ci_scripts/test.sh
+++ b/ci_scripts/test.sh
@@ -1,22 +1,33 @@
 set -e
 
-# Get into a temp directory to run test from the installed scikit learn and
-# check if we do not leave artifacts
-mkdir -p $TEST_DIR
+run_tests() {
+    # Get into a temp directory to run test from the installed scikit learn and
+    # check if we do not leave artifacts
+    mkdir -p $TEST_DIR
 
-cwd=`pwd`
-examples_dir=$cwd/examples
-test_dir=$cwd/test/
+    cwd=`pwd`
+    examples_dir=$cwd/examples
+    test_dir=$cwd/../test/
 
-cd $TEST_DIR
+    cd $TEST_DIR
+    if [[ "$COVERAGE" == "true" ]]; then
+        nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir
+    elif [[ "$EXAMPLES" == "true" ]]; then
+        for example in `find $examples_dir -name '*.py'`
+        do
+            python $example
+        done
+    else
+        nosetests --no-path-adjustment -sv $test_dir
+    fi
+}
 
-if [[ "$COVERAGE" == "true" ]]; then
-    nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir
-elif [[ "$EXAMPLES" == "true" ]]; then
-    for example in `find $examples_dir -name '*.py'`
-    do
-        python $example
-    done
-else
-    nosetests --no-path-adjustment -sv $test_dir
+if [[ "$RUN_FLAKE8" ]]; then
+    source ci_scripts/flake8_diff.sh
 fi
+
+if [[ "$SKIP_TESTS" != "true" ]]; then
+    run_tests
+fi
+
+

From 242eebf195e51a631b7b5e06674243be25b8e702 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 9 Oct 2018 14:00:25 +0200
Subject: [PATCH 14/45] Fixing codacy errors

---
 examples/example_extending_classification.py | 3 ++-
 examples/example_extending_preprocessor.py   | 2 +-
 examples/example_extending_regression.py     | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py
index e7f8334901..d4de7d79d3 100644
--- a/examples/example_extending_classification.py
+++ b/examples/example_extending_classification.py
@@ -16,7 +16,8 @@
 import autosklearn.pipeline.components.classification
 from autosklearn.pipeline.components.base \
     import AutoSklearnClassificationAlgorithm
-from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA
+from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA, \
+    PREDICTIONS
 
 
 # Create MLP classifier component for auto-sklearn.
diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py
index 815d698ecd..33a51cc953 100644
--- a/examples/example_extending_preprocessor.py
+++ b/examples/example_extending_preprocessor.py
@@ -18,7 +18,7 @@
 import autosklearn.pipeline.components.feature_preprocessing
 from autosklearn.pipeline.components.base \
     import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, \
+from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, \
     UNSIGNED_DATA
 
 
diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py
index 59f2af58eb..e3dbe18428 100644
--- a/examples/example_extending_regression.py
+++ b/examples/example_extending_regression.py
@@ -16,7 +16,7 @@
 import autosklearn.pipeline.components.regression
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.constants import SPARSE, DENSE, \
-    SIGNED_DATA, UNSIGNED_DATA
+    SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS
 
 
 # Implement kernel ridge regression component for auto-sklearn.

From 64756233ceaa480179e216abec170bd3d4b1c706 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 9 Oct 2018 14:07:01 +0200
Subject: [PATCH 15/45] Fixing bug

---
 ci_scripts/test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
index 1e356bd125..3d9551375e 100755
--- a/ci_scripts/test.sh
+++ b/ci_scripts/test.sh
@@ -7,7 +7,7 @@ run_tests() {
 
     cwd=`pwd`
     examples_dir=$cwd/examples
-    test_dir=$cwd/../test/
+    test_dir=$cwd/test/
 
     cd $TEST_DIR
     if [[ "$COVERAGE" == "true" ]]; then

From 5cab17879a1b7c314d49e6868a6ed4103a3e3f21 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Thu, 18 Oct 2018 14:41:30 +0200
Subject: [PATCH 16/45] [Debug] try different numpy version

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index b65911d6cb..71c47ceb66 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ nose
 six
 Cython
 
-numpy>=1.9.0
+numpy==1.14.5
 scipy>=0.14.1
 
 scikit-learn>=0.19,<0.20
diff --git a/setup.py b/setup.py
index 86c9c3202b..bcdaba8e4f 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,7 @@
     "nose",
     "six",
     "Cython",
-    "numpy>=1.9.0",
+    "numpy==1.14.5",
     "scipy>=0.14.1",
     "scikit-learn>=0.19,<0.20",
     "lockfile",

From a062ba028b205c5a24341378c45baaac149f5b87 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Thu, 18 Oct 2018 19:31:18 +0200
Subject: [PATCH 17/45] [Debug] Try with latest numpy version

---
 requirements.txt | 2 +-
 setup.py         | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 71c47ceb66..935d91f3ae 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ nose
 six
 Cython
 
-numpy==1.14.5
+numpy==1.15.2
 scipy>=0.14.1
 
 scikit-learn>=0.19,<0.20
diff --git a/setup.py b/setup.py
index bcdaba8e4f..0daf591d29 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,8 @@
     "nose",
     "six",
     "Cython",
-    "numpy==1.14.5",
+    # Debugging. Try with latest version of numpy
+    "numpy==1.15.2",
     "scipy>=0.14.1",
     "scikit-learn>=0.19,<0.20",
     "lockfile",

From 94f9d2ca55a7db3bb9080ba18cf0a3e934bc9805 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Thu, 18 Oct 2018 19:52:50 +0200
Subject: [PATCH 18/45] Set numpy version to 1.14.5

---
 requirements.txt | 2 +-
 setup.py         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 935d91f3ae..71c47ceb66 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ nose
 six
 Cython
 
-numpy==1.15.2
+numpy==1.14.5
 scipy>=0.14.1
 
 scikit-learn>=0.19,<0.20
diff --git a/setup.py b/setup.py
index 0daf591d29..ef89ad314c 100644
--- a/setup.py
+++ b/setup.py
@@ -35,8 +35,8 @@
     "nose",
     "six",
     "Cython",
-    # Debugging. Try with latest version of numpy
-    "numpy==1.15.2",
+    # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error.
+    "numpy==1.14.5",
     "scipy>=0.14.1",
     "scikit-learn>=0.19,<0.20",
     "lockfile",

From b331251c2cd93d6d8149eeaa88df6475ee76aebd Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 9 Oct 2018 13:50:36 +0200
Subject: [PATCH 19/45] First commit

---
 .travis.yml               |   6 ++
 ci_scripts/flake8_diff.sh | 144 ++++++++++++++++++++++++++++++++++++++
 ci_scripts/test.sh        |  43 +++++++-----
 3 files changed, 177 insertions(+), 16 deletions(-)
 create mode 100644 ci_scripts/flake8_diff.sh
 mode change 100644 => 100755 ci_scripts/test.sh

diff --git a/.travis.yml b/.travis.yml
index 968d8e4ec1..bf8727adf4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,6 +19,10 @@ matrix:
     env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
   - os: linux
     env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+    # Add flake8 check in travis.
+  - os: linux
+    env: DISTRIB="conda" PYTHON_VERSION="3.6" RUN_FLAKE8="true" SKIP_TESTS="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+
 
   # Temporarily disabling OSX builds because thy take too long
   # Set language to generic to not break travis-ci
@@ -66,6 +70,8 @@ install:
   - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps
   - mkdir ~/.openml
   - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config
+  # Install flake 8.
+  - pip install flake8
   # Debug output to know all exact package versions!
   - pip freeze
   - python setup.py install
diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh
new file mode 100644
index 0000000000..f0b9b083e1
--- /dev/null
+++ b/ci_scripts/flake8_diff.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+
+# This script is used in Travis to check that PRs do not add obvious
+# flake8 violations. It relies on two things:
+#   - find common ancestor between branch and
+#     automl/auto-sklearn remote
+#   - run flake8 --diff on the diff between the branch and the common
+#     ancestor
+#
+# Additional features:
+#   - the line numbers in Travis match the local branch on the PR
+#     author machine.
+#   - ./build_tools/travis/flake8_diff.sh can be run locally for quick
+#     turn-around
+
+set -e
+# pipefail is necessary to propagate exit codes
+set -o pipefail
+
+PROJECT=automl/auto-sklearn
+PROJECT_URL=https://github.com/$PROJECT.git
+
+# Find the remote with the project name (upstream in most cases)
+REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '')
+
+# Add a temporary remote if needed. For example this is necessary when
+# Travis is configured to run in a fork. In this case 'origin' is the
+# fork and not the reference repo we want to diff against.
+if [[ -z "$REMOTE" ]]; then
+    TMP_REMOTE=tmp_reference_upstream
+    REMOTE=$TMP_REMOTE
+    git remote add $REMOTE $PROJECT_URL
+fi
+
+echo "Remotes:"
+echo '--------------------------------------------------------------------------------'
+git remote --verbose
+
+# Travis does the git clone with a limited depth (50 at the time of
+# writing). This may not be enough to find the common ancestor with
+# $REMOTE/master so we unshallow the git checkout
+if [[ -a .git/shallow ]]; then
+    echo -e '\nTrying to unshallow the repo:'
+    echo '--------------------------------------------------------------------------------'
+    git fetch --unshallow
+fi
+
+if [[ "$TRAVIS" == "true" ]]; then
+    if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]
+    then
+        # In main repo, using TRAVIS_COMMIT_RANGE to test the commits
+        # that were pushed into a branch
+        if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then
+            if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then
+                echo "New branch, no commit range from Travis so passing this test by convention"
+                exit 0
+            fi
+            COMMIT_RANGE=$TRAVIS_COMMIT_RANGE
+        fi
+    else
+        # We want to fetch the code as it is in the PR branch and not
+        # the result of the merge into master. This way line numbers
+        # reported by Travis will match with the local code.
+        LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST
+        # In Travis the PR target is always origin
+        git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF
+    fi
+fi
+
+# If not using the commit range from Travis we need to find the common
+# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master
+if [[ -z "$COMMIT_RANGE" ]]; then
+    if [[ -z "$LOCAL_BRANCH_REF" ]]; then
+        LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD)
+    fi
+    echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:"
+    echo '--------------------------------------------------------------------------------'
+    git --no-pager log -2 $LOCAL_BRANCH_REF
+
+    REMOTE_MASTER_REF="$REMOTE/master"
+    # Make sure that $REMOTE_MASTER_REF is a valid reference
+    echo -e "\nFetching $REMOTE_MASTER_REF"
+    echo '--------------------------------------------------------------------------------'
+    git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF
+    LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF)
+    REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF)
+
+    COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \
+        echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)"
+
+    if [ -z "$COMMIT" ]; then
+        exit 1
+    fi
+
+    COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT)
+
+    echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\
+         "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:"
+    echo '--------------------------------------------------------------------------------'
+    git --no-pager show --no-patch $COMMIT_SHORT_HASH
+
+    COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH"
+
+    if [[ -n "$TMP_REMOTE" ]]; then
+        git remote remove $TMP_REMOTE
+    fi
+
+else
+    echo "Got the commit range from Travis: $COMMIT_RANGE"
+fi
+
+echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
+     "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):"
+echo '--------------------------------------------------------------------------------'
+
+# We ignore files from sklearn/externals. Unfortunately there is no
+# way to do it with flake8 directly (the --exclude does not seem to
+# work with --diff). We could use the exclude magic in the git pathspec
+# ':!sklearn/externals' but it is only available on git 1.9 and Travis
+# uses git 1.8.
+# We need the following command to exit with 0 hence the echo in case
+# there is no match
+MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE || echo "no_match")"
+
+check_files() {
+    files="$1"
+    shift
+    options="$*"
+    if [ -n "$files" ]; then
+        # Conservative approach: diff without context (--unified=0) so that code
+        # that was not changed does not create failures
+        git diff --unified=0 $COMMIT_RANGE -- $files | flake8 --diff --show-source $options
+    fi
+}
+
+if [[ "$MODIFIED_FILES" == "no_match" ]]; then
+    echo "No file outside sklearn/externals and doc/sphinxext/sphinx_gallery has been modified"
+else
+
+    check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)"
+    check_files "$(echo "$MODIFIED_FILES" | grep ^examples)" \
+        --config ./examples/.flake8
+fi
+echo -e "No problem detected by flake8\n"
diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
old mode 100644
new mode 100755
index 0026279285..1e356bd125
--- a/ci_scripts/test.sh
+++ b/ci_scripts/test.sh
@@ -1,22 +1,33 @@
 set -e
 
-# Get into a temp directory to run test from the installed scikit learn and
-# check if we do not leave artifacts
-mkdir -p $TEST_DIR
+run_tests() {
+    # Get into a temp directory to run test from the installed scikit learn and
+    # check if we do not leave artifacts
+    mkdir -p $TEST_DIR
 
-cwd=`pwd`
-examples_dir=$cwd/examples
-test_dir=$cwd/test/
+    cwd=`pwd`
+    examples_dir=$cwd/examples
+    test_dir=$cwd/../test/
 
-cd $TEST_DIR
+    cd $TEST_DIR
+    if [[ "$COVERAGE" == "true" ]]; then
+        nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir
+    elif [[ "$EXAMPLES" == "true" ]]; then
+        for example in `find $examples_dir -name '*.py'`
+        do
+            python $example
+        done
+    else
+        nosetests --no-path-adjustment -sv $test_dir
+    fi
+}
 
-if [[ "$COVERAGE" == "true" ]]; then
-    nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir
-elif [[ "$EXAMPLES" == "true" ]]; then
-    for example in `find $examples_dir -name '*.py'`
-    do
-        python $example
-    done
-else
-    nosetests --no-path-adjustment -sv $test_dir
+if [[ "$RUN_FLAKE8" ]]; then
+    source ci_scripts/flake8_diff.sh
 fi
+
+if [[ "$SKIP_TESTS" != "true" ]]; then
+    run_tests
+fi
+
+

From 34569203f734b00c67417a8ce4e0778ac44c4d50 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 9 Oct 2018 14:07:01 +0200
Subject: [PATCH 20/45] Fixing bug

---
 ci_scripts/test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
index 1e356bd125..3d9551375e 100755
--- a/ci_scripts/test.sh
+++ b/ci_scripts/test.sh
@@ -7,7 +7,7 @@ run_tests() {
 
     cwd=`pwd`
     examples_dir=$cwd/examples
-    test_dir=$cwd/../test/
+    test_dir=$cwd/test/
 
     cd $TEST_DIR
     if [[ "$COVERAGE" == "true" ]]; then

From 6b947c5dc95cceaf5d909966ba1ceeb8f010e027 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Thu, 18 Oct 2018 20:47:38 +0200
Subject: [PATCH 21/45] Modify flake8_diff.sh

---
 ci_scripts/flake8_diff.sh | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh
index f0b9b083e1..e40c2653bb 100644
--- a/ci_scripts/flake8_diff.sh
+++ b/ci_scripts/flake8_diff.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+# This script is mostly taken from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/travis/flake8_diff.sh
+
 # This script is used in Travis to check that PRs do not add obvious
 # flake8 violations. It relies on two things:
 #   - find common ancestor between branch and
@@ -36,8 +38,8 @@ echo "Remotes:"
 echo '--------------------------------------------------------------------------------'
 git remote --verbose
 
-# Travis does the git clone with a limited depth (50 at the time of
-# writing). This may not be enough to find the common ancestor with
+# Travis does the git clone with a limited depth.
+# This may not be enough to find the common ancestor with
 # $REMOTE/master so we unshallow the git checkout
 if [[ -a .git/shallow ]]; then
     echo -e '\nTrying to unshallow the repo:'
@@ -113,11 +115,6 @@ echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
      "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):"
 echo '--------------------------------------------------------------------------------'
 
-# We ignore files from sklearn/externals. Unfortunately there is no
-# way to do it with flake8 directly (the --exclude does not seem to
-# work with --diff). We could use the exclude magic in the git pathspec
-# ':!sklearn/externals' but it is only available on git 1.9 and Travis
-# uses git 1.8.
 # We need the following command to exit with 0 hence the echo in case
 # there is no match
 MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE || echo "no_match")"
@@ -134,7 +131,7 @@ check_files() {
 }
 
 if [[ "$MODIFIED_FILES" == "no_match" ]]; then
-    echo "No file outside sklearn/externals and doc/sphinxext/sphinx_gallery has been modified"
+    echo "No file has been modified"
 else
 
     check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)"

From c6229e5f3f42c5e0fb4ffd16477ec8f1edc37270 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 17 Jul 2018 21:56:01 +0200
Subject: [PATCH 22/45] Extending Autosklearn. First commit.

---
 examples/example_extending_preprocessing.py | 97 +++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 examples/example_extending_preprocessing.py

diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py
new file mode 100644
index 0000000000..c2300ddfb2
--- /dev/null
+++ b/examples/example_extending_preprocessing.py
@@ -0,0 +1,97 @@
+"""
+===============================================
+Extending Auto-sklearn with Custom Preprocessor
+===============================================
+
+
+explanation goes here.
+"""
+
+import autosklearn.pipeline.components.feature_preprocessing
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
+    UniformIntegerHyperparameter
+
+from autosklearn.pipeline.components.base import \
+    AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import *
+
+# Custom wrapper class for using Sklearn's polynomial feature preprocessing
+# function.
+class custom_preprocessor(AutoSklearnPreprocessingAlgorithm):
+    def __init__(self, degree, interaction_only, include_bias, random_state=None):
+        # Define hyperparameters to be tuned here.
+        self.degree = degree
+        self.interaction_only = interaction_only
+        self.include_bias = include_bias
+        self.random_state = random_state
+        self.preprocessor = None
+
+    def fit(self, X, Y):
+        # wrapper function for the fit method of Sklearn's polynomial
+        # preprocessing function.
+        import sklearn.preprocessing
+        self.preprocessor = sklearn.preprocessing.PolynomialFeatures(degree=self.degree,
+                                                                     interaction_only=self.interaction_only,
+                                                                     include_bias=self.include_bias)
+        self.preprocessor.fit(X, Y)
+        return self
+
+    def transform(self, X):
+        # wrapper function for the transform method of sklearn's polynomial
+        # preprocessing function. It is also possible to implement
+        # a preprocessing algorithm directly in this function, provided that
+        # it behaves in the way compatible with that from sklearn.
+        if self.preprocessor is None:
+            raise NotImplementedError()
+        return self.preprocessor.transform(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': 'CustomPreprocessor',
+                'name': 'PolynomialFeatures',
+                'handles_regression': True,
+                'handles_classification': True,
+                'handles_multiclass': True,
+                'handles_multilabel': True,
+                'is_deterministic': True,
+                'input': (DENSE, UNSIGNED_DATA),
+                'output': (INPUT,)}
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        # For each hyperparameter, its type (categorical, integer, float, etc.)
+        # and its range and the default value must be specified here.
+        degree = UniformIntegerHyperparameter(
+            name="degree", lower=2, upper=5, default_value=2)
+        interaction_only = CategoricalHyperparameter(
+            name="interaction_only", choices=["False", "True"], default_value="False")
+        include_bias = CategoricalHyperparameter(
+            name="include_bias", choices=["True", "False"], default_value="True")
+
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([degree, interaction_only, include_bias])
+
+        return cs
+
+
+# Include the custom preprocessor class to auto-sklearn.
+autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(custom_preprocessor)
+
+# Import toy data from sklearn and apply train_test_split.
+from sklearn.datasets import load_boston
+from sklearn.model_selection import train_test_split
+X, y = load_boston(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
+
+# Run auto-sklearn regression with the custom preprocessor.
+import autosklearn.regression
+reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30,
+                                                  per_run_time_limit=10,
+                                                  include_preprocessors=['custom_preprocessor']
+                                                  )
+reg.fit(X_train, y_train)
+y_pred = reg.predict(X_test)
+print(reg.show_models())
+print(reg.sprint_statistics())
+

From 2a98d0cf3bc1fbd2e73c01c9428ed15939c165d3 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Wed, 18 Jul 2018 21:34:32 +0200
Subject: [PATCH 23/45] Add regression example

---
 examples/example_extending_preprocessing.py | 146 ++++++++++++++++----
 1 file changed, 116 insertions(+), 30 deletions(-)

diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py
index c2300ddfb2..bb20970d39 100644
--- a/examples/example_extending_preprocessing.py
+++ b/examples/example_extending_preprocessing.py
@@ -1,24 +1,100 @@
 """
 ===============================================
-Extending Auto-sklearn with Custom Preprocessor
+Extending Auto-sklearn
 ===============================================
 
+In order to include new machine learning algorithms in auto-sklearn's
+optimization process, users can implement a wrapper class for the algorithm
+and register it to auto-sklearn. The example code below demonstrates how
+to implement custom regressor and preprocessor (Lasso and polynomial processing from sklearn, respectively),
+register it to auto-sklearn, and use them for the given task.
+A detailed walkthrough of extending auto-sklearn can be found `here <https://automl.github.io/auto-sklearn/stable/extending.html>`_.
 
-explanation goes here.
 """
 
-import autosklearn.pipeline.components.feature_preprocessing
 from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
-    UniformIntegerHyperparameter
+from ConfigSpace.hyperparameters import *
+from ConfigSpace.conditions import EqualsCondition, InCondition
 
+from autosklearn.pipeline.components.base import \
+    AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.components.base import \
     AutoSklearnPreprocessingAlgorithm
 from autosklearn.pipeline.constants import *
+from autosklearn.util.common import check_for_bool
+
+
+# Custom Regression algorithm added to auto-sklearn (Lasso from sklearn).
+class MyRegressor(AutoSklearnRegressionAlgorithm):
+    def __init__(self, alpha, fit_intercept, tol, positive, random_state=None):
+        self.alpha = alpha
+        self.fit_intercept = fit_intercept
+        #self.normalize = normalize
+        self.tol = tol
+        self.positive = positive
+
+        self.random_state = random_state
+        self.estimator = None
+
+    def fit(self, X, Y):
+        import sklearn.linear_model
+
+        self.alpha = float(self.alpha)
+        self.fit_intercept = check_for_bool(self.fit_intercept)
+        self.normalize = check_for_bool(self.normalize)
+        self.tol = float(self.tol)
+        self.positive = check_for_bool(self.positive)
+
+        self.estimator = sklearn.linear_model.\
+            Lasso(alpha=self.alpha,
+                  fit_intercept=self.fit_intercept,
+                  tol=self.tol,
+                  positive=self.positive,
+                  n_iter=300)
+
+        self.estimator.fit(X, Y)
+        return self
+
+    def predict(self, X):
+        if self.estimator is None:
+            raise NotImplementedError
+        return self.estimator.predict(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': 'MyRegressor',
+                'name': 'MyRegressor',
+                'handles_regression': True,
+                'handles_classification': False,
+                'handles_multiclass': False,
+                'handles_multilabel': False,
+                'is_deterministic': True,
+                'input': (DENSE, UNSIGNED_DATA),
+                'output': (PREDICTIONS,)}
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        alpha = UniformFloatHyperparameter(
+            name="alpha", lower=0, upper=10, default_value=1)
+        fit_intercept = CategoricalHyperparameter(
+            name="fit_intercept", choices=[True, False], default_value=True)
+        normalize = CategoricalHyperparameter(
+            name="normalize", choices=[True, False], default_value=False)
+        tol = UniformFloatHyperparameter(
+            name="tol", lower=10 ** -5, upper=10 ** -1,
+            default_value=10 ** -3, log=True)
+        positive = CategoricalHyperparameter(
+            name="positive", choices=[True, False], default_value=False)
+
+        cs.add_hyperparameters([alpha, fit_intercept, tol, positive])
+
+        return cs
+
 
 # Custom wrapper class for using Sklearn's polynomial feature preprocessing
 # function.
-class custom_preprocessor(AutoSklearnPreprocessingAlgorithm):
+class MyPreprocessor(AutoSklearnPreprocessingAlgorithm):
     def __init__(self, degree, interaction_only, include_bias, random_state=None):
         # Define hyperparameters to be tuned here.
         self.degree = degree
@@ -48,8 +124,8 @@ def transform(self, X):
 
     @staticmethod
     def get_properties(dataset_properties=None):
-        return {'shortname': 'CustomPreprocessor',
-                'name': 'PolynomialFeatures',
+        return {'shortname': 'MyPreprocessor',
+                'name': 'MyPreprocessor',
                 'handles_regression': True,
                 'handles_classification': True,
                 'handles_multiclass': True,
@@ -60,8 +136,8 @@ def get_properties(dataset_properties=None):
 
     @staticmethod
     def get_hyperparameter_search_space(dataset_properties=None):
-        # For each hyperparameter, its type (categorical, integer, float, etc.)
-        # and its range and the default value must be specified here.
+        # For each hyperparameter, its type (categorical, integer, float, etc.),
+        # range and the default value must be specified here.
         degree = UniformIntegerHyperparameter(
             name="degree", lower=2, upper=5, default_value=2)
         interaction_only = CategoricalHyperparameter(
@@ -75,23 +151,33 @@ def get_hyperparameter_search_space(dataset_properties=None):
         return cs
 
 
-# Include the custom preprocessor class to auto-sklearn.
-autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(custom_preprocessor)
-
-# Import toy data from sklearn and apply train_test_split.
-from sklearn.datasets import load_boston
-from sklearn.model_selection import train_test_split
-X, y = load_boston(return_X_y=True)
-X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
-
-# Run auto-sklearn regression with the custom preprocessor.
-import autosklearn.regression
-reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30,
-                                                  per_run_time_limit=10,
-                                                  include_preprocessors=['custom_preprocessor']
-                                                  )
-reg.fit(X_train, y_train)
-y_pred = reg.predict(X_test)
-print(reg.show_models())
-print(reg.sprint_statistics())
-
+def main():
+    # Include the custom preprocessor class to auto-sklearn.
+    import autosklearn.pipeline.components.regression
+    import autosklearn.pipeline.components.feature_preprocessing
+    autosklearn.pipeline.components.regression.add_regressor(MyRegressor)
+    autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(MyPreprocessor)
+
+    # Import toy data from sklearn and apply train_test_split.
+    from sklearn.datasets import load_boston
+    from sklearn.model_selection import train_test_split
+    X, y = load_boston(return_X_y=True)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
+
+    # Run auto-sklearn regression with the custom preprocessor.
+    import autosklearn.regression
+    import autosklearn.metrics
+    reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30,
+                                                      per_run_time_limit=10,
+                                                      include_estimators=["MyRegressor"],
+                                                      include_preprocessors=["MyPreprocessor"])
+    reg.fit(X_train, y_train)
+    y_pred = reg.predict(X_test)
+    scorer = autosklearn.metrics.r2
+    print("Test score: ", scorer(y_pred, y_test))
+    print(reg.show_models())
+    print(reg.sprint_statistics())
+
+
+if __name__ == "__main__":
+    main()

From a6c53b7e86c6854e49cc3246e3c7ff3b64f007ad Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 2 Oct 2018 03:16:32 +0200
Subject: [PATCH 24/45] Add examples for extending auto-sklearn.

---
 examples/example_extending_classification.py | 124 +++++++++++++
 examples/example_extending_preprocessing.py  | 183 -------------------
 examples/example_extending_preprocessor.py   | 109 +++++++++++
 examples/example_extending_regression.py     | 109 +++++++++++
 4 files changed, 342 insertions(+), 183 deletions(-)
 create mode 100644 examples/example_extending_classification.py
 delete mode 100644 examples/example_extending_preprocessing.py
 create mode 100644 examples/example_extending_preprocessor.py
 create mode 100644 examples/example_extending_regression.py

diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py
new file mode 100644
index 0000000000..11b82503d0
--- /dev/null
+++ b/examples/example_extending_classification.py
@@ -0,0 +1,124 @@
+"""
+====================================================================
+Extending Auto-Sklearn with Classification Component
+====================================================================
+
+The following example demonstrates how to create a new classification
+component for using in auto-sklearn.
+"""
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
+    UniformIntegerHyperparameter, UniformFloatHyperparameter
+
+import sklearn.metrics
+import autosklearn.classification
+import autosklearn.pipeline.components.classification
+import autosklearn.pipeline.components.base
+from autosklearn.pipeline.constants import *
+
+
+# Create MLP classifier component for auto-sklearn.
+class MLPClassifier(autosklearn.pipeline.components.base.AutoSklearnClassificationAlgorithm):
+    def __init__(self,
+                 hidden_layer_depth,
+                 num_nodes_per_layer,
+                 activation,
+                 alpha,
+                 random_state=None,
+                 ):
+        self.hidden_layer_depth = hidden_layer_depth
+        self.num_nodes_per_layer = num_nodes_per_layer
+        self.activation = activation
+        self.alpha = alpha
+        self.random_state = random_state
+
+    def fit(self, X, Y):
+        self.num_nodes_per_layer = int(self.num_nodes_per_layer)
+        self.hidden_layer_depth = int(self.hidden_layer_depth)
+        self.alpha = float(self.alpha)
+
+        from sklearn.neural_network import MLPClassifier
+        hidden_layer_sizes = tuple(self.num_nodes_per_layer \
+                                   for i in range(self.hidden_layer_depth))
+
+        self.estimator = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes,
+                                       alpha=self.alpha,
+                                       activation=self.activation,
+                                       random_state=self.random_state,
+                                       )
+        self.estimator.fit(X, Y)
+        return self
+
+    def predict(self, X):
+        if self.estimator is None:
+            raise NotImplementedError()
+        return self.estimator.predict(X)
+
+    def predict_proba(self, X):
+        if self.estimator is None:
+            raise NotImplementedError()
+        return self.estimator.predict_proba(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname':'MLP Classifier',
+                'name': 'MLP CLassifier',
+                'handles_regression': False,
+                'handles_classification': True,
+                'handles_multiclass': True,
+                'handles_multilabel': False,
+                'is_deterministic': False,
+                # Both input and output must be tuple(iterable)
+                'input': [DENSE, SIGNED_DATA, UNSIGNED_DATA],
+                'output': [PREDICTIONS]
+                }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        hidden_layer_depth = UniformIntegerHyperparameter(
+            name="hidden_layer_depth", lower=1, upper=3, default_value=1
+        )
+        num_nodes_per_layer = UniformIntegerHyperparameter(
+            name="num_nodes_per_layer", lower=16, upper=216, default_value=32
+        )
+        activation = CategoricalHyperparameter(
+            name="activation", choices=['identity', 'logistic', 'tanh', 'relu'],
+            default_value='relu'
+        )
+        alpha = UniformFloatHyperparameter(
+            name="alpha", lower=0.0001, upper=1.0, default_value=0.0001
+        )
+        cs.add_hyperparameters([hidden_layer_depth,
+                                num_nodes_per_layer,
+                                activation,
+                                alpha,
+                                ])
+        return cs
+
+
+# Add MLP classifier component to auto-sklearn.
+autosklearn.pipeline.components.classification.add_classifier(MLPClassifier)
+cs = MLPClassifier.get_hyperparameter_search_space()
+print(cs)
+
+# Generate data.
+from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import train_test_split
+X, y = load_breast_cancer(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+# Fit MLP classifier to the data.
+clf = autosklearn.classification.AutoSklearnClassifier(
+    time_left_for_this_task=20,
+    per_run_time_limit=10,
+    include_estimators=['MLPClassifier'],
+)
+clf.fit(X_train, y_train)
+
+# Print test accuracy and statistics.
+y_pred = clf.predict(X_test)
+print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
+print(clf.sprint_statistics())
+print(clf.show_models())
\ No newline at end of file
diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py
deleted file mode 100644
index bb20970d39..0000000000
--- a/examples/example_extending_preprocessing.py
+++ /dev/null
@@ -1,183 +0,0 @@
-"""
-===============================================
-Extending Auto-sklearn
-===============================================
-
-In order to include new machine learning algorithms in auto-sklearn's
-optimization process, users can implement a wrapper class for the algorithm
-and register it to auto-sklearn. The example code below demonstrates how
-to implement custom regressor and preprocessor (Lasso and polynomial processing from sklearn, respectively),
-register it to auto-sklearn, and use them for the given task.
-A detailed walkthrough of extending auto-sklearn can be found `here <https://automl.github.io/auto-sklearn/stable/extending.html>`_.
-
-"""
-
-from ConfigSpace.configuration_space import ConfigurationSpace
-from ConfigSpace.hyperparameters import *
-from ConfigSpace.conditions import EqualsCondition, InCondition
-
-from autosklearn.pipeline.components.base import \
-    AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.components.base import \
-    AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import *
-from autosklearn.util.common import check_for_bool
-
-
-# Custom Regression algorithm added to auto-sklearn (Lasso from sklearn).
-class MyRegressor(AutoSklearnRegressionAlgorithm):
-    def __init__(self, alpha, fit_intercept, tol, positive, random_state=None):
-        self.alpha = alpha
-        self.fit_intercept = fit_intercept
-        #self.normalize = normalize
-        self.tol = tol
-        self.positive = positive
-
-        self.random_state = random_state
-        self.estimator = None
-
-    def fit(self, X, Y):
-        import sklearn.linear_model
-
-        self.alpha = float(self.alpha)
-        self.fit_intercept = check_for_bool(self.fit_intercept)
-        self.normalize = check_for_bool(self.normalize)
-        self.tol = float(self.tol)
-        self.positive = check_for_bool(self.positive)
-
-        self.estimator = sklearn.linear_model.\
-            Lasso(alpha=self.alpha,
-                  fit_intercept=self.fit_intercept,
-                  tol=self.tol,
-                  positive=self.positive,
-                  n_iter=300)
-
-        self.estimator.fit(X, Y)
-        return self
-
-    def predict(self, X):
-        if self.estimator is None:
-            raise NotImplementedError
-        return self.estimator.predict(X)
-
-    @staticmethod
-    def get_properties(dataset_properties=None):
-        return {'shortname': 'MyRegressor',
-                'name': 'MyRegressor',
-                'handles_regression': True,
-                'handles_classification': False,
-                'handles_multiclass': False,
-                'handles_multilabel': False,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (PREDICTIONS,)}
-
-    @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
-        cs = ConfigurationSpace()
-        alpha = UniformFloatHyperparameter(
-            name="alpha", lower=0, upper=10, default_value=1)
-        fit_intercept = CategoricalHyperparameter(
-            name="fit_intercept", choices=[True, False], default_value=True)
-        normalize = CategoricalHyperparameter(
-            name="normalize", choices=[True, False], default_value=False)
-        tol = UniformFloatHyperparameter(
-            name="tol", lower=10 ** -5, upper=10 ** -1,
-            default_value=10 ** -3, log=True)
-        positive = CategoricalHyperparameter(
-            name="positive", choices=[True, False], default_value=False)
-
-        cs.add_hyperparameters([alpha, fit_intercept, tol, positive])
-
-        return cs
-
-
-# Custom wrapper class for using Sklearn's polynomial feature preprocessing
-# function.
-class MyPreprocessor(AutoSklearnPreprocessingAlgorithm):
-    def __init__(self, degree, interaction_only, include_bias, random_state=None):
-        # Define hyperparameters to be tuned here.
-        self.degree = degree
-        self.interaction_only = interaction_only
-        self.include_bias = include_bias
-        self.random_state = random_state
-        self.preprocessor = None
-
-    def fit(self, X, Y):
-        # wrapper function for the fit method of Sklearn's polynomial
-        # preprocessing function.
-        import sklearn.preprocessing
-        self.preprocessor = sklearn.preprocessing.PolynomialFeatures(degree=self.degree,
-                                                                     interaction_only=self.interaction_only,
-                                                                     include_bias=self.include_bias)
-        self.preprocessor.fit(X, Y)
-        return self
-
-    def transform(self, X):
-        # wrapper function for the transform method of sklearn's polynomial
-        # preprocessing function. It is also possible to implement
-        # a preprocessing algorithm directly in this function, provided that
-        # it behaves in the way compatible with that from sklearn.
-        if self.preprocessor is None:
-            raise NotImplementedError()
-        return self.preprocessor.transform(X)
-
-    @staticmethod
-    def get_properties(dataset_properties=None):
-        return {'shortname': 'MyPreprocessor',
-                'name': 'MyPreprocessor',
-                'handles_regression': True,
-                'handles_classification': True,
-                'handles_multiclass': True,
-                'handles_multilabel': True,
-                'is_deterministic': True,
-                'input': (DENSE, UNSIGNED_DATA),
-                'output': (INPUT,)}
-
-    @staticmethod
-    def get_hyperparameter_search_space(dataset_properties=None):
-        # For each hyperparameter, its type (categorical, integer, float, etc.),
-        # range and the default value must be specified here.
-        degree = UniformIntegerHyperparameter(
-            name="degree", lower=2, upper=5, default_value=2)
-        interaction_only = CategoricalHyperparameter(
-            name="interaction_only", choices=["False", "True"], default_value="False")
-        include_bias = CategoricalHyperparameter(
-            name="include_bias", choices=["True", "False"], default_value="True")
-
-        cs = ConfigurationSpace()
-        cs.add_hyperparameters([degree, interaction_only, include_bias])
-
-        return cs
-
-
-def main():
-    # Include the custom preprocessor class to auto-sklearn.
-    import autosklearn.pipeline.components.regression
-    import autosklearn.pipeline.components.feature_preprocessing
-    autosklearn.pipeline.components.regression.add_regressor(MyRegressor)
-    autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(MyPreprocessor)
-
-    # Import toy data from sklearn and apply train_test_split.
-    from sklearn.datasets import load_boston
-    from sklearn.model_selection import train_test_split
-    X, y = load_boston(return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
-
-    # Run auto-sklearn regression with the custom preprocessor.
-    import autosklearn.regression
-    import autosklearn.metrics
-    reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30,
-                                                      per_run_time_limit=10,
-                                                      include_estimators=["MyRegressor"],
-                                                      include_preprocessors=["MyPreprocessor"])
-    reg.fit(X_train, y_train)
-    y_pred = reg.predict(X_test)
-    scorer = autosklearn.metrics.r2
-    print("Test score: ", scorer(y_pred, y_test))
-    print(reg.show_models())
-    print(reg.sprint_statistics())
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py
new file mode 100644
index 0000000000..5bdc024bfa
--- /dev/null
+++ b/examples/example_extending_preprocessor.py
@@ -0,0 +1,109 @@
+"""
+====================================================================
+Extending Auto-Sklearn with Preprocessor Component
+====================================================================
+
+The following example demonstrates how to create a wrapper around the linear
+discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor
+in auto-sklearn.
+"""
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
+    UniformIntegerHyperparameter, CategoricalHyperparameter
+
+import sklearn.metrics
+import autosklearn.classification
+import autosklearn.metrics
+import autosklearn.pipeline.components.feature_preprocessing
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import *
+
+
+# Create LDA component for auto-sklearn.
+class LDA(AutoSklearnPreprocessingAlgorithm):
+    def __init__(self, shrinkage, solver, n_components, tol, random_state=None):
+        self.solver = solver
+        self.shrinkage = shrinkage
+        self.n_components = n_components
+        self.tol = tol
+        self.random_state = random_state
+        self.preprocessor = None
+
+    def fit(self, X, Y=None):
+        self.shrinkage = float(self.shrinkage)
+        self.n_components = int(self.n_components)
+        self.tol = float(self.tol)
+
+        import sklearn.discriminant_analysis
+        self.preprocessor = \
+            sklearn.discriminant_analysis.LinearDiscriminantAnalysis(
+                shrinkage=self.shrinkage,
+                solver=self.solver,
+                n_components=self.n_components,
+                tol=self.tol,
+            )
+        self.preprocessor.fit(X, Y)
+        return self
+
+    def transform(self, X):
+        if self.preprocessor is None:
+            raise NotImplementedError()
+        return self.preprocessor.transform(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': 'LDA',
+                'name': 'Linear Discriminant Analysis',
+                'handles_regression': False,
+                'handles_classification': True,
+                'handles_multiclass': False,
+                'handles_multilabel': False,
+                'is_deterministic': True,
+                'input': (DENSE, UNSIGNED_DATA, SIGNED_DATA),
+                'output': (DENSE, UNSIGNED_DATA, SIGNED_DATA)}
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        solver = CategoricalHyperparameter(
+            name="solver", choices=['svd','lsqr','eigen'], default_value='svd'
+        )
+        shrinkage = UniformFloatHyperparameter(
+            name="shrinkage", lower=0.0, upper=1.0, default_value=0.5
+        )
+        n_components = UniformIntegerHyperparameter(
+            name="n_components", lower=1, upper=29, default_value=10
+        )
+        tol = UniformFloatHyperparameter(
+            name="tol", lower=0.0001, upper=1, default_value=0.0001
+        )
+        cs.add_hyperparameters([solver, shrinkage, n_components, tol])
+        return cs
+
+
+# Add LDA component to auto-sklearn.
+autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(LDA)
+
+# Create dataset.
+from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import train_test_split
+X, y = load_breast_cancer(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+# Configuration space.
+cs = LDA.get_hyperparameter_search_space()
+print(cs)
+
+# Fit the model using LDA as preprocessor.
+clf = autosklearn.classification.AutoSklearnClassifier(
+    time_left_for_this_task=20,
+    include_preprocessors=['LDA'],
+)
+clf.fit(X_train, y_train)
+
+# Print prediction score and statistics.
+y_pred = clf.predict(X_test)
+print("accracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
+print(clf.sprint_statistics())
+print(clf.show_models())
\ No newline at end of file
diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py
new file mode 100644
index 0000000000..aaea3d13b5
--- /dev/null
+++ b/examples/example_extending_regression.py
@@ -0,0 +1,109 @@
+"""
+====================================================================
+Extending Auto-Sklearn with Regression Component
+====================================================================
+
+The following example demonstrates how to create a new regression
+component for using in auto-sklearn.
+"""
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
+    UniformIntegerHyperparameter, CategoricalHyperparameter
+
+import sklearn.metrics
+import autosklearn.regression
+import autosklearn.pipeline.components.regression
+from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
+from autosklearn.pipeline.constants import *
+
+
+# Implement kernel ridge regression component for auto-sklearn.
+class KernelRidgeRegression(AutoSklearnRegressionAlgorithm):
+    def __init__(self, alpha, kernel, gamma, degree, random_state=None):
+        self.alpha = alpha
+        self.kernel = kernel
+        self.gamma = gamma
+        self.degree = degree
+        self.random_state = random_state
+        self.estimator = None
+
+    def fit(self, X, Y):
+        self.alpha = float(self.alpha)
+        self.gamma = float(self.gamma)
+        self.degree = int(self.degree)
+
+        import sklearn.kernel_ridge
+        self.estimator = sklearn.kernel_ridge.KernelRidge(alpha=self.alpha,
+                                                          kernel=self.kernel,
+                                                          gamma=self.gamma,
+                                                          degree=self.degree,
+                                                          )
+        self.estimator.fit(X, Y)
+        return self
+
+    def predict(self, X):
+        if self.estimator is None:
+            raise NotImplementedError
+        return self.estimator.predict(X)
+
+    @staticmethod
+    def get_properties(dataset_properties=None):
+        return {'shortname': 'KRR',
+                'name': 'Kernel Ridge Regression',
+                'handles_regression': True,
+                'handles_classification': False,
+                'handles_multiclass': False,
+                'handles_multilabel': False,
+                'is_deterministic': True,
+                'input': (SPARSE, DENSE, UNSIGNED_DATA, SIGNED_DATA),
+                'output': (PREDICTIONS,)}
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        alpha = UniformFloatHyperparameter(
+            name='alpha', lower=10 ** -5, upper=1, log=True, default_value=0.1)
+        kernel = CategoricalHyperparameter(
+            name='kernel',
+            choices=['linear',
+                     'rbf',
+                     'sigmoid',
+                     'polynomial',
+                     ],
+            default_value='linear'
+        )
+        gamma = UniformFloatHyperparameter(
+            name='gamma', lower=0.00001, upper=1, default_value=0.1, log=True
+        )
+        degree = UniformIntegerHyperparameter(
+            name='degree', lower=2, upper=5, default_value=3
+        )
+        cs.add_hyperparameters([alpha, kernel, gamma, degree])
+        return cs
+
+
+# Add KRR component to auto-sklearn.
+autosklearn.pipeline.components.regression.add_regressor(KernelRidgeRegression)
+cs = KernelRidgeRegression.get_hyperparameter_search_space()
+print(cs)
+
+# Generate data.
+from sklearn.datasets import load_diabetes
+from sklearn.model_selection import train_test_split
+X, y = load_diabetes(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+# Fit the model using KRR.
+reg = autosklearn.regression.AutoSklearnRegressor(
+    time_left_for_this_task=30,
+    per_run_time_limit=10,
+    include_estimators=['KernelRidgeRegression'],
+)
+reg.fit(X_train, y_train)
+
+# Print prediction score and statistics.
+y_pred = reg.predict(X_test)
+print("r2 score: ", sklearn.metrics.r2_score(y_pred, y_test))
+print(reg.sprint_statistics())
+print(reg.show_models())
\ No newline at end of file

From 9db3e2ec8fe17c2481dcbd724e989bcd8d6e7525 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 2 Oct 2018 14:00:45 +0200
Subject: [PATCH 25/45] .

---
 examples/example_extending_classification.py | 7 ++++---
 examples/example_extending_preprocessor.py   | 6 ++++--
 examples/example_extending_regression.py     | 3 ++-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py
index 11b82503d0..e7f8334901 100644
--- a/examples/example_extending_classification.py
+++ b/examples/example_extending_classification.py
@@ -14,12 +14,13 @@
 import sklearn.metrics
 import autosklearn.classification
 import autosklearn.pipeline.components.classification
-import autosklearn.pipeline.components.base
-from autosklearn.pipeline.constants import *
+from autosklearn.pipeline.components.base \
+    import AutoSklearnClassificationAlgorithm
+from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA
 
 
 # Create MLP classifier component for auto-sklearn.
-class MLPClassifier(autosklearn.pipeline.components.base.AutoSklearnClassificationAlgorithm):
+class MLPClassifier(AutoSklearnClassificationAlgorithm):
     def __init__(self,
                  hidden_layer_depth,
                  num_nodes_per_layer,
diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py
index 5bdc024bfa..815d698ecd 100644
--- a/examples/example_extending_preprocessor.py
+++ b/examples/example_extending_preprocessor.py
@@ -16,8 +16,10 @@
 import autosklearn.classification
 import autosklearn.metrics
 import autosklearn.pipeline.components.feature_preprocessing
-from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import *
+from autosklearn.pipeline.components.base \
+    import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, \
+    UNSIGNED_DATA
 
 
 # Create LDA component for auto-sklearn.
diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py
index aaea3d13b5..59f2af58eb 100644
--- a/examples/example_extending_regression.py
+++ b/examples/example_extending_regression.py
@@ -15,7 +15,8 @@
 import autosklearn.regression
 import autosklearn.pipeline.components.regression
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
-from autosklearn.pipeline.constants import *
+from autosklearn.pipeline.constants import SPARSE, DENSE, \
+    SIGNED_DATA, UNSIGNED_DATA
 
 
 # Implement kernel ridge regression component for auto-sklearn.

From 15196ceeb4a2e99875989153120d5a36688d992f Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Tue, 9 Oct 2018 14:00:25 +0200
Subject: [PATCH 26/45] Fixing codacy errors

---
 examples/example_extending_classification.py | 3 ++-
 examples/example_extending_preprocessor.py   | 2 +-
 examples/example_extending_regression.py     | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py
index e7f8334901..d4de7d79d3 100644
--- a/examples/example_extending_classification.py
+++ b/examples/example_extending_classification.py
@@ -16,7 +16,8 @@
 import autosklearn.pipeline.components.classification
 from autosklearn.pipeline.components.base \
     import AutoSklearnClassificationAlgorithm
-from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA
+from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA, \
+    PREDICTIONS
 
 
 # Create MLP classifier component for auto-sklearn.
diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py
index 815d698ecd..33a51cc953 100644
--- a/examples/example_extending_preprocessor.py
+++ b/examples/example_extending_preprocessor.py
@@ -18,7 +18,7 @@
 import autosklearn.pipeline.components.feature_preprocessing
 from autosklearn.pipeline.components.base \
     import AutoSklearnPreprocessingAlgorithm
-from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, \
+from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, \
     UNSIGNED_DATA
 
 
diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py
index 59f2af58eb..e3dbe18428 100644
--- a/examples/example_extending_regression.py
+++ b/examples/example_extending_regression.py
@@ -16,7 +16,7 @@
 import autosklearn.pipeline.components.regression
 from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
 from autosklearn.pipeline.constants import SPARSE, DENSE, \
-    SIGNED_DATA, UNSIGNED_DATA
+    SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS
 
 
 # Implement kernel ridge regression component for auto-sklearn.

From bfb1e08986be5d41b458d6cd736d6e1e2c2c3765 Mon Sep 17 00:00:00 2001
From: JinWoo <31531627+ahn1340@users.noreply.github.com>
Date: Fri, 19 Oct 2018 10:23:28 +0200
Subject: [PATCH 27/45] Change example (#553)

* Change datasets used in examples from digits to breast_cancer.

* [Debug] try different numpy version

* [Debug] Try with latest numpy version

* Set numpy version to 1.14.5

* Fix line length in exanple_parallel.py
---
 examples/example_crossvalidation.py | 4 ++--
 examples/example_eips.py            | 4 ++--
 examples/example_holdout.py         | 4 ++--
 examples/example_parallel.py        | 7 +++++--
 examples/example_random_search.py   | 6 +++---
 examples/example_sequential.py      | 4 ++--
 requirements.txt                    | 2 +-
 setup.py                            | 3 ++-
 8 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/examples/example_crossvalidation.py b/examples/example_crossvalidation.py
index 85530b591b..52e3050f7b 100644
--- a/examples/example_crossvalidation.py
+++ b/examples/example_crossvalidation.py
@@ -21,7 +21,7 @@
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -37,7 +37,7 @@ def main():
 
     # fit() changes the data in place, but refit needs the original data. We
     # therefore copy the data. In practice, one should reload the data
-    automl.fit(X_train.copy(), y_train.copy(), dataset_name='digits')
+    automl.fit(X_train.copy(), y_train.copy(), dataset_name='breast_cancer')
     # During fit(), models are fit on individual cross-validation folds. To use
     # all available data, we call refit() which trains all models in the
     # final ensemble on the whole dataset.
diff --git a/examples/example_eips.py b/examples/example_eips.py
index eef3c6cf11..db2a434092 100644
--- a/examples/example_eips.py
+++ b/examples/example_eips.py
@@ -69,7 +69,7 @@ def get_eips_object_callback(
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -81,7 +81,7 @@ def main():
         get_smac_object_callback=get_eips_object_callback,
         initial_configurations_via_metalearning=0,
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
 
     # Print the final ensemble constructed by auto-sklearn via ROAR.
     print(automl.show_models())
diff --git a/examples/example_holdout.py b/examples/example_holdout.py
index fe1ff1c7a7..19a438bd87 100644
--- a/examples/example_holdout.py
+++ b/examples/example_holdout.py
@@ -18,7 +18,7 @@
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -34,7 +34,7 @@ def main():
         resampling_strategy='holdout',
         resampling_strategy_arguments={'train_size': 0.67}
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
 
     # Print the final ensemble constructed by auto-sklearn.
     print(automl.show_models())
diff --git a/examples/example_parallel.py b/examples/example_parallel.py
index f5572ab97d..ff599e59d0 100644
--- a/examples/example_parallel.py
+++ b/examples/example_parallel.py
@@ -78,14 +78,17 @@ def spawn_classifier(seed, dataset_name):
 
 def main():
 
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
     processes = []
     spawn_classifier = get_spawn_classifier(X_train, y_train)
     for i in range(4): # set this at roughly half of your cores
-        p = multiprocessing.Process(target=spawn_classifier, args=(i, 'digits'))
+        p = multiprocessing.Process(
+            target=spawn_classifier,
+            args=(i, 'breast_cancer'),
+        )
         p.start()
         processes.append(p)
     for p in processes:
diff --git a/examples/example_random_search.py b/examples/example_random_search.py
index 9d04a39974..2a64b36efb 100644
--- a/examples/example_random_search.py
+++ b/examples/example_random_search.py
@@ -68,7 +68,7 @@ def get_random_search_object_callback(
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -79,7 +79,7 @@ def main():
         get_smac_object_callback=get_roar_object_callback,
         initial_configurations_via_metalearning=0,
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
 
     print('#' * 80)
     print('Results for ROAR.')
@@ -99,7 +99,7 @@ def main():
         get_smac_object_callback=get_random_search_object_callback,
         initial_configurations_via_metalearning=0,
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
 
     print('#' * 80)
     print('Results for random search.')
diff --git a/examples/example_sequential.py b/examples/example_sequential.py
index 06820e7ebe..694ea81404 100644
--- a/examples/example_sequential.py
+++ b/examples/example_sequential.py
@@ -17,7 +17,7 @@
 
 
 def main():
-    X, y = sklearn.datasets.load_digits(return_X_y=True)
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -32,7 +32,7 @@ def main():
         ensemble_size=0,
         delete_tmp_folder_after_terminate=False,
     )
-    automl.fit(X_train, y_train, dataset_name='digits')
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
     # This call to fit_ensemble uses all models trained in the previous call
     # to fit to build an ensemble which can be used with automl.predict()
     automl.fit_ensemble(y_train, ensemble_size=50)
diff --git a/requirements.txt b/requirements.txt
index b65911d6cb..71c47ceb66 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ nose
 six
 Cython
 
-numpy>=1.9.0
+numpy==1.14.5
 scipy>=0.14.1
 
 scikit-learn>=0.19,<0.20
diff --git a/setup.py b/setup.py
index 86c9c3202b..ef89ad314c 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,8 @@
     "nose",
     "six",
     "Cython",
-    "numpy>=1.9.0",
+    # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error.
+    "numpy==1.14.5",
     "scipy>=0.14.1",
     "scikit-learn>=0.19,<0.20",
     "lockfile",

From 9c2c245037266871b6cd3e0018a5b6c1c05dea1c Mon Sep 17 00:00:00 2001
From: JinWoo <31531627+ahn1340@users.noreply.github.com>
Date: Fri, 19 Oct 2018 10:26:35 +0200
Subject: [PATCH 28/45] [WIP]Add argument for custom logger configuration.
 (#505)

* Add argument for custom logger configuration. First commit, work in progress.

* Minor changes.

* Modify suggested changes [WIP]

* .

* .

* .

* .

* Fix minor details

* Fix travis not recognizing example_config.yaml

* .

* .

* .

* .

* .

* Change datasets used in examples from digits to breast_cancer.

* Fix codacy error

* Revert codacy error fixing

* [Debug] check if numpy causes error

* [Debug] experimenting with numpy

* [Debug] try to manually install libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug].

* [Debug].

* [Debug] .

* [Debug].

* [Debug].

* [Debug] used older numpy version

* [Debug] numpy

* [Debug] try numpy version 1.14.6

* [Debug] try different numpy version

* [Debug] Try with latest numpy version

* Set numpy version to 1.14.5

* Add argument for custom logger configuration. First commit, work in progress.

* Minor changes.

* Modify suggested changes [WIP]

* .

* .

* .

* .

* Fix minor details

* Fix travis not recognizing example_config.yaml

* .

* .

* .

* .

* .

* Fix codacy error

* Revert codacy error fixing

* [Debug] experimenting with numpy

* [Debug] try to manually install libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug].

* [Debug].

* [Debug] .

* [Debug].

* [Debug].

* [Debug] used older numpy version

* [Debug] try numpy version 1.14.6

* Delete libgcc_check.sh used for debugging.

* Fix numpy version and remove blank lines

* Fix line length in example_parallel.py

* Fix minor error
---
 .travis.yml                        |  2 +-
 autosklearn/automl.py              |  7 ++++-
 autosklearn/estimators.py          | 13 +++++++--
 autosklearn/util/logging_.py       | 23 +++++++++------
 test/test_util/example_config.yaml | 46 ++++++++++++++++++++++++++++++
 test/test_util/test_logging.py     | 31 ++++++++++++++++++++
 6 files changed, 109 insertions(+), 13 deletions(-)
 create mode 100644 test/test_util/example_config.yaml
 create mode 100644 test/test_util/test_logging.py

diff --git a/.travis.yml b/.travis.yml
index 968d8e4ec1..c9a27004aa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -69,7 +69,7 @@ install:
   # Debug output to know all exact package versions!
   - pip freeze
   - python setup.py install
-  
+
 script: bash ci_scripts/test.sh
 after_success: source ci_scripts/success.sh
 
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index d72cbc8920..91d66d4bd2 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -82,6 +82,7 @@ def __init__(self,
                  disable_evaluator_output=False,
                  get_smac_object_callback=None,
                  smac_scenario_args=None,
+                 logging_config=None,
                  ):
         super(AutoML, self).__init__()
         self._backend = backend
@@ -110,6 +111,7 @@ def __init__(self,
         self._disable_evaluator_output = disable_evaluator_output
         self._get_smac_object_callback = get_smac_object_callback
         self._smac_scenario_args = smac_scenario_args
+        self.logging_config = logging_config
 
         self._datamanager = None
         self._dataset_name = None
@@ -235,7 +237,10 @@ def fit_on_datamanager(self, datamanager, metric):
 
     def _get_logger(self, name):
         logger_name = 'AutoML(%d):%s' % (self._seed, name)
-        setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name)))
+        setup_logger(os.path.join(self._backend.temporary_directory,
+                                  '%s.log' % str(logger_name)),
+                     self.logging_config,
+                     )
         return get_logger(logger_name)
 
     @staticmethod
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
index 910c68f8e8..514b469f33 100644
--- a/autosklearn/estimators.py
+++ b/autosklearn/estimators.py
@@ -28,7 +28,9 @@ def __init__(self,
                  shared_mode=False,
                  disable_evaluator_output=False,
                  get_smac_object_callback=None,
-                 smac_scenario_args=None):
+                 smac_scenario_args=None,
+                 logging_config=None,
+                 ):
         """
         Parameters
         ----------
@@ -168,6 +170,11 @@ def __init__(self,
             This is an advanced feature. Use only if you are familiar with
             `SMAC <https://automl.github.io/SMAC3/stable/index.html>`_.
 
+        logging_config : dict, optional (None)
+            dictionary object specifying the logger configuration. If None,
+            the default logging.yaml file is used, which can be found in
+            the directory ``util/logging.yaml`` relative to the installation.
+
         Attributes
         ----------
 
@@ -199,6 +206,7 @@ def __init__(self,
         self.disable_evaluator_output = disable_evaluator_output
         self.get_smac_object_callback = get_smac_object_callback
         self.smac_scenario_args = smac_scenario_args
+        self.logging_config = logging_config
 
         self._automl = None
         super().__init__()
@@ -238,7 +246,8 @@ def build_automl(self):
             shared_mode=self.shared_mode,
             get_smac_object_callback=self.get_smac_object_callback,
             disable_evaluator_output=self.disable_evaluator_output,
-            smac_scenario_args=self.smac_scenario_args
+            smac_scenario_args=self.smac_scenario_args,
+            logging_config=self.logging_config,
         )
 
         return automl
diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py
index cf3f40586d..ea074a1f3f 100644
--- a/autosklearn/util/logging_.py
+++ b/autosklearn/util/logging_.py
@@ -7,18 +7,23 @@
 import yaml
 
 
-def setup_logger(output_file=None):
-    with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'),
-              'r') as fh:
-        config = yaml.load(fh)
-    if output_file is not None:
-        config['handlers']['file_handler']['filename'] = output_file
-    logging.config.dictConfig(config)
+def setup_logger(output_file=None, logging_config=None):
+    # logging_config must be a dictionary object specifying the configuration
+    # for the loggers to be used in auto-sklearn.
+    if logging_config is not None:
+        if output_file is not None:
+            logging_config['handlers']['file_handler']['filename'] = output_file
+        logging.config.dictConfig(logging_config)
+    else:
+        with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'),
+                  'r') as fh:
+            logging_config = yaml.safe_load(fh)
+        if output_file is not None:
+            logging_config['handlers']['file_handler']['filename'] = output_file
+        logging.config.dictConfig(logging_config)
 
 
 def _create_logger(name):
-    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
-                           'message)s', datefmt='%H:%M:%S')
     return logging.getLogger(name)
 
 
diff --git a/test/test_util/example_config.yaml b/test/test_util/example_config.yaml
new file mode 100644
index 0000000000..7c93e1b846
--- /dev/null
+++ b/test/test_util/example_config.yaml
@@ -0,0 +1,46 @@
+---
+version: 1
+disable_existing_loggers: False
+formatters:
+  simple:
+    format: '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s'
+
+handlers:
+  console:
+    class: logging.StreamHandler
+    level: WARNING
+    formatter: simple
+    stream: ext://sys.stdout
+
+  file_handler:
+    class: logging.FileHandler
+    level: DEBUG
+    formatter: simple
+    filename: autosklearn.log
+
+root:
+  level: CRITICAL
+  handlers: [console, file_handler]
+
+loggers:
+  autosklearn.metalearning:
+    level: NOTSET
+    handlers: [file_handler]
+    propagate: no
+
+  autosklearn.util.backend:
+    level: DEBUG
+    handlers: [file_handler]
+    propagate: no
+
+  smac.intensification.intensification.Intensifier:
+    level: INFO
+    handlers: [file_handler, console]
+
+  smac.optimizer.local_search.LocalSearch:
+    level: INFO
+    handlers: [file_handler, console]
+
+  smac.optimizer.smbo.SMBO:
+    level: INFO
+    handlers: [file_handler, console]
diff --git a/test/test_util/test_logging.py b/test/test_util/test_logging.py
new file mode 100644
index 0000000000..9c18c07ec1
--- /dev/null
+++ b/test/test_util/test_logging.py
@@ -0,0 +1,31 @@
+import os
+import unittest
+import logging
+import logging.config
+import yaml
+from autosklearn.util import logging_
+
+class LoggingTest(unittest.TestCase):
+
+    def test_setup_logger(self):
+        # Test that setup_logger function correctly configures the logger
+        # according to the given dictionary, and uses the default
+        # logging.yaml file if logging_config is not specified.
+
+        with open(os.path.join(os.path.dirname(__file__), \
+                               'example_config.yaml'), 'r') as fh:
+            example_config = yaml.safe_load(fh)
+
+        # Configure logger with example_config.yaml.
+        logging_.setup_logger(logging_config=example_config)
+
+        # example_config sets the root logger's level to CRITICAL,
+        # which corresponds to 50.
+        self.assertEqual(logging.getLogger().getEffectiveLevel(), 50)
+
+        # This time use the default configuration.
+        logging_.setup_logger(logging_config=None)
+
+        # default config sets the root logger's level to DEBUG,
+        # which corresponds to 10.
+        self.assertEqual(logging.getLogger().getEffectiveLevel(), 10)
\ No newline at end of file

From 3f0ee66ffa79596de323e08447de20cc46968949 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Fri, 19 Oct 2018 10:28:17 +0200
Subject: [PATCH 29/45] FIX #566: sort ensemble correctly (#567)

---
 autosklearn/util/backend.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/autosklearn/util/backend.py b/autosklearn/util/backend.py
index 52ac678762..c59adbde5c 100644
--- a/autosklearn/util/backend.py
+++ b/autosklearn/util/backend.py
@@ -407,9 +407,10 @@ def save_ensemble(self, ensemble, idx, seed):
         except Exception:
             pass
 
-        filepath = os.path.join(self.get_ensemble_dir(),
-                                '%s.%s.ensemble' % (str(seed),
-                                                    str(idx)))
+        filepath = os.path.join(
+            self.get_ensemble_dir(),
+            '%s.%s.ensemble' % (str(seed), str(idx).zfill(10))
+        )
         with tempfile.NamedTemporaryFile('wb', dir=os.path.dirname(
                 filepath), delete=False) as fh:
             pickle.dump(ensemble, fh)

From 80517ca69b406e305077429481b91e0251049203 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Fri, 19 Oct 2018 11:29:57 +0200
Subject: [PATCH 30/45] Fix Line length in example_parallel.py

---
 examples/example_parallel.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/examples/example_parallel.py b/examples/example_parallel.py
index bcb45206c2..ff599e59d0 100644
--- a/examples/example_parallel.py
+++ b/examples/example_parallel.py
@@ -85,7 +85,10 @@ def main():
     processes = []
     spawn_classifier = get_spawn_classifier(X_train, y_train)
     for i in range(4): # set this at roughly half of your cores
-        p = multiprocessing.Process(target=spawn_classifier, args=(i, 'breast_cancer'))
+        p = multiprocessing.Process(
+            target=spawn_classifier,
+            args=(i, 'breast_cancer'),
+        )
         p.start()
         processes.append(p)
     for p in processes:

From 2afad9acc6a57468331c2d544c7e475f576f11b5 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Fri, 19 Oct 2018 11:34:14 +0200
Subject: [PATCH 31/45] Fix line length in example_parallel.py

---
 examples/example_parallel.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/examples/example_parallel.py b/examples/example_parallel.py
index bcb45206c2..2935ed2b85 100644
--- a/examples/example_parallel.py
+++ b/examples/example_parallel.py
@@ -85,7 +85,10 @@ def main():
     processes = []
     spawn_classifier = get_spawn_classifier(X_train, y_train)
     for i in range(4): # set this at roughly half of your cores
-        p = multiprocessing.Process(target=spawn_classifier, args=(i, 'breast_cancer'))
+        p = multiprocessing.Process(
+            target=spawn_classifier,
+            args=(i, 'breast_cancer')
+        )
         p.start()
         processes.append(p)
     for p in processes:

From c16d7f64a018c988d07a57c0c4367733ea9b1b46 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Fri, 19 Oct 2018 11:34:35 +0200
Subject: [PATCH 32/45] Fix minor error

---
 examples/example_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/example_parallel.py b/examples/example_parallel.py
index 2935ed2b85..ff599e59d0 100644
--- a/examples/example_parallel.py
+++ b/examples/example_parallel.py
@@ -87,7 +87,7 @@ def main():
     for i in range(4): # set this at roughly half of your cores
         p = multiprocessing.Process(
             target=spawn_classifier,
-            args=(i, 'breast_cancer')
+            args=(i, 'breast_cancer'),
         )
         p.start()
         processes.append(p)

From c8368f5d1821225e7a1484f0dc66a77048d431b3 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Fri, 19 Oct 2018 11:52:03 +0200
Subject: [PATCH 33/45] Fix codacy error "parameters differ from overriden
 'fit' method"

---
 examples/example_extending_classification.py | 4 ++--
 examples/example_extending_preprocessor.py   | 4 ++--
 examples/example_extending_regression.py     | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py
index d4de7d79d3..a8b0dbe3ca 100644
--- a/examples/example_extending_classification.py
+++ b/examples/example_extending_classification.py
@@ -35,7 +35,7 @@ def __init__(self,
         self.alpha = alpha
         self.random_state = random_state
 
-    def fit(self, X, Y):
+    def fit(self, X, y):
         self.num_nodes_per_layer = int(self.num_nodes_per_layer)
         self.hidden_layer_depth = int(self.hidden_layer_depth)
         self.alpha = float(self.alpha)
@@ -49,7 +49,7 @@ def fit(self, X, Y):
                                        activation=self.activation,
                                        random_state=self.random_state,
                                        )
-        self.estimator.fit(X, Y)
+        self.estimator.fit(X, y)
         return self
 
     def predict(self, X):
diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py
index 33a51cc953..64e866f002 100644
--- a/examples/example_extending_preprocessor.py
+++ b/examples/example_extending_preprocessor.py
@@ -32,7 +32,7 @@ def __init__(self, shrinkage, solver, n_components, tol, random_state=None):
         self.random_state = random_state
         self.preprocessor = None
 
-    def fit(self, X, Y=None):
+    def fit(self, X, y=None):
         self.shrinkage = float(self.shrinkage)
         self.n_components = int(self.n_components)
         self.tol = float(self.tol)
@@ -45,7 +45,7 @@ def fit(self, X, Y=None):
                 n_components=self.n_components,
                 tol=self.tol,
             )
-        self.preprocessor.fit(X, Y)
+        self.preprocessor.fit(X, y)
         return self
 
     def transform(self, X):
diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py
index e3dbe18428..7b9ad21239 100644
--- a/examples/example_extending_regression.py
+++ b/examples/example_extending_regression.py
@@ -29,7 +29,7 @@ def __init__(self, alpha, kernel, gamma, degree, random_state=None):
         self.random_state = random_state
         self.estimator = None
 
-    def fit(self, X, Y):
+    def fit(self, X, y):
         self.alpha = float(self.alpha)
         self.gamma = float(self.gamma)
         self.degree = int(self.degree)
@@ -40,7 +40,7 @@ def fit(self, X, Y):
                                                           gamma=self.gamma,
                                                           degree=self.degree,
                                                           )
-        self.estimator.fit(X, Y)
+        self.estimator.fit(X, y)
         return self
 
     def predict(self, X):

From 763aac02fe60d97d2f7f21a8c454716243ed5e05 Mon Sep 17 00:00:00 2001
From: JinWoo <31531627+ahn1340@users.noreply.github.com>
Date: Fri, 19 Oct 2018 13:14:52 +0200
Subject: [PATCH 34/45] Check target type at the beginning of the fitting
 process. (#506)

* Check target type at the beginning of the fitting process.

* .

* Fixed minor error in uniitest

* .

* Add unittest for target type checking.

* .

* .

* Change datasets used in examples from digits to breast_cancer.

* [Debug] try with numpy version 1.14.5

* [Debug] Check if numpy version 1.14.6 raises error.

* [Debug] try different numpy version

* [Debug] Try with latest numpy version

* Set numpy version to 1.14.5

* Check target type at the beginning of the fitting process.

* .

* Fixed minor error in uniitest

* .

* Add unittest for target type checking.

* .

* .

* [Debug] Check if numpy version 1.14.6 raises error.

* Fix numpy version to 1.14.5

* Add comment to Mock in test_type_of_target

* Fix line length in example_parallel.py

* Fix minor error
---
 autosklearn/estimators.py           |  25 ++++++
 test/test_automl/test_estimators.py | 134 +++++++++++++++++++++++++++-
 2 files changed, 157 insertions(+), 2 deletions(-)

diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
index 514b469f33..6adedd0f56 100644
--- a/autosklearn/estimators.py
+++ b/autosklearn/estimators.py
@@ -3,6 +3,7 @@
 
 from autosklearn.automl import AutoMLClassifier, AutoMLRegressor
 from autosklearn.util.backend import create
+from sklearn.utils.multiclass import type_of_target
 
 
 class AutoSklearnEstimator(BaseEstimator):
@@ -465,6 +466,18 @@ def fit(self, X, y,
         self
 
         """
+        # Before running anything else, first check that the
+        # type of data is compatible with auto-sklearn. Legal target
+        # types are: binary, multiclass, multilabel-indicator.
+        target_type = type_of_target(y)
+        if target_type in ['multiclass-multioutput',
+                           'continuous',
+                           'continuous-multioutput',
+                           'unknown',
+                           ]:
+            raise ValueError("classification with data of type %s is"
+                             " not supported" % target_type)
+
         super().fit(
             X=X,
             y=y,
@@ -568,6 +581,18 @@ def fit(self, X, y,
         self
 
         """
+        # Before running anything else, first check that the
+        # type of data is compatible with auto-sklearn. Legal target
+        # types are: continuous, binary, multiclass.
+        target_type = type_of_target(y)
+        if target_type in ['multiclass-multioutput',
+                           'multilabel-indicator',
+                           'continuous-multioutput',
+                           'unknown',
+                           ]:
+            raise ValueError("regression with data of type %s is not"
+                             " supported" % target_type)
+
         # Fit is supposed to be idempotent!
         # But not if we use share_mode.
         super().fit(
diff --git a/test/test_automl/test_estimators.py b/test/test_automl/test_estimators.py
index e8a5bd0954..064dc73610 100644
--- a/test/test_automl/test_estimators.py
+++ b/test/test_automl/test_estimators.py
@@ -50,17 +50,25 @@ class EstimatorTest(Base, unittest.TestCase):
     #     self._tearDown(output)
 
     def test_pSMAC_wrong_arguments(self):
+        X = np.zeros((100, 100))
+        y = np.zeros((100, ))
         self.assertRaisesRegexp(ValueError,
                                 "If shared_mode == True tmp_folder must not "
                                 "be None.",
-                                lambda shared_mode: AutoSklearnClassifier(shared_mode=shared_mode).fit(None, None),
+                                lambda shared_mode:
+                                AutoSklearnClassifier(
+                                    shared_mode=shared_mode,
+                                ).fit(X, y),
                                 shared_mode=True)
 
         self.assertRaisesRegexp(ValueError,
                                 "If shared_mode == True output_folder must not "
                                 "be None.",
                                 lambda shared_mode, tmp_folder:
-                                AutoSklearnClassifier(shared_mode=shared_mode, tmp_folder=tmp_folder).fit(None, None),
+                                AutoSklearnClassifier(
+                                    shared_mode=shared_mode,
+                                    tmp_folder=tmp_folder,
+                                ).fit(X, y),
                                 shared_mode=True,
                                 tmp_folder='/tmp/duitaredxtvbedb')
 
@@ -85,6 +93,128 @@ def test_feat_type_wrong_arguments(self):
                                 cls.fit,
                                 X=X, y=y, feat_type=['Car']*100)
 
+    # Mock AutoSklearnEstimator.fit so the test doesn't actually run fit().
+    @unittest.mock.patch('autosklearn.estimators.AutoSklearnEstimator.fit')
+    def test_type_of_target(self, mock_estimator):
+        # Test that classifier raises error for illegal target types.
+        X = np.array([[1, 2],
+                      [2, 3],
+                      [3, 4],
+                      [4, 5],
+                      ])
+        # Possible target types
+        y_binary = np.array([0, 0, 1, 1])
+        y_continuous = np.array([0.1, 1.3, 2.1, 4.0])
+        y_multiclass = np.array([0, 1, 2, 0])
+        y_multilabel = np.array([[0, 1],
+                                 [1, 1],
+                                 [1, 0],
+                                 [0, 0],
+                                 ])
+        y_multiclass_multioutput = np.array([[0, 1],
+                                             [1, 3],
+                                             [2, 2],
+                                             [5, 3],
+                                             ])
+        y_continuous_multioutput = np.array([[0.1, 1.5],
+                                             [1.2, 3.5],
+                                             [2.7, 2.7],
+                                             [5.5, 3.9],
+                                             ])
+
+        cls = AutoSklearnClassifier()
+        # Illegal target types for classification: continuous,
+        # multiclass-multioutput, continuous-multioutput.
+        self.assertRaisesRegex(ValueError,
+                               "classification with data of type"
+                               " multiclass-multioutput is not supported",
+                               cls.fit,
+                               X=X,
+                               y=y_multiclass_multioutput,
+                               )
+
+        self.assertRaisesRegex(ValueError,
+                               "classification with data of type"
+                               " continuous is not supported",
+                               cls.fit,
+                               X=X,
+                               y=y_continuous,
+                               )
+
+        self.assertRaisesRegex(ValueError,
+                               "classification with data of type"
+                               " continuous-multioutput is not supported",
+                               cls.fit,
+                               X=X,
+                               y=y_continuous_multioutput,
+                               )
+
+        # Legal target types for classification: binary, multiclass,
+        # multilabel-indicator.
+        try:
+            cls.fit(X, y_binary)
+        except ValueError:
+            self.fail("cls.fit() raised ValueError while fitting "
+                      "binary targets")
+
+        try:
+            cls.fit(X, y_multiclass)
+        except ValueError:
+            self.fail("cls.fit() raised ValueError while fitting "
+                      "multiclass targets")
+
+        try:
+            cls.fit(X, y_multilabel)
+        except ValueError:
+            self.fail("cls.fit() raised ValueError while fitting "
+                      "multilabel-indicator targets")
+
+        # Test that regressor raises error for illegal target types.
+        reg = AutoSklearnRegressor()
+        # Illegal target types for regression: multiclass-multioutput,
+        # multilabel-indicator, continuous-multioutput.
+        self.assertRaisesRegex(ValueError,
+                               "regression with data of type"
+                               " multiclass-multioutput is not supported",
+                               reg.fit,
+                               X=X,
+                               y=y_multiclass_multioutput,
+                               )
+
+        self.assertRaisesRegex(ValueError,
+                               "regression with data of type"
+                               " multilabel-indicator is not supported",
+                               reg.fit,
+                               X=X,
+                               y=y_multilabel,
+                               )
+
+        self.assertRaisesRegex(ValueError,
+                               "regression with data of type"
+                               " continuous-multioutput is not supported",
+                               reg.fit,
+                               X=X,
+                               y=y_continuous_multioutput,
+                               )
+        # Legal target types: continuous, binary, multiclass
+        try:
+            reg.fit(X, y_continuous)
+        except ValueError:
+            self.fail("reg.fit() raised ValueError while fitting "
+                      "continuous targets")
+
+        try:
+            reg.fit(X, y_binary)
+        except ValueError:
+            self.fail("reg.fit() raised ValueError while fitting "
+                      "binary targets")
+
+        try:
+            reg.fit(X, y_multiclass)
+        except ValueError:
+            self.fail("reg.fit() raised ValueError while fitting "
+                      "multiclass targets")
+
     def test_fit_pSMAC(self):
         tmp = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC')
         output = os.path.join(self.test_dir, '..', '.out_estimator_fit_pSMAC')

From 3cf42b54e2d8e3d981a04fc4db89da47ac9a4051 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Fri, 19 Oct 2018 14:06:10 +0200
Subject: [PATCH 35/45] Update test_automl.py

---
 test/test_automl/test_automl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_automl/test_automl.py b/test/test_automl/test_automl.py
index 9324566201..f8da452e84 100644
--- a/test/test_automl/test_automl.py
+++ b/test/test_automl/test_automl.py
@@ -224,7 +224,7 @@ def test_automl_outputs(self):
 
         fixture = os.listdir(os.path.join(backend_api.temporary_directory, '.auto-sklearn',
                                           'ensembles'))
-        self.assertIn('100.0.ensemble', fixture)
+        self.assertIn('100.0000000000.ensemble', fixture)
 
         # Start time
         start_time_file_path = os.path.join(backend_api.temporary_directory, '.auto-sklearn',

From 88d1554d84646b8cf672ea5c53f92b7499619a16 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Thu, 25 Oct 2018 15:41:12 +0200
Subject: [PATCH 36/45] Add python 3.7to Travis, change python_requirement in
 setup.py.

---
 .travis.yml | 4 ++--
 setup.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index bf8727adf4..6ec5fa1781 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,7 +19,8 @@ matrix:
     env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
   - os: linux
     env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
-    # Add flake8 check in travis.
+  - os: linux
+    env: DISTRIB="conda" PYTHON_VERSION="3.7" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
   - os: linux
     env: DISTRIB="conda" PYTHON_VERSION="3.6" RUN_FLAKE8="true" SKIP_TESTS="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
 
@@ -70,7 +71,6 @@ install:
   - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps
   - mkdir ~/.openml
   - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config
-  # Install flake 8.
   - pip install flake8
   # Debug output to know all exact package versions!
   - pip freeze
diff --git a/setup.py b/setup.py
index ef89ad314c..cfdac2867e 100644
--- a/setup.py
+++ b/setup.py
@@ -69,6 +69,6 @@
     license='BSD',
     platforms=['Linux'],
     classifiers=[],
-    python_requires='>=3.4.*',
+    python_requires='>=3.5.*',
     url='https://automl.github.io/auto-sklearn',
 )

From 9b652d571af5d16a4641bbd5128e868e99496f11 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Thu, 25 Oct 2018 15:54:51 +0200
Subject: [PATCH 37/45] Add solver hyperparameter in MLP classifier example,
 increase runtime of classifier in example_extend_preprocessing.py

---
 examples/example_extending_classification.py | 9 ++++++++-
 examples/example_extending_preprocessor.py   | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py
index a8b0dbe3ca..9f4ea4eedb 100644
--- a/examples/example_extending_classification.py
+++ b/examples/example_extending_classification.py
@@ -27,12 +27,14 @@ def __init__(self,
                  num_nodes_per_layer,
                  activation,
                  alpha,
+                 solver,
                  random_state=None,
                  ):
         self.hidden_layer_depth = hidden_layer_depth
         self.num_nodes_per_layer = num_nodes_per_layer
         self.activation = activation
         self.alpha = alpha
+        self.solver = solver
         self.random_state = random_state
 
     def fit(self, X, y):
@@ -45,8 +47,9 @@ def fit(self, X, y):
                                    for i in range(self.hidden_layer_depth))
 
         self.estimator = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes,
-                                       alpha=self.alpha,
                                        activation=self.activation,
+                                       alpha=self.alpha,
+                                       solver=self.solver,
                                        random_state=self.random_state,
                                        )
         self.estimator.fit(X, y)
@@ -92,10 +95,14 @@ def get_hyperparameter_search_space(dataset_properties=None):
         alpha = UniformFloatHyperparameter(
             name="alpha", lower=0.0001, upper=1.0, default_value=0.0001
         )
+        solver = CategoricalHyperparameter(
+            name="solver", choices=['lbfgs', 'sgd', 'adam'], default_value='adam'
+        )
         cs.add_hyperparameters([hidden_layer_depth,
                                 num_nodes_per_layer,
                                 activation,
                                 alpha,
+                                solver,
                                 ])
         return cs
 
diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py
index 64e866f002..e416827408 100644
--- a/examples/example_extending_preprocessor.py
+++ b/examples/example_extending_preprocessor.py
@@ -99,7 +99,7 @@ def get_hyperparameter_search_space(dataset_properties=None):
 
 # Fit the model using LDA as preprocessor.
 clf = autosklearn.classification.AutoSklearnClassifier(
-    time_left_for_this_task=20,
+    time_left_for_this_task=30,
     include_preprocessors=['LDA'],
 )
 clf.fit(X_train, y_train)

From aacf24bf9c6acd3ebdbd3f08ca0362997cd55f26 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Thu, 25 Oct 2018 19:09:01 +0200
Subject: [PATCH 38/45] Change all occurences of master to development in
 flake8_diff.sh

---
 ci_scripts/flake8_diff.sh | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh
index e40c2653bb..39701d5248 100644
--- a/ci_scripts/flake8_diff.sh
+++ b/ci_scripts/flake8_diff.sh
@@ -40,7 +40,7 @@ git remote --verbose
 
 # Travis does the git clone with a limited depth.
 # This may not be enough to find the common ancestor with
-# $REMOTE/master so we unshallow the git checkout
+# $REMOTE/development so we unshallow the git checkout
 if [[ -a .git/shallow ]]; then
     echo -e '\nTrying to unshallow the repo:'
     echo '--------------------------------------------------------------------------------'
@@ -61,7 +61,7 @@ if [[ "$TRAVIS" == "true" ]]; then
         fi
     else
         # We want to fetch the code as it is in the PR branch and not
-        # the result of the merge into master. This way line numbers
+        # the result of the merge into development. This way line numbers
         # reported by Travis will match with the local code.
         LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST
         # In Travis the PR target is always origin
@@ -70,7 +70,7 @@ if [[ "$TRAVIS" == "true" ]]; then
 fi
 
 # If not using the commit range from Travis we need to find the common
-# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master
+# ancestor between $LOCAL_BRANCH_REF and $REMOTE/development
 if [[ -z "$COMMIT_RANGE" ]]; then
     if [[ -z "$LOCAL_BRANCH_REF" ]]; then
         LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD)
@@ -79,16 +79,16 @@ if [[ -z "$COMMIT_RANGE" ]]; then
     echo '--------------------------------------------------------------------------------'
     git --no-pager log -2 $LOCAL_BRANCH_REF
 
-    REMOTE_MASTER_REF="$REMOTE/master"
-    # Make sure that $REMOTE_MASTER_REF is a valid reference
-    echo -e "\nFetching $REMOTE_MASTER_REF"
+    REMOTE_DEVELOPMENT_REF="$REMOTE/development"
+    # Make sure that $REMOTE_DEVELOPMENT_REF is a valid reference
+    echo -e "\nFetching $REMOTE_DEVELOPMENT_REF"
     echo '--------------------------------------------------------------------------------'
-    git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF
+    git fetch $REMOTE development:refs/remotes/$REMOTE_DEVELOPMENT_REF
     LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF)
-    REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF)
+    REMOTE_DEVELOPMENT_SHORT_HASH=$(git rev-parse --short $REMOTE_DEVELOPMENT_REF)
 
-    COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \
-        echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)"
+    COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_DEVELOPMENT_REF) || \
+        echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_DEVELOPMENT_REF -q)"
 
     if [ -z "$COMMIT" ]; then
         exit 1
@@ -97,7 +97,7 @@ if [[ -z "$COMMIT_RANGE" ]]; then
     COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT)
 
     echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\
-         "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:"
+         "and $REMOTE_DEVELOPMENT_REF ($REMOTE_DEVELOPMENT_SHORT_HASH) is $COMMIT_SHORT_HASH:"
     echo '--------------------------------------------------------------------------------'
     git --no-pager show --no-patch $COMMIT_SHORT_HASH
 

From f9a7b1de6e6515a02c76e07cd227e243d5245dc3 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Thu, 25 Oct 2018 20:21:07 +0200
Subject: [PATCH 39/45] numpy requirement is now >=1.9.0<=1.14.5

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 71c47ceb66..4ba8cfc10e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ nose
 six
 Cython
 
-numpy==1.14.5
+numpy>=1.9.0<=1.14.5
 scipy>=0.14.1
 
 scikit-learn>=0.19,<0.20
diff --git a/setup.py b/setup.py
index cfdac2867e..68491b6a4f 100644
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,7 @@
     "six",
     "Cython",
     # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error.
-    "numpy==1.14.5",
+    "numpy>=1.9.0<=1.14.5",
     "scipy>=0.14.1",
     "scikit-learn>=0.19,<0.20",
     "lockfile",

From 2c079703ebefd05439229cddffbc969280b15a82 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Fri, 26 Oct 2018 01:55:15 +0200
Subject: [PATCH 40/45] Fix requirement inequality mistake

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 4ba8cfc10e..835fe5d685 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ nose
 six
 Cython
 
-numpy>=1.9.0<=1.14.5
+numpy<=1.9.0>=1.14.5
 scipy>=0.14.1
 
 scikit-learn>=0.19,<0.20
diff --git a/setup.py b/setup.py
index 68491b6a4f..6ef16993a5 100644
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,7 @@
     "six",
     "Cython",
     # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error.
-    "numpy>=1.9.0<=1.14.5",
+    "numpy<=1.9.0>=1.14.5",
     "scipy>=0.14.1",
     "scikit-learn>=0.19,<0.20",
     "lockfile",

From de3192f67442704ac727be4f40bdf226c9982a32 Mon Sep 17 00:00:00 2001
From: Jinu <ahnj@cs.uni-freiburg.de>
Date: Fri, 26 Oct 2018 02:13:36 +0200
Subject: [PATCH 41/45] change initial numpy version to 1.14.5.

---
 .travis.yml      | 2 +-
 requirements.txt | 2 +-
 setup.py         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 6ec5fa1781..ecf00953d7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -64,7 +64,7 @@ install:
   # Install general requirements the way setup.py suggests
   - pip install pep8 codecov
   # Temporarily pin the numpy version for travis-ci
-  - pip install "numpy<1.15"
+  - pip install "numpy<=1.14.5"
   - cat requirements.txt | xargs -n 1 -L 1 pip install
   # Install openml dependency for metadata generation unittest
   - pip install xmltodict requests liac-arff
diff --git a/requirements.txt b/requirements.txt
index 835fe5d685..4ba8cfc10e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ nose
 six
 Cython
 
-numpy<=1.9.0>=1.14.5
+numpy>=1.9.0<=1.14.5
 scipy>=0.14.1
 
 scikit-learn>=0.19,<0.20
diff --git a/setup.py b/setup.py
index 6ef16993a5..68491b6a4f 100644
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,7 @@
     "six",
     "Cython",
     # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error.
-    "numpy<=1.9.0>=1.14.5",
+    "numpy>=1.9.0<=1.14.5",
     "scipy>=0.14.1",
     "scikit-learn>=0.19,<0.20",
     "lockfile",

From 56af60d00a2838bf4cd6e7b32212abe632ad14c5 Mon Sep 17 00:00:00 2001
From: JinWoo <31531627+ahn1340@users.noreply.github.com>
Date: Fri, 9 Nov 2018 15:06:02 +0100
Subject: [PATCH 42/45] Circle Drop (#575)

* Extending Autosklearn. First commit.

* Add regression example

* CI: upper bound numpy version due to travis failures

* CI: upper bound numpy version due to travis failures

* use tempfile.gettempdir() (#521)

* use tempfile.gettempdir()

* follow quality review coding standards

* Remove a colon from README.md (#527)

* fixing warnings on non-tuple sequence for indexing (#526)

* fix string formatting (#540)

* FIX removing models wrt wrong metric in ensemble (#522)

* Add examples for extending auto-sklearn.

* .

* Change datasets used in examples from digits to breast_cancer.

* First commit

* Fixing codacy errors

* Fixing bug

* [Debug] try different numpy version

* [Debug] Try with latest numpy version

* Set numpy version to 1.14.5

* First commit

* Fixing bug

* Modify flake8_diff.sh

* Extending Autosklearn. First commit.

* Add regression example

* Add examples for extending auto-sklearn.

* .

* Fixing codacy errors

* Change example (#553)

* Change datasets used in examples from digits to breast_cancer.

* [Debug] try different numpy version

* [Debug] Try with latest numpy version

* Set numpy version to 1.14.5

* Fix line length in exanple_parallel.py

* [WIP]Add argument for custom logger configuration. (#505)

* Add argument for custom logger configuration. First commit, work in progress.

* Minor changes.

* Modify suggested changes [WIP]

* .

* .

* .

* .

* Fix minor details

* Fix travis not recognizing example_config.yaml

* .

* .

* .

* .

* .

* Change datasets used in examples from digits to breast_cancer.

* Fix codacy error

* Revert codacy error fixing

* [Debug] check if numpy causes error

* [Debug] experimenting with numpy

* [Debug] try to manually install libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug].

* [Debug].

* [Debug] .

* [Debug].

* [Debug].

* [Debug] used older numpy version

* [Debug] numpy

* [Debug] try numpy version 1.14.6

* [Debug] try different numpy version

* [Debug] Try with latest numpy version

* Set numpy version to 1.14.5

* Add argument for custom logger configuration. First commit, work in progress.

* Minor changes.

* Modify suggested changes [WIP]

* .

* .

* .

* .

* Fix minor details

* Fix travis not recognizing example_config.yaml

* .

* .

* .

* .

* .

* Fix codacy error

* Revert codacy error fixing

* [Debug] experimenting with numpy

* [Debug] try to manually install libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug] libgcc

* [Debug].

* [Debug].

* [Debug] .

* [Debug].

* [Debug].

* [Debug] used older numpy version

* [Debug] try numpy version 1.14.6

* Delete libgcc_check.sh used for debugging.

* Fix numpy version and remove blank lines

* Fix line length in example_parallel.py

* Fix minor error

* FIX #566: sort ensemble correctly (#567)

* Fix Line length in example_parallel.py

* Fix line length in example_parallel.py

* Fix minor error

* Fix codacy error "parameters differ from overriden 'fit' method"

* Check target type at the beginning of the fitting process. (#506)

* Check target type at the beginning of the fitting process.

* .

* Fixed minor error in uniitest

* .

* Add unittest for target type checking.

* .

* .

* Change datasets used in examples from digits to breast_cancer.

* [Debug] try with numpy version 1.14.5

* [Debug] Check if numpy version 1.14.6 raises error.

* [Debug] try different numpy version

* [Debug] Try with latest numpy version

* Set numpy version to 1.14.5

* Check target type at the beginning of the fitting process.

* .

* Fixed minor error in uniitest

* .

* Add unittest for target type checking.

* .

* .

* [Debug] Check if numpy version 1.14.6 raises error.

* Fix numpy version to 1.14.5

* Add comment to Mock in test_type_of_target

* Fix line length in example_parallel.py

* Fix minor error

* Update test_automl.py

* Add python 3.7to Travis, change python_requirement in setup.py.

* Add solver hyperparameter in MLP classifier example, increase runtime of classifier in example_extend_preprocessing.py

* Change all occurences of master to development in flake8_diff.sh

* numpy requirement is now >=1.9.0<=1.14.5

* Fix requirement inequality mistake

* change initial numpy version to 1.14.5.

* Deploy using travis instead of circle

* FIX error in travis.yml caused by stashing

* Test that deploy works.

* Debugging. Set local_dir to doc/development.

* Done Testing. Finalize the PR.

* Delete circle_install.sh
---
 .travis.yml                  | 14 ++++++--
 ci_scripts/circle_install.sh | 20 ------------
 ci_scripts/create_doc.sh     | 61 +++++++++++++++++++++++++++++++++++
 ci_scripts/push_doc.sh       | 42 ------------------------
 circle.yml                   | 62 ------------------------------------
 5 files changed, 73 insertions(+), 126 deletions(-)
 delete mode 100644 ci_scripts/circle_install.sh
 create mode 100644 ci_scripts/create_doc.sh
 delete mode 100644 ci_scripts/push_doc.sh
 delete mode 100644 circle.yml

diff --git a/.travis.yml b/.travis.yml
index 748303ec3f..9faa0ae8f8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,7 @@ matrix:
   - os: linux
     env: DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
   - os: linux
-    env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+    env: DISTRIB="conda" PYTHON_VERSION="3.6" DOCPUSH="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
   - os: linux
     env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
   - os: linux
@@ -77,5 +77,15 @@ install:
   - python setup.py install
 
 script: bash ci_scripts/test.sh
-after_success: source ci_scripts/success.sh
+after_success: source ci_scripts/success.sh && source ci_scripts/create_doc.sh $TRAVIS_BRANCH "doc_result"
 
+deploy:
+  provider: pages
+  skip-cleanup: true
+  github-token: $GITHUB_TOKEN # set in the settings page of my repository
+  keep-hisotry: true
+  commiter-from-gh: true
+  on:
+    all_branches: true
+    condition: $doc_result = "success"
+  local_dir: doc/$TRAVIS_BRANCH
diff --git a/ci_scripts/circle_install.sh b/ci_scripts/circle_install.sh
deleted file mode 100644
index 195ad87d54..0000000000
--- a/ci_scripts/circle_install.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!bin/bash
-
-# on circle ci, each command run with it's own execution context so we have to
-# activate the conda testenv on a per command basis. That's why we put calls to
-# python (conda) in a dedicated bash script and we activate the conda testenv
-# here.
-source activate testenv
-
-export CC=`which gcc`
-# install documentation building dependencies
-pip install --upgrade numpy
-pip install --upgrade matplotlib setuptools nose coverage sphinx==1.5.5 sphinx_bootstrap_theme numpydoc sphinx_gallery pillow
-# And finally, all other dependencies
-cat requirements.txt | xargs -n 1 -L 1 pip install
-
-python setup.py clean
-python setup.py develop
-
-# pipefail is necessary to propagate exit codes
-set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt
diff --git a/ci_scripts/create_doc.sh b/ci_scripts/create_doc.sh
new file mode 100644
index 0000000000..0a794627d8
--- /dev/null
+++ b/ci_scripts/create_doc.sh
@@ -0,0 +1,61 @@
+# This script is mostly adopted from https://github.com/openml/openml-python/blob/develop/ci_scripts/create_doc.sh
+
+set -euo pipefail
+
+# Check if DOCPUSH is set
+if ! [[ -z ${DOCPUSH+x} ]]; then
+
+    if [[ "$DOCPUSH" == "true" ]]; then
+
+        # install documentation building dependencies
+        pip install --upgrade matplotlib seaborn setuptools nose coverage sphinx pillow sphinx-gallery sphinx_bootstrap_theme cython numpydoc nbformat nbconvert mock
+
+        # $1 is the branch name
+        # $2 is the global variable where we set the script status
+
+        if ! { [ $1 = "master" ] || [ $1 = "development" ]; }; then
+            { echo "Not one of the allowed branches"; exit 0; }
+        fi
+
+        # delete any previous documentation folder
+        if [ -d doc/$1 ]; then
+            rm -rf doc/$1
+        fi
+
+        # create the documentation
+        cd doc && make html 2>&1
+
+        # create directory with branch name
+        # the documentation for dev/stable from git will be stored here
+        mkdir $1
+
+        # get previous documentation from github
+        git clone https://github.com/automl/auto-sklearn.git --branch gh-pages --single-branch
+
+        # copy previous documentation
+        cp -r auto-sklearn/. $1
+        rm -rf auto-sklearn
+
+        # if the documentation for the branch exists, remove it
+        if [ -d $1/$1 ]; then
+            rm -rf $1/$1
+        fi
+
+        # copy the updated documentation for this branch
+        mkdir $1/$1
+        cp -r build/html/. $1/$1
+
+        # takes a variable name as an argument and assigns the script outcome to a
+        # variable with the given name. If it got this far, the script was successful
+        function set_return() {
+            # $1 is the variable where we save the script outcome
+            local __result=$1
+            local  status='success'
+            eval $__result="'$status'"
+        }
+
+        set_return "$2"
+    fi
+fi
+# Workaround for travis failure
+set +u
diff --git a/ci_scripts/push_doc.sh b/ci_scripts/push_doc.sh
deleted file mode 100644
index 3fa944b64a..0000000000
--- a/ci_scripts/push_doc.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-# This script is meant to be called in the "deploy" step defined in 
-# circle.yml. See https://circleci.com/docs/ for more details.
-# The behavior of the script is controlled by environment variable defined
-# in the circle.yml in the top level folder of the project.
-
-if [ ! -z "$1" ]
-    then DOC_FOLDER=$1
-fi
-
-MSG="Pushing the docs for revision for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1, folder: $DOC_FOLDER"
-
-cd $HOME
-
-# Clone the docs repo if it isnt already there
-if [ ! -d $DOC_REPO ];
-    then git clone "git@github.com:$USERNAME/"$DOC_REPO".git";
-fi
-
-# Copy the build docs to a temporary folder
-rm -rf tmp
-mkdir tmp
-cp -R $HOME/$DOC_REPO/doc/build/html/* ./tmp/
-
-cd $DOC_REPO
-git branch gh-pages
-git checkout -f gh-pages
-git reset --hard origin/gh-pages
-git clean -dfx
-git rm -rf $HOME/$DOC_REPO/$DOC_FOLDER && rm -rf $HOME/$DOC_REPO/$DOC_FOLDER
-
-# Copy the new build docs
-mkdir $DOC_FOLDER
-cp -R $HOME/tmp/* ./$DOC_FOLDER/
-
-git config --global user.email $EMAIL
-git config --global user.name $USERNAME
-git add -f ./$DOC_FOLDER/
-git commit -m "$MSG"
-git push -f origin gh-pages
-
-echo $MSG
\ No newline at end of file
diff --git a/circle.yml b/circle.yml
deleted file mode 100644
index 8ff09eb573..0000000000
--- a/circle.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-machine:
-  environment:
-    PATH: /home/ubuntu/miniconda/bin:$PATH
-
-    # The github organization or username of the repository which hosts the
-    # project and documentation.
-    USERNAME: "automl"
-
-    # The repository where the documentation will be hosted
-    DOC_REPO: "auto-sklearn"
-
-    # The base URL for the Github page where the documentation will be hosted
-    DOC_URL: ""
-
-    # The email is to be used for commits in the Github Page
-    EMAIL: "feurerm@informatik.uni-freiburg.de"
-
-dependencies:
-
-  # Various dependencies
-  pre:
-    # Get rid of existing virtualenvs on circle ci as they conflict with conda.
-    # From nilearn: https://github.com/nilearn/nilearn/blob/master/circle.yml
-    - cd && rm -rf ~/.pyenv && rm -rf ~/virtualenvs
-    # from scikit-learn contrib
-    - sudo -E apt-get -yq remove texlive-binaries --purge
-    - sudo -E apt-get -yq update
-    - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra
-    # Other stuff...
-    - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install build-essential
-    # Conda installation
-    - wget https://repo.continuum.io/miniconda/Miniconda3-4.3.21-Linux-x86_64.sh -O ~/miniconda.sh
-    - bash ~/miniconda.sh -b -p $HOME/miniconda
-    - conda create -n testenv --yes python=3.6 pip wheel nose gcc swig
-
-  # The --user is needed to let sphinx see the source and the binaries
-  # The pipefail is requested to propagate exit code
-  override:
-    - source ci_scripts/circle_install.sh
-test:
-  # Grep error on the documentation
-  override:
-    - cat ~/log.txt && if grep -q "Traceback (most recent call last):" ~/log.txt; then false; else true; fi
-deployment:
-  master:
-    branch: master
-    commands:
-      - bash ci_scripts/push_doc.sh 'stable'
-  development:
-    branch: development
-    commands:
-      - bash ci_scripts/push_doc.sh 'dev'
-general:
-  # Open the doc to the API
-  artifacts:
-    - "doc/_build/html"
-    - "~/log.txt"
-  # Restric the build to the branch master only
-  #branches:
-  #  only:
-  #     - development
-  #     - master

From 1b7a172929fab8fdda5c8c6bb0fc1cead24538ba Mon Sep 17 00:00:00 2001
From: theFool <theFool32@users.noreply.github.com>
Date: Fri, 9 Nov 2018 22:23:07 +0800
Subject: [PATCH 43/45] Update gmeans.py (#572)

fix the bug of n_clusters not equals to len(cluster_centers)
---
 autosklearn/metalearning/metalearning/clustering/gmeans.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosklearn/metalearning/metalearning/clustering/gmeans.py b/autosklearn/metalearning/metalearning/clustering/gmeans.py
index 23363c6248..704ecc05a7 100644
--- a/autosklearn/metalearning/metalearning/clustering/gmeans.py
+++ b/autosklearn/metalearning/metalearning/clustering/gmeans.py
@@ -69,7 +69,7 @@ def fit(self, X):
                     break
 
                 # Refinement
-                KMeans = sklearn.cluster.KMeans(n_clusters=1, n_init=1,
+                KMeans = sklearn.cluster.KMeans(n_clusters=len(cluster_centers), n_init=1,
                                                 init=np.array(cluster_centers),
                                                 random_state=self.random_state)
                 KMeans.fit(X)

From 6d53d1f8b1d1d035eb2d464af17ffba6bac14f87 Mon Sep 17 00:00:00 2001
From: JinWoo <31531627+ahn1340@users.noreply.github.com>
Date: Fri, 9 Nov 2018 15:27:18 +0100
Subject: [PATCH 44/45] Release 0.4.1 (#576)

* .

* .

* AutoSklearnClassifier/Regressor's fit, refit, fit_ensemble now return self.

* Initial commit. Work in Progress.

* Fix minor printing error in sprint_statistics.

* Revert "Fix#460"

* Raise error if ensemble is not built (#480)

* .

* .

* AutoSklearnClassifier/Regressor's fit, refit, fit_ensemble now return self.

* Initial commit. Work in Progress.

* Fix minor printing error in sprint_statistics.

* Revert "Fix#460"

* Resolve rebase conflict

* combined unittests to reduce travis runtime

* .

* .

* .

* .

* .

* ADD Auto-sklearn 0.4.1 release note to releases.rst
---
 doc/releases.rst | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/doc/releases.rst b/doc/releases.rst
index ab35f83634..d4d5beea23 100644
--- a/doc/releases.rst
+++ b/doc/releases.rst
@@ -11,6 +11,38 @@
 Releases
 ========
 
+Version 0.4.1
+=============
+
+* Added documentation on `how to extend Auto-sklearn <https://github.com/automl/auto-sklearn/pull/510>`_
+  with custom classifier, regressor, and preprocessor.
+* Auto-sklearn now requires numpy version between 1.9.0 and 1.14.5, due to higher versions
+  causing travis failure.
+* Examples now use ``sklearn.datasets.load_breast_cancer()`` instead of ``sklearn.datasets.load_digits()``
+  to reduce memory usage for travis build.
+* Fixes future warnings on non-tuple sequence for indexing.
+* Fixes `#500 <https://github.com/automl/auto-sklearn/issues/500>`_: fixes
+  ensemble builder to correctly evaluate model score with any metrics.
+  See this `PR <https://github.com/automl/auto-sklearn/pull/522>`_.
+* Fixes `#482 <https://github.com/automl/auto-sklearn/issues/482>`_ and
+  `#491 <https://github.com/automl/auto-sklearn/issues/491>`_: Users can now set up
+  custom logger configuration by passing a dictionary created by a yaml file to
+  ``logging_config``.
+* Fixes `#566 <https://github.com/automl/auto-sklearn/issues/566>`_: ensembles are now sorted correctly.
+* Fixes `#293 <https://github.com/automl/auto-sklearn/issues/293>`_: Auto-sklearn checks if appropriate
+  target type was given for classification and regression before call to ``fit()``.
+* Travis-ci now runs flake8 to enforce pep8 style guide, and uses travis-ci instead of circle-ci
+  for deployment.
+
+Contributors
+************
+
+* Matthias Feurer
+* Manuel Streuhofer
+* Taneli Mielikäinen
+* Katharina Eggensperger
+* Jin Woo Ahn
+
 Version 0.4.0
 =============
 

From 8aae9d62d97b0c91aed7c0c2410f552e51120d01 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Fri, 9 Nov 2018 17:07:18 +0100
Subject: [PATCH 45/45] Update version information for 0.4.1

---
 autosklearn/__version__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosklearn/__version__.py b/autosklearn/__version__.py
index 88f7ebca07..4bfd2e72b7 100644
--- a/autosklearn/__version__.py
+++ b/autosklearn/__version__.py
@@ -1,4 +1,4 @@
 """Version information."""
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.4.0"
+__version__ = "0.4.1"