From ecadd822dd9e017f9ac2b1387381491cfe51be63 Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 17 Jul 2018 21:56:01 +0200 Subject: [PATCH 01/45] Extending Autosklearn. First commit. --- examples/example_extending_preprocessing.py | 97 +++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 examples/example_extending_preprocessing.py diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py new file mode 100644 index 0000000000..c2300ddfb2 --- /dev/null +++ b/examples/example_extending_preprocessing.py @@ -0,0 +1,97 @@ +""" +=============================================== +Extending Auto-sklearn with Custom Preprocessor +=============================================== + + +explanation goes here. +""" + +import autosklearn.pipeline.components.feature_preprocessing +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ + UniformIntegerHyperparameter + +from autosklearn.pipeline.components.base import \ + AutoSklearnPreprocessingAlgorithm +from autosklearn.pipeline.constants import * + +# Custom wrapper class for using Sklearn's polynomial feature preprocessing +# function. +class custom_preprocessor(AutoSklearnPreprocessingAlgorithm): + def __init__(self, degree, interaction_only, include_bias, random_state=None): + # Define hyperparameters to be tuned here. + self.degree = degree + self.interaction_only = interaction_only + self.include_bias = include_bias + self.random_state = random_state + self.preprocessor = None + + def fit(self, X, Y): + # wrapper function for the fit method of Sklearn's polynomial + # preprocessing function. + import sklearn.preprocessing + self.preprocessor = sklearn.preprocessing.PolynomialFeatures(degree=self.degree, + interaction_only=self.interaction_only, + include_bias=self.include_bias) + self.preprocessor.fit(X, Y) + return self + + def transform(self, X): + # wrapper function for the transform method of sklearn's polynomial + # preprocessing function. It is also possible to implement + # a preprocessing algorithm directly in this function, provided that + # it behaves in the way compatible with that from sklearn. + if self.preprocessor is None: + raise NotImplementedError() + return self.preprocessor.transform(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'CustomPreprocessor', + 'name': 'PolynomialFeatures', + 'handles_regression': True, + 'handles_classification': True, + 'handles_multiclass': True, + 'handles_multilabel': True, + 'is_deterministic': True, + 'input': (DENSE, UNSIGNED_DATA), + 'output': (INPUT,)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + # For each hyperparameter, its type (categorical, integer, float, etc.) + # and its range and the default value must be specified here. + degree = UniformIntegerHyperparameter( + name="degree", lower=2, upper=5, default_value=2) + interaction_only = CategoricalHyperparameter( + name="interaction_only", choices=["False", "True"], default_value="False") + include_bias = CategoricalHyperparameter( + name="include_bias", choices=["True", "False"], default_value="True") + + cs = ConfigurationSpace() + cs.add_hyperparameters([degree, interaction_only, include_bias]) + + return cs + + +# Include the custom preprocessor class to auto-sklearn. +autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(custom_preprocessor) + +# Import toy data from sklearn and apply train_test_split. +from sklearn.datasets import load_boston +from sklearn.model_selection import train_test_split +X, y = load_boston(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) + +# Run auto-sklearn regression with the custom preprocessor. +import autosklearn.regression +reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30, + per_run_time_limit=10, + include_preprocessors=['custom_preprocessor'] + ) +reg.fit(X_train, y_train) +y_pred = reg.predict(X_test) +print(reg.show_models()) +print(reg.sprint_statistics()) + From b912d67eea14622d3ecade5a4c07b8b0deb8305c Mon Sep 17 00:00:00 2001 From: Jinu Date: Wed, 18 Jul 2018 21:34:32 +0200 Subject: [PATCH 02/45] Add regression example --- examples/example_extending_preprocessing.py | 146 ++++++++++++++++---- 1 file changed, 116 insertions(+), 30 deletions(-) diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py index c2300ddfb2..bb20970d39 100644 --- a/examples/example_extending_preprocessing.py +++ b/examples/example_extending_preprocessing.py @@ -1,24 +1,100 @@ """ =============================================== -Extending Auto-sklearn with Custom Preprocessor +Extending Auto-sklearn =============================================== +In order to include new machine learning algorithms in auto-sklearn's +optimization process, users can implement a wrapper class for the algorithm +and register it to auto-sklearn. The example code below demonstrates how +to implement custom regressor and preprocessor (Lasso and polynomial processing from sklearn, respectively), +register it to auto-sklearn, and use them for the given task. +A detailed walkthrough of extending auto-sklearn can be found `here `_. -explanation goes here. """ -import autosklearn.pipeline.components.feature_preprocessing from ConfigSpace.configuration_space import ConfigurationSpace -from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ - UniformIntegerHyperparameter +from ConfigSpace.hyperparameters import * +from ConfigSpace.conditions import EqualsCondition, InCondition +from autosklearn.pipeline.components.base import \ + AutoSklearnRegressionAlgorithm from autosklearn.pipeline.components.base import \ AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import * +from autosklearn.util.common import check_for_bool + + +# Custom Regression algorithm added to auto-sklearn (Lasso from sklearn). +class MyRegressor(AutoSklearnRegressionAlgorithm): + def __init__(self, alpha, fit_intercept, tol, positive, random_state=None): + self.alpha = alpha + self.fit_intercept = fit_intercept + #self.normalize = normalize + self.tol = tol + self.positive = positive + + self.random_state = random_state + self.estimator = None + + def fit(self, X, Y): + import sklearn.linear_model + + self.alpha = float(self.alpha) + self.fit_intercept = check_for_bool(self.fit_intercept) + self.normalize = check_for_bool(self.normalize) + self.tol = float(self.tol) + self.positive = check_for_bool(self.positive) + + self.estimator = sklearn.linear_model.\ + Lasso(alpha=self.alpha, + fit_intercept=self.fit_intercept, + tol=self.tol, + positive=self.positive, + n_iter=300) + + self.estimator.fit(X, Y) + return self + + def predict(self, X): + if self.estimator is None: + raise NotImplementedError + return self.estimator.predict(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'MyRegressor', + 'name': 'MyRegressor', + 'handles_regression': True, + 'handles_classification': False, + 'handles_multiclass': False, + 'handles_multilabel': False, + 'is_deterministic': True, + 'input': (DENSE, UNSIGNED_DATA), + 'output': (PREDICTIONS,)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + alpha = UniformFloatHyperparameter( + name="alpha", lower=0, upper=10, default_value=1) + fit_intercept = CategoricalHyperparameter( + name="fit_intercept", choices=[True, False], default_value=True) + normalize = CategoricalHyperparameter( + name="normalize", choices=[True, False], default_value=False) + tol = UniformFloatHyperparameter( + name="tol", lower=10 ** -5, upper=10 ** -1, + default_value=10 ** -3, log=True) + positive = CategoricalHyperparameter( + name="positive", choices=[True, False], default_value=False) + + cs.add_hyperparameters([alpha, fit_intercept, tol, positive]) + + return cs + # Custom wrapper class for using Sklearn's polynomial feature preprocessing # function. -class custom_preprocessor(AutoSklearnPreprocessingAlgorithm): +class MyPreprocessor(AutoSklearnPreprocessingAlgorithm): def __init__(self, degree, interaction_only, include_bias, random_state=None): # Define hyperparameters to be tuned here. self.degree = degree @@ -48,8 +124,8 @@ def transform(self, X): @staticmethod def get_properties(dataset_properties=None): - return {'shortname': 'CustomPreprocessor', - 'name': 'PolynomialFeatures', + return {'shortname': 'MyPreprocessor', + 'name': 'MyPreprocessor', 'handles_regression': True, 'handles_classification': True, 'handles_multiclass': True, @@ -60,8 +136,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space(dataset_properties=None): - # For each hyperparameter, its type (categorical, integer, float, etc.) - # and its range and the default value must be specified here. + # For each hyperparameter, its type (categorical, integer, float, etc.), + # range and the default value must be specified here. degree = UniformIntegerHyperparameter( name="degree", lower=2, upper=5, default_value=2) interaction_only = CategoricalHyperparameter( @@ -75,23 +151,33 @@ def get_hyperparameter_search_space(dataset_properties=None): return cs -# Include the custom preprocessor class to auto-sklearn. -autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(custom_preprocessor) - -# Import toy data from sklearn and apply train_test_split. -from sklearn.datasets import load_boston -from sklearn.model_selection import train_test_split -X, y = load_boston(return_X_y=True) -X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) - -# Run auto-sklearn regression with the custom preprocessor. -import autosklearn.regression -reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30, - per_run_time_limit=10, - include_preprocessors=['custom_preprocessor'] - ) -reg.fit(X_train, y_train) -y_pred = reg.predict(X_test) -print(reg.show_models()) -print(reg.sprint_statistics()) - +def main(): + # Include the custom preprocessor class to auto-sklearn. + import autosklearn.pipeline.components.regression + import autosklearn.pipeline.components.feature_preprocessing + autosklearn.pipeline.components.regression.add_regressor(MyRegressor) + autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(MyPreprocessor) + + # Import toy data from sklearn and apply train_test_split. + from sklearn.datasets import load_boston + from sklearn.model_selection import train_test_split + X, y = load_boston(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) + + # Run auto-sklearn regression with the custom preprocessor. + import autosklearn.regression + import autosklearn.metrics + reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30, + per_run_time_limit=10, + include_estimators=["MyRegressor"], + include_preprocessors=["MyPreprocessor"]) + reg.fit(X_train, y_train) + y_pred = reg.predict(X_test) + scorer = autosklearn.metrics.r2 + print("Test score: ", scorer(y_pred, y_test)) + print(reg.show_models()) + print(reg.sprint_statistics()) + + +if __name__ == "__main__": + main() From 8e6927e87eb61f136e91d89b2fc2415cfa7cb506 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 1 Aug 2018 09:57:01 +0200 Subject: [PATCH 03/45] CI: upper bound numpy version due to travis failures --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f299f314f9..8c11d3f318 100644 --- a/.travis.yml +++ b/.travis.yml @@ -58,10 +58,11 @@ before_install: install: # Install general requirements the way setup.py suggests - pip install pep8 codecov + # Temporarily pin the numpy version for travis-ci + - pip install numpy<1.15 - cat requirements.txt | xargs -n 1 -L 1 pip install # Install openml dependency for metadata generation unittest - - pip install xmltodict requests - - pip install git+https://github.com/renatopp/liac-arff + - pip install xmltodict requests liac-arff - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps - mkdir ~/.openml - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config From e8130f7cf7fdc80688eea545a184dd7511eb7106 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 1 Aug 2018 10:32:18 +0200 Subject: [PATCH 04/45] CI: upper bound numpy version due to travis failures --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8c11d3f318..968d8e4ec1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -59,7 +59,7 @@ install: # Install general requirements the way setup.py suggests - pip install pep8 codecov # Temporarily pin the numpy version for travis-ci - - pip install numpy<1.15 + - pip install "numpy<1.15" - cat requirements.txt | xargs -n 1 -L 1 pip install # Install openml dependency for metadata generation unittest - pip install xmltodict requests liac-arff From 683238254025b504b78d0aebc83038457574bcd1 Mon Sep 17 00:00:00 2001 From: Manuel Streuhofer Date: Wed, 1 Aug 2018 14:41:34 +0200 Subject: [PATCH 05/45] use tempfile.gettempdir() (#521) * use tempfile.gettempdir() * follow quality review coding standards --- autosklearn/util/backend.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/autosklearn/util/backend.py b/autosklearn/util/backend.py index 1f7ea70f7f..52ac678762 100644 --- a/autosklearn/util/backend.py +++ b/autosklearn/util/backend.py @@ -71,11 +71,17 @@ def _prepare_directories(self, temporary_directory, output_directory): self.__temporary_directory = temporary_directory \ if temporary_directory \ - else '/tmp/autosklearn_tmp_%d_%d' % (pid, random_number) + else os.path.join( + tempfile.gettempdir(), + 'autosklearn_tmp_%d_%d' % (pid, random_number) + ) self.__output_directory = output_directory \ if output_directory \ - else '/tmp/autosklearn_output_%d_%d' % (pid, random_number) + else os.path.join( + tempfile.gettempdir(), + 'autosklearn_output_%d_%d' % (pid, random_number) + ) def create_directories(self): if self.shared_mode: @@ -460,4 +466,4 @@ def write_txt_file(self, filepath, data, name): self.logger.debug('Created %s file %s' % (name, filepath)) else: self.logger.debug('%s file already present %s' % - (name, filepath)) \ No newline at end of file + (name, filepath)) From df273da9d9428902b52f7f132362c19472b9e704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Taneli=20Mielik=C3=A4inen?= Date: Mon, 13 Aug 2018 04:44:40 -0700 Subject: [PATCH 06/45] Remove a colon from README.md (#527) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d0de400572..dd53e9f140 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ auto-sklearn is an automated machine learning toolkit and a drop-in replacement Find the documentation [here](http://automl.github.io/auto-sklearn/) -Status for master branch: +Status for master branch [![Build Status](https://travis-ci.org/automl/auto-sklearn.svg?branch=master)](https://travis-ci.org/automl/auto-sklearn) [![Code Health](https://landscape.io/github/automl/auto-sklearn/master/landscape.png)](https://landscape.io/github/automl/auto-sklearn/master) From 8c5e3c7c1b7a729661f680df7eaf20885be3e039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Taneli=20Mielik=C3=A4inen?= Date: Tue, 14 Aug 2018 06:08:30 -0700 Subject: [PATCH 07/45] fixing warnings on non-tuple sequence for indexing (#526) --- .../pipeline/create_searchspace_util.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/autosklearn/pipeline/create_searchspace_util.py b/autosklearn/pipeline/create_searchspace_util.py index 5ebb7d1246..2abfa5c172 100644 --- a/autosklearn/pipeline/create_searchspace_util.py +++ b/autosklearn/pipeline/create_searchspace_util.py @@ -117,8 +117,8 @@ def find_active_choices(matches, node, node_idx, dataset_properties, \ choices = [] for c_idx, component in enumerate(available_components): - slices = [slice(None) if idx != node_idx else slice(c_idx, c_idx+1) - for idx in range(len(matches.shape))] + slices = tuple(slice(None) if idx != node_idx else slice(c_idx, c_idx+1) + for idx in range(len(matches.shape))) if np.sum(matches[slices]) > 0: choices.append(component) @@ -200,10 +200,10 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties, for product in itertools.product(*num_node_choices): for node_idx, choice_idx in enumerate(product): node_idx += start_idx - slices_ = [ + slices_ = tuple( slice(None) if idx != node_idx else slice(choice_idx, choice_idx + 1) for idx in - range(len(matches.shape))] + range(len(matches.shape))) if np.sum(matches[slices_]) == 0: skip_array[product] = 1 @@ -212,13 +212,11 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties, if skip_array[product]: continue - slices = [] - for idx in range(len(matches.shape)): - if idx not in indices: - slices.append(slice(None)) - else: - slices.append(slice(product[idx - start_idx], - product[idx - start_idx] + 1)) + slices = tuple( + slice(None) if idx not in indices else + slice(product[idx - start_idx], + product[idx - start_idx] + 1) for idx in + range(len(matches.shape))) # This prints the affected nodes # print [node_choice_names[i][product[i]] From c02dc8f1609af8043c1df7c6ec26e414cdfb8696 Mon Sep 17 00:00:00 2001 From: Katharina Eggensperger Date: Mon, 10 Sep 2018 17:28:52 +0200 Subject: [PATCH 08/45] fix string formatting (#540) --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 4ae310adf0..86c9c3202b 100644 --- a/setup.py +++ b/setup.py @@ -18,8 +18,8 @@ if sys.version_info < (3, 5): raise ValueError( - 'Unsupported python version %s found. Auto-sklearn requires Python ' - '3.5 or higher.' % sys.version_info + 'Unsupported Python version %d.%d.%d found. Auto-sklearn requires Python ' + '3.5 or higher.' % (sys.version_info.major, sys.version_info.minor, sys.version_info.micro) ) From 9e91a3358bfb7ea36cf6a26310687e311e8ff698 Mon Sep 17 00:00:00 2001 From: Katharina Eggensperger Date: Mon, 10 Sep 2018 17:29:47 +0200 Subject: [PATCH 09/45] FIX removing models wrt wrong metric in ensemble (#522) --- autosklearn/ensemble_builder.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py index c3851cab13..6d96e30562 100644 --- a/autosklearn/ensemble_builder.py +++ b/autosklearn/ensemble_builder.py @@ -171,11 +171,8 @@ def main(self): while True: #maximal number of iterations - if ( - self.max_iterations is not None - and self.max_iterations > 0 - and iteration >= self.max_iterations - ): + if (self.max_iterations is not None + and 0 < self.max_iterations <= iteration): self.logger.info("Terminate ensemble building because of max iterations: %d of %d", self.max_iterations, iteration) @@ -300,7 +297,7 @@ def read_ensemble_preds(self): Y_TEST: None, # Lazy keys so far: # 0 - not loaded - # 1 - loaded and ind memory + # 1 - loaded and in memory # 2 - loaded but dropped again "loaded": 0 } @@ -372,14 +369,18 @@ def get_n_best_preds(self): ], key=lambda x: x[1], ))) - # remove all that are at most as good as random, cannot assume a - # minimum number here because all kinds of metric can be used - sorted_keys = filter(lambda x: x[1] > 0.001, sorted_keys) + # remove all that are at most as good as random + # note: dummy model must have run_id=1 (there is not run_id=0) + dummy_score = list(filter(lambda x: x[2] == 1, sorted_keys))[0] + self.logger.debug("Use %f as dummy score" % + dummy_score[1]) + sorted_keys = filter(lambda x: x[1] > dummy_score[1], sorted_keys) # remove Dummy Classifier sorted_keys = list(filter(lambda x: x[2] > 1, sorted_keys)) if not sorted_keys: - # no model left; try to use dummy classifier (num_run==0) - self.logger.warning("No models better than random - using Dummy Classifier!") + # no model left; try to use dummy score (num_run==0) + self.logger.warning("No models better than random - " + "using Dummy Score!") sorted_keys = [ (k, v["ens_score"], v["num_run"]) for k, v in self.read_preds.items() if v["seed"] == self.seed and v["num_run"] == 1 From 8eaa36cece9309b13d8a7d17de7bc9b7c5b3dde9 Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 2 Oct 2018 03:16:32 +0200 Subject: [PATCH 10/45] Add examples for extending auto-sklearn. --- examples/example_extending_classification.py | 124 +++++++++++++ examples/example_extending_preprocessing.py | 183 ------------------- examples/example_extending_preprocessor.py | 109 +++++++++++ examples/example_extending_regression.py | 109 +++++++++++ 4 files changed, 342 insertions(+), 183 deletions(-) create mode 100644 examples/example_extending_classification.py delete mode 100644 examples/example_extending_preprocessing.py create mode 100644 examples/example_extending_preprocessor.py create mode 100644 examples/example_extending_regression.py diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py new file mode 100644 index 0000000000..11b82503d0 --- /dev/null +++ b/examples/example_extending_classification.py @@ -0,0 +1,124 @@ +""" +==================================================================== +Extending Auto-Sklearn with Classification Component +==================================================================== + +The following example demonstrates how to create a new classification +component for using in auto-sklearn. +""" + +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ + UniformIntegerHyperparameter, UniformFloatHyperparameter + +import sklearn.metrics +import autosklearn.classification +import autosklearn.pipeline.components.classification +import autosklearn.pipeline.components.base +from autosklearn.pipeline.constants import * + + +# Create MLP classifier component for auto-sklearn. +class MLPClassifier(autosklearn.pipeline.components.base.AutoSklearnClassificationAlgorithm): + def __init__(self, + hidden_layer_depth, + num_nodes_per_layer, + activation, + alpha, + random_state=None, + ): + self.hidden_layer_depth = hidden_layer_depth + self.num_nodes_per_layer = num_nodes_per_layer + self.activation = activation + self.alpha = alpha + self.random_state = random_state + + def fit(self, X, Y): + self.num_nodes_per_layer = int(self.num_nodes_per_layer) + self.hidden_layer_depth = int(self.hidden_layer_depth) + self.alpha = float(self.alpha) + + from sklearn.neural_network import MLPClassifier + hidden_layer_sizes = tuple(self.num_nodes_per_layer \ + for i in range(self.hidden_layer_depth)) + + self.estimator = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, + alpha=self.alpha, + activation=self.activation, + random_state=self.random_state, + ) + self.estimator.fit(X, Y) + return self + + def predict(self, X): + if self.estimator is None: + raise NotImplementedError() + return self.estimator.predict(X) + + def predict_proba(self, X): + if self.estimator is None: + raise NotImplementedError() + return self.estimator.predict_proba(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname':'MLP Classifier', + 'name': 'MLP CLassifier', + 'handles_regression': False, + 'handles_classification': True, + 'handles_multiclass': True, + 'handles_multilabel': False, + 'is_deterministic': False, + # Both input and output must be tuple(iterable) + 'input': [DENSE, SIGNED_DATA, UNSIGNED_DATA], + 'output': [PREDICTIONS] + } + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + hidden_layer_depth = UniformIntegerHyperparameter( + name="hidden_layer_depth", lower=1, upper=3, default_value=1 + ) + num_nodes_per_layer = UniformIntegerHyperparameter( + name="num_nodes_per_layer", lower=16, upper=216, default_value=32 + ) + activation = CategoricalHyperparameter( + name="activation", choices=['identity', 'logistic', 'tanh', 'relu'], + default_value='relu' + ) + alpha = UniformFloatHyperparameter( + name="alpha", lower=0.0001, upper=1.0, default_value=0.0001 + ) + cs.add_hyperparameters([hidden_layer_depth, + num_nodes_per_layer, + activation, + alpha, + ]) + return cs + + +# Add MLP classifier component to auto-sklearn. +autosklearn.pipeline.components.classification.add_classifier(MLPClassifier) +cs = MLPClassifier.get_hyperparameter_search_space() +print(cs) + +# Generate data. +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split +X, y = load_breast_cancer(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Fit MLP classifier to the data. +clf = autosklearn.classification.AutoSklearnClassifier( + time_left_for_this_task=20, + per_run_time_limit=10, + include_estimators=['MLPClassifier'], +) +clf.fit(X_train, y_train) + +# Print test accuracy and statistics. +y_pred = clf.predict(X_test) +print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test)) +print(clf.sprint_statistics()) +print(clf.show_models()) \ No newline at end of file diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py deleted file mode 100644 index bb20970d39..0000000000 --- a/examples/example_extending_preprocessing.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -=============================================== -Extending Auto-sklearn -=============================================== - -In order to include new machine learning algorithms in auto-sklearn's -optimization process, users can implement a wrapper class for the algorithm -and register it to auto-sklearn. The example code below demonstrates how -to implement custom regressor and preprocessor (Lasso and polynomial processing from sklearn, respectively), -register it to auto-sklearn, and use them for the given task. -A detailed walkthrough of extending auto-sklearn can be found `here `_. - -""" - -from ConfigSpace.configuration_space import ConfigurationSpace -from ConfigSpace.hyperparameters import * -from ConfigSpace.conditions import EqualsCondition, InCondition - -from autosklearn.pipeline.components.base import \ - AutoSklearnRegressionAlgorithm -from autosklearn.pipeline.components.base import \ - AutoSklearnPreprocessingAlgorithm -from autosklearn.pipeline.constants import * -from autosklearn.util.common import check_for_bool - - -# Custom Regression algorithm added to auto-sklearn (Lasso from sklearn). -class MyRegressor(AutoSklearnRegressionAlgorithm): - def __init__(self, alpha, fit_intercept, tol, positive, random_state=None): - self.alpha = alpha - self.fit_intercept = fit_intercept - #self.normalize = normalize - self.tol = tol - self.positive = positive - - self.random_state = random_state - self.estimator = None - - def fit(self, X, Y): - import sklearn.linear_model - - self.alpha = float(self.alpha) - self.fit_intercept = check_for_bool(self.fit_intercept) - self.normalize = check_for_bool(self.normalize) - self.tol = float(self.tol) - self.positive = check_for_bool(self.positive) - - self.estimator = sklearn.linear_model.\ - Lasso(alpha=self.alpha, - fit_intercept=self.fit_intercept, - tol=self.tol, - positive=self.positive, - n_iter=300) - - self.estimator.fit(X, Y) - return self - - def predict(self, X): - if self.estimator is None: - raise NotImplementedError - return self.estimator.predict(X) - - @staticmethod - def get_properties(dataset_properties=None): - return {'shortname': 'MyRegressor', - 'name': 'MyRegressor', - 'handles_regression': True, - 'handles_classification': False, - 'handles_multiclass': False, - 'handles_multilabel': False, - 'is_deterministic': True, - 'input': (DENSE, UNSIGNED_DATA), - 'output': (PREDICTIONS,)} - - @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): - cs = ConfigurationSpace() - alpha = UniformFloatHyperparameter( - name="alpha", lower=0, upper=10, default_value=1) - fit_intercept = CategoricalHyperparameter( - name="fit_intercept", choices=[True, False], default_value=True) - normalize = CategoricalHyperparameter( - name="normalize", choices=[True, False], default_value=False) - tol = UniformFloatHyperparameter( - name="tol", lower=10 ** -5, upper=10 ** -1, - default_value=10 ** -3, log=True) - positive = CategoricalHyperparameter( - name="positive", choices=[True, False], default_value=False) - - cs.add_hyperparameters([alpha, fit_intercept, tol, positive]) - - return cs - - -# Custom wrapper class for using Sklearn's polynomial feature preprocessing -# function. -class MyPreprocessor(AutoSklearnPreprocessingAlgorithm): - def __init__(self, degree, interaction_only, include_bias, random_state=None): - # Define hyperparameters to be tuned here. - self.degree = degree - self.interaction_only = interaction_only - self.include_bias = include_bias - self.random_state = random_state - self.preprocessor = None - - def fit(self, X, Y): - # wrapper function for the fit method of Sklearn's polynomial - # preprocessing function. - import sklearn.preprocessing - self.preprocessor = sklearn.preprocessing.PolynomialFeatures(degree=self.degree, - interaction_only=self.interaction_only, - include_bias=self.include_bias) - self.preprocessor.fit(X, Y) - return self - - def transform(self, X): - # wrapper function for the transform method of sklearn's polynomial - # preprocessing function. It is also possible to implement - # a preprocessing algorithm directly in this function, provided that - # it behaves in the way compatible with that from sklearn. - if self.preprocessor is None: - raise NotImplementedError() - return self.preprocessor.transform(X) - - @staticmethod - def get_properties(dataset_properties=None): - return {'shortname': 'MyPreprocessor', - 'name': 'MyPreprocessor', - 'handles_regression': True, - 'handles_classification': True, - 'handles_multiclass': True, - 'handles_multilabel': True, - 'is_deterministic': True, - 'input': (DENSE, UNSIGNED_DATA), - 'output': (INPUT,)} - - @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): - # For each hyperparameter, its type (categorical, integer, float, etc.), - # range and the default value must be specified here. - degree = UniformIntegerHyperparameter( - name="degree", lower=2, upper=5, default_value=2) - interaction_only = CategoricalHyperparameter( - name="interaction_only", choices=["False", "True"], default_value="False") - include_bias = CategoricalHyperparameter( - name="include_bias", choices=["True", "False"], default_value="True") - - cs = ConfigurationSpace() - cs.add_hyperparameters([degree, interaction_only, include_bias]) - - return cs - - -def main(): - # Include the custom preprocessor class to auto-sklearn. - import autosklearn.pipeline.components.regression - import autosklearn.pipeline.components.feature_preprocessing - autosklearn.pipeline.components.regression.add_regressor(MyRegressor) - autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(MyPreprocessor) - - # Import toy data from sklearn and apply train_test_split. - from sklearn.datasets import load_boston - from sklearn.model_selection import train_test_split - X, y = load_boston(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) - - # Run auto-sklearn regression with the custom preprocessor. - import autosklearn.regression - import autosklearn.metrics - reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30, - per_run_time_limit=10, - include_estimators=["MyRegressor"], - include_preprocessors=["MyPreprocessor"]) - reg.fit(X_train, y_train) - y_pred = reg.predict(X_test) - scorer = autosklearn.metrics.r2 - print("Test score: ", scorer(y_pred, y_test)) - print(reg.show_models()) - print(reg.sprint_statistics()) - - -if __name__ == "__main__": - main() diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py new file mode 100644 index 0000000000..5bdc024bfa --- /dev/null +++ b/examples/example_extending_preprocessor.py @@ -0,0 +1,109 @@ +""" +==================================================================== +Extending Auto-Sklearn with Preprocessor Component +==================================================================== + +The following example demonstrates how to create a wrapper around the linear +discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor +in auto-sklearn. +""" + +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ + UniformIntegerHyperparameter, CategoricalHyperparameter + +import sklearn.metrics +import autosklearn.classification +import autosklearn.metrics +import autosklearn.pipeline.components.feature_preprocessing +from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm +from autosklearn.pipeline.constants import * + + +# Create LDA component for auto-sklearn. +class LDA(AutoSklearnPreprocessingAlgorithm): + def __init__(self, shrinkage, solver, n_components, tol, random_state=None): + self.solver = solver + self.shrinkage = shrinkage + self.n_components = n_components + self.tol = tol + self.random_state = random_state + self.preprocessor = None + + def fit(self, X, Y=None): + self.shrinkage = float(self.shrinkage) + self.n_components = int(self.n_components) + self.tol = float(self.tol) + + import sklearn.discriminant_analysis + self.preprocessor = \ + sklearn.discriminant_analysis.LinearDiscriminantAnalysis( + shrinkage=self.shrinkage, + solver=self.solver, + n_components=self.n_components, + tol=self.tol, + ) + self.preprocessor.fit(X, Y) + return self + + def transform(self, X): + if self.preprocessor is None: + raise NotImplementedError() + return self.preprocessor.transform(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'LDA', + 'name': 'Linear Discriminant Analysis', + 'handles_regression': False, + 'handles_classification': True, + 'handles_multiclass': False, + 'handles_multilabel': False, + 'is_deterministic': True, + 'input': (DENSE, UNSIGNED_DATA, SIGNED_DATA), + 'output': (DENSE, UNSIGNED_DATA, SIGNED_DATA)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + solver = CategoricalHyperparameter( + name="solver", choices=['svd','lsqr','eigen'], default_value='svd' + ) + shrinkage = UniformFloatHyperparameter( + name="shrinkage", lower=0.0, upper=1.0, default_value=0.5 + ) + n_components = UniformIntegerHyperparameter( + name="n_components", lower=1, upper=29, default_value=10 + ) + tol = UniformFloatHyperparameter( + name="tol", lower=0.0001, upper=1, default_value=0.0001 + ) + cs.add_hyperparameters([solver, shrinkage, n_components, tol]) + return cs + + +# Add LDA component to auto-sklearn. +autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(LDA) + +# Create dataset. +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split +X, y = load_breast_cancer(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Configuration space. +cs = LDA.get_hyperparameter_search_space() +print(cs) + +# Fit the model using LDA as preprocessor. +clf = autosklearn.classification.AutoSklearnClassifier( + time_left_for_this_task=20, + include_preprocessors=['LDA'], +) +clf.fit(X_train, y_train) + +# Print prediction score and statistics. +y_pred = clf.predict(X_test) +print("accracy: ", sklearn.metrics.accuracy_score(y_pred, y_test)) +print(clf.sprint_statistics()) +print(clf.show_models()) \ No newline at end of file diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py new file mode 100644 index 0000000000..aaea3d13b5 --- /dev/null +++ b/examples/example_extending_regression.py @@ -0,0 +1,109 @@ +""" +==================================================================== +Extending Auto-Sklearn with Regression Component +==================================================================== + +The following example demonstrates how to create a new regression +component for using in auto-sklearn. +""" + +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ + UniformIntegerHyperparameter, CategoricalHyperparameter + +import sklearn.metrics +import autosklearn.regression +import autosklearn.pipeline.components.regression +from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm +from autosklearn.pipeline.constants import * + + +# Implement kernel ridge regression component for auto-sklearn. +class KernelRidgeRegression(AutoSklearnRegressionAlgorithm): + def __init__(self, alpha, kernel, gamma, degree, random_state=None): + self.alpha = alpha + self.kernel = kernel + self.gamma = gamma + self.degree = degree + self.random_state = random_state + self.estimator = None + + def fit(self, X, Y): + self.alpha = float(self.alpha) + self.gamma = float(self.gamma) + self.degree = int(self.degree) + + import sklearn.kernel_ridge + self.estimator = sklearn.kernel_ridge.KernelRidge(alpha=self.alpha, + kernel=self.kernel, + gamma=self.gamma, + degree=self.degree, + ) + self.estimator.fit(X, Y) + return self + + def predict(self, X): + if self.estimator is None: + raise NotImplementedError + return self.estimator.predict(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'KRR', + 'name': 'Kernel Ridge Regression', + 'handles_regression': True, + 'handles_classification': False, + 'handles_multiclass': False, + 'handles_multilabel': False, + 'is_deterministic': True, + 'input': (SPARSE, DENSE, UNSIGNED_DATA, SIGNED_DATA), + 'output': (PREDICTIONS,)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + alpha = UniformFloatHyperparameter( + name='alpha', lower=10 ** -5, upper=1, log=True, default_value=0.1) + kernel = CategoricalHyperparameter( + name='kernel', + choices=['linear', + 'rbf', + 'sigmoid', + 'polynomial', + ], + default_value='linear' + ) + gamma = UniformFloatHyperparameter( + name='gamma', lower=0.00001, upper=1, default_value=0.1, log=True + ) + degree = UniformIntegerHyperparameter( + name='degree', lower=2, upper=5, default_value=3 + ) + cs.add_hyperparameters([alpha, kernel, gamma, degree]) + return cs + + +# Add KRR component to auto-sklearn. +autosklearn.pipeline.components.regression.add_regressor(KernelRidgeRegression) +cs = KernelRidgeRegression.get_hyperparameter_search_space() +print(cs) + +# Generate data. +from sklearn.datasets import load_diabetes +from sklearn.model_selection import train_test_split +X, y = load_diabetes(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Fit the model using KRR. +reg = autosklearn.regression.AutoSklearnRegressor( + time_left_for_this_task=30, + per_run_time_limit=10, + include_estimators=['KernelRidgeRegression'], +) +reg.fit(X_train, y_train) + +# Print prediction score and statistics. +y_pred = reg.predict(X_test) +print("r2 score: ", sklearn.metrics.r2_score(y_pred, y_test)) +print(reg.sprint_statistics()) +print(reg.show_models()) \ No newline at end of file From e1e8c2575d38fc39b8b158342c7fdbf71ec78607 Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 2 Oct 2018 14:00:45 +0200 Subject: [PATCH 11/45] . --- examples/example_extending_classification.py | 7 ++++--- examples/example_extending_preprocessor.py | 6 ++++-- examples/example_extending_regression.py | 3 ++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py index 11b82503d0..e7f8334901 100644 --- a/examples/example_extending_classification.py +++ b/examples/example_extending_classification.py @@ -14,12 +14,13 @@ import sklearn.metrics import autosklearn.classification import autosklearn.pipeline.components.classification -import autosklearn.pipeline.components.base -from autosklearn.pipeline.constants import * +from autosklearn.pipeline.components.base \ + import AutoSklearnClassificationAlgorithm +from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA # Create MLP classifier component for auto-sklearn. -class MLPClassifier(autosklearn.pipeline.components.base.AutoSklearnClassificationAlgorithm): +class MLPClassifier(AutoSklearnClassificationAlgorithm): def __init__(self, hidden_layer_depth, num_nodes_per_layer, diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py index 5bdc024bfa..815d698ecd 100644 --- a/examples/example_extending_preprocessor.py +++ b/examples/example_extending_preprocessor.py @@ -16,8 +16,10 @@ import autosklearn.classification import autosklearn.metrics import autosklearn.pipeline.components.feature_preprocessing -from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm -from autosklearn.pipeline.constants import * +from autosklearn.pipeline.components.base \ + import AutoSklearnPreprocessingAlgorithm +from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, \ + UNSIGNED_DATA # Create LDA component for auto-sklearn. diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py index aaea3d13b5..59f2af58eb 100644 --- a/examples/example_extending_regression.py +++ b/examples/example_extending_regression.py @@ -15,7 +15,8 @@ import autosklearn.regression import autosklearn.pipeline.components.regression from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm -from autosklearn.pipeline.constants import * +from autosklearn.pipeline.constants import SPARSE, DENSE, \ + SIGNED_DATA, UNSIGNED_DATA # Implement kernel ridge regression component for auto-sklearn. From c55cbacc892c9a005f440d0ab9ccf19c48d30bb3 Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 2 Oct 2018 21:36:12 +0200 Subject: [PATCH 12/45] Change datasets used in examples from digits to breast_cancer. --- examples/example_crossvalidation.py | 4 ++-- examples/example_eips.py | 4 ++-- examples/example_holdout.py | 4 ++-- examples/example_parallel.py | 4 ++-- examples/example_random_search.py | 6 +++--- examples/example_sequential.py | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/examples/example_crossvalidation.py b/examples/example_crossvalidation.py index 85530b591b..52e3050f7b 100644 --- a/examples/example_crossvalidation.py +++ b/examples/example_crossvalidation.py @@ -21,7 +21,7 @@ def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -37,7 +37,7 @@ def main(): # fit() changes the data in place, but refit needs the original data. We # therefore copy the data. In practice, one should reload the data - automl.fit(X_train.copy(), y_train.copy(), dataset_name='digits') + automl.fit(X_train.copy(), y_train.copy(), dataset_name='breast_cancer') # During fit(), models are fit on individual cross-validation folds. To use # all available data, we call refit() which trains all models in the # final ensemble on the whole dataset. diff --git a/examples/example_eips.py b/examples/example_eips.py index eef3c6cf11..db2a434092 100644 --- a/examples/example_eips.py +++ b/examples/example_eips.py @@ -69,7 +69,7 @@ def get_eips_object_callback( def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -81,7 +81,7 @@ def main(): get_smac_object_callback=get_eips_object_callback, initial_configurations_via_metalearning=0, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') # Print the final ensemble constructed by auto-sklearn via ROAR. print(automl.show_models()) diff --git a/examples/example_holdout.py b/examples/example_holdout.py index fe1ff1c7a7..19a438bd87 100644 --- a/examples/example_holdout.py +++ b/examples/example_holdout.py @@ -18,7 +18,7 @@ def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -34,7 +34,7 @@ def main(): resampling_strategy='holdout', resampling_strategy_arguments={'train_size': 0.67} ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') # Print the final ensemble constructed by auto-sklearn. print(automl.show_models()) diff --git a/examples/example_parallel.py b/examples/example_parallel.py index f5572ab97d..bcb45206c2 100644 --- a/examples/example_parallel.py +++ b/examples/example_parallel.py @@ -78,14 +78,14 @@ def spawn_classifier(seed, dataset_name): def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) processes = [] spawn_classifier = get_spawn_classifier(X_train, y_train) for i in range(4): # set this at roughly half of your cores - p = multiprocessing.Process(target=spawn_classifier, args=(i, 'digits')) + p = multiprocessing.Process(target=spawn_classifier, args=(i, 'breast_cancer')) p.start() processes.append(p) for p in processes: diff --git a/examples/example_random_search.py b/examples/example_random_search.py index 9d04a39974..2a64b36efb 100644 --- a/examples/example_random_search.py +++ b/examples/example_random_search.py @@ -68,7 +68,7 @@ def get_random_search_object_callback( def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -79,7 +79,7 @@ def main(): get_smac_object_callback=get_roar_object_callback, initial_configurations_via_metalearning=0, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') print('#' * 80) print('Results for ROAR.') @@ -99,7 +99,7 @@ def main(): get_smac_object_callback=get_random_search_object_callback, initial_configurations_via_metalearning=0, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') print('#' * 80) print('Results for random search.') diff --git a/examples/example_sequential.py b/examples/example_sequential.py index 06820e7ebe..694ea81404 100644 --- a/examples/example_sequential.py +++ b/examples/example_sequential.py @@ -17,7 +17,7 @@ def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -32,7 +32,7 @@ def main(): ensemble_size=0, delete_tmp_folder_after_terminate=False, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') # This call to fit_ensemble uses all models trained in the previous call # to fit to build an ensemble which can be used with automl.predict() automl.fit_ensemble(y_train, ensemble_size=50) From ef12841bab5256e3c5abadbcafea32764c7ca1fc Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 9 Oct 2018 13:50:36 +0200 Subject: [PATCH 13/45] First commit --- .travis.yml | 6 ++ ci_scripts/flake8_diff.sh | 144 ++++++++++++++++++++++++++++++++++++++ ci_scripts/test.sh | 43 +++++++----- 3 files changed, 177 insertions(+), 16 deletions(-) create mode 100644 ci_scripts/flake8_diff.sh mode change 100644 => 100755 ci_scripts/test.sh diff --git a/.travis.yml b/.travis.yml index 968d8e4ec1..bf8727adf4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,6 +19,10 @@ matrix: env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - os: linux env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" + # Add flake8 check in travis. + - os: linux + env: DISTRIB="conda" PYTHON_VERSION="3.6" RUN_FLAKE8="true" SKIP_TESTS="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" + # Temporarily disabling OSX builds because thy take too long # Set language to generic to not break travis-ci @@ -66,6 +70,8 @@ install: - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps - mkdir ~/.openml - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config + # Install flake 8. + - pip install flake8 # Debug output to know all exact package versions! - pip freeze - python setup.py install diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh new file mode 100644 index 0000000000..f0b9b083e1 --- /dev/null +++ b/ci_scripts/flake8_diff.sh @@ -0,0 +1,144 @@ +#!/bin/bash + +# This script is used in Travis to check that PRs do not add obvious +# flake8 violations. It relies on two things: +# - find common ancestor between branch and +# automl/auto-sklearn remote +# - run flake8 --diff on the diff between the branch and the common +# ancestor +# +# Additional features: +# - the line numbers in Travis match the local branch on the PR +# author machine. +# - ./build_tools/travis/flake8_diff.sh can be run locally for quick +# turn-around + +set -e +# pipefail is necessary to propagate exit codes +set -o pipefail + +PROJECT=automl/auto-sklearn +PROJECT_URL=https://github.com/$PROJECT.git + +# Find the remote with the project name (upstream in most cases) +REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '') + +# Add a temporary remote if needed. For example this is necessary when +# Travis is configured to run in a fork. In this case 'origin' is the +# fork and not the reference repo we want to diff against. +if [[ -z "$REMOTE" ]]; then + TMP_REMOTE=tmp_reference_upstream + REMOTE=$TMP_REMOTE + git remote add $REMOTE $PROJECT_URL +fi + +echo "Remotes:" +echo '--------------------------------------------------------------------------------' +git remote --verbose + +# Travis does the git clone with a limited depth (50 at the time of +# writing). This may not be enough to find the common ancestor with +# $REMOTE/master so we unshallow the git checkout +if [[ -a .git/shallow ]]; then + echo -e '\nTrying to unshallow the repo:' + echo '--------------------------------------------------------------------------------' + git fetch --unshallow +fi + +if [[ "$TRAVIS" == "true" ]]; then + if [[ "$TRAVIS_PULL_REQUEST" == "false" ]] + then + # In main repo, using TRAVIS_COMMIT_RANGE to test the commits + # that were pushed into a branch + if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then + if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then + echo "New branch, no commit range from Travis so passing this test by convention" + exit 0 + fi + COMMIT_RANGE=$TRAVIS_COMMIT_RANGE + fi + else + # We want to fetch the code as it is in the PR branch and not + # the result of the merge into master. This way line numbers + # reported by Travis will match with the local code. + LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST + # In Travis the PR target is always origin + git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF + fi +fi + +# If not using the commit range from Travis we need to find the common +# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master +if [[ -z "$COMMIT_RANGE" ]]; then + if [[ -z "$LOCAL_BRANCH_REF" ]]; then + LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD) + fi + echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:" + echo '--------------------------------------------------------------------------------' + git --no-pager log -2 $LOCAL_BRANCH_REF + + REMOTE_MASTER_REF="$REMOTE/master" + # Make sure that $REMOTE_MASTER_REF is a valid reference + echo -e "\nFetching $REMOTE_MASTER_REF" + echo '--------------------------------------------------------------------------------' + git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF + LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF) + REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF) + + COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \ + echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)" + + if [ -z "$COMMIT" ]; then + exit 1 + fi + + COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT) + + echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\ + "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:" + echo '--------------------------------------------------------------------------------' + git --no-pager show --no-patch $COMMIT_SHORT_HASH + + COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH" + + if [[ -n "$TMP_REMOTE" ]]; then + git remote remove $TMP_REMOTE + fi + +else + echo "Got the commit range from Travis: $COMMIT_RANGE" +fi + +echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \ + "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):" +echo '--------------------------------------------------------------------------------' + +# We ignore files from sklearn/externals. Unfortunately there is no +# way to do it with flake8 directly (the --exclude does not seem to +# work with --diff). We could use the exclude magic in the git pathspec +# ':!sklearn/externals' but it is only available on git 1.9 and Travis +# uses git 1.8. +# We need the following command to exit with 0 hence the echo in case +# there is no match +MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE || echo "no_match")" + +check_files() { + files="$1" + shift + options="$*" + if [ -n "$files" ]; then + # Conservative approach: diff without context (--unified=0) so that code + # that was not changed does not create failures + git diff --unified=0 $COMMIT_RANGE -- $files | flake8 --diff --show-source $options + fi +} + +if [[ "$MODIFIED_FILES" == "no_match" ]]; then + echo "No file outside sklearn/externals and doc/sphinxext/sphinx_gallery has been modified" +else + + check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)" + check_files "$(echo "$MODIFIED_FILES" | grep ^examples)" \ + --config ./examples/.flake8 +fi +echo -e "No problem detected by flake8\n" diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh old mode 100644 new mode 100755 index 0026279285..1e356bd125 --- a/ci_scripts/test.sh +++ b/ci_scripts/test.sh @@ -1,22 +1,33 @@ set -e -# Get into a temp directory to run test from the installed scikit learn and -# check if we do not leave artifacts -mkdir -p $TEST_DIR +run_tests() { + # Get into a temp directory to run test from the installed scikit learn and + # check if we do not leave artifacts + mkdir -p $TEST_DIR -cwd=`pwd` -examples_dir=$cwd/examples -test_dir=$cwd/test/ + cwd=`pwd` + examples_dir=$cwd/examples + test_dir=$cwd/../test/ -cd $TEST_DIR + cd $TEST_DIR + if [[ "$COVERAGE" == "true" ]]; then + nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir + elif [[ "$EXAMPLES" == "true" ]]; then + for example in `find $examples_dir -name '*.py'` + do + python $example + done + else + nosetests --no-path-adjustment -sv $test_dir + fi +} -if [[ "$COVERAGE" == "true" ]]; then - nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir -elif [[ "$EXAMPLES" == "true" ]]; then - for example in `find $examples_dir -name '*.py'` - do - python $example - done -else - nosetests --no-path-adjustment -sv $test_dir +if [[ "$RUN_FLAKE8" ]]; then + source ci_scripts/flake8_diff.sh fi + +if [[ "$SKIP_TESTS" != "true" ]]; then + run_tests +fi + + From 242eebf195e51a631b7b5e06674243be25b8e702 Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 9 Oct 2018 14:00:25 +0200 Subject: [PATCH 14/45] Fixing codacy errors --- examples/example_extending_classification.py | 3 ++- examples/example_extending_preprocessor.py | 2 +- examples/example_extending_regression.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py index e7f8334901..d4de7d79d3 100644 --- a/examples/example_extending_classification.py +++ b/examples/example_extending_classification.py @@ -16,7 +16,8 @@ import autosklearn.pipeline.components.classification from autosklearn.pipeline.components.base \ import AutoSklearnClassificationAlgorithm -from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA +from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA, \ + PREDICTIONS # Create MLP classifier component for auto-sklearn. diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py index 815d698ecd..33a51cc953 100644 --- a/examples/example_extending_preprocessor.py +++ b/examples/example_extending_preprocessor.py @@ -18,7 +18,7 @@ import autosklearn.pipeline.components.feature_preprocessing from autosklearn.pipeline.components.base \ import AutoSklearnPreprocessingAlgorithm -from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, \ +from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, \ UNSIGNED_DATA diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py index 59f2af58eb..e3dbe18428 100644 --- a/examples/example_extending_regression.py +++ b/examples/example_extending_regression.py @@ -16,7 +16,7 @@ import autosklearn.pipeline.components.regression from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm from autosklearn.pipeline.constants import SPARSE, DENSE, \ - SIGNED_DATA, UNSIGNED_DATA + SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS # Implement kernel ridge regression component for auto-sklearn. From 64756233ceaa480179e216abec170bd3d4b1c706 Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 9 Oct 2018 14:07:01 +0200 Subject: [PATCH 15/45] Fixing bug --- ci_scripts/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh index 1e356bd125..3d9551375e 100755 --- a/ci_scripts/test.sh +++ b/ci_scripts/test.sh @@ -7,7 +7,7 @@ run_tests() { cwd=`pwd` examples_dir=$cwd/examples - test_dir=$cwd/../test/ + test_dir=$cwd/test/ cd $TEST_DIR if [[ "$COVERAGE" == "true" ]]; then From 5cab17879a1b7c314d49e6868a6ed4103a3e3f21 Mon Sep 17 00:00:00 2001 From: Jinu Date: Thu, 18 Oct 2018 14:41:30 +0200 Subject: [PATCH 16/45] [Debug] try different numpy version --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index b65911d6cb..71c47ceb66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ nose six Cython -numpy>=1.9.0 +numpy==1.14.5 scipy>=0.14.1 scikit-learn>=0.19,<0.20 diff --git a/setup.py b/setup.py index 86c9c3202b..bcdaba8e4f 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ "nose", "six", "Cython", - "numpy>=1.9.0", + "numpy==1.14.5", "scipy>=0.14.1", "scikit-learn>=0.19,<0.20", "lockfile", From a062ba028b205c5a24341378c45baaac149f5b87 Mon Sep 17 00:00:00 2001 From: Jinu Date: Thu, 18 Oct 2018 19:31:18 +0200 Subject: [PATCH 17/45] [Debug] Try with latest numpy version --- requirements.txt | 2 +- setup.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 71c47ceb66..935d91f3ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ nose six Cython -numpy==1.14.5 +numpy==1.15.2 scipy>=0.14.1 scikit-learn>=0.19,<0.20 diff --git a/setup.py b/setup.py index bcdaba8e4f..0daf591d29 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,8 @@ "nose", "six", "Cython", - "numpy==1.14.5", + # Debugging. Try with latest version of numpy + "numpy==1.15.2", "scipy>=0.14.1", "scikit-learn>=0.19,<0.20", "lockfile", From 94f9d2ca55a7db3bb9080ba18cf0a3e934bc9805 Mon Sep 17 00:00:00 2001 From: Jinu Date: Thu, 18 Oct 2018 19:52:50 +0200 Subject: [PATCH 18/45] Set numpy version to 1.14.5 --- requirements.txt | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 935d91f3ae..71c47ceb66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ nose six Cython -numpy==1.15.2 +numpy==1.14.5 scipy>=0.14.1 scikit-learn>=0.19,<0.20 diff --git a/setup.py b/setup.py index 0daf591d29..ef89ad314c 100644 --- a/setup.py +++ b/setup.py @@ -35,8 +35,8 @@ "nose", "six", "Cython", - # Debugging. Try with latest version of numpy - "numpy==1.15.2", + # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error. + "numpy==1.14.5", "scipy>=0.14.1", "scikit-learn>=0.19,<0.20", "lockfile", From b331251c2cd93d6d8149eeaa88df6475ee76aebd Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 9 Oct 2018 13:50:36 +0200 Subject: [PATCH 19/45] First commit --- .travis.yml | 6 ++ ci_scripts/flake8_diff.sh | 144 ++++++++++++++++++++++++++++++++++++++ ci_scripts/test.sh | 43 +++++++----- 3 files changed, 177 insertions(+), 16 deletions(-) create mode 100644 ci_scripts/flake8_diff.sh mode change 100644 => 100755 ci_scripts/test.sh diff --git a/.travis.yml b/.travis.yml index 968d8e4ec1..bf8727adf4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,6 +19,10 @@ matrix: env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - os: linux env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" + # Add flake8 check in travis. + - os: linux + env: DISTRIB="conda" PYTHON_VERSION="3.6" RUN_FLAKE8="true" SKIP_TESTS="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" + # Temporarily disabling OSX builds because thy take too long # Set language to generic to not break travis-ci @@ -66,6 +70,8 @@ install: - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps - mkdir ~/.openml - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config + # Install flake 8. + - pip install flake8 # Debug output to know all exact package versions! - pip freeze - python setup.py install diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh new file mode 100644 index 0000000000..f0b9b083e1 --- /dev/null +++ b/ci_scripts/flake8_diff.sh @@ -0,0 +1,144 @@ +#!/bin/bash + +# This script is used in Travis to check that PRs do not add obvious +# flake8 violations. It relies on two things: +# - find common ancestor between branch and +# automl/auto-sklearn remote +# - run flake8 --diff on the diff between the branch and the common +# ancestor +# +# Additional features: +# - the line numbers in Travis match the local branch on the PR +# author machine. +# - ./build_tools/travis/flake8_diff.sh can be run locally for quick +# turn-around + +set -e +# pipefail is necessary to propagate exit codes +set -o pipefail + +PROJECT=automl/auto-sklearn +PROJECT_URL=https://github.com/$PROJECT.git + +# Find the remote with the project name (upstream in most cases) +REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '') + +# Add a temporary remote if needed. For example this is necessary when +# Travis is configured to run in a fork. In this case 'origin' is the +# fork and not the reference repo we want to diff against. +if [[ -z "$REMOTE" ]]; then + TMP_REMOTE=tmp_reference_upstream + REMOTE=$TMP_REMOTE + git remote add $REMOTE $PROJECT_URL +fi + +echo "Remotes:" +echo '--------------------------------------------------------------------------------' +git remote --verbose + +# Travis does the git clone with a limited depth (50 at the time of +# writing). This may not be enough to find the common ancestor with +# $REMOTE/master so we unshallow the git checkout +if [[ -a .git/shallow ]]; then + echo -e '\nTrying to unshallow the repo:' + echo '--------------------------------------------------------------------------------' + git fetch --unshallow +fi + +if [[ "$TRAVIS" == "true" ]]; then + if [[ "$TRAVIS_PULL_REQUEST" == "false" ]] + then + # In main repo, using TRAVIS_COMMIT_RANGE to test the commits + # that were pushed into a branch + if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then + if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then + echo "New branch, no commit range from Travis so passing this test by convention" + exit 0 + fi + COMMIT_RANGE=$TRAVIS_COMMIT_RANGE + fi + else + # We want to fetch the code as it is in the PR branch and not + # the result of the merge into master. This way line numbers + # reported by Travis will match with the local code. + LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST + # In Travis the PR target is always origin + git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF + fi +fi + +# If not using the commit range from Travis we need to find the common +# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master +if [[ -z "$COMMIT_RANGE" ]]; then + if [[ -z "$LOCAL_BRANCH_REF" ]]; then + LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD) + fi + echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:" + echo '--------------------------------------------------------------------------------' + git --no-pager log -2 $LOCAL_BRANCH_REF + + REMOTE_MASTER_REF="$REMOTE/master" + # Make sure that $REMOTE_MASTER_REF is a valid reference + echo -e "\nFetching $REMOTE_MASTER_REF" + echo '--------------------------------------------------------------------------------' + git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF + LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF) + REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF) + + COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \ + echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)" + + if [ -z "$COMMIT" ]; then + exit 1 + fi + + COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT) + + echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\ + "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:" + echo '--------------------------------------------------------------------------------' + git --no-pager show --no-patch $COMMIT_SHORT_HASH + + COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH" + + if [[ -n "$TMP_REMOTE" ]]; then + git remote remove $TMP_REMOTE + fi + +else + echo "Got the commit range from Travis: $COMMIT_RANGE" +fi + +echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \ + "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):" +echo '--------------------------------------------------------------------------------' + +# We ignore files from sklearn/externals. Unfortunately there is no +# way to do it with flake8 directly (the --exclude does not seem to +# work with --diff). We could use the exclude magic in the git pathspec +# ':!sklearn/externals' but it is only available on git 1.9 and Travis +# uses git 1.8. +# We need the following command to exit with 0 hence the echo in case +# there is no match +MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE || echo "no_match")" + +check_files() { + files="$1" + shift + options="$*" + if [ -n "$files" ]; then + # Conservative approach: diff without context (--unified=0) so that code + # that was not changed does not create failures + git diff --unified=0 $COMMIT_RANGE -- $files | flake8 --diff --show-source $options + fi +} + +if [[ "$MODIFIED_FILES" == "no_match" ]]; then + echo "No file outside sklearn/externals and doc/sphinxext/sphinx_gallery has been modified" +else + + check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)" + check_files "$(echo "$MODIFIED_FILES" | grep ^examples)" \ + --config ./examples/.flake8 +fi +echo -e "No problem detected by flake8\n" diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh old mode 100644 new mode 100755 index 0026279285..1e356bd125 --- a/ci_scripts/test.sh +++ b/ci_scripts/test.sh @@ -1,22 +1,33 @@ set -e -# Get into a temp directory to run test from the installed scikit learn and -# check if we do not leave artifacts -mkdir -p $TEST_DIR +run_tests() { + # Get into a temp directory to run test from the installed scikit learn and + # check if we do not leave artifacts + mkdir -p $TEST_DIR -cwd=`pwd` -examples_dir=$cwd/examples -test_dir=$cwd/test/ + cwd=`pwd` + examples_dir=$cwd/examples + test_dir=$cwd/../test/ -cd $TEST_DIR + cd $TEST_DIR + if [[ "$COVERAGE" == "true" ]]; then + nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir + elif [[ "$EXAMPLES" == "true" ]]; then + for example in `find $examples_dir -name '*.py'` + do + python $example + done + else + nosetests --no-path-adjustment -sv $test_dir + fi +} -if [[ "$COVERAGE" == "true" ]]; then - nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir -elif [[ "$EXAMPLES" == "true" ]]; then - for example in `find $examples_dir -name '*.py'` - do - python $example - done -else - nosetests --no-path-adjustment -sv $test_dir +if [[ "$RUN_FLAKE8" ]]; then + source ci_scripts/flake8_diff.sh fi + +if [[ "$SKIP_TESTS" != "true" ]]; then + run_tests +fi + + From 34569203f734b00c67417a8ce4e0778ac44c4d50 Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 9 Oct 2018 14:07:01 +0200 Subject: [PATCH 20/45] Fixing bug --- ci_scripts/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh index 1e356bd125..3d9551375e 100755 --- a/ci_scripts/test.sh +++ b/ci_scripts/test.sh @@ -7,7 +7,7 @@ run_tests() { cwd=`pwd` examples_dir=$cwd/examples - test_dir=$cwd/../test/ + test_dir=$cwd/test/ cd $TEST_DIR if [[ "$COVERAGE" == "true" ]]; then From 6b947c5dc95cceaf5d909966ba1ceeb8f010e027 Mon Sep 17 00:00:00 2001 From: Jinu Date: Thu, 18 Oct 2018 20:47:38 +0200 Subject: [PATCH 21/45] Modify flake8_diff.sh --- ci_scripts/flake8_diff.sh | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh index f0b9b083e1..e40c2653bb 100644 --- a/ci_scripts/flake8_diff.sh +++ b/ci_scripts/flake8_diff.sh @@ -1,5 +1,7 @@ #!/bin/bash +# This script is mostly taken from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/travis/flake8_diff.sh + # This script is used in Travis to check that PRs do not add obvious # flake8 violations. It relies on two things: # - find common ancestor between branch and @@ -36,8 +38,8 @@ echo "Remotes:" echo '--------------------------------------------------------------------------------' git remote --verbose -# Travis does the git clone with a limited depth (50 at the time of -# writing). This may not be enough to find the common ancestor with +# Travis does the git clone with a limited depth. +# This may not be enough to find the common ancestor with # $REMOTE/master so we unshallow the git checkout if [[ -a .git/shallow ]]; then echo -e '\nTrying to unshallow the repo:' @@ -113,11 +115,6 @@ echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \ "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):" echo '--------------------------------------------------------------------------------' -# We ignore files from sklearn/externals. Unfortunately there is no -# way to do it with flake8 directly (the --exclude does not seem to -# work with --diff). We could use the exclude magic in the git pathspec -# ':!sklearn/externals' but it is only available on git 1.9 and Travis -# uses git 1.8. # We need the following command to exit with 0 hence the echo in case # there is no match MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE || echo "no_match")" @@ -134,7 +131,7 @@ check_files() { } if [[ "$MODIFIED_FILES" == "no_match" ]]; then - echo "No file outside sklearn/externals and doc/sphinxext/sphinx_gallery has been modified" + echo "No file has been modified" else check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)" From c6229e5f3f42c5e0fb4ffd16477ec8f1edc37270 Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 17 Jul 2018 21:56:01 +0200 Subject: [PATCH 22/45] Extending Autosklearn. First commit. --- examples/example_extending_preprocessing.py | 97 +++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 examples/example_extending_preprocessing.py diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py new file mode 100644 index 0000000000..c2300ddfb2 --- /dev/null +++ b/examples/example_extending_preprocessing.py @@ -0,0 +1,97 @@ +""" +=============================================== +Extending Auto-sklearn with Custom Preprocessor +=============================================== + + +explanation goes here. +""" + +import autosklearn.pipeline.components.feature_preprocessing +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ + UniformIntegerHyperparameter + +from autosklearn.pipeline.components.base import \ + AutoSklearnPreprocessingAlgorithm +from autosklearn.pipeline.constants import * + +# Custom wrapper class for using Sklearn's polynomial feature preprocessing +# function. +class custom_preprocessor(AutoSklearnPreprocessingAlgorithm): + def __init__(self, degree, interaction_only, include_bias, random_state=None): + # Define hyperparameters to be tuned here. + self.degree = degree + self.interaction_only = interaction_only + self.include_bias = include_bias + self.random_state = random_state + self.preprocessor = None + + def fit(self, X, Y): + # wrapper function for the fit method of Sklearn's polynomial + # preprocessing function. + import sklearn.preprocessing + self.preprocessor = sklearn.preprocessing.PolynomialFeatures(degree=self.degree, + interaction_only=self.interaction_only, + include_bias=self.include_bias) + self.preprocessor.fit(X, Y) + return self + + def transform(self, X): + # wrapper function for the transform method of sklearn's polynomial + # preprocessing function. It is also possible to implement + # a preprocessing algorithm directly in this function, provided that + # it behaves in the way compatible with that from sklearn. + if self.preprocessor is None: + raise NotImplementedError() + return self.preprocessor.transform(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'CustomPreprocessor', + 'name': 'PolynomialFeatures', + 'handles_regression': True, + 'handles_classification': True, + 'handles_multiclass': True, + 'handles_multilabel': True, + 'is_deterministic': True, + 'input': (DENSE, UNSIGNED_DATA), + 'output': (INPUT,)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + # For each hyperparameter, its type (categorical, integer, float, etc.) + # and its range and the default value must be specified here. + degree = UniformIntegerHyperparameter( + name="degree", lower=2, upper=5, default_value=2) + interaction_only = CategoricalHyperparameter( + name="interaction_only", choices=["False", "True"], default_value="False") + include_bias = CategoricalHyperparameter( + name="include_bias", choices=["True", "False"], default_value="True") + + cs = ConfigurationSpace() + cs.add_hyperparameters([degree, interaction_only, include_bias]) + + return cs + + +# Include the custom preprocessor class to auto-sklearn. +autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(custom_preprocessor) + +# Import toy data from sklearn and apply train_test_split. +from sklearn.datasets import load_boston +from sklearn.model_selection import train_test_split +X, y = load_boston(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) + +# Run auto-sklearn regression with the custom preprocessor. +import autosklearn.regression +reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30, + per_run_time_limit=10, + include_preprocessors=['custom_preprocessor'] + ) +reg.fit(X_train, y_train) +y_pred = reg.predict(X_test) +print(reg.show_models()) +print(reg.sprint_statistics()) + From 2a98d0cf3bc1fbd2e73c01c9428ed15939c165d3 Mon Sep 17 00:00:00 2001 From: Jinu Date: Wed, 18 Jul 2018 21:34:32 +0200 Subject: [PATCH 23/45] Add regression example --- examples/example_extending_preprocessing.py | 146 ++++++++++++++++---- 1 file changed, 116 insertions(+), 30 deletions(-) diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py index c2300ddfb2..bb20970d39 100644 --- a/examples/example_extending_preprocessing.py +++ b/examples/example_extending_preprocessing.py @@ -1,24 +1,100 @@ """ =============================================== -Extending Auto-sklearn with Custom Preprocessor +Extending Auto-sklearn =============================================== +In order to include new machine learning algorithms in auto-sklearn's +optimization process, users can implement a wrapper class for the algorithm +and register it to auto-sklearn. The example code below demonstrates how +to implement custom regressor and preprocessor (Lasso and polynomial processing from sklearn, respectively), +register it to auto-sklearn, and use them for the given task. +A detailed walkthrough of extending auto-sklearn can be found `here `_. -explanation goes here. """ -import autosklearn.pipeline.components.feature_preprocessing from ConfigSpace.configuration_space import ConfigurationSpace -from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ - UniformIntegerHyperparameter +from ConfigSpace.hyperparameters import * +from ConfigSpace.conditions import EqualsCondition, InCondition +from autosklearn.pipeline.components.base import \ + AutoSklearnRegressionAlgorithm from autosklearn.pipeline.components.base import \ AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import * +from autosklearn.util.common import check_for_bool + + +# Custom Regression algorithm added to auto-sklearn (Lasso from sklearn). +class MyRegressor(AutoSklearnRegressionAlgorithm): + def __init__(self, alpha, fit_intercept, tol, positive, random_state=None): + self.alpha = alpha + self.fit_intercept = fit_intercept + #self.normalize = normalize + self.tol = tol + self.positive = positive + + self.random_state = random_state + self.estimator = None + + def fit(self, X, Y): + import sklearn.linear_model + + self.alpha = float(self.alpha) + self.fit_intercept = check_for_bool(self.fit_intercept) + self.normalize = check_for_bool(self.normalize) + self.tol = float(self.tol) + self.positive = check_for_bool(self.positive) + + self.estimator = sklearn.linear_model.\ + Lasso(alpha=self.alpha, + fit_intercept=self.fit_intercept, + tol=self.tol, + positive=self.positive, + n_iter=300) + + self.estimator.fit(X, Y) + return self + + def predict(self, X): + if self.estimator is None: + raise NotImplementedError + return self.estimator.predict(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'MyRegressor', + 'name': 'MyRegressor', + 'handles_regression': True, + 'handles_classification': False, + 'handles_multiclass': False, + 'handles_multilabel': False, + 'is_deterministic': True, + 'input': (DENSE, UNSIGNED_DATA), + 'output': (PREDICTIONS,)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + alpha = UniformFloatHyperparameter( + name="alpha", lower=0, upper=10, default_value=1) + fit_intercept = CategoricalHyperparameter( + name="fit_intercept", choices=[True, False], default_value=True) + normalize = CategoricalHyperparameter( + name="normalize", choices=[True, False], default_value=False) + tol = UniformFloatHyperparameter( + name="tol", lower=10 ** -5, upper=10 ** -1, + default_value=10 ** -3, log=True) + positive = CategoricalHyperparameter( + name="positive", choices=[True, False], default_value=False) + + cs.add_hyperparameters([alpha, fit_intercept, tol, positive]) + + return cs + # Custom wrapper class for using Sklearn's polynomial feature preprocessing # function. -class custom_preprocessor(AutoSklearnPreprocessingAlgorithm): +class MyPreprocessor(AutoSklearnPreprocessingAlgorithm): def __init__(self, degree, interaction_only, include_bias, random_state=None): # Define hyperparameters to be tuned here. self.degree = degree @@ -48,8 +124,8 @@ def transform(self, X): @staticmethod def get_properties(dataset_properties=None): - return {'shortname': 'CustomPreprocessor', - 'name': 'PolynomialFeatures', + return {'shortname': 'MyPreprocessor', + 'name': 'MyPreprocessor', 'handles_regression': True, 'handles_classification': True, 'handles_multiclass': True, @@ -60,8 +136,8 @@ def get_properties(dataset_properties=None): @staticmethod def get_hyperparameter_search_space(dataset_properties=None): - # For each hyperparameter, its type (categorical, integer, float, etc.) - # and its range and the default value must be specified here. + # For each hyperparameter, its type (categorical, integer, float, etc.), + # range and the default value must be specified here. degree = UniformIntegerHyperparameter( name="degree", lower=2, upper=5, default_value=2) interaction_only = CategoricalHyperparameter( @@ -75,23 +151,33 @@ def get_hyperparameter_search_space(dataset_properties=None): return cs -# Include the custom preprocessor class to auto-sklearn. -autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(custom_preprocessor) - -# Import toy data from sklearn and apply train_test_split. -from sklearn.datasets import load_boston -from sklearn.model_selection import train_test_split -X, y = load_boston(return_X_y=True) -X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) - -# Run auto-sklearn regression with the custom preprocessor. -import autosklearn.regression -reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30, - per_run_time_limit=10, - include_preprocessors=['custom_preprocessor'] - ) -reg.fit(X_train, y_train) -y_pred = reg.predict(X_test) -print(reg.show_models()) -print(reg.sprint_statistics()) - +def main(): + # Include the custom preprocessor class to auto-sklearn. + import autosklearn.pipeline.components.regression + import autosklearn.pipeline.components.feature_preprocessing + autosklearn.pipeline.components.regression.add_regressor(MyRegressor) + autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(MyPreprocessor) + + # Import toy data from sklearn and apply train_test_split. + from sklearn.datasets import load_boston + from sklearn.model_selection import train_test_split + X, y = load_boston(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) + + # Run auto-sklearn regression with the custom preprocessor. + import autosklearn.regression + import autosklearn.metrics + reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30, + per_run_time_limit=10, + include_estimators=["MyRegressor"], + include_preprocessors=["MyPreprocessor"]) + reg.fit(X_train, y_train) + y_pred = reg.predict(X_test) + scorer = autosklearn.metrics.r2 + print("Test score: ", scorer(y_pred, y_test)) + print(reg.show_models()) + print(reg.sprint_statistics()) + + +if __name__ == "__main__": + main() From a6c53b7e86c6854e49cc3246e3c7ff3b64f007ad Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 2 Oct 2018 03:16:32 +0200 Subject: [PATCH 24/45] Add examples for extending auto-sklearn. --- examples/example_extending_classification.py | 124 +++++++++++++ examples/example_extending_preprocessing.py | 183 ------------------- examples/example_extending_preprocessor.py | 109 +++++++++++ examples/example_extending_regression.py | 109 +++++++++++ 4 files changed, 342 insertions(+), 183 deletions(-) create mode 100644 examples/example_extending_classification.py delete mode 100644 examples/example_extending_preprocessing.py create mode 100644 examples/example_extending_preprocessor.py create mode 100644 examples/example_extending_regression.py diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py new file mode 100644 index 0000000000..11b82503d0 --- /dev/null +++ b/examples/example_extending_classification.py @@ -0,0 +1,124 @@ +""" +==================================================================== +Extending Auto-Sklearn with Classification Component +==================================================================== + +The following example demonstrates how to create a new classification +component for using in auto-sklearn. +""" + +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ + UniformIntegerHyperparameter, UniformFloatHyperparameter + +import sklearn.metrics +import autosklearn.classification +import autosklearn.pipeline.components.classification +import autosklearn.pipeline.components.base +from autosklearn.pipeline.constants import * + + +# Create MLP classifier component for auto-sklearn. +class MLPClassifier(autosklearn.pipeline.components.base.AutoSklearnClassificationAlgorithm): + def __init__(self, + hidden_layer_depth, + num_nodes_per_layer, + activation, + alpha, + random_state=None, + ): + self.hidden_layer_depth = hidden_layer_depth + self.num_nodes_per_layer = num_nodes_per_layer + self.activation = activation + self.alpha = alpha + self.random_state = random_state + + def fit(self, X, Y): + self.num_nodes_per_layer = int(self.num_nodes_per_layer) + self.hidden_layer_depth = int(self.hidden_layer_depth) + self.alpha = float(self.alpha) + + from sklearn.neural_network import MLPClassifier + hidden_layer_sizes = tuple(self.num_nodes_per_layer \ + for i in range(self.hidden_layer_depth)) + + self.estimator = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, + alpha=self.alpha, + activation=self.activation, + random_state=self.random_state, + ) + self.estimator.fit(X, Y) + return self + + def predict(self, X): + if self.estimator is None: + raise NotImplementedError() + return self.estimator.predict(X) + + def predict_proba(self, X): + if self.estimator is None: + raise NotImplementedError() + return self.estimator.predict_proba(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname':'MLP Classifier', + 'name': 'MLP CLassifier', + 'handles_regression': False, + 'handles_classification': True, + 'handles_multiclass': True, + 'handles_multilabel': False, + 'is_deterministic': False, + # Both input and output must be tuple(iterable) + 'input': [DENSE, SIGNED_DATA, UNSIGNED_DATA], + 'output': [PREDICTIONS] + } + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + hidden_layer_depth = UniformIntegerHyperparameter( + name="hidden_layer_depth", lower=1, upper=3, default_value=1 + ) + num_nodes_per_layer = UniformIntegerHyperparameter( + name="num_nodes_per_layer", lower=16, upper=216, default_value=32 + ) + activation = CategoricalHyperparameter( + name="activation", choices=['identity', 'logistic', 'tanh', 'relu'], + default_value='relu' + ) + alpha = UniformFloatHyperparameter( + name="alpha", lower=0.0001, upper=1.0, default_value=0.0001 + ) + cs.add_hyperparameters([hidden_layer_depth, + num_nodes_per_layer, + activation, + alpha, + ]) + return cs + + +# Add MLP classifier component to auto-sklearn. +autosklearn.pipeline.components.classification.add_classifier(MLPClassifier) +cs = MLPClassifier.get_hyperparameter_search_space() +print(cs) + +# Generate data. +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split +X, y = load_breast_cancer(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Fit MLP classifier to the data. +clf = autosklearn.classification.AutoSklearnClassifier( + time_left_for_this_task=20, + per_run_time_limit=10, + include_estimators=['MLPClassifier'], +) +clf.fit(X_train, y_train) + +# Print test accuracy and statistics. +y_pred = clf.predict(X_test) +print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test)) +print(clf.sprint_statistics()) +print(clf.show_models()) \ No newline at end of file diff --git a/examples/example_extending_preprocessing.py b/examples/example_extending_preprocessing.py deleted file mode 100644 index bb20970d39..0000000000 --- a/examples/example_extending_preprocessing.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -=============================================== -Extending Auto-sklearn -=============================================== - -In order to include new machine learning algorithms in auto-sklearn's -optimization process, users can implement a wrapper class for the algorithm -and register it to auto-sklearn. The example code below demonstrates how -to implement custom regressor and preprocessor (Lasso and polynomial processing from sklearn, respectively), -register it to auto-sklearn, and use them for the given task. -A detailed walkthrough of extending auto-sklearn can be found `here `_. - -""" - -from ConfigSpace.configuration_space import ConfigurationSpace -from ConfigSpace.hyperparameters import * -from ConfigSpace.conditions import EqualsCondition, InCondition - -from autosklearn.pipeline.components.base import \ - AutoSklearnRegressionAlgorithm -from autosklearn.pipeline.components.base import \ - AutoSklearnPreprocessingAlgorithm -from autosklearn.pipeline.constants import * -from autosklearn.util.common import check_for_bool - - -# Custom Regression algorithm added to auto-sklearn (Lasso from sklearn). -class MyRegressor(AutoSklearnRegressionAlgorithm): - def __init__(self, alpha, fit_intercept, tol, positive, random_state=None): - self.alpha = alpha - self.fit_intercept = fit_intercept - #self.normalize = normalize - self.tol = tol - self.positive = positive - - self.random_state = random_state - self.estimator = None - - def fit(self, X, Y): - import sklearn.linear_model - - self.alpha = float(self.alpha) - self.fit_intercept = check_for_bool(self.fit_intercept) - self.normalize = check_for_bool(self.normalize) - self.tol = float(self.tol) - self.positive = check_for_bool(self.positive) - - self.estimator = sklearn.linear_model.\ - Lasso(alpha=self.alpha, - fit_intercept=self.fit_intercept, - tol=self.tol, - positive=self.positive, - n_iter=300) - - self.estimator.fit(X, Y) - return self - - def predict(self, X): - if self.estimator is None: - raise NotImplementedError - return self.estimator.predict(X) - - @staticmethod - def get_properties(dataset_properties=None): - return {'shortname': 'MyRegressor', - 'name': 'MyRegressor', - 'handles_regression': True, - 'handles_classification': False, - 'handles_multiclass': False, - 'handles_multilabel': False, - 'is_deterministic': True, - 'input': (DENSE, UNSIGNED_DATA), - 'output': (PREDICTIONS,)} - - @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): - cs = ConfigurationSpace() - alpha = UniformFloatHyperparameter( - name="alpha", lower=0, upper=10, default_value=1) - fit_intercept = CategoricalHyperparameter( - name="fit_intercept", choices=[True, False], default_value=True) - normalize = CategoricalHyperparameter( - name="normalize", choices=[True, False], default_value=False) - tol = UniformFloatHyperparameter( - name="tol", lower=10 ** -5, upper=10 ** -1, - default_value=10 ** -3, log=True) - positive = CategoricalHyperparameter( - name="positive", choices=[True, False], default_value=False) - - cs.add_hyperparameters([alpha, fit_intercept, tol, positive]) - - return cs - - -# Custom wrapper class for using Sklearn's polynomial feature preprocessing -# function. -class MyPreprocessor(AutoSklearnPreprocessingAlgorithm): - def __init__(self, degree, interaction_only, include_bias, random_state=None): - # Define hyperparameters to be tuned here. - self.degree = degree - self.interaction_only = interaction_only - self.include_bias = include_bias - self.random_state = random_state - self.preprocessor = None - - def fit(self, X, Y): - # wrapper function for the fit method of Sklearn's polynomial - # preprocessing function. - import sklearn.preprocessing - self.preprocessor = sklearn.preprocessing.PolynomialFeatures(degree=self.degree, - interaction_only=self.interaction_only, - include_bias=self.include_bias) - self.preprocessor.fit(X, Y) - return self - - def transform(self, X): - # wrapper function for the transform method of sklearn's polynomial - # preprocessing function. It is also possible to implement - # a preprocessing algorithm directly in this function, provided that - # it behaves in the way compatible with that from sklearn. - if self.preprocessor is None: - raise NotImplementedError() - return self.preprocessor.transform(X) - - @staticmethod - def get_properties(dataset_properties=None): - return {'shortname': 'MyPreprocessor', - 'name': 'MyPreprocessor', - 'handles_regression': True, - 'handles_classification': True, - 'handles_multiclass': True, - 'handles_multilabel': True, - 'is_deterministic': True, - 'input': (DENSE, UNSIGNED_DATA), - 'output': (INPUT,)} - - @staticmethod - def get_hyperparameter_search_space(dataset_properties=None): - # For each hyperparameter, its type (categorical, integer, float, etc.), - # range and the default value must be specified here. - degree = UniformIntegerHyperparameter( - name="degree", lower=2, upper=5, default_value=2) - interaction_only = CategoricalHyperparameter( - name="interaction_only", choices=["False", "True"], default_value="False") - include_bias = CategoricalHyperparameter( - name="include_bias", choices=["True", "False"], default_value="True") - - cs = ConfigurationSpace() - cs.add_hyperparameters([degree, interaction_only, include_bias]) - - return cs - - -def main(): - # Include the custom preprocessor class to auto-sklearn. - import autosklearn.pipeline.components.regression - import autosklearn.pipeline.components.feature_preprocessing - autosklearn.pipeline.components.regression.add_regressor(MyRegressor) - autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(MyPreprocessor) - - # Import toy data from sklearn and apply train_test_split. - from sklearn.datasets import load_boston - from sklearn.model_selection import train_test_split - X, y = load_boston(return_X_y=True) - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) - - # Run auto-sklearn regression with the custom preprocessor. - import autosklearn.regression - import autosklearn.metrics - reg = autosklearn.regression.AutoSklearnRegressor(time_left_for_this_task=30, - per_run_time_limit=10, - include_estimators=["MyRegressor"], - include_preprocessors=["MyPreprocessor"]) - reg.fit(X_train, y_train) - y_pred = reg.predict(X_test) - scorer = autosklearn.metrics.r2 - print("Test score: ", scorer(y_pred, y_test)) - print(reg.show_models()) - print(reg.sprint_statistics()) - - -if __name__ == "__main__": - main() diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py new file mode 100644 index 0000000000..5bdc024bfa --- /dev/null +++ b/examples/example_extending_preprocessor.py @@ -0,0 +1,109 @@ +""" +==================================================================== +Extending Auto-Sklearn with Preprocessor Component +==================================================================== + +The following example demonstrates how to create a wrapper around the linear +discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor +in auto-sklearn. +""" + +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ + UniformIntegerHyperparameter, CategoricalHyperparameter + +import sklearn.metrics +import autosklearn.classification +import autosklearn.metrics +import autosklearn.pipeline.components.feature_preprocessing +from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm +from autosklearn.pipeline.constants import * + + +# Create LDA component for auto-sklearn. +class LDA(AutoSklearnPreprocessingAlgorithm): + def __init__(self, shrinkage, solver, n_components, tol, random_state=None): + self.solver = solver + self.shrinkage = shrinkage + self.n_components = n_components + self.tol = tol + self.random_state = random_state + self.preprocessor = None + + def fit(self, X, Y=None): + self.shrinkage = float(self.shrinkage) + self.n_components = int(self.n_components) + self.tol = float(self.tol) + + import sklearn.discriminant_analysis + self.preprocessor = \ + sklearn.discriminant_analysis.LinearDiscriminantAnalysis( + shrinkage=self.shrinkage, + solver=self.solver, + n_components=self.n_components, + tol=self.tol, + ) + self.preprocessor.fit(X, Y) + return self + + def transform(self, X): + if self.preprocessor is None: + raise NotImplementedError() + return self.preprocessor.transform(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'LDA', + 'name': 'Linear Discriminant Analysis', + 'handles_regression': False, + 'handles_classification': True, + 'handles_multiclass': False, + 'handles_multilabel': False, + 'is_deterministic': True, + 'input': (DENSE, UNSIGNED_DATA, SIGNED_DATA), + 'output': (DENSE, UNSIGNED_DATA, SIGNED_DATA)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + solver = CategoricalHyperparameter( + name="solver", choices=['svd','lsqr','eigen'], default_value='svd' + ) + shrinkage = UniformFloatHyperparameter( + name="shrinkage", lower=0.0, upper=1.0, default_value=0.5 + ) + n_components = UniformIntegerHyperparameter( + name="n_components", lower=1, upper=29, default_value=10 + ) + tol = UniformFloatHyperparameter( + name="tol", lower=0.0001, upper=1, default_value=0.0001 + ) + cs.add_hyperparameters([solver, shrinkage, n_components, tol]) + return cs + + +# Add LDA component to auto-sklearn. +autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(LDA) + +# Create dataset. +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split +X, y = load_breast_cancer(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Configuration space. +cs = LDA.get_hyperparameter_search_space() +print(cs) + +# Fit the model using LDA as preprocessor. +clf = autosklearn.classification.AutoSklearnClassifier( + time_left_for_this_task=20, + include_preprocessors=['LDA'], +) +clf.fit(X_train, y_train) + +# Print prediction score and statistics. +y_pred = clf.predict(X_test) +print("accracy: ", sklearn.metrics.accuracy_score(y_pred, y_test)) +print(clf.sprint_statistics()) +print(clf.show_models()) \ No newline at end of file diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py new file mode 100644 index 0000000000..aaea3d13b5 --- /dev/null +++ b/examples/example_extending_regression.py @@ -0,0 +1,109 @@ +""" +==================================================================== +Extending Auto-Sklearn with Regression Component +==================================================================== + +The following example demonstrates how to create a new regression +component for using in auto-sklearn. +""" + +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ + UniformIntegerHyperparameter, CategoricalHyperparameter + +import sklearn.metrics +import autosklearn.regression +import autosklearn.pipeline.components.regression +from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm +from autosklearn.pipeline.constants import * + + +# Implement kernel ridge regression component for auto-sklearn. +class KernelRidgeRegression(AutoSklearnRegressionAlgorithm): + def __init__(self, alpha, kernel, gamma, degree, random_state=None): + self.alpha = alpha + self.kernel = kernel + self.gamma = gamma + self.degree = degree + self.random_state = random_state + self.estimator = None + + def fit(self, X, Y): + self.alpha = float(self.alpha) + self.gamma = float(self.gamma) + self.degree = int(self.degree) + + import sklearn.kernel_ridge + self.estimator = sklearn.kernel_ridge.KernelRidge(alpha=self.alpha, + kernel=self.kernel, + gamma=self.gamma, + degree=self.degree, + ) + self.estimator.fit(X, Y) + return self + + def predict(self, X): + if self.estimator is None: + raise NotImplementedError + return self.estimator.predict(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'KRR', + 'name': 'Kernel Ridge Regression', + 'handles_regression': True, + 'handles_classification': False, + 'handles_multiclass': False, + 'handles_multilabel': False, + 'is_deterministic': True, + 'input': (SPARSE, DENSE, UNSIGNED_DATA, SIGNED_DATA), + 'output': (PREDICTIONS,)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + alpha = UniformFloatHyperparameter( + name='alpha', lower=10 ** -5, upper=1, log=True, default_value=0.1) + kernel = CategoricalHyperparameter( + name='kernel', + choices=['linear', + 'rbf', + 'sigmoid', + 'polynomial', + ], + default_value='linear' + ) + gamma = UniformFloatHyperparameter( + name='gamma', lower=0.00001, upper=1, default_value=0.1, log=True + ) + degree = UniformIntegerHyperparameter( + name='degree', lower=2, upper=5, default_value=3 + ) + cs.add_hyperparameters([alpha, kernel, gamma, degree]) + return cs + + +# Add KRR component to auto-sklearn. +autosklearn.pipeline.components.regression.add_regressor(KernelRidgeRegression) +cs = KernelRidgeRegression.get_hyperparameter_search_space() +print(cs) + +# Generate data. +from sklearn.datasets import load_diabetes +from sklearn.model_selection import train_test_split +X, y = load_diabetes(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Fit the model using KRR. +reg = autosklearn.regression.AutoSklearnRegressor( + time_left_for_this_task=30, + per_run_time_limit=10, + include_estimators=['KernelRidgeRegression'], +) +reg.fit(X_train, y_train) + +# Print prediction score and statistics. +y_pred = reg.predict(X_test) +print("r2 score: ", sklearn.metrics.r2_score(y_pred, y_test)) +print(reg.sprint_statistics()) +print(reg.show_models()) \ No newline at end of file From 9db3e2ec8fe17c2481dcbd724e989bcd8d6e7525 Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 2 Oct 2018 14:00:45 +0200 Subject: [PATCH 25/45] . --- examples/example_extending_classification.py | 7 ++++--- examples/example_extending_preprocessor.py | 6 ++++-- examples/example_extending_regression.py | 3 ++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py index 11b82503d0..e7f8334901 100644 --- a/examples/example_extending_classification.py +++ b/examples/example_extending_classification.py @@ -14,12 +14,13 @@ import sklearn.metrics import autosklearn.classification import autosklearn.pipeline.components.classification -import autosklearn.pipeline.components.base -from autosklearn.pipeline.constants import * +from autosklearn.pipeline.components.base \ + import AutoSklearnClassificationAlgorithm +from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA # Create MLP classifier component for auto-sklearn. -class MLPClassifier(autosklearn.pipeline.components.base.AutoSklearnClassificationAlgorithm): +class MLPClassifier(AutoSklearnClassificationAlgorithm): def __init__(self, hidden_layer_depth, num_nodes_per_layer, diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py index 5bdc024bfa..815d698ecd 100644 --- a/examples/example_extending_preprocessor.py +++ b/examples/example_extending_preprocessor.py @@ -16,8 +16,10 @@ import autosklearn.classification import autosklearn.metrics import autosklearn.pipeline.components.feature_preprocessing -from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm -from autosklearn.pipeline.constants import * +from autosklearn.pipeline.components.base \ + import AutoSklearnPreprocessingAlgorithm +from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, \ + UNSIGNED_DATA # Create LDA component for auto-sklearn. diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py index aaea3d13b5..59f2af58eb 100644 --- a/examples/example_extending_regression.py +++ b/examples/example_extending_regression.py @@ -15,7 +15,8 @@ import autosklearn.regression import autosklearn.pipeline.components.regression from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm -from autosklearn.pipeline.constants import * +from autosklearn.pipeline.constants import SPARSE, DENSE, \ + SIGNED_DATA, UNSIGNED_DATA # Implement kernel ridge regression component for auto-sklearn. From 15196ceeb4a2e99875989153120d5a36688d992f Mon Sep 17 00:00:00 2001 From: Jinu Date: Tue, 9 Oct 2018 14:00:25 +0200 Subject: [PATCH 26/45] Fixing codacy errors --- examples/example_extending_classification.py | 3 ++- examples/example_extending_preprocessor.py | 2 +- examples/example_extending_regression.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py index e7f8334901..d4de7d79d3 100644 --- a/examples/example_extending_classification.py +++ b/examples/example_extending_classification.py @@ -16,7 +16,8 @@ import autosklearn.pipeline.components.classification from autosklearn.pipeline.components.base \ import AutoSklearnClassificationAlgorithm -from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA +from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA, \ + PREDICTIONS # Create MLP classifier component for auto-sklearn. diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py index 815d698ecd..33a51cc953 100644 --- a/examples/example_extending_preprocessor.py +++ b/examples/example_extending_preprocessor.py @@ -18,7 +18,7 @@ import autosklearn.pipeline.components.feature_preprocessing from autosklearn.pipeline.components.base \ import AutoSklearnPreprocessingAlgorithm -from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, \ +from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, \ UNSIGNED_DATA diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py index 59f2af58eb..e3dbe18428 100644 --- a/examples/example_extending_regression.py +++ b/examples/example_extending_regression.py @@ -16,7 +16,7 @@ import autosklearn.pipeline.components.regression from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm from autosklearn.pipeline.constants import SPARSE, DENSE, \ - SIGNED_DATA, UNSIGNED_DATA + SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS # Implement kernel ridge regression component for auto-sklearn. From bfb1e08986be5d41b458d6cd736d6e1e2c2c3765 Mon Sep 17 00:00:00 2001 From: JinWoo <31531627+ahn1340@users.noreply.github.com> Date: Fri, 19 Oct 2018 10:23:28 +0200 Subject: [PATCH 27/45] Change example (#553) * Change datasets used in examples from digits to breast_cancer. * [Debug] try different numpy version * [Debug] Try with latest numpy version * Set numpy version to 1.14.5 * Fix line length in exanple_parallel.py --- examples/example_crossvalidation.py | 4 ++-- examples/example_eips.py | 4 ++-- examples/example_holdout.py | 4 ++-- examples/example_parallel.py | 7 +++++-- examples/example_random_search.py | 6 +++--- examples/example_sequential.py | 4 ++-- requirements.txt | 2 +- setup.py | 3 ++- 8 files changed, 19 insertions(+), 15 deletions(-) diff --git a/examples/example_crossvalidation.py b/examples/example_crossvalidation.py index 85530b591b..52e3050f7b 100644 --- a/examples/example_crossvalidation.py +++ b/examples/example_crossvalidation.py @@ -21,7 +21,7 @@ def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -37,7 +37,7 @@ def main(): # fit() changes the data in place, but refit needs the original data. We # therefore copy the data. In practice, one should reload the data - automl.fit(X_train.copy(), y_train.copy(), dataset_name='digits') + automl.fit(X_train.copy(), y_train.copy(), dataset_name='breast_cancer') # During fit(), models are fit on individual cross-validation folds. To use # all available data, we call refit() which trains all models in the # final ensemble on the whole dataset. diff --git a/examples/example_eips.py b/examples/example_eips.py index eef3c6cf11..db2a434092 100644 --- a/examples/example_eips.py +++ b/examples/example_eips.py @@ -69,7 +69,7 @@ def get_eips_object_callback( def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -81,7 +81,7 @@ def main(): get_smac_object_callback=get_eips_object_callback, initial_configurations_via_metalearning=0, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') # Print the final ensemble constructed by auto-sklearn via ROAR. print(automl.show_models()) diff --git a/examples/example_holdout.py b/examples/example_holdout.py index fe1ff1c7a7..19a438bd87 100644 --- a/examples/example_holdout.py +++ b/examples/example_holdout.py @@ -18,7 +18,7 @@ def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -34,7 +34,7 @@ def main(): resampling_strategy='holdout', resampling_strategy_arguments={'train_size': 0.67} ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') # Print the final ensemble constructed by auto-sklearn. print(automl.show_models()) diff --git a/examples/example_parallel.py b/examples/example_parallel.py index f5572ab97d..ff599e59d0 100644 --- a/examples/example_parallel.py +++ b/examples/example_parallel.py @@ -78,14 +78,17 @@ def spawn_classifier(seed, dataset_name): def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) processes = [] spawn_classifier = get_spawn_classifier(X_train, y_train) for i in range(4): # set this at roughly half of your cores - p = multiprocessing.Process(target=spawn_classifier, args=(i, 'digits')) + p = multiprocessing.Process( + target=spawn_classifier, + args=(i, 'breast_cancer'), + ) p.start() processes.append(p) for p in processes: diff --git a/examples/example_random_search.py b/examples/example_random_search.py index 9d04a39974..2a64b36efb 100644 --- a/examples/example_random_search.py +++ b/examples/example_random_search.py @@ -68,7 +68,7 @@ def get_random_search_object_callback( def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -79,7 +79,7 @@ def main(): get_smac_object_callback=get_roar_object_callback, initial_configurations_via_metalearning=0, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') print('#' * 80) print('Results for ROAR.') @@ -99,7 +99,7 @@ def main(): get_smac_object_callback=get_random_search_object_callback, initial_configurations_via_metalearning=0, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') print('#' * 80) print('Results for random search.') diff --git a/examples/example_sequential.py b/examples/example_sequential.py index 06820e7ebe..694ea81404 100644 --- a/examples/example_sequential.py +++ b/examples/example_sequential.py @@ -17,7 +17,7 @@ def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -32,7 +32,7 @@ def main(): ensemble_size=0, delete_tmp_folder_after_terminate=False, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') # This call to fit_ensemble uses all models trained in the previous call # to fit to build an ensemble which can be used with automl.predict() automl.fit_ensemble(y_train, ensemble_size=50) diff --git a/requirements.txt b/requirements.txt index b65911d6cb..71c47ceb66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ nose six Cython -numpy>=1.9.0 +numpy==1.14.5 scipy>=0.14.1 scikit-learn>=0.19,<0.20 diff --git a/setup.py b/setup.py index 86c9c3202b..ef89ad314c 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,8 @@ "nose", "six", "Cython", - "numpy>=1.9.0", + # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error. + "numpy==1.14.5", "scipy>=0.14.1", "scikit-learn>=0.19,<0.20", "lockfile", From 9c2c245037266871b6cd3e0018a5b6c1c05dea1c Mon Sep 17 00:00:00 2001 From: JinWoo <31531627+ahn1340@users.noreply.github.com> Date: Fri, 19 Oct 2018 10:26:35 +0200 Subject: [PATCH 28/45] [WIP]Add argument for custom logger configuration. (#505) * Add argument for custom logger configuration. First commit, work in progress. * Minor changes. * Modify suggested changes [WIP] * . * . * . * . * Fix minor details * Fix travis not recognizing example_config.yaml * . * . * . * . * . * Change datasets used in examples from digits to breast_cancer. * Fix codacy error * Revert codacy error fixing * [Debug] check if numpy causes error * [Debug] experimenting with numpy * [Debug] try to manually install libgcc * [Debug] libgcc * [Debug] libgcc * [Debug] libgcc * [Debug] libgcc * [Debug]. * [Debug]. * [Debug] . * [Debug]. * [Debug]. * [Debug] used older numpy version * [Debug] numpy * [Debug] try numpy version 1.14.6 * [Debug] try different numpy version * [Debug] Try with latest numpy version * Set numpy version to 1.14.5 * Add argument for custom logger configuration. First commit, work in progress. * Minor changes. * Modify suggested changes [WIP] * . * . * . * . * Fix minor details * Fix travis not recognizing example_config.yaml * . * . * . * . * . * Fix codacy error * Revert codacy error fixing * [Debug] experimenting with numpy * [Debug] try to manually install libgcc * [Debug] libgcc * [Debug] libgcc * [Debug] libgcc * [Debug] libgcc * [Debug]. * [Debug]. * [Debug] . * [Debug]. * [Debug]. * [Debug] used older numpy version * [Debug] try numpy version 1.14.6 * Delete libgcc_check.sh used for debugging. * Fix numpy version and remove blank lines * Fix line length in example_parallel.py * Fix minor error --- .travis.yml | 2 +- autosklearn/automl.py | 7 ++++- autosklearn/estimators.py | 13 +++++++-- autosklearn/util/logging_.py | 23 +++++++++------ test/test_util/example_config.yaml | 46 ++++++++++++++++++++++++++++++ test/test_util/test_logging.py | 31 ++++++++++++++++++++ 6 files changed, 109 insertions(+), 13 deletions(-) create mode 100644 test/test_util/example_config.yaml create mode 100644 test/test_util/test_logging.py diff --git a/.travis.yml b/.travis.yml index 968d8e4ec1..c9a27004aa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -69,7 +69,7 @@ install: # Debug output to know all exact package versions! - pip freeze - python setup.py install - + script: bash ci_scripts/test.sh after_success: source ci_scripts/success.sh diff --git a/autosklearn/automl.py b/autosklearn/automl.py index d72cbc8920..91d66d4bd2 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -82,6 +82,7 @@ def __init__(self, disable_evaluator_output=False, get_smac_object_callback=None, smac_scenario_args=None, + logging_config=None, ): super(AutoML, self).__init__() self._backend = backend @@ -110,6 +111,7 @@ def __init__(self, self._disable_evaluator_output = disable_evaluator_output self._get_smac_object_callback = get_smac_object_callback self._smac_scenario_args = smac_scenario_args + self.logging_config = logging_config self._datamanager = None self._dataset_name = None @@ -235,7 +237,10 @@ def fit_on_datamanager(self, datamanager, metric): def _get_logger(self, name): logger_name = 'AutoML(%d):%s' % (self._seed, name) - setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name))) + setup_logger(os.path.join(self._backend.temporary_directory, + '%s.log' % str(logger_name)), + self.logging_config, + ) return get_logger(logger_name) @staticmethod diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index 910c68f8e8..514b469f33 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -28,7 +28,9 @@ def __init__(self, shared_mode=False, disable_evaluator_output=False, get_smac_object_callback=None, - smac_scenario_args=None): + smac_scenario_args=None, + logging_config=None, + ): """ Parameters ---------- @@ -168,6 +170,11 @@ def __init__(self, This is an advanced feature. Use only if you are familiar with `SMAC `_. + logging_config : dict, optional (None) + dictionary object specifying the logger configuration. If None, + the default logging.yaml file is used, which can be found in + the directory ``util/logging.yaml`` relative to the installation. + Attributes ---------- @@ -199,6 +206,7 @@ def __init__(self, self.disable_evaluator_output = disable_evaluator_output self.get_smac_object_callback = get_smac_object_callback self.smac_scenario_args = smac_scenario_args + self.logging_config = logging_config self._automl = None super().__init__() @@ -238,7 +246,8 @@ def build_automl(self): shared_mode=self.shared_mode, get_smac_object_callback=self.get_smac_object_callback, disable_evaluator_output=self.disable_evaluator_output, - smac_scenario_args=self.smac_scenario_args + smac_scenario_args=self.smac_scenario_args, + logging_config=self.logging_config, ) return automl diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py index cf3f40586d..ea074a1f3f 100644 --- a/autosklearn/util/logging_.py +++ b/autosklearn/util/logging_.py @@ -7,18 +7,23 @@ import yaml -def setup_logger(output_file=None): - with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'), - 'r') as fh: - config = yaml.load(fh) - if output_file is not None: - config['handlers']['file_handler']['filename'] = output_file - logging.config.dictConfig(config) +def setup_logger(output_file=None, logging_config=None): + # logging_config must be a dictionary object specifying the configuration + # for the loggers to be used in auto-sklearn. + if logging_config is not None: + if output_file is not None: + logging_config['handlers']['file_handler']['filename'] = output_file + logging.config.dictConfig(logging_config) + else: + with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'), + 'r') as fh: + logging_config = yaml.safe_load(fh) + if output_file is not None: + logging_config['handlers']['file_handler']['filename'] = output_file + logging.config.dictConfig(logging_config) def _create_logger(name): - logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %(' - 'message)s', datefmt='%H:%M:%S') return logging.getLogger(name) diff --git a/test/test_util/example_config.yaml b/test/test_util/example_config.yaml new file mode 100644 index 0000000000..7c93e1b846 --- /dev/null +++ b/test/test_util/example_config.yaml @@ -0,0 +1,46 @@ +--- +version: 1 +disable_existing_loggers: False +formatters: + simple: + format: '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s' + +handlers: + console: + class: logging.StreamHandler + level: WARNING + formatter: simple + stream: ext://sys.stdout + + file_handler: + class: logging.FileHandler + level: DEBUG + formatter: simple + filename: autosklearn.log + +root: + level: CRITICAL + handlers: [console, file_handler] + +loggers: + autosklearn.metalearning: + level: NOTSET + handlers: [file_handler] + propagate: no + + autosklearn.util.backend: + level: DEBUG + handlers: [file_handler] + propagate: no + + smac.intensification.intensification.Intensifier: + level: INFO + handlers: [file_handler, console] + + smac.optimizer.local_search.LocalSearch: + level: INFO + handlers: [file_handler, console] + + smac.optimizer.smbo.SMBO: + level: INFO + handlers: [file_handler, console] diff --git a/test/test_util/test_logging.py b/test/test_util/test_logging.py new file mode 100644 index 0000000000..9c18c07ec1 --- /dev/null +++ b/test/test_util/test_logging.py @@ -0,0 +1,31 @@ +import os +import unittest +import logging +import logging.config +import yaml +from autosklearn.util import logging_ + +class LoggingTest(unittest.TestCase): + + def test_setup_logger(self): + # Test that setup_logger function correctly configures the logger + # according to the given dictionary, and uses the default + # logging.yaml file if logging_config is not specified. + + with open(os.path.join(os.path.dirname(__file__), \ + 'example_config.yaml'), 'r') as fh: + example_config = yaml.safe_load(fh) + + # Configure logger with example_config.yaml. + logging_.setup_logger(logging_config=example_config) + + # example_config sets the root logger's level to CRITICAL, + # which corresponds to 50. + self.assertEqual(logging.getLogger().getEffectiveLevel(), 50) + + # This time use the default configuration. + logging_.setup_logger(logging_config=None) + + # default config sets the root logger's level to DEBUG, + # which corresponds to 10. + self.assertEqual(logging.getLogger().getEffectiveLevel(), 10) \ No newline at end of file From 3f0ee66ffa79596de323e08447de20cc46968949 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Fri, 19 Oct 2018 10:28:17 +0200 Subject: [PATCH 29/45] FIX #566: sort ensemble correctly (#567) --- autosklearn/util/backend.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/autosklearn/util/backend.py b/autosklearn/util/backend.py index 52ac678762..c59adbde5c 100644 --- a/autosklearn/util/backend.py +++ b/autosklearn/util/backend.py @@ -407,9 +407,10 @@ def save_ensemble(self, ensemble, idx, seed): except Exception: pass - filepath = os.path.join(self.get_ensemble_dir(), - '%s.%s.ensemble' % (str(seed), - str(idx))) + filepath = os.path.join( + self.get_ensemble_dir(), + '%s.%s.ensemble' % (str(seed), str(idx).zfill(10)) + ) with tempfile.NamedTemporaryFile('wb', dir=os.path.dirname( filepath), delete=False) as fh: pickle.dump(ensemble, fh) From 80517ca69b406e305077429481b91e0251049203 Mon Sep 17 00:00:00 2001 From: Jinu Date: Fri, 19 Oct 2018 11:29:57 +0200 Subject: [PATCH 30/45] Fix Line length in example_parallel.py --- examples/example_parallel.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/example_parallel.py b/examples/example_parallel.py index bcb45206c2..ff599e59d0 100644 --- a/examples/example_parallel.py +++ b/examples/example_parallel.py @@ -85,7 +85,10 @@ def main(): processes = [] spawn_classifier = get_spawn_classifier(X_train, y_train) for i in range(4): # set this at roughly half of your cores - p = multiprocessing.Process(target=spawn_classifier, args=(i, 'breast_cancer')) + p = multiprocessing.Process( + target=spawn_classifier, + args=(i, 'breast_cancer'), + ) p.start() processes.append(p) for p in processes: From 2afad9acc6a57468331c2d544c7e475f576f11b5 Mon Sep 17 00:00:00 2001 From: Jinu Date: Fri, 19 Oct 2018 11:34:14 +0200 Subject: [PATCH 31/45] Fix line length in example_parallel.py --- examples/example_parallel.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/example_parallel.py b/examples/example_parallel.py index bcb45206c2..2935ed2b85 100644 --- a/examples/example_parallel.py +++ b/examples/example_parallel.py @@ -85,7 +85,10 @@ def main(): processes = [] spawn_classifier = get_spawn_classifier(X_train, y_train) for i in range(4): # set this at roughly half of your cores - p = multiprocessing.Process(target=spawn_classifier, args=(i, 'breast_cancer')) + p = multiprocessing.Process( + target=spawn_classifier, + args=(i, 'breast_cancer') + ) p.start() processes.append(p) for p in processes: From c16d7f64a018c988d07a57c0c4367733ea9b1b46 Mon Sep 17 00:00:00 2001 From: Jinu Date: Fri, 19 Oct 2018 11:34:35 +0200 Subject: [PATCH 32/45] Fix minor error --- examples/example_parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/example_parallel.py b/examples/example_parallel.py index 2935ed2b85..ff599e59d0 100644 --- a/examples/example_parallel.py +++ b/examples/example_parallel.py @@ -87,7 +87,7 @@ def main(): for i in range(4): # set this at roughly half of your cores p = multiprocessing.Process( target=spawn_classifier, - args=(i, 'breast_cancer') + args=(i, 'breast_cancer'), ) p.start() processes.append(p) From c8368f5d1821225e7a1484f0dc66a77048d431b3 Mon Sep 17 00:00:00 2001 From: Jinu Date: Fri, 19 Oct 2018 11:52:03 +0200 Subject: [PATCH 33/45] Fix codacy error "parameters differ from overriden 'fit' method" --- examples/example_extending_classification.py | 4 ++-- examples/example_extending_preprocessor.py | 4 ++-- examples/example_extending_regression.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py index d4de7d79d3..a8b0dbe3ca 100644 --- a/examples/example_extending_classification.py +++ b/examples/example_extending_classification.py @@ -35,7 +35,7 @@ def __init__(self, self.alpha = alpha self.random_state = random_state - def fit(self, X, Y): + def fit(self, X, y): self.num_nodes_per_layer = int(self.num_nodes_per_layer) self.hidden_layer_depth = int(self.hidden_layer_depth) self.alpha = float(self.alpha) @@ -49,7 +49,7 @@ def fit(self, X, Y): activation=self.activation, random_state=self.random_state, ) - self.estimator.fit(X, Y) + self.estimator.fit(X, y) return self def predict(self, X): diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py index 33a51cc953..64e866f002 100644 --- a/examples/example_extending_preprocessor.py +++ b/examples/example_extending_preprocessor.py @@ -32,7 +32,7 @@ def __init__(self, shrinkage, solver, n_components, tol, random_state=None): self.random_state = random_state self.preprocessor = None - def fit(self, X, Y=None): + def fit(self, X, y=None): self.shrinkage = float(self.shrinkage) self.n_components = int(self.n_components) self.tol = float(self.tol) @@ -45,7 +45,7 @@ def fit(self, X, Y=None): n_components=self.n_components, tol=self.tol, ) - self.preprocessor.fit(X, Y) + self.preprocessor.fit(X, y) return self def transform(self, X): diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py index e3dbe18428..7b9ad21239 100644 --- a/examples/example_extending_regression.py +++ b/examples/example_extending_regression.py @@ -29,7 +29,7 @@ def __init__(self, alpha, kernel, gamma, degree, random_state=None): self.random_state = random_state self.estimator = None - def fit(self, X, Y): + def fit(self, X, y): self.alpha = float(self.alpha) self.gamma = float(self.gamma) self.degree = int(self.degree) @@ -40,7 +40,7 @@ def fit(self, X, Y): gamma=self.gamma, degree=self.degree, ) - self.estimator.fit(X, Y) + self.estimator.fit(X, y) return self def predict(self, X): From 763aac02fe60d97d2f7f21a8c454716243ed5e05 Mon Sep 17 00:00:00 2001 From: JinWoo <31531627+ahn1340@users.noreply.github.com> Date: Fri, 19 Oct 2018 13:14:52 +0200 Subject: [PATCH 34/45] Check target type at the beginning of the fitting process. (#506) * Check target type at the beginning of the fitting process. * . * Fixed minor error in uniitest * . * Add unittest for target type checking. * . * . * Change datasets used in examples from digits to breast_cancer. * [Debug] try with numpy version 1.14.5 * [Debug] Check if numpy version 1.14.6 raises error. * [Debug] try different numpy version * [Debug] Try with latest numpy version * Set numpy version to 1.14.5 * Check target type at the beginning of the fitting process. * . * Fixed minor error in uniitest * . * Add unittest for target type checking. * . * . * [Debug] Check if numpy version 1.14.6 raises error. * Fix numpy version to 1.14.5 * Add comment to Mock in test_type_of_target * Fix line length in example_parallel.py * Fix minor error --- autosklearn/estimators.py | 25 ++++++ test/test_automl/test_estimators.py | 134 +++++++++++++++++++++++++++- 2 files changed, 157 insertions(+), 2 deletions(-) diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index 514b469f33..6adedd0f56 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -3,6 +3,7 @@ from autosklearn.automl import AutoMLClassifier, AutoMLRegressor from autosklearn.util.backend import create +from sklearn.utils.multiclass import type_of_target class AutoSklearnEstimator(BaseEstimator): @@ -465,6 +466,18 @@ def fit(self, X, y, self """ + # Before running anything else, first check that the + # type of data is compatible with auto-sklearn. Legal target + # types are: binary, multiclass, multilabel-indicator. + target_type = type_of_target(y) + if target_type in ['multiclass-multioutput', + 'continuous', + 'continuous-multioutput', + 'unknown', + ]: + raise ValueError("classification with data of type %s is" + " not supported" % target_type) + super().fit( X=X, y=y, @@ -568,6 +581,18 @@ def fit(self, X, y, self """ + # Before running anything else, first check that the + # type of data is compatible with auto-sklearn. Legal target + # types are: continuous, binary, multiclass. + target_type = type_of_target(y) + if target_type in ['multiclass-multioutput', + 'multilabel-indicator', + 'continuous-multioutput', + 'unknown', + ]: + raise ValueError("regression with data of type %s is not" + " supported" % target_type) + # Fit is supposed to be idempotent! # But not if we use share_mode. super().fit( diff --git a/test/test_automl/test_estimators.py b/test/test_automl/test_estimators.py index e8a5bd0954..064dc73610 100644 --- a/test/test_automl/test_estimators.py +++ b/test/test_automl/test_estimators.py @@ -50,17 +50,25 @@ class EstimatorTest(Base, unittest.TestCase): # self._tearDown(output) def test_pSMAC_wrong_arguments(self): + X = np.zeros((100, 100)) + y = np.zeros((100, )) self.assertRaisesRegexp(ValueError, "If shared_mode == True tmp_folder must not " "be None.", - lambda shared_mode: AutoSklearnClassifier(shared_mode=shared_mode).fit(None, None), + lambda shared_mode: + AutoSklearnClassifier( + shared_mode=shared_mode, + ).fit(X, y), shared_mode=True) self.assertRaisesRegexp(ValueError, "If shared_mode == True output_folder must not " "be None.", lambda shared_mode, tmp_folder: - AutoSklearnClassifier(shared_mode=shared_mode, tmp_folder=tmp_folder).fit(None, None), + AutoSklearnClassifier( + shared_mode=shared_mode, + tmp_folder=tmp_folder, + ).fit(X, y), shared_mode=True, tmp_folder='/tmp/duitaredxtvbedb') @@ -85,6 +93,128 @@ def test_feat_type_wrong_arguments(self): cls.fit, X=X, y=y, feat_type=['Car']*100) + # Mock AutoSklearnEstimator.fit so the test doesn't actually run fit(). + @unittest.mock.patch('autosklearn.estimators.AutoSklearnEstimator.fit') + def test_type_of_target(self, mock_estimator): + # Test that classifier raises error for illegal target types. + X = np.array([[1, 2], + [2, 3], + [3, 4], + [4, 5], + ]) + # Possible target types + y_binary = np.array([0, 0, 1, 1]) + y_continuous = np.array([0.1, 1.3, 2.1, 4.0]) + y_multiclass = np.array([0, 1, 2, 0]) + y_multilabel = np.array([[0, 1], + [1, 1], + [1, 0], + [0, 0], + ]) + y_multiclass_multioutput = np.array([[0, 1], + [1, 3], + [2, 2], + [5, 3], + ]) + y_continuous_multioutput = np.array([[0.1, 1.5], + [1.2, 3.5], + [2.7, 2.7], + [5.5, 3.9], + ]) + + cls = AutoSklearnClassifier() + # Illegal target types for classification: continuous, + # multiclass-multioutput, continuous-multioutput. + self.assertRaisesRegex(ValueError, + "classification with data of type" + " multiclass-multioutput is not supported", + cls.fit, + X=X, + y=y_multiclass_multioutput, + ) + + self.assertRaisesRegex(ValueError, + "classification with data of type" + " continuous is not supported", + cls.fit, + X=X, + y=y_continuous, + ) + + self.assertRaisesRegex(ValueError, + "classification with data of type" + " continuous-multioutput is not supported", + cls.fit, + X=X, + y=y_continuous_multioutput, + ) + + # Legal target types for classification: binary, multiclass, + # multilabel-indicator. + try: + cls.fit(X, y_binary) + except ValueError: + self.fail("cls.fit() raised ValueError while fitting " + "binary targets") + + try: + cls.fit(X, y_multiclass) + except ValueError: + self.fail("cls.fit() raised ValueError while fitting " + "multiclass targets") + + try: + cls.fit(X, y_multilabel) + except ValueError: + self.fail("cls.fit() raised ValueError while fitting " + "multilabel-indicator targets") + + # Test that regressor raises error for illegal target types. + reg = AutoSklearnRegressor() + # Illegal target types for regression: multiclass-multioutput, + # multilabel-indicator, continuous-multioutput. + self.assertRaisesRegex(ValueError, + "regression with data of type" + " multiclass-multioutput is not supported", + reg.fit, + X=X, + y=y_multiclass_multioutput, + ) + + self.assertRaisesRegex(ValueError, + "regression with data of type" + " multilabel-indicator is not supported", + reg.fit, + X=X, + y=y_multilabel, + ) + + self.assertRaisesRegex(ValueError, + "regression with data of type" + " continuous-multioutput is not supported", + reg.fit, + X=X, + y=y_continuous_multioutput, + ) + # Legal target types: continuous, binary, multiclass + try: + reg.fit(X, y_continuous) + except ValueError: + self.fail("reg.fit() raised ValueError while fitting " + "continuous targets") + + try: + reg.fit(X, y_binary) + except ValueError: + self.fail("reg.fit() raised ValueError while fitting " + "binary targets") + + try: + reg.fit(X, y_multiclass) + except ValueError: + self.fail("reg.fit() raised ValueError while fitting " + "multiclass targets") + def test_fit_pSMAC(self): tmp = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC') output = os.path.join(self.test_dir, '..', '.out_estimator_fit_pSMAC') From 3cf42b54e2d8e3d981a04fc4db89da47ac9a4051 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Fri, 19 Oct 2018 14:06:10 +0200 Subject: [PATCH 35/45] Update test_automl.py --- test/test_automl/test_automl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_automl/test_automl.py b/test/test_automl/test_automl.py index 9324566201..f8da452e84 100644 --- a/test/test_automl/test_automl.py +++ b/test/test_automl/test_automl.py @@ -224,7 +224,7 @@ def test_automl_outputs(self): fixture = os.listdir(os.path.join(backend_api.temporary_directory, '.auto-sklearn', 'ensembles')) - self.assertIn('100.0.ensemble', fixture) + self.assertIn('100.0000000000.ensemble', fixture) # Start time start_time_file_path = os.path.join(backend_api.temporary_directory, '.auto-sklearn', From 88d1554d84646b8cf672ea5c53f92b7499619a16 Mon Sep 17 00:00:00 2001 From: Jinu Date: Thu, 25 Oct 2018 15:41:12 +0200 Subject: [PATCH 36/45] Add python 3.7to Travis, change python_requirement in setup.py. --- .travis.yml | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index bf8727adf4..6ec5fa1781 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,8 @@ matrix: env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - os: linux env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - # Add flake8 check in travis. + - os: linux + env: DISTRIB="conda" PYTHON_VERSION="3.7" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - os: linux env: DISTRIB="conda" PYTHON_VERSION="3.6" RUN_FLAKE8="true" SKIP_TESTS="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" @@ -70,7 +71,6 @@ install: - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps - mkdir ~/.openml - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config - # Install flake 8. - pip install flake8 # Debug output to know all exact package versions! - pip freeze diff --git a/setup.py b/setup.py index ef89ad314c..cfdac2867e 100644 --- a/setup.py +++ b/setup.py @@ -69,6 +69,6 @@ license='BSD', platforms=['Linux'], classifiers=[], - python_requires='>=3.4.*', + python_requires='>=3.5.*', url='https://automl.github.io/auto-sklearn', ) From 9b652d571af5d16a4641bbd5128e868e99496f11 Mon Sep 17 00:00:00 2001 From: Jinu Date: Thu, 25 Oct 2018 15:54:51 +0200 Subject: [PATCH 37/45] Add solver hyperparameter in MLP classifier example, increase runtime of classifier in example_extend_preprocessing.py --- examples/example_extending_classification.py | 9 ++++++++- examples/example_extending_preprocessor.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py index a8b0dbe3ca..9f4ea4eedb 100644 --- a/examples/example_extending_classification.py +++ b/examples/example_extending_classification.py @@ -27,12 +27,14 @@ def __init__(self, num_nodes_per_layer, activation, alpha, + solver, random_state=None, ): self.hidden_layer_depth = hidden_layer_depth self.num_nodes_per_layer = num_nodes_per_layer self.activation = activation self.alpha = alpha + self.solver = solver self.random_state = random_state def fit(self, X, y): @@ -45,8 +47,9 @@ def fit(self, X, y): for i in range(self.hidden_layer_depth)) self.estimator = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, - alpha=self.alpha, activation=self.activation, + alpha=self.alpha, + solver=self.solver, random_state=self.random_state, ) self.estimator.fit(X, y) @@ -92,10 +95,14 @@ def get_hyperparameter_search_space(dataset_properties=None): alpha = UniformFloatHyperparameter( name="alpha", lower=0.0001, upper=1.0, default_value=0.0001 ) + solver = CategoricalHyperparameter( + name="solver", choices=['lbfgs', 'sgd', 'adam'], default_value='adam' + ) cs.add_hyperparameters([hidden_layer_depth, num_nodes_per_layer, activation, alpha, + solver, ]) return cs diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py index 64e866f002..e416827408 100644 --- a/examples/example_extending_preprocessor.py +++ b/examples/example_extending_preprocessor.py @@ -99,7 +99,7 @@ def get_hyperparameter_search_space(dataset_properties=None): # Fit the model using LDA as preprocessor. clf = autosklearn.classification.AutoSklearnClassifier( - time_left_for_this_task=20, + time_left_for_this_task=30, include_preprocessors=['LDA'], ) clf.fit(X_train, y_train) From aacf24bf9c6acd3ebdbd3f08ca0362997cd55f26 Mon Sep 17 00:00:00 2001 From: Jinu Date: Thu, 25 Oct 2018 19:09:01 +0200 Subject: [PATCH 38/45] Change all occurences of master to development in flake8_diff.sh --- ci_scripts/flake8_diff.sh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh index e40c2653bb..39701d5248 100644 --- a/ci_scripts/flake8_diff.sh +++ b/ci_scripts/flake8_diff.sh @@ -40,7 +40,7 @@ git remote --verbose # Travis does the git clone with a limited depth. # This may not be enough to find the common ancestor with -# $REMOTE/master so we unshallow the git checkout +# $REMOTE/development so we unshallow the git checkout if [[ -a .git/shallow ]]; then echo -e '\nTrying to unshallow the repo:' echo '--------------------------------------------------------------------------------' @@ -61,7 +61,7 @@ if [[ "$TRAVIS" == "true" ]]; then fi else # We want to fetch the code as it is in the PR branch and not - # the result of the merge into master. This way line numbers + # the result of the merge into development. This way line numbers # reported by Travis will match with the local code. LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST # In Travis the PR target is always origin @@ -70,7 +70,7 @@ if [[ "$TRAVIS" == "true" ]]; then fi # If not using the commit range from Travis we need to find the common -# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master +# ancestor between $LOCAL_BRANCH_REF and $REMOTE/development if [[ -z "$COMMIT_RANGE" ]]; then if [[ -z "$LOCAL_BRANCH_REF" ]]; then LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD) @@ -79,16 +79,16 @@ if [[ -z "$COMMIT_RANGE" ]]; then echo '--------------------------------------------------------------------------------' git --no-pager log -2 $LOCAL_BRANCH_REF - REMOTE_MASTER_REF="$REMOTE/master" - # Make sure that $REMOTE_MASTER_REF is a valid reference - echo -e "\nFetching $REMOTE_MASTER_REF" + REMOTE_DEVELOPMENT_REF="$REMOTE/development" + # Make sure that $REMOTE_DEVELOPMENT_REF is a valid reference + echo -e "\nFetching $REMOTE_DEVELOPMENT_REF" echo '--------------------------------------------------------------------------------' - git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF + git fetch $REMOTE development:refs/remotes/$REMOTE_DEVELOPMENT_REF LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF) - REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF) + REMOTE_DEVELOPMENT_SHORT_HASH=$(git rev-parse --short $REMOTE_DEVELOPMENT_REF) - COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \ - echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)" + COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_DEVELOPMENT_REF) || \ + echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_DEVELOPMENT_REF -q)" if [ -z "$COMMIT" ]; then exit 1 @@ -97,7 +97,7 @@ if [[ -z "$COMMIT_RANGE" ]]; then COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT) echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\ - "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:" + "and $REMOTE_DEVELOPMENT_REF ($REMOTE_DEVELOPMENT_SHORT_HASH) is $COMMIT_SHORT_HASH:" echo '--------------------------------------------------------------------------------' git --no-pager show --no-patch $COMMIT_SHORT_HASH From f9a7b1de6e6515a02c76e07cd227e243d5245dc3 Mon Sep 17 00:00:00 2001 From: Jinu Date: Thu, 25 Oct 2018 20:21:07 +0200 Subject: [PATCH 39/45] numpy requirement is now >=1.9.0<=1.14.5 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 71c47ceb66..4ba8cfc10e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ nose six Cython -numpy==1.14.5 +numpy>=1.9.0<=1.14.5 scipy>=0.14.1 scikit-learn>=0.19,<0.20 diff --git a/setup.py b/setup.py index cfdac2867e..68491b6a4f 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ "six", "Cython", # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error. - "numpy==1.14.5", + "numpy>=1.9.0<=1.14.5", "scipy>=0.14.1", "scikit-learn>=0.19,<0.20", "lockfile", From 2c079703ebefd05439229cddffbc969280b15a82 Mon Sep 17 00:00:00 2001 From: Jinu Date: Fri, 26 Oct 2018 01:55:15 +0200 Subject: [PATCH 40/45] Fix requirement inequality mistake --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4ba8cfc10e..835fe5d685 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ nose six Cython -numpy>=1.9.0<=1.14.5 +numpy<=1.9.0>=1.14.5 scipy>=0.14.1 scikit-learn>=0.19,<0.20 diff --git a/setup.py b/setup.py index 68491b6a4f..6ef16993a5 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ "six", "Cython", # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error. - "numpy>=1.9.0<=1.14.5", + "numpy<=1.9.0>=1.14.5", "scipy>=0.14.1", "scikit-learn>=0.19,<0.20", "lockfile", From de3192f67442704ac727be4f40bdf226c9982a32 Mon Sep 17 00:00:00 2001 From: Jinu Date: Fri, 26 Oct 2018 02:13:36 +0200 Subject: [PATCH 41/45] change initial numpy version to 1.14.5. --- .travis.yml | 2 +- requirements.txt | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6ec5fa1781..ecf00953d7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -64,7 +64,7 @@ install: # Install general requirements the way setup.py suggests - pip install pep8 codecov # Temporarily pin the numpy version for travis-ci - - pip install "numpy<1.15" + - pip install "numpy<=1.14.5" - cat requirements.txt | xargs -n 1 -L 1 pip install # Install openml dependency for metadata generation unittest - pip install xmltodict requests liac-arff diff --git a/requirements.txt b/requirements.txt index 835fe5d685..4ba8cfc10e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ nose six Cython -numpy<=1.9.0>=1.14.5 +numpy>=1.9.0<=1.14.5 scipy>=0.14.1 scikit-learn>=0.19,<0.20 diff --git a/setup.py b/setup.py index 6ef16993a5..68491b6a4f 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ "six", "Cython", # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error. - "numpy<=1.9.0>=1.14.5", + "numpy>=1.9.0<=1.14.5", "scipy>=0.14.1", "scikit-learn>=0.19,<0.20", "lockfile", From 56af60d00a2838bf4cd6e7b32212abe632ad14c5 Mon Sep 17 00:00:00 2001 From: JinWoo <31531627+ahn1340@users.noreply.github.com> Date: Fri, 9 Nov 2018 15:06:02 +0100 Subject: [PATCH 42/45] Circle Drop (#575) * Extending Autosklearn. First commit. * Add regression example * CI: upper bound numpy version due to travis failures * CI: upper bound numpy version due to travis failures * use tempfile.gettempdir() (#521) * use tempfile.gettempdir() * follow quality review coding standards * Remove a colon from README.md (#527) * fixing warnings on non-tuple sequence for indexing (#526) * fix string formatting (#540) * FIX removing models wrt wrong metric in ensemble (#522) * Add examples for extending auto-sklearn. * . * Change datasets used in examples from digits to breast_cancer. * First commit * Fixing codacy errors * Fixing bug * [Debug] try different numpy version * [Debug] Try with latest numpy version * Set numpy version to 1.14.5 * First commit * Fixing bug * Modify flake8_diff.sh * Extending Autosklearn. First commit. * Add regression example * Add examples for extending auto-sklearn. * . * Fixing codacy errors * Change example (#553) * Change datasets used in examples from digits to breast_cancer. * [Debug] try different numpy version * [Debug] Try with latest numpy version * Set numpy version to 1.14.5 * Fix line length in exanple_parallel.py * [WIP]Add argument for custom logger configuration. (#505) * Add argument for custom logger configuration. First commit, work in progress. * Minor changes. * Modify suggested changes [WIP] * . * . * . * . * Fix minor details * Fix travis not recognizing example_config.yaml * . * . * . * . * . * Change datasets used in examples from digits to breast_cancer. * Fix codacy error * Revert codacy error fixing * [Debug] check if numpy causes error * [Debug] experimenting with numpy * [Debug] try to manually install libgcc * [Debug] libgcc * [Debug] libgcc * [Debug] libgcc * [Debug] libgcc * [Debug]. * [Debug]. * [Debug] . * [Debug]. * [Debug]. * [Debug] used older numpy version * [Debug] numpy * [Debug] try numpy version 1.14.6 * [Debug] try different numpy version * [Debug] Try with latest numpy version * Set numpy version to 1.14.5 * Add argument for custom logger configuration. First commit, work in progress. * Minor changes. * Modify suggested changes [WIP] * . * . * . * . * Fix minor details * Fix travis not recognizing example_config.yaml * . * . * . * . * . * Fix codacy error * Revert codacy error fixing * [Debug] experimenting with numpy * [Debug] try to manually install libgcc * [Debug] libgcc * [Debug] libgcc * [Debug] libgcc * [Debug] libgcc * [Debug]. * [Debug]. * [Debug] . * [Debug]. * [Debug]. * [Debug] used older numpy version * [Debug] try numpy version 1.14.6 * Delete libgcc_check.sh used for debugging. * Fix numpy version and remove blank lines * Fix line length in example_parallel.py * Fix minor error * FIX #566: sort ensemble correctly (#567) * Fix Line length in example_parallel.py * Fix line length in example_parallel.py * Fix minor error * Fix codacy error "parameters differ from overriden 'fit' method" * Check target type at the beginning of the fitting process. (#506) * Check target type at the beginning of the fitting process. * . * Fixed minor error in uniitest * . * Add unittest for target type checking. * . * . * Change datasets used in examples from digits to breast_cancer. * [Debug] try with numpy version 1.14.5 * [Debug] Check if numpy version 1.14.6 raises error. * [Debug] try different numpy version * [Debug] Try with latest numpy version * Set numpy version to 1.14.5 * Check target type at the beginning of the fitting process. * . * Fixed minor error in uniitest * . * Add unittest for target type checking. * . * . * [Debug] Check if numpy version 1.14.6 raises error. * Fix numpy version to 1.14.5 * Add comment to Mock in test_type_of_target * Fix line length in example_parallel.py * Fix minor error * Update test_automl.py * Add python 3.7to Travis, change python_requirement in setup.py. * Add solver hyperparameter in MLP classifier example, increase runtime of classifier in example_extend_preprocessing.py * Change all occurences of master to development in flake8_diff.sh * numpy requirement is now >=1.9.0<=1.14.5 * Fix requirement inequality mistake * change initial numpy version to 1.14.5. * Deploy using travis instead of circle * FIX error in travis.yml caused by stashing * Test that deploy works. * Debugging. Set local_dir to doc/development. * Done Testing. Finalize the PR. * Delete circle_install.sh --- .travis.yml | 14 ++++++-- ci_scripts/circle_install.sh | 20 ------------ ci_scripts/create_doc.sh | 61 +++++++++++++++++++++++++++++++++++ ci_scripts/push_doc.sh | 42 ------------------------ circle.yml | 62 ------------------------------------ 5 files changed, 73 insertions(+), 126 deletions(-) delete mode 100644 ci_scripts/circle_install.sh create mode 100644 ci_scripts/create_doc.sh delete mode 100644 ci_scripts/push_doc.sh delete mode 100644 circle.yml diff --git a/.travis.yml b/.travis.yml index 748303ec3f..9faa0ae8f8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,7 @@ matrix: - os: linux env: DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - os: linux - env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" + env: DISTRIB="conda" PYTHON_VERSION="3.6" DOCPUSH="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - os: linux env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - os: linux @@ -77,5 +77,15 @@ install: - python setup.py install script: bash ci_scripts/test.sh -after_success: source ci_scripts/success.sh +after_success: source ci_scripts/success.sh && source ci_scripts/create_doc.sh $TRAVIS_BRANCH "doc_result" +deploy: + provider: pages + skip-cleanup: true + github-token: $GITHUB_TOKEN # set in the settings page of my repository + keep-hisotry: true + commiter-from-gh: true + on: + all_branches: true + condition: $doc_result = "success" + local_dir: doc/$TRAVIS_BRANCH diff --git a/ci_scripts/circle_install.sh b/ci_scripts/circle_install.sh deleted file mode 100644 index 195ad87d54..0000000000 --- a/ci_scripts/circle_install.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!bin/bash - -# on circle ci, each command run with it's own execution context so we have to -# activate the conda testenv on a per command basis. That's why we put calls to -# python (conda) in a dedicated bash script and we activate the conda testenv -# here. -source activate testenv - -export CC=`which gcc` -# install documentation building dependencies -pip install --upgrade numpy -pip install --upgrade matplotlib setuptools nose coverage sphinx==1.5.5 sphinx_bootstrap_theme numpydoc sphinx_gallery pillow -# And finally, all other dependencies -cat requirements.txt | xargs -n 1 -L 1 pip install - -python setup.py clean -python setup.py develop - -# pipefail is necessary to propagate exit codes -set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt diff --git a/ci_scripts/create_doc.sh b/ci_scripts/create_doc.sh new file mode 100644 index 0000000000..0a794627d8 --- /dev/null +++ b/ci_scripts/create_doc.sh @@ -0,0 +1,61 @@ +# This script is mostly adopted from https://github.com/openml/openml-python/blob/develop/ci_scripts/create_doc.sh + +set -euo pipefail + +# Check if DOCPUSH is set +if ! [[ -z ${DOCPUSH+x} ]]; then + + if [[ "$DOCPUSH" == "true" ]]; then + + # install documentation building dependencies + pip install --upgrade matplotlib seaborn setuptools nose coverage sphinx pillow sphinx-gallery sphinx_bootstrap_theme cython numpydoc nbformat nbconvert mock + + # $1 is the branch name + # $2 is the global variable where we set the script status + + if ! { [ $1 = "master" ] || [ $1 = "development" ]; }; then + { echo "Not one of the allowed branches"; exit 0; } + fi + + # delete any previous documentation folder + if [ -d doc/$1 ]; then + rm -rf doc/$1 + fi + + # create the documentation + cd doc && make html 2>&1 + + # create directory with branch name + # the documentation for dev/stable from git will be stored here + mkdir $1 + + # get previous documentation from github + git clone https://github.com/automl/auto-sklearn.git --branch gh-pages --single-branch + + # copy previous documentation + cp -r auto-sklearn/. $1 + rm -rf auto-sklearn + + # if the documentation for the branch exists, remove it + if [ -d $1/$1 ]; then + rm -rf $1/$1 + fi + + # copy the updated documentation for this branch + mkdir $1/$1 + cp -r build/html/. $1/$1 + + # takes a variable name as an argument and assigns the script outcome to a + # variable with the given name. If it got this far, the script was successful + function set_return() { + # $1 is the variable where we save the script outcome + local __result=$1 + local status='success' + eval $__result="'$status'" + } + + set_return "$2" + fi +fi +# Workaround for travis failure +set +u diff --git a/ci_scripts/push_doc.sh b/ci_scripts/push_doc.sh deleted file mode 100644 index 3fa944b64a..0000000000 --- a/ci_scripts/push_doc.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -# This script is meant to be called in the "deploy" step defined in -# circle.yml. See https://circleci.com/docs/ for more details. -# The behavior of the script is controlled by environment variable defined -# in the circle.yml in the top level folder of the project. - -if [ ! -z "$1" ] - then DOC_FOLDER=$1 -fi - -MSG="Pushing the docs for revision for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1, folder: $DOC_FOLDER" - -cd $HOME - -# Clone the docs repo if it isnt already there -if [ ! -d $DOC_REPO ]; - then git clone "git@github.com:$USERNAME/"$DOC_REPO".git"; -fi - -# Copy the build docs to a temporary folder -rm -rf tmp -mkdir tmp -cp -R $HOME/$DOC_REPO/doc/build/html/* ./tmp/ - -cd $DOC_REPO -git branch gh-pages -git checkout -f gh-pages -git reset --hard origin/gh-pages -git clean -dfx -git rm -rf $HOME/$DOC_REPO/$DOC_FOLDER && rm -rf $HOME/$DOC_REPO/$DOC_FOLDER - -# Copy the new build docs -mkdir $DOC_FOLDER -cp -R $HOME/tmp/* ./$DOC_FOLDER/ - -git config --global user.email $EMAIL -git config --global user.name $USERNAME -git add -f ./$DOC_FOLDER/ -git commit -m "$MSG" -git push -f origin gh-pages - -echo $MSG \ No newline at end of file diff --git a/circle.yml b/circle.yml deleted file mode 100644 index 8ff09eb573..0000000000 --- a/circle.yml +++ /dev/null @@ -1,62 +0,0 @@ -machine: - environment: - PATH: /home/ubuntu/miniconda/bin:$PATH - - # The github organization or username of the repository which hosts the - # project and documentation. - USERNAME: "automl" - - # The repository where the documentation will be hosted - DOC_REPO: "auto-sklearn" - - # The base URL for the Github page where the documentation will be hosted - DOC_URL: "" - - # The email is to be used for commits in the Github Page - EMAIL: "feurerm@informatik.uni-freiburg.de" - -dependencies: - - # Various dependencies - pre: - # Get rid of existing virtualenvs on circle ci as they conflict with conda. - # From nilearn: https://github.com/nilearn/nilearn/blob/master/circle.yml - - cd && rm -rf ~/.pyenv && rm -rf ~/virtualenvs - # from scikit-learn contrib - - sudo -E apt-get -yq remove texlive-binaries --purge - - sudo -E apt-get -yq update - - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra - # Other stuff... - - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install build-essential - # Conda installation - - wget https://repo.continuum.io/miniconda/Miniconda3-4.3.21-Linux-x86_64.sh -O ~/miniconda.sh - - bash ~/miniconda.sh -b -p $HOME/miniconda - - conda create -n testenv --yes python=3.6 pip wheel nose gcc swig - - # The --user is needed to let sphinx see the source and the binaries - # The pipefail is requested to propagate exit code - override: - - source ci_scripts/circle_install.sh -test: - # Grep error on the documentation - override: - - cat ~/log.txt && if grep -q "Traceback (most recent call last):" ~/log.txt; then false; else true; fi -deployment: - master: - branch: master - commands: - - bash ci_scripts/push_doc.sh 'stable' - development: - branch: development - commands: - - bash ci_scripts/push_doc.sh 'dev' -general: - # Open the doc to the API - artifacts: - - "doc/_build/html" - - "~/log.txt" - # Restric the build to the branch master only - #branches: - # only: - # - development - # - master From 1b7a172929fab8fdda5c8c6bb0fc1cead24538ba Mon Sep 17 00:00:00 2001 From: theFool Date: Fri, 9 Nov 2018 22:23:07 +0800 Subject: [PATCH 43/45] Update gmeans.py (#572) fix the bug of n_clusters not equals to len(cluster_centers) --- autosklearn/metalearning/metalearning/clustering/gmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/metalearning/metalearning/clustering/gmeans.py b/autosklearn/metalearning/metalearning/clustering/gmeans.py index 23363c6248..704ecc05a7 100644 --- a/autosklearn/metalearning/metalearning/clustering/gmeans.py +++ b/autosklearn/metalearning/metalearning/clustering/gmeans.py @@ -69,7 +69,7 @@ def fit(self, X): break # Refinement - KMeans = sklearn.cluster.KMeans(n_clusters=1, n_init=1, + KMeans = sklearn.cluster.KMeans(n_clusters=len(cluster_centers), n_init=1, init=np.array(cluster_centers), random_state=self.random_state) KMeans.fit(X) From 6d53d1f8b1d1d035eb2d464af17ffba6bac14f87 Mon Sep 17 00:00:00 2001 From: JinWoo <31531627+ahn1340@users.noreply.github.com> Date: Fri, 9 Nov 2018 15:27:18 +0100 Subject: [PATCH 44/45] Release 0.4.1 (#576) * . * . * AutoSklearnClassifier/Regressor's fit, refit, fit_ensemble now return self. * Initial commit. Work in Progress. * Fix minor printing error in sprint_statistics. * Revert "Fix#460" * Raise error if ensemble is not built (#480) * . * . * AutoSklearnClassifier/Regressor's fit, refit, fit_ensemble now return self. * Initial commit. Work in Progress. * Fix minor printing error in sprint_statistics. * Revert "Fix#460" * Resolve rebase conflict * combined unittests to reduce travis runtime * . * . * . * . * . * ADD Auto-sklearn 0.4.1 release note to releases.rst --- doc/releases.rst | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/doc/releases.rst b/doc/releases.rst index ab35f83634..d4d5beea23 100644 --- a/doc/releases.rst +++ b/doc/releases.rst @@ -11,6 +11,38 @@ Releases ======== +Version 0.4.1 +============= + +* Added documentation on `how to extend Auto-sklearn `_ + with custom classifier, regressor, and preprocessor. +* Auto-sklearn now requires numpy version between 1.9.0 and 1.14.5, due to higher versions + causing travis failure. +* Examples now use ``sklearn.datasets.load_breast_cancer()`` instead of ``sklearn.datasets.load_digits()`` + to reduce memory usage for travis build. +* Fixes future warnings on non-tuple sequence for indexing. +* Fixes `#500 `_: fixes + ensemble builder to correctly evaluate model score with any metrics. + See this `PR `_. +* Fixes `#482 `_ and + `#491 `_: Users can now set up + custom logger configuration by passing a dictionary created by a yaml file to + ``logging_config``. +* Fixes `#566 `_: ensembles are now sorted correctly. +* Fixes `#293 `_: Auto-sklearn checks if appropriate + target type was given for classification and regression before call to ``fit()``. +* Travis-ci now runs flake8 to enforce pep8 style guide, and uses travis-ci instead of circle-ci + for deployment. + +Contributors +************ + +* Matthias Feurer +* Manuel Streuhofer +* Taneli Mielikäinen +* Katharina Eggensperger +* Jin Woo Ahn + Version 0.4.0 ============= From 8aae9d62d97b0c91aed7c0c2410f552e51120d01 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Fri, 9 Nov 2018 17:07:18 +0100 Subject: [PATCH 45/45] Update version information for 0.4.1 --- autosklearn/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autosklearn/__version__.py b/autosklearn/__version__.py index 88f7ebca07..4bfd2e72b7 100644 --- a/autosklearn/__version__.py +++ b/autosklearn/__version__.py @@ -1,4 +1,4 @@ """Version information.""" # The following line *must* be the last in the module, exactly as formatted: -__version__ = "0.4.0" +__version__ = "0.4.1"