diff --git a/.travis.yml b/.travis.yml index f299f314f9..9faa0ae8f8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,9 +16,14 @@ matrix: - os: linux env: DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - os: linux - env: DISTRIB="conda" PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" + env: DISTRIB="conda" PYTHON_VERSION="3.6" DOCPUSH="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - os: linux env: DISTRIB="conda" PYTHON_VERSION="3.6" EXAMPLES="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" + - os: linux + env: DISTRIB="conda" PYTHON_VERSION="3.7" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" + - os: linux + env: DISTRIB="conda" PYTHON_VERSION="3.6" RUN_FLAKE8="true" SKIP_TESTS="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" + # Temporarily disabling OSX builds because thy take too long # Set language to generic to not break travis-ci @@ -58,17 +63,29 @@ before_install: install: # Install general requirements the way setup.py suggests - pip install pep8 codecov + # Temporarily pin the numpy version for travis-ci + - pip install "numpy<=1.14.5" - cat requirements.txt | xargs -n 1 -L 1 pip install # Install openml dependency for metadata generation unittest - - pip install xmltodict requests - - pip install git+https://github.com/renatopp/liac-arff + - pip install xmltodict requests liac-arff - pip install git+https://github.com/openml/openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1 --no-deps - mkdir ~/.openml - echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config + - pip install flake8 # Debug output to know all exact package versions! - pip freeze - python setup.py install - + script: bash ci_scripts/test.sh -after_success: source ci_scripts/success.sh +after_success: source ci_scripts/success.sh && source ci_scripts/create_doc.sh $TRAVIS_BRANCH "doc_result" +deploy: + provider: pages + skip-cleanup: true + github-token: $GITHUB_TOKEN # set in the settings page of my repository + keep-hisotry: true + commiter-from-gh: true + on: + all_branches: true + condition: $doc_result = "success" + local_dir: doc/$TRAVIS_BRANCH diff --git a/README.md b/README.md index d0de400572..dd53e9f140 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ auto-sklearn is an automated machine learning toolkit and a drop-in replacement Find the documentation [here](http://automl.github.io/auto-sklearn/) -Status for master branch: +Status for master branch [![Build Status](https://travis-ci.org/automl/auto-sklearn.svg?branch=master)](https://travis-ci.org/automl/auto-sklearn) [![Code Health](https://landscape.io/github/automl/auto-sklearn/master/landscape.png)](https://landscape.io/github/automl/auto-sklearn/master) diff --git a/autosklearn/__version__.py b/autosklearn/__version__.py index 88f7ebca07..4bfd2e72b7 100644 --- a/autosklearn/__version__.py +++ b/autosklearn/__version__.py @@ -1,4 +1,4 @@ """Version information.""" # The following line *must* be the last in the module, exactly as formatted: -__version__ = "0.4.0" +__version__ = "0.4.1" diff --git a/autosklearn/automl.py b/autosklearn/automl.py index d72cbc8920..91d66d4bd2 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -82,6 +82,7 @@ def __init__(self, disable_evaluator_output=False, get_smac_object_callback=None, smac_scenario_args=None, + logging_config=None, ): super(AutoML, self).__init__() self._backend = backend @@ -110,6 +111,7 @@ def __init__(self, self._disable_evaluator_output = disable_evaluator_output self._get_smac_object_callback = get_smac_object_callback self._smac_scenario_args = smac_scenario_args + self.logging_config = logging_config self._datamanager = None self._dataset_name = None @@ -235,7 +237,10 @@ def fit_on_datamanager(self, datamanager, metric): def _get_logger(self, name): logger_name = 'AutoML(%d):%s' % (self._seed, name) - setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name))) + setup_logger(os.path.join(self._backend.temporary_directory, + '%s.log' % str(logger_name)), + self.logging_config, + ) return get_logger(logger_name) @staticmethod diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py index c3851cab13..6d96e30562 100644 --- a/autosklearn/ensemble_builder.py +++ b/autosklearn/ensemble_builder.py @@ -171,11 +171,8 @@ def main(self): while True: #maximal number of iterations - if ( - self.max_iterations is not None - and self.max_iterations > 0 - and iteration >= self.max_iterations - ): + if (self.max_iterations is not None + and 0 < self.max_iterations <= iteration): self.logger.info("Terminate ensemble building because of max iterations: %d of %d", self.max_iterations, iteration) @@ -300,7 +297,7 @@ def read_ensemble_preds(self): Y_TEST: None, # Lazy keys so far: # 0 - not loaded - # 1 - loaded and ind memory + # 1 - loaded and in memory # 2 - loaded but dropped again "loaded": 0 } @@ -372,14 +369,18 @@ def get_n_best_preds(self): ], key=lambda x: x[1], ))) - # remove all that are at most as good as random, cannot assume a - # minimum number here because all kinds of metric can be used - sorted_keys = filter(lambda x: x[1] > 0.001, sorted_keys) + # remove all that are at most as good as random + # note: dummy model must have run_id=1 (there is not run_id=0) + dummy_score = list(filter(lambda x: x[2] == 1, sorted_keys))[0] + self.logger.debug("Use %f as dummy score" % + dummy_score[1]) + sorted_keys = filter(lambda x: x[1] > dummy_score[1], sorted_keys) # remove Dummy Classifier sorted_keys = list(filter(lambda x: x[2] > 1, sorted_keys)) if not sorted_keys: - # no model left; try to use dummy classifier (num_run==0) - self.logger.warning("No models better than random - using Dummy Classifier!") + # no model left; try to use dummy score (num_run==0) + self.logger.warning("No models better than random - " + "using Dummy Score!") sorted_keys = [ (k, v["ens_score"], v["num_run"]) for k, v in self.read_preds.items() if v["seed"] == self.seed and v["num_run"] == 1 diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index 910c68f8e8..6adedd0f56 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -3,6 +3,7 @@ from autosklearn.automl import AutoMLClassifier, AutoMLRegressor from autosklearn.util.backend import create +from sklearn.utils.multiclass import type_of_target class AutoSklearnEstimator(BaseEstimator): @@ -28,7 +29,9 @@ def __init__(self, shared_mode=False, disable_evaluator_output=False, get_smac_object_callback=None, - smac_scenario_args=None): + smac_scenario_args=None, + logging_config=None, + ): """ Parameters ---------- @@ -168,6 +171,11 @@ def __init__(self, This is an advanced feature. Use only if you are familiar with `SMAC `_. + logging_config : dict, optional (None) + dictionary object specifying the logger configuration. If None, + the default logging.yaml file is used, which can be found in + the directory ``util/logging.yaml`` relative to the installation. + Attributes ---------- @@ -199,6 +207,7 @@ def __init__(self, self.disable_evaluator_output = disable_evaluator_output self.get_smac_object_callback = get_smac_object_callback self.smac_scenario_args = smac_scenario_args + self.logging_config = logging_config self._automl = None super().__init__() @@ -238,7 +247,8 @@ def build_automl(self): shared_mode=self.shared_mode, get_smac_object_callback=self.get_smac_object_callback, disable_evaluator_output=self.disable_evaluator_output, - smac_scenario_args=self.smac_scenario_args + smac_scenario_args=self.smac_scenario_args, + logging_config=self.logging_config, ) return automl @@ -456,6 +466,18 @@ def fit(self, X, y, self """ + # Before running anything else, first check that the + # type of data is compatible with auto-sklearn. Legal target + # types are: binary, multiclass, multilabel-indicator. + target_type = type_of_target(y) + if target_type in ['multiclass-multioutput', + 'continuous', + 'continuous-multioutput', + 'unknown', + ]: + raise ValueError("classification with data of type %s is" + " not supported" % target_type) + super().fit( X=X, y=y, @@ -559,6 +581,18 @@ def fit(self, X, y, self """ + # Before running anything else, first check that the + # type of data is compatible with auto-sklearn. Legal target + # types are: continuous, binary, multiclass. + target_type = type_of_target(y) + if target_type in ['multiclass-multioutput', + 'multilabel-indicator', + 'continuous-multioutput', + 'unknown', + ]: + raise ValueError("regression with data of type %s is not" + " supported" % target_type) + # Fit is supposed to be idempotent! # But not if we use share_mode. super().fit( diff --git a/autosklearn/metalearning/metalearning/clustering/gmeans.py b/autosklearn/metalearning/metalearning/clustering/gmeans.py index 23363c6248..704ecc05a7 100644 --- a/autosklearn/metalearning/metalearning/clustering/gmeans.py +++ b/autosklearn/metalearning/metalearning/clustering/gmeans.py @@ -69,7 +69,7 @@ def fit(self, X): break # Refinement - KMeans = sklearn.cluster.KMeans(n_clusters=1, n_init=1, + KMeans = sklearn.cluster.KMeans(n_clusters=len(cluster_centers), n_init=1, init=np.array(cluster_centers), random_state=self.random_state) KMeans.fit(X) diff --git a/autosklearn/pipeline/create_searchspace_util.py b/autosklearn/pipeline/create_searchspace_util.py index 5ebb7d1246..2abfa5c172 100644 --- a/autosklearn/pipeline/create_searchspace_util.py +++ b/autosklearn/pipeline/create_searchspace_util.py @@ -117,8 +117,8 @@ def find_active_choices(matches, node, node_idx, dataset_properties, \ choices = [] for c_idx, component in enumerate(available_components): - slices = [slice(None) if idx != node_idx else slice(c_idx, c_idx+1) - for idx in range(len(matches.shape))] + slices = tuple(slice(None) if idx != node_idx else slice(c_idx, c_idx+1) + for idx in range(len(matches.shape))) if np.sum(matches[slices]) > 0: choices.append(component) @@ -200,10 +200,10 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties, for product in itertools.product(*num_node_choices): for node_idx, choice_idx in enumerate(product): node_idx += start_idx - slices_ = [ + slices_ = tuple( slice(None) if idx != node_idx else slice(choice_idx, choice_idx + 1) for idx in - range(len(matches.shape))] + range(len(matches.shape))) if np.sum(matches[slices_]) == 0: skip_array[product] = 1 @@ -212,13 +212,11 @@ def add_forbidden(conf_space, pipeline, matches, dataset_properties, if skip_array[product]: continue - slices = [] - for idx in range(len(matches.shape)): - if idx not in indices: - slices.append(slice(None)) - else: - slices.append(slice(product[idx - start_idx], - product[idx - start_idx] + 1)) + slices = tuple( + slice(None) if idx not in indices else + slice(product[idx - start_idx], + product[idx - start_idx] + 1) for idx in + range(len(matches.shape))) # This prints the affected nodes # print [node_choice_names[i][product[i]] diff --git a/autosklearn/util/backend.py b/autosklearn/util/backend.py index 1f7ea70f7f..c59adbde5c 100644 --- a/autosklearn/util/backend.py +++ b/autosklearn/util/backend.py @@ -71,11 +71,17 @@ def _prepare_directories(self, temporary_directory, output_directory): self.__temporary_directory = temporary_directory \ if temporary_directory \ - else '/tmp/autosklearn_tmp_%d_%d' % (pid, random_number) + else os.path.join( + tempfile.gettempdir(), + 'autosklearn_tmp_%d_%d' % (pid, random_number) + ) self.__output_directory = output_directory \ if output_directory \ - else '/tmp/autosklearn_output_%d_%d' % (pid, random_number) + else os.path.join( + tempfile.gettempdir(), + 'autosklearn_output_%d_%d' % (pid, random_number) + ) def create_directories(self): if self.shared_mode: @@ -401,9 +407,10 @@ def save_ensemble(self, ensemble, idx, seed): except Exception: pass - filepath = os.path.join(self.get_ensemble_dir(), - '%s.%s.ensemble' % (str(seed), - str(idx))) + filepath = os.path.join( + self.get_ensemble_dir(), + '%s.%s.ensemble' % (str(seed), str(idx).zfill(10)) + ) with tempfile.NamedTemporaryFile('wb', dir=os.path.dirname( filepath), delete=False) as fh: pickle.dump(ensemble, fh) @@ -460,4 +467,4 @@ def write_txt_file(self, filepath, data, name): self.logger.debug('Created %s file %s' % (name, filepath)) else: self.logger.debug('%s file already present %s' % - (name, filepath)) \ No newline at end of file + (name, filepath)) diff --git a/autosklearn/util/logging_.py b/autosklearn/util/logging_.py index cf3f40586d..ea074a1f3f 100644 --- a/autosklearn/util/logging_.py +++ b/autosklearn/util/logging_.py @@ -7,18 +7,23 @@ import yaml -def setup_logger(output_file=None): - with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'), - 'r') as fh: - config = yaml.load(fh) - if output_file is not None: - config['handlers']['file_handler']['filename'] = output_file - logging.config.dictConfig(config) +def setup_logger(output_file=None, logging_config=None): + # logging_config must be a dictionary object specifying the configuration + # for the loggers to be used in auto-sklearn. + if logging_config is not None: + if output_file is not None: + logging_config['handlers']['file_handler']['filename'] = output_file + logging.config.dictConfig(logging_config) + else: + with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'), + 'r') as fh: + logging_config = yaml.safe_load(fh) + if output_file is not None: + logging_config['handlers']['file_handler']['filename'] = output_file + logging.config.dictConfig(logging_config) def _create_logger(name): - logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %(' - 'message)s', datefmt='%H:%M:%S') return logging.getLogger(name) diff --git a/ci_scripts/circle_install.sh b/ci_scripts/circle_install.sh deleted file mode 100644 index 195ad87d54..0000000000 --- a/ci_scripts/circle_install.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!bin/bash - -# on circle ci, each command run with it's own execution context so we have to -# activate the conda testenv on a per command basis. That's why we put calls to -# python (conda) in a dedicated bash script and we activate the conda testenv -# here. -source activate testenv - -export CC=`which gcc` -# install documentation building dependencies -pip install --upgrade numpy -pip install --upgrade matplotlib setuptools nose coverage sphinx==1.5.5 sphinx_bootstrap_theme numpydoc sphinx_gallery pillow -# And finally, all other dependencies -cat requirements.txt | xargs -n 1 -L 1 pip install - -python setup.py clean -python setup.py develop - -# pipefail is necessary to propagate exit codes -set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt diff --git a/ci_scripts/create_doc.sh b/ci_scripts/create_doc.sh new file mode 100644 index 0000000000..0a794627d8 --- /dev/null +++ b/ci_scripts/create_doc.sh @@ -0,0 +1,61 @@ +# This script is mostly adopted from https://github.com/openml/openml-python/blob/develop/ci_scripts/create_doc.sh + +set -euo pipefail + +# Check if DOCPUSH is set +if ! [[ -z ${DOCPUSH+x} ]]; then + + if [[ "$DOCPUSH" == "true" ]]; then + + # install documentation building dependencies + pip install --upgrade matplotlib seaborn setuptools nose coverage sphinx pillow sphinx-gallery sphinx_bootstrap_theme cython numpydoc nbformat nbconvert mock + + # $1 is the branch name + # $2 is the global variable where we set the script status + + if ! { [ $1 = "master" ] || [ $1 = "development" ]; }; then + { echo "Not one of the allowed branches"; exit 0; } + fi + + # delete any previous documentation folder + if [ -d doc/$1 ]; then + rm -rf doc/$1 + fi + + # create the documentation + cd doc && make html 2>&1 + + # create directory with branch name + # the documentation for dev/stable from git will be stored here + mkdir $1 + + # get previous documentation from github + git clone https://github.com/automl/auto-sklearn.git --branch gh-pages --single-branch + + # copy previous documentation + cp -r auto-sklearn/. $1 + rm -rf auto-sklearn + + # if the documentation for the branch exists, remove it + if [ -d $1/$1 ]; then + rm -rf $1/$1 + fi + + # copy the updated documentation for this branch + mkdir $1/$1 + cp -r build/html/. $1/$1 + + # takes a variable name as an argument and assigns the script outcome to a + # variable with the given name. If it got this far, the script was successful + function set_return() { + # $1 is the variable where we save the script outcome + local __result=$1 + local status='success' + eval $__result="'$status'" + } + + set_return "$2" + fi +fi +# Workaround for travis failure +set +u diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh new file mode 100644 index 0000000000..39701d5248 --- /dev/null +++ b/ci_scripts/flake8_diff.sh @@ -0,0 +1,141 @@ +#!/bin/bash + +# This script is mostly taken from https://github.com/scikit-learn/scikit-learn/blob/master/build_tools/travis/flake8_diff.sh + +# This script is used in Travis to check that PRs do not add obvious +# flake8 violations. It relies on two things: +# - find common ancestor between branch and +# automl/auto-sklearn remote +# - run flake8 --diff on the diff between the branch and the common +# ancestor +# +# Additional features: +# - the line numbers in Travis match the local branch on the PR +# author machine. +# - ./build_tools/travis/flake8_diff.sh can be run locally for quick +# turn-around + +set -e +# pipefail is necessary to propagate exit codes +set -o pipefail + +PROJECT=automl/auto-sklearn +PROJECT_URL=https://github.com/$PROJECT.git + +# Find the remote with the project name (upstream in most cases) +REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '') + +# Add a temporary remote if needed. For example this is necessary when +# Travis is configured to run in a fork. In this case 'origin' is the +# fork and not the reference repo we want to diff against. +if [[ -z "$REMOTE" ]]; then + TMP_REMOTE=tmp_reference_upstream + REMOTE=$TMP_REMOTE + git remote add $REMOTE $PROJECT_URL +fi + +echo "Remotes:" +echo '--------------------------------------------------------------------------------' +git remote --verbose + +# Travis does the git clone with a limited depth. +# This may not be enough to find the common ancestor with +# $REMOTE/development so we unshallow the git checkout +if [[ -a .git/shallow ]]; then + echo -e '\nTrying to unshallow the repo:' + echo '--------------------------------------------------------------------------------' + git fetch --unshallow +fi + +if [[ "$TRAVIS" == "true" ]]; then + if [[ "$TRAVIS_PULL_REQUEST" == "false" ]] + then + # In main repo, using TRAVIS_COMMIT_RANGE to test the commits + # that were pushed into a branch + if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then + if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then + echo "New branch, no commit range from Travis so passing this test by convention" + exit 0 + fi + COMMIT_RANGE=$TRAVIS_COMMIT_RANGE + fi + else + # We want to fetch the code as it is in the PR branch and not + # the result of the merge into development. This way line numbers + # reported by Travis will match with the local code. + LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST + # In Travis the PR target is always origin + git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF + fi +fi + +# If not using the commit range from Travis we need to find the common +# ancestor between $LOCAL_BRANCH_REF and $REMOTE/development +if [[ -z "$COMMIT_RANGE" ]]; then + if [[ -z "$LOCAL_BRANCH_REF" ]]; then + LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD) + fi + echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:" + echo '--------------------------------------------------------------------------------' + git --no-pager log -2 $LOCAL_BRANCH_REF + + REMOTE_DEVELOPMENT_REF="$REMOTE/development" + # Make sure that $REMOTE_DEVELOPMENT_REF is a valid reference + echo -e "\nFetching $REMOTE_DEVELOPMENT_REF" + echo '--------------------------------------------------------------------------------' + git fetch $REMOTE development:refs/remotes/$REMOTE_DEVELOPMENT_REF + LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF) + REMOTE_DEVELOPMENT_SHORT_HASH=$(git rev-parse --short $REMOTE_DEVELOPMENT_REF) + + COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_DEVELOPMENT_REF) || \ + echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_DEVELOPMENT_REF -q)" + + if [ -z "$COMMIT" ]; then + exit 1 + fi + + COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT) + + echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\ + "and $REMOTE_DEVELOPMENT_REF ($REMOTE_DEVELOPMENT_SHORT_HASH) is $COMMIT_SHORT_HASH:" + echo '--------------------------------------------------------------------------------' + git --no-pager show --no-patch $COMMIT_SHORT_HASH + + COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH" + + if [[ -n "$TMP_REMOTE" ]]; then + git remote remove $TMP_REMOTE + fi + +else + echo "Got the commit range from Travis: $COMMIT_RANGE" +fi + +echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \ + "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):" +echo '--------------------------------------------------------------------------------' + +# We need the following command to exit with 0 hence the echo in case +# there is no match +MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE || echo "no_match")" + +check_files() { + files="$1" + shift + options="$*" + if [ -n "$files" ]; then + # Conservative approach: diff without context (--unified=0) so that code + # that was not changed does not create failures + git diff --unified=0 $COMMIT_RANGE -- $files | flake8 --diff --show-source $options + fi +} + +if [[ "$MODIFIED_FILES" == "no_match" ]]; then + echo "No file has been modified" +else + + check_files "$(echo "$MODIFIED_FILES" | grep -v ^examples)" + check_files "$(echo "$MODIFIED_FILES" | grep ^examples)" \ + --config ./examples/.flake8 +fi +echo -e "No problem detected by flake8\n" diff --git a/ci_scripts/push_doc.sh b/ci_scripts/push_doc.sh deleted file mode 100644 index 3fa944b64a..0000000000 --- a/ci_scripts/push_doc.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -# This script is meant to be called in the "deploy" step defined in -# circle.yml. See https://circleci.com/docs/ for more details. -# The behavior of the script is controlled by environment variable defined -# in the circle.yml in the top level folder of the project. - -if [ ! -z "$1" ] - then DOC_FOLDER=$1 -fi - -MSG="Pushing the docs for revision for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1, folder: $DOC_FOLDER" - -cd $HOME - -# Clone the docs repo if it isnt already there -if [ ! -d $DOC_REPO ]; - then git clone "git@github.com:$USERNAME/"$DOC_REPO".git"; -fi - -# Copy the build docs to a temporary folder -rm -rf tmp -mkdir tmp -cp -R $HOME/$DOC_REPO/doc/build/html/* ./tmp/ - -cd $DOC_REPO -git branch gh-pages -git checkout -f gh-pages -git reset --hard origin/gh-pages -git clean -dfx -git rm -rf $HOME/$DOC_REPO/$DOC_FOLDER && rm -rf $HOME/$DOC_REPO/$DOC_FOLDER - -# Copy the new build docs -mkdir $DOC_FOLDER -cp -R $HOME/tmp/* ./$DOC_FOLDER/ - -git config --global user.email $EMAIL -git config --global user.name $USERNAME -git add -f ./$DOC_FOLDER/ -git commit -m "$MSG" -git push -f origin gh-pages - -echo $MSG \ No newline at end of file diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh old mode 100644 new mode 100755 index 0026279285..3d9551375e --- a/ci_scripts/test.sh +++ b/ci_scripts/test.sh @@ -1,22 +1,33 @@ set -e -# Get into a temp directory to run test from the installed scikit learn and -# check if we do not leave artifacts -mkdir -p $TEST_DIR +run_tests() { + # Get into a temp directory to run test from the installed scikit learn and + # check if we do not leave artifacts + mkdir -p $TEST_DIR -cwd=`pwd` -examples_dir=$cwd/examples -test_dir=$cwd/test/ + cwd=`pwd` + examples_dir=$cwd/examples + test_dir=$cwd/test/ -cd $TEST_DIR + cd $TEST_DIR + if [[ "$COVERAGE" == "true" ]]; then + nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir + elif [[ "$EXAMPLES" == "true" ]]; then + for example in `find $examples_dir -name '*.py'` + do + python $example + done + else + nosetests --no-path-adjustment -sv $test_dir + fi +} -if [[ "$COVERAGE" == "true" ]]; then - nosetests --no-path-adjustment -sv --with-coverage --cover-package=$MODULE $test_dir -elif [[ "$EXAMPLES" == "true" ]]; then - for example in `find $examples_dir -name '*.py'` - do - python $example - done -else - nosetests --no-path-adjustment -sv $test_dir +if [[ "$RUN_FLAKE8" ]]; then + source ci_scripts/flake8_diff.sh fi + +if [[ "$SKIP_TESTS" != "true" ]]; then + run_tests +fi + + diff --git a/circle.yml b/circle.yml deleted file mode 100644 index 8ff09eb573..0000000000 --- a/circle.yml +++ /dev/null @@ -1,62 +0,0 @@ -machine: - environment: - PATH: /home/ubuntu/miniconda/bin:$PATH - - # The github organization or username of the repository which hosts the - # project and documentation. - USERNAME: "automl" - - # The repository where the documentation will be hosted - DOC_REPO: "auto-sklearn" - - # The base URL for the Github page where the documentation will be hosted - DOC_URL: "" - - # The email is to be used for commits in the Github Page - EMAIL: "feurerm@informatik.uni-freiburg.de" - -dependencies: - - # Various dependencies - pre: - # Get rid of existing virtualenvs on circle ci as they conflict with conda. - # From nilearn: https://github.com/nilearn/nilearn/blob/master/circle.yml - - cd && rm -rf ~/.pyenv && rm -rf ~/virtualenvs - # from scikit-learn contrib - - sudo -E apt-get -yq remove texlive-binaries --purge - - sudo -E apt-get -yq update - - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra - # Other stuff... - - sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install build-essential - # Conda installation - - wget https://repo.continuum.io/miniconda/Miniconda3-4.3.21-Linux-x86_64.sh -O ~/miniconda.sh - - bash ~/miniconda.sh -b -p $HOME/miniconda - - conda create -n testenv --yes python=3.6 pip wheel nose gcc swig - - # The --user is needed to let sphinx see the source and the binaries - # The pipefail is requested to propagate exit code - override: - - source ci_scripts/circle_install.sh -test: - # Grep error on the documentation - override: - - cat ~/log.txt && if grep -q "Traceback (most recent call last):" ~/log.txt; then false; else true; fi -deployment: - master: - branch: master - commands: - - bash ci_scripts/push_doc.sh 'stable' - development: - branch: development - commands: - - bash ci_scripts/push_doc.sh 'dev' -general: - # Open the doc to the API - artifacts: - - "doc/_build/html" - - "~/log.txt" - # Restric the build to the branch master only - #branches: - # only: - # - development - # - master diff --git a/doc/releases.rst b/doc/releases.rst index ab35f83634..d4d5beea23 100644 --- a/doc/releases.rst +++ b/doc/releases.rst @@ -11,6 +11,38 @@ Releases ======== +Version 0.4.1 +============= + +* Added documentation on `how to extend Auto-sklearn `_ + with custom classifier, regressor, and preprocessor. +* Auto-sklearn now requires numpy version between 1.9.0 and 1.14.5, due to higher versions + causing travis failure. +* Examples now use ``sklearn.datasets.load_breast_cancer()`` instead of ``sklearn.datasets.load_digits()`` + to reduce memory usage for travis build. +* Fixes future warnings on non-tuple sequence for indexing. +* Fixes `#500 `_: fixes + ensemble builder to correctly evaluate model score with any metrics. + See this `PR `_. +* Fixes `#482 `_ and + `#491 `_: Users can now set up + custom logger configuration by passing a dictionary created by a yaml file to + ``logging_config``. +* Fixes `#566 `_: ensembles are now sorted correctly. +* Fixes `#293 `_: Auto-sklearn checks if appropriate + target type was given for classification and regression before call to ``fit()``. +* Travis-ci now runs flake8 to enforce pep8 style guide, and uses travis-ci instead of circle-ci + for deployment. + +Contributors +************ + +* Matthias Feurer +* Manuel Streuhofer +* Taneli Mielikäinen +* Katharina Eggensperger +* Jin Woo Ahn + Version 0.4.0 ============= diff --git a/examples/example_crossvalidation.py b/examples/example_crossvalidation.py index 85530b591b..52e3050f7b 100644 --- a/examples/example_crossvalidation.py +++ b/examples/example_crossvalidation.py @@ -21,7 +21,7 @@ def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -37,7 +37,7 @@ def main(): # fit() changes the data in place, but refit needs the original data. We # therefore copy the data. In practice, one should reload the data - automl.fit(X_train.copy(), y_train.copy(), dataset_name='digits') + automl.fit(X_train.copy(), y_train.copy(), dataset_name='breast_cancer') # During fit(), models are fit on individual cross-validation folds. To use # all available data, we call refit() which trains all models in the # final ensemble on the whole dataset. diff --git a/examples/example_eips.py b/examples/example_eips.py index eef3c6cf11..db2a434092 100644 --- a/examples/example_eips.py +++ b/examples/example_eips.py @@ -69,7 +69,7 @@ def get_eips_object_callback( def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -81,7 +81,7 @@ def main(): get_smac_object_callback=get_eips_object_callback, initial_configurations_via_metalearning=0, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') # Print the final ensemble constructed by auto-sklearn via ROAR. print(automl.show_models()) diff --git a/examples/example_extending_classification.py b/examples/example_extending_classification.py new file mode 100644 index 0000000000..9f4ea4eedb --- /dev/null +++ b/examples/example_extending_classification.py @@ -0,0 +1,133 @@ +""" +==================================================================== +Extending Auto-Sklearn with Classification Component +==================================================================== + +The following example demonstrates how to create a new classification +component for using in auto-sklearn. +""" + +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ + UniformIntegerHyperparameter, UniformFloatHyperparameter + +import sklearn.metrics +import autosklearn.classification +import autosklearn.pipeline.components.classification +from autosklearn.pipeline.components.base \ + import AutoSklearnClassificationAlgorithm +from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA, \ + PREDICTIONS + + +# Create MLP classifier component for auto-sklearn. +class MLPClassifier(AutoSklearnClassificationAlgorithm): + def __init__(self, + hidden_layer_depth, + num_nodes_per_layer, + activation, + alpha, + solver, + random_state=None, + ): + self.hidden_layer_depth = hidden_layer_depth + self.num_nodes_per_layer = num_nodes_per_layer + self.activation = activation + self.alpha = alpha + self.solver = solver + self.random_state = random_state + + def fit(self, X, y): + self.num_nodes_per_layer = int(self.num_nodes_per_layer) + self.hidden_layer_depth = int(self.hidden_layer_depth) + self.alpha = float(self.alpha) + + from sklearn.neural_network import MLPClassifier + hidden_layer_sizes = tuple(self.num_nodes_per_layer \ + for i in range(self.hidden_layer_depth)) + + self.estimator = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, + activation=self.activation, + alpha=self.alpha, + solver=self.solver, + random_state=self.random_state, + ) + self.estimator.fit(X, y) + return self + + def predict(self, X): + if self.estimator is None: + raise NotImplementedError() + return self.estimator.predict(X) + + def predict_proba(self, X): + if self.estimator is None: + raise NotImplementedError() + return self.estimator.predict_proba(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname':'MLP Classifier', + 'name': 'MLP CLassifier', + 'handles_regression': False, + 'handles_classification': True, + 'handles_multiclass': True, + 'handles_multilabel': False, + 'is_deterministic': False, + # Both input and output must be tuple(iterable) + 'input': [DENSE, SIGNED_DATA, UNSIGNED_DATA], + 'output': [PREDICTIONS] + } + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + hidden_layer_depth = UniformIntegerHyperparameter( + name="hidden_layer_depth", lower=1, upper=3, default_value=1 + ) + num_nodes_per_layer = UniformIntegerHyperparameter( + name="num_nodes_per_layer", lower=16, upper=216, default_value=32 + ) + activation = CategoricalHyperparameter( + name="activation", choices=['identity', 'logistic', 'tanh', 'relu'], + default_value='relu' + ) + alpha = UniformFloatHyperparameter( + name="alpha", lower=0.0001, upper=1.0, default_value=0.0001 + ) + solver = CategoricalHyperparameter( + name="solver", choices=['lbfgs', 'sgd', 'adam'], default_value='adam' + ) + cs.add_hyperparameters([hidden_layer_depth, + num_nodes_per_layer, + activation, + alpha, + solver, + ]) + return cs + + +# Add MLP classifier component to auto-sklearn. +autosklearn.pipeline.components.classification.add_classifier(MLPClassifier) +cs = MLPClassifier.get_hyperparameter_search_space() +print(cs) + +# Generate data. +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split +X, y = load_breast_cancer(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Fit MLP classifier to the data. +clf = autosklearn.classification.AutoSklearnClassifier( + time_left_for_this_task=20, + per_run_time_limit=10, + include_estimators=['MLPClassifier'], +) +clf.fit(X_train, y_train) + +# Print test accuracy and statistics. +y_pred = clf.predict(X_test) +print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test)) +print(clf.sprint_statistics()) +print(clf.show_models()) \ No newline at end of file diff --git a/examples/example_extending_preprocessor.py b/examples/example_extending_preprocessor.py new file mode 100644 index 0000000000..e416827408 --- /dev/null +++ b/examples/example_extending_preprocessor.py @@ -0,0 +1,111 @@ +""" +==================================================================== +Extending Auto-Sklearn with Preprocessor Component +==================================================================== + +The following example demonstrates how to create a wrapper around the linear +discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor +in auto-sklearn. +""" + +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ + UniformIntegerHyperparameter, CategoricalHyperparameter + +import sklearn.metrics +import autosklearn.classification +import autosklearn.metrics +import autosklearn.pipeline.components.feature_preprocessing +from autosklearn.pipeline.components.base \ + import AutoSklearnPreprocessingAlgorithm +from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, \ + UNSIGNED_DATA + + +# Create LDA component for auto-sklearn. +class LDA(AutoSklearnPreprocessingAlgorithm): + def __init__(self, shrinkage, solver, n_components, tol, random_state=None): + self.solver = solver + self.shrinkage = shrinkage + self.n_components = n_components + self.tol = tol + self.random_state = random_state + self.preprocessor = None + + def fit(self, X, y=None): + self.shrinkage = float(self.shrinkage) + self.n_components = int(self.n_components) + self.tol = float(self.tol) + + import sklearn.discriminant_analysis + self.preprocessor = \ + sklearn.discriminant_analysis.LinearDiscriminantAnalysis( + shrinkage=self.shrinkage, + solver=self.solver, + n_components=self.n_components, + tol=self.tol, + ) + self.preprocessor.fit(X, y) + return self + + def transform(self, X): + if self.preprocessor is None: + raise NotImplementedError() + return self.preprocessor.transform(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'LDA', + 'name': 'Linear Discriminant Analysis', + 'handles_regression': False, + 'handles_classification': True, + 'handles_multiclass': False, + 'handles_multilabel': False, + 'is_deterministic': True, + 'input': (DENSE, UNSIGNED_DATA, SIGNED_DATA), + 'output': (DENSE, UNSIGNED_DATA, SIGNED_DATA)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + solver = CategoricalHyperparameter( + name="solver", choices=['svd','lsqr','eigen'], default_value='svd' + ) + shrinkage = UniformFloatHyperparameter( + name="shrinkage", lower=0.0, upper=1.0, default_value=0.5 + ) + n_components = UniformIntegerHyperparameter( + name="n_components", lower=1, upper=29, default_value=10 + ) + tol = UniformFloatHyperparameter( + name="tol", lower=0.0001, upper=1, default_value=0.0001 + ) + cs.add_hyperparameters([solver, shrinkage, n_components, tol]) + return cs + + +# Add LDA component to auto-sklearn. +autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(LDA) + +# Create dataset. +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split +X, y = load_breast_cancer(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Configuration space. +cs = LDA.get_hyperparameter_search_space() +print(cs) + +# Fit the model using LDA as preprocessor. +clf = autosklearn.classification.AutoSklearnClassifier( + time_left_for_this_task=30, + include_preprocessors=['LDA'], +) +clf.fit(X_train, y_train) + +# Print prediction score and statistics. +y_pred = clf.predict(X_test) +print("accracy: ", sklearn.metrics.accuracy_score(y_pred, y_test)) +print(clf.sprint_statistics()) +print(clf.show_models()) \ No newline at end of file diff --git a/examples/example_extending_regression.py b/examples/example_extending_regression.py new file mode 100644 index 0000000000..7b9ad21239 --- /dev/null +++ b/examples/example_extending_regression.py @@ -0,0 +1,110 @@ +""" +==================================================================== +Extending Auto-Sklearn with Regression Component +==================================================================== + +The following example demonstrates how to create a new regression +component for using in auto-sklearn. +""" + +from ConfigSpace.configuration_space import ConfigurationSpace +from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ + UniformIntegerHyperparameter, CategoricalHyperparameter + +import sklearn.metrics +import autosklearn.regression +import autosklearn.pipeline.components.regression +from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm +from autosklearn.pipeline.constants import SPARSE, DENSE, \ + SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS + + +# Implement kernel ridge regression component for auto-sklearn. +class KernelRidgeRegression(AutoSklearnRegressionAlgorithm): + def __init__(self, alpha, kernel, gamma, degree, random_state=None): + self.alpha = alpha + self.kernel = kernel + self.gamma = gamma + self.degree = degree + self.random_state = random_state + self.estimator = None + + def fit(self, X, y): + self.alpha = float(self.alpha) + self.gamma = float(self.gamma) + self.degree = int(self.degree) + + import sklearn.kernel_ridge + self.estimator = sklearn.kernel_ridge.KernelRidge(alpha=self.alpha, + kernel=self.kernel, + gamma=self.gamma, + degree=self.degree, + ) + self.estimator.fit(X, y) + return self + + def predict(self, X): + if self.estimator is None: + raise NotImplementedError + return self.estimator.predict(X) + + @staticmethod + def get_properties(dataset_properties=None): + return {'shortname': 'KRR', + 'name': 'Kernel Ridge Regression', + 'handles_regression': True, + 'handles_classification': False, + 'handles_multiclass': False, + 'handles_multilabel': False, + 'is_deterministic': True, + 'input': (SPARSE, DENSE, UNSIGNED_DATA, SIGNED_DATA), + 'output': (PREDICTIONS,)} + + @staticmethod + def get_hyperparameter_search_space(dataset_properties=None): + cs = ConfigurationSpace() + alpha = UniformFloatHyperparameter( + name='alpha', lower=10 ** -5, upper=1, log=True, default_value=0.1) + kernel = CategoricalHyperparameter( + name='kernel', + choices=['linear', + 'rbf', + 'sigmoid', + 'polynomial', + ], + default_value='linear' + ) + gamma = UniformFloatHyperparameter( + name='gamma', lower=0.00001, upper=1, default_value=0.1, log=True + ) + degree = UniformIntegerHyperparameter( + name='degree', lower=2, upper=5, default_value=3 + ) + cs.add_hyperparameters([alpha, kernel, gamma, degree]) + return cs + + +# Add KRR component to auto-sklearn. +autosklearn.pipeline.components.regression.add_regressor(KernelRidgeRegression) +cs = KernelRidgeRegression.get_hyperparameter_search_space() +print(cs) + +# Generate data. +from sklearn.datasets import load_diabetes +from sklearn.model_selection import train_test_split +X, y = load_diabetes(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Fit the model using KRR. +reg = autosklearn.regression.AutoSklearnRegressor( + time_left_for_this_task=30, + per_run_time_limit=10, + include_estimators=['KernelRidgeRegression'], +) +reg.fit(X_train, y_train) + +# Print prediction score and statistics. +y_pred = reg.predict(X_test) +print("r2 score: ", sklearn.metrics.r2_score(y_pred, y_test)) +print(reg.sprint_statistics()) +print(reg.show_models()) \ No newline at end of file diff --git a/examples/example_holdout.py b/examples/example_holdout.py index fe1ff1c7a7..19a438bd87 100644 --- a/examples/example_holdout.py +++ b/examples/example_holdout.py @@ -18,7 +18,7 @@ def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -34,7 +34,7 @@ def main(): resampling_strategy='holdout', resampling_strategy_arguments={'train_size': 0.67} ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') # Print the final ensemble constructed by auto-sklearn. print(automl.show_models()) diff --git a/examples/example_parallel.py b/examples/example_parallel.py index f5572ab97d..ff599e59d0 100644 --- a/examples/example_parallel.py +++ b/examples/example_parallel.py @@ -78,14 +78,17 @@ def spawn_classifier(seed, dataset_name): def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) processes = [] spawn_classifier = get_spawn_classifier(X_train, y_train) for i in range(4): # set this at roughly half of your cores - p = multiprocessing.Process(target=spawn_classifier, args=(i, 'digits')) + p = multiprocessing.Process( + target=spawn_classifier, + args=(i, 'breast_cancer'), + ) p.start() processes.append(p) for p in processes: diff --git a/examples/example_random_search.py b/examples/example_random_search.py index 9d04a39974..2a64b36efb 100644 --- a/examples/example_random_search.py +++ b/examples/example_random_search.py @@ -68,7 +68,7 @@ def get_random_search_object_callback( def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -79,7 +79,7 @@ def main(): get_smac_object_callback=get_roar_object_callback, initial_configurations_via_metalearning=0, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') print('#' * 80) print('Results for ROAR.') @@ -99,7 +99,7 @@ def main(): get_smac_object_callback=get_random_search_object_callback, initial_configurations_via_metalearning=0, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') print('#' * 80) print('Results for random search.') diff --git a/examples/example_sequential.py b/examples/example_sequential.py index 06820e7ebe..694ea81404 100644 --- a/examples/example_sequential.py +++ b/examples/example_sequential.py @@ -17,7 +17,7 @@ def main(): - X, y = sklearn.datasets.load_digits(return_X_y=True) + X, y = sklearn.datasets.load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) @@ -32,7 +32,7 @@ def main(): ensemble_size=0, delete_tmp_folder_after_terminate=False, ) - automl.fit(X_train, y_train, dataset_name='digits') + automl.fit(X_train, y_train, dataset_name='breast_cancer') # This call to fit_ensemble uses all models trained in the previous call # to fit to build an ensemble which can be used with automl.predict() automl.fit_ensemble(y_train, ensemble_size=50) diff --git a/requirements.txt b/requirements.txt index b65911d6cb..4ba8cfc10e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ nose six Cython -numpy>=1.9.0 +numpy>=1.9.0<=1.14.5 scipy>=0.14.1 scikit-learn>=0.19,<0.20 diff --git a/setup.py b/setup.py index 4ae310adf0..68491b6a4f 100644 --- a/setup.py +++ b/setup.py @@ -18,8 +18,8 @@ if sys.version_info < (3, 5): raise ValueError( - 'Unsupported python version %s found. Auto-sklearn requires Python ' - '3.5 or higher.' % sys.version_info + 'Unsupported Python version %d.%d.%d found. Auto-sklearn requires Python ' + '3.5 or higher.' % (sys.version_info.major, sys.version_info.minor, sys.version_info.micro) ) @@ -35,7 +35,8 @@ "nose", "six", "Cython", - "numpy>=1.9.0", + # Numpy version of higher than 1.14.5 causes libgcc_s.so.1 error. + "numpy>=1.9.0<=1.14.5", "scipy>=0.14.1", "scikit-learn>=0.19,<0.20", "lockfile", @@ -68,6 +69,6 @@ license='BSD', platforms=['Linux'], classifiers=[], - python_requires='>=3.4.*', + python_requires='>=3.5.*', url='https://automl.github.io/auto-sklearn', ) diff --git a/test/test_automl/test_automl.py b/test/test_automl/test_automl.py index 9324566201..f8da452e84 100644 --- a/test/test_automl/test_automl.py +++ b/test/test_automl/test_automl.py @@ -224,7 +224,7 @@ def test_automl_outputs(self): fixture = os.listdir(os.path.join(backend_api.temporary_directory, '.auto-sklearn', 'ensembles')) - self.assertIn('100.0.ensemble', fixture) + self.assertIn('100.0000000000.ensemble', fixture) # Start time start_time_file_path = os.path.join(backend_api.temporary_directory, '.auto-sklearn', diff --git a/test/test_automl/test_estimators.py b/test/test_automl/test_estimators.py index e8a5bd0954..064dc73610 100644 --- a/test/test_automl/test_estimators.py +++ b/test/test_automl/test_estimators.py @@ -50,17 +50,25 @@ class EstimatorTest(Base, unittest.TestCase): # self._tearDown(output) def test_pSMAC_wrong_arguments(self): + X = np.zeros((100, 100)) + y = np.zeros((100, )) self.assertRaisesRegexp(ValueError, "If shared_mode == True tmp_folder must not " "be None.", - lambda shared_mode: AutoSklearnClassifier(shared_mode=shared_mode).fit(None, None), + lambda shared_mode: + AutoSklearnClassifier( + shared_mode=shared_mode, + ).fit(X, y), shared_mode=True) self.assertRaisesRegexp(ValueError, "If shared_mode == True output_folder must not " "be None.", lambda shared_mode, tmp_folder: - AutoSklearnClassifier(shared_mode=shared_mode, tmp_folder=tmp_folder).fit(None, None), + AutoSklearnClassifier( + shared_mode=shared_mode, + tmp_folder=tmp_folder, + ).fit(X, y), shared_mode=True, tmp_folder='/tmp/duitaredxtvbedb') @@ -85,6 +93,128 @@ def test_feat_type_wrong_arguments(self): cls.fit, X=X, y=y, feat_type=['Car']*100) + # Mock AutoSklearnEstimator.fit so the test doesn't actually run fit(). + @unittest.mock.patch('autosklearn.estimators.AutoSklearnEstimator.fit') + def test_type_of_target(self, mock_estimator): + # Test that classifier raises error for illegal target types. + X = np.array([[1, 2], + [2, 3], + [3, 4], + [4, 5], + ]) + # Possible target types + y_binary = np.array([0, 0, 1, 1]) + y_continuous = np.array([0.1, 1.3, 2.1, 4.0]) + y_multiclass = np.array([0, 1, 2, 0]) + y_multilabel = np.array([[0, 1], + [1, 1], + [1, 0], + [0, 0], + ]) + y_multiclass_multioutput = np.array([[0, 1], + [1, 3], + [2, 2], + [5, 3], + ]) + y_continuous_multioutput = np.array([[0.1, 1.5], + [1.2, 3.5], + [2.7, 2.7], + [5.5, 3.9], + ]) + + cls = AutoSklearnClassifier() + # Illegal target types for classification: continuous, + # multiclass-multioutput, continuous-multioutput. + self.assertRaisesRegex(ValueError, + "classification with data of type" + " multiclass-multioutput is not supported", + cls.fit, + X=X, + y=y_multiclass_multioutput, + ) + + self.assertRaisesRegex(ValueError, + "classification with data of type" + " continuous is not supported", + cls.fit, + X=X, + y=y_continuous, + ) + + self.assertRaisesRegex(ValueError, + "classification with data of type" + " continuous-multioutput is not supported", + cls.fit, + X=X, + y=y_continuous_multioutput, + ) + + # Legal target types for classification: binary, multiclass, + # multilabel-indicator. + try: + cls.fit(X, y_binary) + except ValueError: + self.fail("cls.fit() raised ValueError while fitting " + "binary targets") + + try: + cls.fit(X, y_multiclass) + except ValueError: + self.fail("cls.fit() raised ValueError while fitting " + "multiclass targets") + + try: + cls.fit(X, y_multilabel) + except ValueError: + self.fail("cls.fit() raised ValueError while fitting " + "multilabel-indicator targets") + + # Test that regressor raises error for illegal target types. + reg = AutoSklearnRegressor() + # Illegal target types for regression: multiclass-multioutput, + # multilabel-indicator, continuous-multioutput. + self.assertRaisesRegex(ValueError, + "regression with data of type" + " multiclass-multioutput is not supported", + reg.fit, + X=X, + y=y_multiclass_multioutput, + ) + + self.assertRaisesRegex(ValueError, + "regression with data of type" + " multilabel-indicator is not supported", + reg.fit, + X=X, + y=y_multilabel, + ) + + self.assertRaisesRegex(ValueError, + "regression with data of type" + " continuous-multioutput is not supported", + reg.fit, + X=X, + y=y_continuous_multioutput, + ) + # Legal target types: continuous, binary, multiclass + try: + reg.fit(X, y_continuous) + except ValueError: + self.fail("reg.fit() raised ValueError while fitting " + "continuous targets") + + try: + reg.fit(X, y_binary) + except ValueError: + self.fail("reg.fit() raised ValueError while fitting " + "binary targets") + + try: + reg.fit(X, y_multiclass) + except ValueError: + self.fail("reg.fit() raised ValueError while fitting " + "multiclass targets") + def test_fit_pSMAC(self): tmp = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC') output = os.path.join(self.test_dir, '..', '.out_estimator_fit_pSMAC') diff --git a/test/test_util/example_config.yaml b/test/test_util/example_config.yaml new file mode 100644 index 0000000000..7c93e1b846 --- /dev/null +++ b/test/test_util/example_config.yaml @@ -0,0 +1,46 @@ +--- +version: 1 +disable_existing_loggers: False +formatters: + simple: + format: '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s' + +handlers: + console: + class: logging.StreamHandler + level: WARNING + formatter: simple + stream: ext://sys.stdout + + file_handler: + class: logging.FileHandler + level: DEBUG + formatter: simple + filename: autosklearn.log + +root: + level: CRITICAL + handlers: [console, file_handler] + +loggers: + autosklearn.metalearning: + level: NOTSET + handlers: [file_handler] + propagate: no + + autosklearn.util.backend: + level: DEBUG + handlers: [file_handler] + propagate: no + + smac.intensification.intensification.Intensifier: + level: INFO + handlers: [file_handler, console] + + smac.optimizer.local_search.LocalSearch: + level: INFO + handlers: [file_handler, console] + + smac.optimizer.smbo.SMBO: + level: INFO + handlers: [file_handler, console] diff --git a/test/test_util/test_logging.py b/test/test_util/test_logging.py new file mode 100644 index 0000000000..9c18c07ec1 --- /dev/null +++ b/test/test_util/test_logging.py @@ -0,0 +1,31 @@ +import os +import unittest +import logging +import logging.config +import yaml +from autosklearn.util import logging_ + +class LoggingTest(unittest.TestCase): + + def test_setup_logger(self): + # Test that setup_logger function correctly configures the logger + # according to the given dictionary, and uses the default + # logging.yaml file if logging_config is not specified. + + with open(os.path.join(os.path.dirname(__file__), \ + 'example_config.yaml'), 'r') as fh: + example_config = yaml.safe_load(fh) + + # Configure logger with example_config.yaml. + logging_.setup_logger(logging_config=example_config) + + # example_config sets the root logger's level to CRITICAL, + # which corresponds to 50. + self.assertEqual(logging.getLogger().getEffectiveLevel(), 50) + + # This time use the default configuration. + logging_.setup_logger(logging_config=None) + + # default config sets the root logger's level to DEBUG, + # which corresponds to 10. + self.assertEqual(logging.getLogger().getEffectiveLevel(), 10) \ No newline at end of file