diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000..704b67af15 --- /dev/null +++ b/.flake8 @@ -0,0 +1,7 @@ +[flake8] +max-line-length = 100 +show-source = True +application-import-names = autosklearn +exclude = + venv + build diff --git a/.github/workflows/dist.yml b/.github/workflows/dist.yml new file mode 100644 index 0000000000..ada0593183 --- /dev/null +++ b/.github/workflows/dist.yml @@ -0,0 +1,31 @@ +name: dist-check + +on: [push, pull_request] + +jobs: + dist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Build dist + run: | + python setup.py sdist + - name: Twine check + run: | + pip install twine + last_dist=$(ls -t dist/auto-sklearn-*.tar.gz | head -n 1) + twine_output=`twine check "$last_dist"` + if [[ "$twine_output" != "Checking $last_dist: PASSED" ]]; then echo $twine_output && exit 1;fi + - name: Install dist + run: | + last_dist=$(ls -t dist/auto-sklearn-*.tar.gz | head -n 1) + pip install $last_dist + - name: PEP 561 Compliance + run: | + pip install mypy + cd .. # required to use the installed version of autosklearn + if ! python -c "import autosklearn"; then exit 1; fi diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000000..59831864ef --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,43 @@ +name: Docs +on: [pull_request, push] + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + pip install -e .[docs,examples,examples_unix] + - name: Make docs + run: | + cd doc + make html + - name: Pull latest gh-pages + if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' + run: | + cd .. + git clone https://github.com/automl/auto-sklearn.git --branch gh-pages --single-branch gh-pages + - name: Copy new doc into gh-pages + if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' + run: | + branch_name=${GITHUB_REF##*/} + cd ../gh-pages + rm -rf $branch_name + cp -r ../autosklearn/doc/build/html $branch_name + - name: Push to gh-pages + if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' + run: | + last_commit=$(git log --pretty=format:"%an: %s") + cd ../gh-pages + branch_name=${GITHUB_REF##*/} + git add $branch_name/ + git config --global user.name 'Github Actions' + git config --global user.email 'not@mail.com' + git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} + git commit -am "$last_commit" + git push diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml new file mode 100644 index 0000000000..eabada7e8d --- /dev/null +++ b/.github/workflows/pre-commit.yaml @@ -0,0 +1,20 @@ +name: pre-commit + +on: [push, pull_request] + +jobs: + run-all-files: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Install pre-commit + run: | + pip install pre-commit + pre-commit install + - name: Run pre-commit + run: | + pre-commit run --all-files diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 0000000000..a0265bedf1 --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,83 @@ +name: Tests + +on: [push, pull_request] + +jobs: + ubuntu: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.6, 3.7, 3.8] + use-conda: [true, false] + use-dist: [false] + include: + - python-version: 3.8 + code-cov: true + - python-version: 3.7 + use-conda: false + use-dist: true + fail-fast: false + + steps: + - uses: actions/checkout@v2 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Conda Install test dependencies + if: matrix.use-conda == true + run: | + # Miniconda is available in $CONDA env var + $CONDA/bin/conda create -n testenv --yes pip wheel gxx_linux-64 gcc_linux-64 swig python=${{ matrix.python-version }} + $CONDA/envs/testenv/bin/python3 -m pip install --upgrade pip + $CONDA/envs/testenv/bin/pip3 install -e .[test] + - name: Install test dependencies + if: matrix.use-conda == false && matrix.use-dist == false + run: | + python -m pip install --upgrade pip + pip install -e .[test] + sudo apt-get update + sudo apt-get remove swig + sudo apt-get install swig3.0 + sudo ln -s /usr/bin/swig3.0 /usr/bin/swig + - name: Dist Install test dependencies + if: matrix.use-conda == false && matrix.use-dist == true + run: | + python -m pip install --upgrade pip + sudo apt-get update + sudo apt-get remove swig + sudo apt-get install swig3.0 + sudo ln -s /usr/bin/swig3.0 /usr/bin/swig + # We need to install for the dependencies, like pytest + pip install -e .[test] + # Then we remove autosklearn and install from DIST + pip uninstall --yes auto-sklearn + python setup.py sdist + last_dist=$(ls -t dist/auto-sklearn-*.tar.gz | head -n 1) + pip install $last_dist + - name: Conda Run tests + if: matrix.use-conda == true + run: | + export OPENBLAS_NUM_THREADS=1 + export OMP_NUM_THREADS=1 + export MKL_NUM_THREADS=1 + # We activate conda as metalearning uses python directly, so we need + # to change the default python + export PATH="$CONDA/envs/testenv/bin:$PATH" + if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autosklearn --cov-report=xml'; fi + $CONDA/envs/testenv/bin/python3 -m pytest --durations=20 -sv $codecov test + - name: Run tests + if: matrix.use-conda == false + run: | + export OPENBLAS_NUM_THREADS=1 + export OMP_NUM_THREADS=1 + export MKL_NUM_THREADS=1 + if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autosklearn --cov-report=xml'; fi + pytest --durations=20 -sv $codecov test + - name: Upload coverage + if: matrix.code-cov && always() + uses: codecov/codecov-action@v1 + with: + fail_ci_if_error: true + verbose: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..1bc50bec0c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,45 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.761 + hooks: + - id: mypy + name: mypy auto-sklearn-ensembles + files: autosklearn/ensembles + - id: mypy + name: mypy auto-sklearn-metrics + files: autosklearn/metrics + - id: mypy + name: mypy auto-sklearn-data + files: autosklearn/data + - id: mypy + name: mypy auto-sklearn-util + files: autosklearn/util + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.3 + hooks: + - id: flake8 + name: flake8 auto-sklearn + files: autosklearn/* + - id: flake8 + name: flake8 file-order-data + files: autosklearn/data + additional_dependencies: + - flake8-import-order + - id: flake8 + name: flake8 file-order-ensemble + files: autosklearn/ensembles + additional_dependencies: + - flake8-import-order + - id: flake8 + name: flake8 file-order-metrics + files: autosklearn/metrics + additional_dependencies: + - flake8-import-order + - id: flake8 + name: flake8 file-order-util + files: autosklearn/util + additional_dependencies: + - flake8-import-order + - id: flake8 + name: flake8 autosklearn-test + files: test/* diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 576dbbe64d..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,83 +0,0 @@ -language: python - -sudo: false - -dist: xenial - -env: - global: - - OPENBLAS_NUM_THREADS=1 - - OMP_NUM_THREADS=1 - - MKL_NUM_THREADS=1 - # Directory where tests are run from - - TEST_DIR=/tmp/test_dir/ - - EXAMP_DIR=/tmp/examp_dir/ - - MODULE=autosklearn - - MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" - -matrix: - allow_failures: - - os: osx - - include: - - os: linux - env: DISTRIB="conda" DOCPUSH="true" PYTHON="3.7" SKIP_TESTS="true" - - os: linux - env: DISTRIB="conda" PYTHON="3.8" RUN_FLAKE8="true" SKIP_TESTS="true" - - os: linux - env: DISTRIB="conda" PYTHON="3.8" RUN_MYPY="true" SKIP_TESTS="true" - - os: linux - env: DISTRIB="conda" COVERAGE="true" PYTHON="3.6" - - os: linux - env: DISTRIB="conda" TEST_DIST="true" PYTHON="3.7" - - os: linux - env: DISTRIB="conda" PYTHON="3.8" - - os: linux - python: 3.6 - env: DISTRIB="ubuntu" - - os: linux - python: 3.7 - env: DISTRIB="ubuntu" - - os: linux - python: 3.8 - env: DISTRIB="ubuntu" - # Temporarily disabling OSX builds because thy take too long - # Set language to generic to not break travis-ci - # https://github.com/travis-ci/travis-ci/issues/2312#issuecomment-195620855 - # so far, this issue is still open and there is no good solution - # python will then be installed by anaconda - #- os: osx - # sudo: required - # language: generic - # env: DISTRIB="conda" PYTHON_VERSION="3.5" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh" - -cache: - # We use three different cache directory - # to work around a Travis bug with multi-platform cache - directories: - - $HOME/.cache/pip - - $HOME/download - pip: true - -git: - depth: 5 - -before_install: - - source ci_scripts/install_env.sh - -install: - - source ci_scripts/install.sh - -script: bash ci_scripts/test.sh -after_success: source ci_scripts/success.sh && source ci_scripts/create_doc.sh $TRAVIS_BRANCH "doc_result" - -deploy: - provider: pages - skip_cleanup: true - github_token: $GITHUB_TOKEN # set in the settings page of my repository - keep-history: true - committer-from-gh: true - on: - all_branches: true - condition: $doc_result = "success" - local_dir: doc/$TRAVIS_BRANCH diff --git a/ci_scripts/create_doc.sh b/ci_scripts/create_doc.sh deleted file mode 100644 index 39a3af323b..0000000000 --- a/ci_scripts/create_doc.sh +++ /dev/null @@ -1,61 +0,0 @@ -# This script is mostly adopted from https://github.com/openml/openml-python/blob/develop/ci_scripts/create_doc.sh - -set -euo pipefail - -# Check if DOCPUSH is set -if ! [[ -z ${DOCPUSH+x} ]]; then - - if [[ "$DOCPUSH" == "true" ]]; then - - # install documentation building dependencies - pip install --upgrade matplotlib seaborn setuptools pytest mypy coverage sphinx pillow sphinx-gallery sphinx_bootstrap_theme cython numpydoc nbformat nbconvert mock - - # $1 is the branch name - # $2 is the global variable where we set the script status - - # delete any previous documentation folder - if [ -d doc/$1 ]; then - rm -rf doc/$1 - fi - - # create the documentation - cd doc && make html 2>&1 - - if ! { [ $1 = "master" ] || [ $1 = "development" ]; }; then - { echo "Not one of the allowed branches"; exit 0; } - fi - - # create directory with branch name - # the documentation for dev/stable from git will be stored here - mkdir $1 - - # get previous documentation from github - git clone https://github.com/automl/auto-sklearn.git --branch gh-pages --single-branch - - # copy previous documentation - cp -r auto-sklearn/. $1 - rm -rf auto-sklearn - - # if the documentation for the branch exists, remove it - if [ -d $1/$1 ]; then - rm -rf $1/$1 - fi - - # copy the updated documentation for this branch - mkdir $1/$1 - cp -r build/html/. $1/$1 - - # takes a variable name as an argument and assigns the script outcome to a - # variable with the given name. If it got this far, the script was successful - function set_return() { - # $1 is the variable where we save the script outcome - local __result=$1 - local status='success' - eval $__result="'$status'" - } - - set_return "$2" - fi -fi -# Workaround for travis failure -set +u diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh deleted file mode 100644 index 6ee526f0bd..0000000000 --- a/ci_scripts/install.sh +++ /dev/null @@ -1,34 +0,0 @@ -set -e - -# Install general requirements the way setup.py suggests -pip install pytest==4.6.* pep8 codecov pytest-cov flake8 flaky mypy flake8-import-order matplotlib - -# Install the packages in the correct order specified by the requirements.txt file -cat requirements.txt | xargs -n 1 -L 1 pip install - -# Debug output to know all exact package versions! - pip freeze - -if [[ "$TEST_DIST" == "true" ]]; then - pip install twine - python setup.py sdist - # Find file which was modified last as done in https://stackoverflow.com/a/4561987 - dist=`find dist -type f -printf '%T@ %p\n' | sort -n | tail -1 | cut -f2- -d" "` - echo "Installing $dist" - pip install "$dist" - twine_output=`twine check "$dist"` - if [[ "$twine_output" != "Checking $dist: PASSED" ]]; then - echo $twine_output - exit 1 - else - echo "Check with Twine: OK: $twine_output" - fi -else - python setup.py check -m -s - python setup.py install -fi - -# Install openml dependency for metadata generation unittest -pip install openml -mkdir ~/.openml -echo "apikey = 610344db6388d9ba34f6db45a3cf71de" > ~/.openml/config diff --git a/ci_scripts/install_env.sh b/ci_scripts/install_env.sh deleted file mode 100644 index 5886129520..0000000000 --- a/ci_scripts/install_env.sh +++ /dev/null @@ -1,30 +0,0 @@ -set -e - -if [[ "$DISTRIB" == "conda" ]]; then - - wget $MINICONDA_URL -O miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" - # check if Conda was installed - if [[ `which conda` ]]; then echo 'Conda installation successful'; else exit 1; fi - conda create -n testenv --yes pip wheel gxx_linux-64 gcc_linux-64 swig python="$PYTHON" - source activate testenv - -else - - sudo apt install -y python3-dev - pip install --upgrade setuptools - - # install linux packages - sudo apt-get update - # https://github.com/automl/auto-sklearn/issues/314 - sudo apt-get remove swig - sudo apt-get install swig3.0 - sudo ln -s /usr/bin/swig3.0 /usr/bin/swig - -fi - -python3 -V -which python -pip3 --version -which pip3 diff --git a/ci_scripts/run_flake8.sh b/ci_scripts/run_flake8.sh deleted file mode 100755 index 0862b99849..0000000000 --- a/ci_scripts/run_flake8.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -# Default ignore is 'W504', 'E24', 'E126', 'W503', 'E123', -# 'E704', 'E121', 'E226' -# The I* are flake order -# Add flake 8 order -flake8 --application-import-names=autosklearn --ignore=I100,I101,I201,I202,W504,E24,E126,W503,E123,E704,E121,E226 --max-line-length=100 --show-source \ - autosklearn \ - test \ - examples \ - || exit 1 - -# Support for incremental flake-8 order -flake8 --application-import-names=autosklearn --max-line-length=100 --show-source \ - autosklearn/data \ - autosklearn/ensembles \ - autosklearn/metrics \ - autosklearn/util \ - || exit 1 -echo -e "No problem detected by flake8\n" diff --git a/ci_scripts/run_mypy.sh b/ci_scripts/run_mypy.sh deleted file mode 100755 index 8d92077d17..0000000000 --- a/ci_scripts/run_mypy.sh +++ /dev/null @@ -1,23 +0,0 @@ -#MYPYPATH=smac -MYPYOPTS="" - -MYPYOPS="$MYPYOPS --ignore-missing-imports --follow-imports skip" -# We would like to have the following options set, but for now we have to use the ones above to get started -#MYPYOPTS="--ignore-missing-imports --strict" -#MYPYOPTS="$MYPYOPS --disallow-any-unimported" -#MYPYOPTS="$MYPYOPS --disallow-any-expr" -#MYPYOPTS="$MYPYOPS --disallow-any-decorated" -#MYPYOPTS="$MYPYOPS --disallow-any-explicit" -#MYPYOPTS="$MYPYOPS --disallow-any-generics" -MYPYOPTS="$MYPYOPS --disallow-untyped-decorators" -MYPYOPTS="$MYPYOPS --disallow-incomplete-defs" -MYPYOPTS="$MYPYOPS --disallow-untyped-defs" -# Add the following once the scenario is removed from teh main code or typed -# https://mypy.readthedocs.io/en/stable/command_line.html#configuring-warnings -# MYPYOPTS="$MYPYOPS --warn-unused-ignores" - -mypy $MYPYOPTS --show-error-codes \ - autosklearn/data/ \ - autosklearn/ensembles \ - autosklearn/metrics \ - autosklearn/util diff --git a/ci_scripts/success.sh b/ci_scripts/success.sh deleted file mode 100644 index acc37d9bcc..0000000000 --- a/ci_scripts/success.sh +++ /dev/null @@ -1,11 +0,0 @@ -set -e - -if [[ "$COVERAGE" == "true" ]]; then - - python setup.py install - cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR - cd $TRAVIS_BUILD_DIR - codecov - coverage report - -fi \ No newline at end of file diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh deleted file mode 100755 index 2608f42621..0000000000 --- a/ci_scripts/test.sh +++ /dev/null @@ -1,77 +0,0 @@ -set -e - -run_tests() { - # Get into a temp directory to run test from the installed scikit learn and - # check if we do not leave artifacts - mkdir -p $TEST_DIR - - cwd=`pwd` - test_dir=$cwd/test/ - - cd $TEST_DIR - - python -c 'import autosklearn; print("Auto-sklearn imported from: %s" % autosklearn.__file__)' - - test_params="" - if [[ "$COVERAGE" == "true" ]]; then - test_params="--cov=$MODULE" - fi - - python -m pytest $test_dir -v $test_params - - cd $cwd -} - -run_examples() { - cwd=`pwd` - examples_dir=$cwd/examples/ - - # Get into a temp directory to run test from the installed scikit learn and - # check if we do not leave artifacts - mkdir -p $EXAMP_DIR - cd $EXAMP_DIR - - python -c 'import autosklearn; print("Auto-sklearn imported from: %s" % autosklearn.__file__)' - for example in `find $examples_dir -name '*.py'` - do - echo '***********************************************************' - echo "Running example $example" - python $example - done - - cd $cwd -} - -if [[ "$RUN_FLAKE8" ]]; then - echo '***********************************************************' - echo '***********************************************************' - echo 'Running flake8' - echo '***********************************************************' - source ci_scripts/run_flake8.sh -fi - -if [[ "$RUN_MYPY" ]]; then - echo '***********************************************************' - echo '***********************************************************' - echo 'Running Mypy' - echo '***********************************************************' - source ci_scripts/run_mypy.sh -fi - -if [[ "$SKIP_TESTS" != "true" ]]; then - echo '***********************************************************' - echo '***********************************************************' - echo 'Running unittests' - echo '***********************************************************' - run_tests -fi - -if [[ "$EXAMPLES" ]]; then - echo '***********************************************************' - echo '***********************************************************' - echo 'Running examples' - echo '***********************************************************' - run_examples -fi - - diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000000..8c8b6589af --- /dev/null +++ b/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +# Reports any config lines that are not recognized +warn_unused_configs=True +ignore_missing_imports=True +follow_imports=skip +disallow_untyped_defs=True +disallow_incomplete_defs=True +disallow_untyped_decorators=True diff --git a/setup.py b/setup.py index 9617112e3e..7c2d58471d 100644 --- a/setup.py +++ b/setup.py @@ -32,9 +32,17 @@ "pytest-xdist", "pytest-timeout", "flaky", - "pytest-cov", "openml", - ] + "pre-commit", + "pytest-cov", + ], + "examples": [ + "matplotlib", + "jupyter", + "notebook", + "seaborn", + ], + "docs": ["sphinx", "sphinx-gallery", "sphinx_bootstrap_theme", "numpydoc"], } with open("autosklearn/__version__.py") as fh: diff --git a/test/test_automl/automl_utils.py b/test/test_automl/automl_utils.py index 1d1d6e2827..90ccf99f98 100644 --- a/test/test_automl/automl_utils.py +++ b/test/test_automl/automl_utils.py @@ -1,10 +1,21 @@ # -*- encoding: utf-8 -*- import os +import glob import numpy as np -def extract_msg_from_log(log_file): +def print_debug_information(automl): + + # In case it is called with estimator, + # Get the automl object + if hasattr(automl, 'automl_'): + automl = automl.automl_ + + # Log file path + log_file = glob.glob(os.path.join( + automl._backend.temporary_directory, 'AutoML*.log'))[0] + include_messages = ['INFO', 'DEBUG', 'WARN', 'CRITICAL', 'ERROR', 'FATAL'] @@ -22,6 +33,20 @@ def extract_msg_from_log(log_file): except Exception as e: return str(e) + + # Also add the run history if any + if hasattr(automl, 'runhistory_') and hasattr(automl.runhistory_, 'data'): + for k, v in automl.runhistory_.data.items(): + content += ["{}->{}".format(k, v)] + else: + content += ['No RunHistory'] + + # Also add the ensemble history if any + if len(automl.ensemble_performance_history) > 0: + content += [str(h) for h in automl.ensemble_performance_history] + else: + content += ['No Ensemble History'] + return os.linesep.join(content) diff --git a/test/test_automl/test_automl.py b/test/test_automl/test_automl.py index 6cc701976a..5b974ddfb1 100644 --- a/test/test_automl/test_automl.py +++ b/test/test_automl/test_automl.py @@ -24,7 +24,7 @@ from smac.tae import StatusType sys.path.append(os.path.dirname(__file__)) -from automl_utils import extract_msg_from_log, count_succeses # noqa (E402: module level import not at top of file) +from automl_utils import print_debug_information, count_succeses # noqa (E402: module level import not at top of file) class AutoMLStub(AutoML): @@ -290,10 +290,6 @@ def test_automl_outputs(backend, dask_client): task=MULTICLASS_CLASSIFICATION, ) - # Log file path - log_file_path = glob.glob(os.path.join( - backend.temporary_directory, 'AutoML*.log'))[0] - # pickled data manager (without one hot encoding!) with open(data_manager_file, 'rb') as fh: D = pickle.load(fh) @@ -335,7 +331,7 @@ def test_automl_outputs(backend, dask_client): '.auto-sklearn', "start_time_100") with open(start_time_file_path, 'r') as fh: start_time = float(fh.read()) - assert time.time() - start_time >= 10, extract_msg_from_log(log_file_path) + assert time.time() - start_time >= 10, print_debug_information(auto) del auto diff --git a/test/test_automl/test_estimators.py b/test/test_automl/test_estimators.py index 4db354dbe7..e317b359f6 100644 --- a/test/test_automl/test_estimators.py +++ b/test/test_automl/test_estimators.py @@ -28,7 +28,7 @@ from autosklearn.smbo import get_smac_object sys.path.append(os.path.dirname(__file__)) -from automl_utils import extract_msg_from_log, count_succeses # noqa (E402: module level import not at top of file) +from automl_utils import print_debug_information, count_succeses # noqa (E402: module level import not at top of file) def test_fit_n_jobs(tmp_dir, output_dir): @@ -388,15 +388,13 @@ def test_multilabel(tmp_dir, output_dir, dask_client): output_folder=output_dir) automl.fit(X_train, Y_train) - # Log file path - log_file_path = glob.glob(os.path.join( - tmp_dir, 'AutoML*.log'))[0] + predictions = automl.predict(X_test) - assert predictions.shape == (50, 3), extract_msg_from_log(log_file_path) - assert count_succeses(automl.cv_results_) > 0, extract_msg_from_log(log_file_path) + assert predictions.shape == (50, 3), print_debug_information(automl) + assert count_succeses(automl.cv_results_) > 0, print_debug_information(automl) score = f1_macro(Y_test, predictions) - assert score >= 0.9, extract_msg_from_log(log_file_path) + assert score >= 0.9, print_debug_information(automl) probs = automl.predict_proba(X_train) assert np.mean(probs) == pytest.approx(0.33, rel=1e-1) @@ -414,17 +412,16 @@ def test_binary(tmp_dir, output_dir, dask_client): automl.fit(X_train, Y_train, X_test=X_test, y_test=Y_test, dataset_name='binary_test_dataset') - log_file_path = glob.glob(os.path.join( - tmp_dir, 'AutoML*.log'))[0] + predictions = automl.predict(X_test) - assert predictions.shape == (50, ), extract_msg_from_log(log_file_path) + assert predictions.shape == (50, ), print_debug_information(automl) score = accuracy(Y_test, predictions) - assert score > 0.9, extract_msg_from_log(log_file_path) - assert count_succeses(automl.cv_results_) > 0, extract_msg_from_log(log_file_path) + assert score > 0.9, print_debug_information(automl) + assert count_succeses(automl.cv_results_) > 0, print_debug_information(automl) output_files = glob.glob(os.path.join(output_dir, 'binary_test_dataset_test_*.predict')) - assert len(output_files) > 0, (output_files, extract_msg_from_log(log_file_path)) + assert len(output_files) > 0, (output_files, print_debug_information(automl)) def test_classification_pandas_support(tmp_dir, output_dir, dask_client): @@ -453,12 +450,9 @@ def test_classification_pandas_support(tmp_dir, output_dir, dask_client): automl.fit(X, y) - log_file_path = glob.glob(os.path.join( - tmp_dir, 'AutoML*.log'))[0] - # Make sure that at least better than random. # We use same X_train==X_test to test code quality - assert automl.score(X, y) > 0.555, extract_msg_from_log(log_file_path) + assert automl.score(X, y) > 0.555, print_debug_information(automl) automl.refit(X, y) @@ -482,10 +476,6 @@ def test_regression(tmp_dir, output_dir, dask_client): automl.fit(X_train, Y_train) - # Log file path - log_file_path = glob.glob(os.path.join( - tmp_dir, 'AutoML*.log'))[0] - predictions = automl.predict(X_test) assert predictions.shape == (356,) score = mean_squared_error(Y_test, predictions) @@ -493,7 +483,7 @@ def test_regression(tmp_dir, output_dir, dask_client): # On average np.sqrt(30) away from the target -> ~5.5 on average # Results with select rates drops avg score to a range of -32.40 to -37, on 30 seconds # constraint. With more time_left_for_this_task this is no longer an issue - assert score >= -37, extract_msg_from_log(log_file_path) + assert score >= -37, print_debug_information(automl) assert count_succeses(automl.cv_results_) > 0 @@ -513,15 +503,11 @@ def test_cv_regression(tmp_dir, output_dir, dask_client): automl.fit(X_train, Y_train) - # Log file path - log_file_path = glob.glob(os.path.join( - tmp_dir, 'AutoML*.log'))[0] - predictions = automl.predict(X_test) assert predictions.shape == (206,) score = r2(Y_test, predictions) - assert score >= 0.1, extract_msg_from_log(log_file_path) - assert count_succeses(automl.cv_results_) > 0, extract_msg_from_log(log_file_path) + assert score >= 0.1, print_debug_information(automl) + assert count_succeses(automl.cv_results_) > 0, print_debug_information(automl) def test_regression_pandas_support(tmp_dir, output_dir, dask_client): @@ -545,18 +531,15 @@ def test_regression_pandas_support(tmp_dir, output_dir, dask_client): # Make sure we error out because y is not encoded automl.fit(X, y) - log_file_path = glob.glob(os.path.join( - tmp_dir, 'AutoML*.log'))[0] - # Make sure that at least better than random. # We use same X_train==X_test to test code quality - assert automl.score(X, y) >= 0.5, extract_msg_from_log(log_file_path) + assert automl.score(X, y) >= 0.5, print_debug_information(automl) automl.refit(X, y) # Make sure that at least better than random. - assert r2(y, automl.predict(X)) > 0.5, extract_msg_from_log(log_file_path) - assert count_succeses(automl.cv_results_) > 0, extract_msg_from_log(log_file_path) + assert r2(y, automl.predict(X)) > 0.5, print_debug_information(automl) + assert count_succeses(automl.cv_results_) > 0, print_debug_information(automl) # Currently this class only tests that the methods of AutoSklearnClassifier @@ -615,6 +598,8 @@ def test_autosklearn2_classification_methods_returns_self(dask_client): assert automl is automl_refitted predictions = automl_fitted.predict(X_test) - assert sklearn.metrics.accuracy_score(y_test, predictions) >= 2 / 3 + assert sklearn.metrics.accuracy_score( + y_test, predictions + ) >= 2 / 3, print_debug_information(automl) pickle.dumps(automl_fitted)