From 4216d43b483f01b510e193288e08b322d7100d5b Mon Sep 17 00:00:00 2001 From: chico Date: Fri, 18 Jun 2021 09:16:09 +0200 Subject: [PATCH 1/3] Enabled threadpoolctl --- autosklearn/evaluation/abstract_evaluator.py | 5 +++++ requirements.txt | 1 + 2 files changed, 6 insertions(+) diff --git a/autosklearn/evaluation/abstract_evaluator.py b/autosklearn/evaluation/abstract_evaluator.py index 50a9cd272e..56f55ac543 100644 --- a/autosklearn/evaluation/abstract_evaluator.py +++ b/autosklearn/evaluation/abstract_evaluator.py @@ -12,6 +12,8 @@ from smac.tae import StatusType +from threadpoolctl import threadpool_limits + import autosklearn.pipeline.classification import autosklearn.pipeline.regression from autosklearn.constants import ( @@ -193,6 +195,9 @@ def __init__( budget_type: Optional[str] = None, ): + # Limit the number of threads that numpy uses + threadpool_limits(limits=1) + self.starttime = time.time() self.configuration = configuration diff --git a/requirements.txt b/requirements.txt index a29774a201..e0588f79eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ distributed>=2.2.0 pyyaml pandas>=1.0 liac-arff +threadpoolctl ConfigSpace>=0.4.14,<0.5 pynisher>=0.6.3 From 4a098bf98db2f33e4e1eba7a6e565e8946017488 Mon Sep 17 00:00:00 2001 From: chico Date: Fri, 25 Jun 2021 19:00:28 +0200 Subject: [PATCH 2/3] doc thread variables --- doc/manual.rst | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/doc/manual.rst b/doc/manual.rst index ee5595fd6d..2e57f185e1 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -172,13 +172,14 @@ is exhausted. **Note:** *auto-sklearn* requires all workers to have access to a shared file system for storing training data and models. -Furthermore, depending on the installation of scikit-learn and numpy, -the model building procedure may use up to all cores. Such behaviour is -unintended by *auto-sklearn* and is most likely due to numpy being installed -from `pypi` as a binary wheel (`see here `_). Executing -``export OPENBLAS_NUM_THREADS=1`` should disable such behaviours and make numpy -only use a single core at a time. +*auto-sklearn* employs `threadpoolctl `_ to control the number of threads employed by scientific libraries like numpy or scikit-learn. This is done exclusively during the building procedure of models, not during inference. In particular, *auto-sklearn* allows each pipeline to use at most 1 thread during training. At predicting and scoring time this limitation is not enforced by *auto-sklearn*. You can control the number of resources +employed by the pipelines by setting the following variables in your environment, prior to running *auto-sklearn*: + +.. role:: bash(code) + :language: bash + export OPENBLAS_NUM_THREADS=1 + export MKL_NUM_THREADS=1 + export OMP_NUM_THREADS=1 Model persistence ================= From 4010f80be86029423d46a81b4872d1bf6ea7a0a5 Mon Sep 17 00:00:00 2001 From: chico Date: Wed, 30 Jun 2021 12:58:25 +0200 Subject: [PATCH 3/3] Fix documentation --- doc/manual.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/manual.rst b/doc/manual.rst index 2e57f185e1..1e3ac0ae66 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -175,11 +175,14 @@ is exhausted. *auto-sklearn* employs `threadpoolctl `_ to control the number of threads employed by scientific libraries like numpy or scikit-learn. This is done exclusively during the building procedure of models, not during inference. In particular, *auto-sklearn* allows each pipeline to use at most 1 thread during training. At predicting and scoring time this limitation is not enforced by *auto-sklearn*. You can control the number of resources employed by the pipelines by setting the following variables in your environment, prior to running *auto-sklearn*: -.. role:: bash(code) - :language: bash - export OPENBLAS_NUM_THREADS=1 - export MKL_NUM_THREADS=1 - export OMP_NUM_THREADS=1 +.. code-block:: shell-session + + $ export OPENBLAS_NUM_THREADS=1 + $ export MKL_NUM_THREADS=1 + $ export OMP_NUM_THREADS=1 + + +For further information about how scikit-learn handles multiprocessing, please check the `Parallelism, resource management, and configuration `_ documentation from the library. Model persistence =================