diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 6a8337005e..79c6b98912 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -230,8 +230,6 @@ def __init__(self, def _create_dask_client(self): self._is_dask_client_internally_created = True - if self._n_jobs is not None and self._n_jobs > 1: - dask.config.set({'distributed.worker.daemon': False}) self._dask_client = dask.distributed.Client( dask.distributed.LocalCluster( n_workers=self._n_jobs, diff --git a/examples/60_search/example_parallel_manual_spawning.py b/examples/60_search/example_parallel_manual_spawning.py index a7e8a2709c..53be0c2bca 100644 --- a/examples/60_search/example_parallel_manual_spawning.py +++ b/examples/60_search/example_parallel_manual_spawning.py @@ -36,7 +36,14 @@ # Dask configuration # ================== # -# Auto-sklearn requires dask workers to not run in the daemon setting +# Auto-sklearn uses threads in Dask to launch memory constrained jobs. +# This number of threads can be provided directly via the n_jobs argument +# when creating the AutoSklearnClassifier. Additionally, the user can provide +# a dask_client argument which can have processes=True. +# When using processes to True, we need to specify the below setting +# to allow internally generated processes. +# Optionally, you can choose to provide a dask client with processes=False +# and remove the following line. dask.config.set({'distributed.worker.daemon': False}) @@ -76,6 +83,9 @@ async def do_work(): # one can also start a dask scheduler from the command line), see the # `dask cli docs `_ for # further information. +# Please not, that DASK_DISTRIBUTED__WORKER__DAEMON=False is required in this +# case as dask-worker creates a new process. That is, it is equivalent to the +# setting described above with dask.distributed.Client with processes=True # # Again, we need to make sure that we do not start the workers in a daemon # mode. diff --git a/test/conftest.py b/test/conftest.py index e9a42efd3f..4db32f6af7 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -3,7 +3,6 @@ import time import unittest.mock -import dask from dask.distributed import Client, get_client import psutil import pytest @@ -124,9 +123,8 @@ def dask_client(request): Workers are in subprocesses to not create deadlocks with the pynisher and logging. """ - dask.config.set({'distributed.worker.daemon': False}) - client = Client(n_workers=2, threads_per_worker=1, processes=True) + client = Client(n_workers=2, threads_per_worker=1, processes=False) print("Started Dask client={}\n".format(client)) def get_finalizer(address): @@ -150,7 +148,6 @@ def dask_client_single_worker(request): Using this might cause deadlocks with the pynisher and the logging module. However, it is used very rarely to avoid this issue as much as possible. """ - dask.config.set({'distributed.worker.daemon': False}) client = Client(n_workers=1, threads_per_worker=1, processes=False) print("Started Dask client={}\n".format(client))