Skip to content

Commit

Permalink
Add extra bootstrap config to VM based clusters (#319)
Browse files Browse the repository at this point in the history
* Release 2021.9.0

* Add extra bootstrap options
  • Loading branch information
jacobtomlinson committed Nov 23, 2021
1 parent a919b6d commit bf62720
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 4 deletions.
2 changes: 2 additions & 0 deletions dask_cloudprovider/azure/azurevm.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,8 @@ class AzureVMCluster(VMCluster):
By default the ``daskdev/dask:latest`` image will be used.
docker_args: string (optional)
Extra command line arguments to pass to Docker.
extra_bootstrap: list[str] (optional)
Extra commands to be run during the bootstrap phase.
silence_logs: bool
Whether or not we should silence logging when setting up the cluster.
asynchronous: bool
Expand Down
8 changes: 7 additions & 1 deletion dask_cloudprovider/azure/tests/test_azurevm.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def skip_without_credentials(func):
"""
)(func)

rg = dask.config.get("cloudprovider.azure.azurevm.resource_group", None)
rg = dask.config.get("cloudprovider.azure.resource_group", None)
vnet = dask.config.get("cloudprovider.azure.azurevm.vnet", None)
security_group = dask.config.get("cloudprovider.azure.azurevm.security_group", None)
location = dask.config.get("cloudprovider.azure.location", None)
Expand Down Expand Up @@ -121,3 +121,9 @@ def gpu_mem():
async def test_render_cloud_init():
cloud_init = AzureVMCluster.get_cloud_init(docker_args="--privileged")
assert " --privileged " in cloud_init

cloud_init = AzureVMCluster.get_cloud_init(
extra_bootstrap=["echo 'hello world'", "echo 'foo bar'"]
)
assert "- echo 'hello world'" in cloud_init
assert "- echo 'foo bar'" in cloud_init
2 changes: 2 additions & 0 deletions dask_cloudprovider/digitalocean/droplet.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ class DropletCluster(VMCluster):
By default the ``daskdev/dask:latest`` image will be used.
docker_args: string (optional)
Extra command line arguments to pass to Docker.
extra_bootstrap: list[str] (optional)
Extra commands to be run during the bootstrap phase.
env_vars: dict (optional)
Environment variables to be passed to the worker.
silence_logs: bool
Expand Down
2 changes: 2 additions & 0 deletions dask_cloudprovider/gcp/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,8 @@ class GCPCluster(VMCluster):
By default the ``daskdev/dask:latest`` image will be used.
docker_args: string (optional)
Extra command line arguments to pass to Docker.
extra_bootstrap: list[str] (optional)
Extra commands to be run during the bootstrap phase.
ngpus: int (optional)
The number of GPUs to atatch to the instance.
Default is ``0``.
Expand Down
6 changes: 6 additions & 0 deletions dask_cloudprovider/generic/cloud-init.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ runcmd:
- systemctl restart docker
{% endif %}

{% if extra_bootstrap %}
{% for command in extra_bootstrap %}
- {{ command }}
{% endfor %}
{% endif %}

# Run container
- 'docker run --net=host {%+ if gpu_instance %}--gpus=all{% endif %} {% for key in env_vars %} -e {{key}}="{{env_vars[key]}}" {% endfor %}{%+ if docker_args %}{{docker_args}}{% endif %} {{image}} {{ command }}'

Expand Down
9 changes: 8 additions & 1 deletion dask_cloudprovider/generic/vmcluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
class VMInterface(ProcessInterface):
"""A superclass for VM Schedulers, Workers and Nannies."""

def __init__(self, docker_args: str = "", **kwargs):
def __init__(self, docker_args: str = "", extra_bootstrap: list = None, **kwargs):
super().__init__()
self.name = None
self.command = None
Expand All @@ -29,6 +29,7 @@ def __init__(self, docker_args: str = "", **kwargs):
self.bootstrap = None
self.docker_image = "daskdev/dask:latest"
self.docker_args = docker_args
self.extra_bootstrap = extra_bootstrap
self.auto_shutdown = True
self.set_env = 'env DASK_INTERNAL_INHERIT_CONFIG="{}"'.format(
dask.config.serialize(dask.config.global_config)
Expand Down Expand Up @@ -190,6 +191,8 @@ class VMCluster(SpecCluster):
By default the ``daskdev/dask:latest`` image will be used.
docker_args: string (optional)
Extra command line arguments to pass to Docker.
extra_bootstrap: list[str] (optional)
Extra commands to be run during the bootstrap phase.
silence_logs: bool
Whether or not we should silence logging when setting up the cluster.
asynchronous: bool
Expand Down Expand Up @@ -223,6 +226,7 @@ def __init__(
scheduler_options: dict = {},
docker_image="daskdev/dask:latest",
docker_args: str = "",
extra_bootstrap: list = None,
env_vars: dict = {},
security: bool = True,
protocol: str = None,
Expand Down Expand Up @@ -278,6 +282,7 @@ def __init__(
self.scheduler_options["scheduler_options"] = scheduler_options
self.worker_options["env_vars"] = env_vars
self.options["docker_args"] = docker_args
self.options["extra_bootstrap"] = extra_bootstrap
self.scheduler_options["docker_args"] = docker_args
self.worker_options["docker_args"] = docker_args
self.worker_options["docker_image"] = image
Expand Down Expand Up @@ -333,6 +338,7 @@ def render_process_cloud_init(self, process):
image=process.docker_image,
command=process.command,
docker_args=process.docker_args,
extra_bootstrap=process.extra_bootstrap,
gpu_instance=process.gpu_instance,
bootstrap=process.bootstrap,
auto_shutdown=process.auto_shutdown,
Expand All @@ -357,6 +363,7 @@ def get_cloud_init(cls, *args, **kwargs):
image=cluster.options["docker_image"],
command="dask-scheduler --version",
docker_args=cluster.options["docker_args"],
extra_bootstrap=cluster.options["extra_bootstrap"],
gpu_instance=cluster.gpu_instance,
bootstrap=cluster.bootstrap,
auto_shutdown=cluster.auto_shutdown,
Expand Down
2 changes: 2 additions & 0 deletions dask_cloudprovider/hetzner/vserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ class HetznerCluster(VMCluster):
See :class:`distributed.scheduler.Scheduler`.
env_vars: dict
Environment variables to be passed to the worker.
extra_bootstrap: list[str] (optional)
Extra commands to be run during the bootstrap phase.
Example
--------
Expand Down
19 changes: 17 additions & 2 deletions doc/source/troubleshooting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ So, for example, code like this will result in an error

.. code-block:: python
from dask_cloudprovider import FargateCluster
from dask_cloudprovider.aws import FargateCluster
cluster = FargateCluster(
image="daskdev/dask:latest",
worker_cpu=256,
Expand Down Expand Up @@ -63,4 +63,19 @@ However, to get the desired cluster configuration you'll need to request a servi

Go to ``https://<region>.aws.amazon.com/servicequotas/home/services/ec2/quotas`` and
`request an increase <https://docs.aws.amazon.com/servicequotas/latest/userguide/request-quota-increase.html>`_ for
"Running On-Demand Standard (A, C, D, H, I, M, R, T, Z) instances".
"Running On-Demand Standard (A, C, D, H, I, M, R, T, Z) instances".

Pulling private Docker images
-----------------------------------

For cluster managers like ``EC2Cluster``, ``AzureVMCluster`` and ``GCPCluster`` Docker images will be pulled onto VMs created on the cloud of your choice.

If you need to pull a private Docker images which requires authentication each VM will need to be configured with credentials. These cluster managers accept
and ``extra_bootstrap`` argument where you can provide additional bash commands to be run during startup. This is a good place to log into your Docker registry.

.. code-block:: python
from dask_cloudprovider.azure import AzureVMCluster
cluster = AzureVMCluster(...
docker_image="my_private_image:latest",
extra_bootstrap=["docker login -u 'username' -p 'password'"])

0 comments on commit bf62720

Please sign in to comment.