Skip to content

Commit

Permalink
Merge pull request #110 from dsludwig/feat/default-toleration
Browse files Browse the repository at this point in the history
Feat/default toleration
  • Loading branch information
jacobtomlinson committed Nov 16, 2018
2 parents 9e2474e + 24fdf00 commit 25fb05f
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 11 deletions.
16 changes: 16 additions & 0 deletions dask_kubernetes/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,22 @@ def make_pod_spec(
env=[client.V1EnvVar(name=k, value=v)
for k, v in env.items()],
)
],
tolerations=[
client.V1Toleration(
key='k8s.dask.org/dedicated',
operator='Equal',
value='worker',
effect='NoSchedule',
),
# GKE currently does not permit creating taints on a node pool
# with a `/` in the key field
client.V1Toleration(
key='k8s.dask.org_dedicated',
operator='Equal',
value='worker',
effect='NoSchedule',
),
]
)
)
Expand Down
69 changes: 58 additions & 11 deletions dask_kubernetes/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def test_scale_up_down(cluster, client):

a, b = list(cluster.scheduler.workers)
x = client.submit(np.ones, 1, workers=a)
y = client.submit(np.ones, 100_000_000, workers=b)
y = client.submit(np.ones, 50_000_000, workers=b)

wait([x, y])

Expand Down Expand Up @@ -531,17 +531,13 @@ def test_repr(cluster):
assert "workers=0" in text


def test_escape_username(pod_spec, loop, ns):
old_logname = os.environ.get('LOGNAME')
os.environ['LOGNAME'] = 'foo!'
def test_escape_username(pod_spec, loop, ns, monkeypatch):
monkeypatch.setenv('LOGNAME', 'foo!')

try:
with KubeCluster(pod_spec, loop=loop, namespace=ns) as cluster:
assert 'foo' in cluster.name
assert '!' not in cluster.name
assert 'foo' in cluster.pod_template.metadata.labels['user']
finally:
os.environ['LOGNAME'] = old_logname
with KubeCluster(pod_spec, loop=loop, namespace=ns) as cluster:
assert 'foo' in cluster.name
assert '!' not in cluster.name
assert 'foo' in cluster.pod_template.metadata.labels['user']


def test_escape_name(pod_spec, loop, ns):
Expand All @@ -564,3 +560,54 @@ def test_maximum(cluster):

result = logger.getvalue()
assert "scale beyond maximum number of workers" in result.lower()


def test_default_toleration(pod_spec):
tolerations = pod_spec.to_dict()['spec']['tolerations']
assert {
'key': 'k8s.dask.org/dedicated',
'operator': 'Equal',
'value': 'worker',
'effect': 'NoSchedule',
'toleration_seconds': None
} in tolerations
assert {
'key': 'k8s.dask.org_dedicated',
'operator': 'Equal',
'value': 'worker',
'effect': 'NoSchedule',
'toleration_seconds': None
} in tolerations


def test_default_toleration_preserved(image_name):
pod_spec = make_pod_spec(
image=image_name,
extra_pod_config={
'tolerations': [
{
'key': 'example.org/toleration',
'operator': 'Exists',
'effect': 'NoSchedule',
}
],
}
)
tolerations = pod_spec.to_dict()['spec']['tolerations']
assert {
'key': 'k8s.dask.org/dedicated',
'operator': 'Equal',
'value': 'worker',
'effect': 'NoSchedule',
} in tolerations
assert {
'key': 'k8s.dask.org_dedicated',
'operator': 'Equal',
'value': 'worker',
'effect': 'NoSchedule',
} in tolerations
assert {
'key': 'example.org/toleration',
'operator': 'Exists',
'effect': 'NoSchedule',
} in tolerations
17 changes: 17 additions & 0 deletions doc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,23 @@ More information about these images is available at the
`Dask documentation <https://docs.dask.org/en/latest/setup/docker.html>`_.


Deployment Details
------------------

Workers are created directly as simple pods. These worker pods are configured
to shutdown if they are unable to connect to the scheduler for 60 seconds.
The pods are cleaned up when :meth:`~dask_kubernetes.KubeCluster.close` is called,
or the scheduler process exits.

The pods are created with two default `tolerations <https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/>`_:

* ``k8s.dask.org/dedicated=worker:NoSchedule``
* ``k8s.dask.org_dedicated=worker:NoSchedule``

If you have nodes with the corresponding taints, then the worker pods will
schedule to those nodes (and no other pods will be able to schedule to those
nodes).

.. toctree::
:maxdepth: 1
:hidden:
Expand Down

0 comments on commit 25fb05f

Please sign in to comment.