Skip to content

Commit

Permalink
Merge pull request #303 from girder/support-nvidia-docker2
Browse files Browse the repository at this point in the history
Support nvidia-docker version 2.0
  • Loading branch information
msmolens committed Aug 27, 2018
2 parents 973ab64 + bed7bae commit 31c28c6
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 157 deletions.
86 changes: 0 additions & 86 deletions girder_worker/docker/nvidia.py
@@ -1,89 +1,3 @@
"""
This module is necessary since the docker python library does not yet provide built-in support
for nvidia-docker tooling. If some incarnation of this feature[1] makes its way into a release
of docker-py, this module can go away. Until then, the ideas in that PR have been copied and
lightly modified here.
[1] https://github.com/docker/docker-py/pull/1560
"""

import os
import re
import requests

from docker.api.client import APIClient
from docker.client import DockerClient
from docker.errors import DockerException
from docker.utils.utils import parse_devices

NVIDIA_DEFAULT_HOST = 'localhost'
NVIDIA_DEFAULT_PORT = 3476


def get_nvidia_docker_endpoint():
host = os.environ.get('NV_HOST', 'http://%s:%s' % (NVIDIA_DEFAULT_HOST, NVIDIA_DEFAULT_PORT))
return host + '/docker/cli/json'


def get_nvidia_configuration():
url = get_nvidia_docker_endpoint()
try:
return requests.get(url).json()
except requests.exceptions.ConnectionError:
url = get_nvidia_docker_endpoint()
raise NvidiaConnectionError((
'Couldn\'t connect to nvidia-driver-plugin at {url} - is it running and accessible?.\n'
'Try: "curl {url}" or "systemctl start nvidia-docker"').format(url=url), url)


def is_nvidia_image(api, image):
labels = api.inspect_image(image).get('Config', {}).get('Labels')
return bool(labels and labels.get('com.nvidia.volumes.needed') == 'nvidia_driver')


def add_nvidia_docker_to_config(container_config):
if not container_config.get('HostConfig', None):
container_config['HostConfig'] = {}

nvidia_config = get_nvidia_configuration()

# Setup the Volumes
container_config['HostConfig'].setdefault('VolumeDriver', nvidia_config['VolumeDriver'])
container_config['HostConfig'].setdefault('Binds', [])
container_config['HostConfig']['Binds'].extend(nvidia_config['Volumes'])

# Get nvidia control devices
devices = container_config['HostConfig'].get('Devices', [])
# suport both '0 1' and '0, 1' formats, just like nvidia-docker
gpu_isolation = os.getenv('NV_GPU', '').replace(',', ' ').split()
pattern = re.compile(r'/nvidia([0-9]+)$')
for device in nvidia_config['Devices']:
if gpu_isolation:
card_number = pattern.search(device)
if card_number and card_number.group(1) not in gpu_isolation:
continue
devices.extend(parse_devices([device]))

container_config['HostConfig']['Devices'] = devices


class NvidiaDockerClient(DockerClient):
def __init__(self, *args, **kwargs):
self.api = NvidiaAPIClient(*args, **kwargs)


class NvidiaAPIClient(APIClient):
def create_container_config(self, image, *args, **kwargs):
container_config = (
super(NvidiaAPIClient, self).create_container_config(image, *args, **kwargs))

if is_nvidia_image(self, image):
add_nvidia_docker_to_config(container_config)

return container_config


class NvidiaConnectionError(DockerException):
def __init__(self, msg, nvidia_url):
self.msg = msg
self.url = nvidia_url
20 changes: 7 additions & 13 deletions girder_worker/docker/tasks/__init__.py
Expand Up @@ -47,21 +47,15 @@ def _pull_image(image):
def _run_container(image, container_args, **kwargs):
# TODO we could allow configuration of non default socket
client = docker.from_env(version='auto')
if nvidia.is_nvidia_image(client.api, image):
client = nvidia.NvidiaDockerClient.from_env(version='auto')

logger.info('Running container: image: %s args: %s kwargs: %s'
% (image, container_args, kwargs))
runtime = kwargs.pop('runtime', None)
if runtime is None and nvidia.is_nvidia_image(client.api, image):
runtime = 'nvidia'

logger.info('Running container: image: %s args: %s runtime: %s kwargs: %s'
% (image, container_args, runtime, kwargs))
try:
return client.containers.run(image, container_args, **kwargs)
except nvidia.NvidiaConnectionError:
try:
logger.info('Running nvidia container without nvidia support: image: %s' % image)
client = docker.from_env(version='auto')
return client.containers.run(image, container_args, **kwargs)
except DockerException:
logger.exception('Exception when running docker container without nvidia support.')
raise
return client.containers.run(image, container_args, runtime=runtime, **kwargs)
except DockerException:
logger.exception('Exception when running docker container')
raise
Expand Down
63 changes: 5 additions & 58 deletions tests/test_docker_nvidia.py
@@ -1,78 +1,25 @@
import os
import pytest
import mock
import requests

from girder_worker.docker.nvidia import (
NVIDIA_DEFAULT_HOST,
NVIDIA_DEFAULT_PORT,
NvidiaAPIClient,
NvidiaConnectionError,
get_nvidia_docker_endpoint,
get_nvidia_configuration,
is_nvidia_image
)
from docker.api.client import APIClient


def test_nvidia_docker_endpoint_returns_defaults():
assert get_nvidia_docker_endpoint() == \
'http://{}:{}/docker/cli/json'.format(NVIDIA_DEFAULT_HOST, NVIDIA_DEFAULT_PORT)


def test_docker_endpoint_responds_to_NV_HOST(monkeypatch):
monkeypatch.setitem(os.environ, 'NV_HOST', 'http://bogus.com:8888')
assert get_nvidia_docker_endpoint() == 'http://bogus.com:8888/docker/cli/json'


def test_get_nvidia_configuration_calls_docker_endpoint_url():
with mock.patch('girder_worker.docker.nvidia.requests.get') as m:
get_nvidia_configuration()
m.assert_called_with(get_nvidia_docker_endpoint())


def test_get_nvidia_configuration_raises_NvidiaConnectionError_on_requests_ConnectionError():
with mock.patch('girder_worker.docker.nvidia.requests.get') as m:
m.side_effect = requests.exceptions.ConnectionError()
with pytest.raises(NvidiaConnectionError):
get_nvidia_configuration()
from girder_worker.docker.nvidia import is_nvidia_image


def test_is_nvidia_image_no_labels_returns_false():
api = mock.MagicMock(spec=NvidiaAPIClient)
api = mock.MagicMock(spec=APIClient)
api.inspect_image.return_value = {}
assert is_nvidia_image(api, 'bogus/image:latest') is False


def test_is_nvidia_image_no_nvidia_labels_returns_false():
api = mock.MagicMock(spec=NvidiaAPIClient)
api = mock.MagicMock(spec=APIClient)
api.inspect_image.return_value = {'Config': {'Labels': {'some': 'label'}}}
assert is_nvidia_image(api, 'bogus/image:latest') is False


def test_is_nvidia_image_returns_true():
api = mock.MagicMock(spec=NvidiaAPIClient)
api = mock.MagicMock(spec=APIClient)
api.inspect_image.return_value = {'Config':
{'Labels':
{'com.nvidia.volumes.needed': 'nvidia_driver'}}}
assert is_nvidia_image(api, 'bogus/image:latest') is True


def test_NvidiaAPIClient_create_container_config_is_nvidia_image_calls_add_nvidia_docker():
with mock.patch('girder_worker.docker.nvidia.APIClient.create_container_config'):
with mock.patch('girder_worker.docker.nvidia.is_nvidia_image', return_value=True):
with mock.patch('girder_worker.docker.nvidia.add_nvidia_docker_to_config') as m:
api = NvidiaAPIClient()
api.create_container_config('bogus/image:latest')
m.assert_called_once()


def test_NvidiaAPIClient_create_container_config_is_nvidia_image_does_not_call_add_nvidia_docker():
with mock.patch('girder_worker.docker.nvidia.APIClient.create_container_config'):
with mock.patch('girder_worker.docker.nvidia.is_nvidia_image', return_value=False):
with mock.patch('girder_worker.docker.nvidia.add_nvidia_docker_to_config') as m:
api = NvidiaAPIClient()
api.create_container_config('bogus/image:latest')
m.assert_not_called()


# TODO: add_nvidia_docker_to_config

0 comments on commit 31c28c6

Please sign in to comment.