Skip to content

Commit

Permalink
Allow Galaxy to use cached mulled containers for Singularity.
Browse files Browse the repository at this point in the history
- Change undocumented (and probably unused) config option container_image_cache_path default to "database/contianer_cache".
- Update container resolver framework to let plugins assert they only produce containers of a single type.
- Update mulled container resolvers to indicate they produce Docker containers only - update names also.
- Refactor mulled caching stuff to allow reuse with Singularity (where the names come from is different - but a lot of the logic is the same).
- Add a cached singularity container resolver - that searches "config.container_image_cache_path/singularity/mulled" for an image with the correct name.

Next up - similarly refactor the BuildMulledDockerContainerResolver to allow reuse and implement the BuildMulledSingularityContainerResolver.
  • Loading branch information
jmchilton committed Jun 12, 2017
1 parent d36c405 commit 8d224ed
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 56 deletions.
4 changes: 4 additions & 0 deletions lib/galaxy/config.py
Expand Up @@ -247,7 +247,11 @@ def __init__( self, **kwargs ):
log.warn("preserve_python_environment set to unknown value [%s], defaulting to legacy_only")
preserve_python_environment = "legacy_only"
self.preserve_python_environment = preserve_python_environment
# Older default container cache path, I don't think anyone is using it anymore and it wasn't documented - we
# should probably drop the backward compatiblity to save the path check.
self.container_image_cache_path = self.resolve_path( kwargs.get( "container_image_cache_path", "database/container_images" ) )
if not os.path.exists( self.container_image_cache_path ):
self.container_image_cache_path = self.resolve_path( kwargs.get( "container_image_cache_path", "database/container_cache" ) )
self.outputs_to_working_directory = string_as_bool( kwargs.get( 'outputs_to_working_directory', False ) )
self.output_size_limit = int( kwargs.get( 'output_size_limit', 0 ) )
self.retry_job_output_collection = int( kwargs.get( 'retry_job_output_collection', 0 ) )
Expand Down
168 changes: 118 additions & 50 deletions lib/galaxy/tools/deps/container_resolvers/mulled.py
Expand Up @@ -2,6 +2,7 @@

import collections
import logging
import os

import six

Expand Down Expand Up @@ -37,7 +38,7 @@
CachedV2MulledImageMultiTarget.multi_target = "v2"


def list_cached_mulled_images(namespace=None, hash_func="v2"):
def list_docker_cached_mulled_images(namespace=None, hash_func="v2"):
command = build_docker_images_command(truncate=True, sudo=False)
command = "%s | tail -n +2 | tr -s ' ' | cut -d' ' -f1,2" % command
images_and_versions = check_output(command)
Expand All @@ -46,46 +47,58 @@ def list_cached_mulled_images(namespace=None, hash_func="v2"):
def output_line_to_image(line):
image_name, version = line.split(" ", 1)
identifier = "%s:%s" % (image_name, version)
url, namespace, package_description = image_name.split("/")
if not version or version == "latest":
version = None
image = identifier_to_cached_target(identifier, hash_func)
return image

image = None
if package_description.startswith("mulled-v1-"):
if hash_func == "v2":
return None
# TODO: Sort on build ...
raw_images = [output_line_to_image(_) for _ in filter(name_filter, images_and_versions.splitlines())]
return [i for i in raw_images if i is not None]

hash = package_description
build = None
if version and version.isdigit():
build = version
image = CachedV1MulledImageMultiTarget(hash, build, identifier)
elif package_description.startswith("mulled-v2-"):
if hash_func == "v1":
return None

version_hash = None
build = None
def identifier_to_cached_target(identifier, hash_func):
image_name, version = identifier.rsplit(":", 1)
_, package_description = image_name.rsplit("/", 1)
if not version or version == "latest":
version = None

if version and "-" in version:
version_hash, build = version.rsplit("-", 1)
elif version.isdigit():
version_hash, build = None, version
elif version:
log.debug("Unparsable mulled image tag encountered [%s]" % version)
image = None
if package_description.startswith("mulled-v1-"):
if hash_func == "v2":
return None

image = CachedV2MulledImageMultiTarget(package_description, version_hash, build, identifier)
else:
build = None
if version and "--" in version:
version, build = split_tag(version)
hash = package_description
build = None
if version and version.isdigit():
build = version
image = CachedV1MulledImageMultiTarget(hash, build, identifier)
elif package_description.startswith("mulled-v2-"):
if hash_func == "v1":
return None

image = CachedMulledImageSingleTarget(image_name, version, build, identifier)
version_hash = None
build = None

return image
if version and "-" in version:
version_hash, build = version.rsplit("-", 1)
elif version.isdigit():
version_hash, build = None, version
elif version:
log.debug("Unparsable mulled image tag encountered [%s]" % version)

# TODO: Sort on build ...
raw_images = [output_line_to_image(_) for _ in filter(name_filter, images_and_versions.splitlines())]
image = CachedV2MulledImageMultiTarget(package_description, version_hash, build, identifier)
else:
build = None
if version and "--" in version:
version, build = split_tag(version)

image = CachedMulledImageSingleTarget(image_name, version, build, identifier)

return image


def list_cached_mulled_images_from_path(directory, hash_func="v2"):
contents = os.listdir(directory)
raw_images = map(lambda name: identifier_to_cached_target(name, hash_func), contents)
return [i for i in raw_images if i is not None]


Expand All @@ -94,11 +107,10 @@ def get_filter(namespace):
return lambda name: name.startswith(prefix) and name.count("/") == 2


def cached_container_description(targets, namespace, hash_func="v2"):
def find_best_matching_cached_image(targets, cached_images, hash_func):
if len(targets) == 0:
return None

cached_images = list_cached_mulled_images(namespace, hash_func=hash_func)
image = None
if len(targets) == 1:
target = targets[0]
Expand Down Expand Up @@ -142,6 +154,16 @@ def cached_container_description(targets, namespace, hash_func="v2"):
image = cached_image
break

return image


def docker_cached_container_description(targets, namespace, hash_func="v2"):
if len(targets) == 0:
return None

cached_images = list_docker_cached_mulled_images(namespace, hash_func=hash_func)
image = find_best_matching_cached_image(targets, cached_images, hash_func)

container = None
if image:
container = ContainerDescription(
Expand All @@ -152,13 +174,34 @@ def cached_container_description(targets, namespace, hash_func="v2"):
return container


def singularity_cached_container_description(targets, cache_directory, hash_func="v2"):
if len(targets) == 0:
return None

if not os.path.exists(cache_directory):
return None

cached_images = list_cached_mulled_images_from_path(cache_directory, hash_func=hash_func)
image = find_best_matching_cached_image(targets, cached_images, hash_func)

container = None
if image:
container = ContainerDescription(
os.path.join(cache_directory, image.image_identifier),
type="singularity",
)

return container


@six.python_2_unicode_compatible
class CachedMulledContainerResolver(ContainerResolver):
class CachedMulledDockerContainerResolver(ContainerResolver):

resolver_type = "cached_mulled"
container_type = "docker"

def __init__(self, app_info=None, namespace=None, hash_func="v2"):
super(CachedMulledContainerResolver, self).__init__(app_info)
super(CachedMulledDockerContainerResolver, self).__init__(app_info)
self.namespace = namespace
self.hash_func = hash_func

Expand All @@ -167,20 +210,43 @@ def resolve(self, enabled_container_types, tool_info):
return None

targets = mulled_targets(tool_info)
return cached_container_description(targets, self.namespace, hash_func=self.hash_func)
return docker_cached_container_description(targets, self.namespace, hash_func=self.hash_func)

def __str__(self):
return "CachedMulledDockerContainerResolver[namespace=%s]" % self.namespace


@six.python_2_unicode_compatible
class CachedMulledSingularityContainerResolver(ContainerResolver):

resolver_type = "cached_mulled_singularity"
container_type = "singularity"

def __init__(self, app_info=None, hash_func="v2"):
super(CachedMulledDockerContainerResolver, self).__init__(app_info)
self.cache_directory = os.path.join(app_info.container_image_cache_path, "singularity", "mulled")
self.hash_func = hash_func

def resolve(self, enabled_container_types, tool_info):
if tool_info.requires_galaxy_python_environment:
return None

targets = mulled_targets(tool_info)
return singularity_cached_container_description(targets, hash_func=self.hash_func)

def __str__(self):
return "CachedMulledContainerResolver[namespace=%s]" % self.namespace
return "CachedMulledSingularityContainerResolver[namespace=%s]" % self.namespace


@six.python_2_unicode_compatible
class MulledContainerResolver(ContainerResolver):
class MulledDockerContainerResolver(ContainerResolver):
"""Look for mulled images matching tool dependencies."""

resolver_type = "mulled"
container_type = "docker"

def __init__(self, app_info=None, namespace="biocontainers", hash_func="v2"):
super(MulledContainerResolver, self).__init__(app_info)
super(MulledDockerContainerResolver, self).__init__(app_info)
self.namespace = namespace
self.hash_func = hash_func

Expand Down Expand Up @@ -235,21 +301,22 @@ def tags_if_available(image_name):
if name:
return ContainerDescription(
"quay.io/%s/%s" % (self.namespace, name),
type="docker",
type=self.container_type,
)

def __str__(self):
return "MulledContainerResolver[namespace=%s]" % self.namespace
return "MulledDockerContainerResolver[namespace=%s]" % self.namespace


@six.python_2_unicode_compatible
class BuildMulledContainerResolver(ContainerResolver):
class BuildMulledDockerContainerResolver(ContainerResolver):
"""Look for mulled images matching tool dependencies."""

resolver_type = "build_mulled"
container_type = "docker"

def __init__(self, app_info=None, namespace="local", hash_func="v2", **kwds):
super(BuildMulledContainerResolver, self).__init__(app_info)
super(BuildMulledDockerContainerResolver, self).__init__(app_info)
self._involucro_context_kwds = {
'involucro_bin': self._get_config_option("involucro_path", None)
}
Expand All @@ -275,23 +342,24 @@ def resolve(self, enabled_container_types, tool_info):
hash_func=self.hash_func,
**self._mulled_kwds
)
return cached_container_description(targets, self.namespace, hash_func=self.hash_func)
return docker_cached_container_description(targets, self.namespace, hash_func=self.hash_func)

def _get_involucro_context(self):
involucro_context = InvolucroContext(**self._involucro_context_kwds)
self.enabled = ensure_installed(involucro_context, self.auto_init)
return involucro_context

def __str__(self):
return "BuildContainerResolver[namespace=%s]" % self.namespace
return "BuildDockerContainerResolver[namespace=%s]" % self.namespace


def mulled_targets(tool_info):
return requirements_to_mulled_targets(tool_info.requirements)


__all__ = (
"CachedMulledContainerResolver",
"MulledContainerResolver",
"BuildMulledContainerResolver",
"CachedMulledDockerContainerResolver",
"CachedMulledSingularityContainerResolver",
"MulledDockerContainerResolver",
"BuildMulledDockerContainerResolver",
)
17 changes: 11 additions & 6 deletions lib/galaxy/tools/deps/containers.py
Expand Up @@ -14,9 +14,10 @@

from .container_resolvers.explicit import ExplicitContainerResolver
from .container_resolvers.mulled import (
BuildMulledContainerResolver,
CachedMulledContainerResolver,
MulledContainerResolver,
BuildMulledDockerContainerResolver,
CachedMulledDockerContainerResolver,
CachedMulledSingularityContainerResolver,
MulledDockerContainerResolver,
)
from .requirements import ContainerDescription
from .requirements import DEFAULT_CONTAINER_RESOLVE_DEPENDENCIES, DEFAULT_CONTAINER_SHELL
Expand Down Expand Up @@ -218,9 +219,10 @@ def __default_containers_resolvers(self):
]
if self.enable_beta_mulled_containers:
default_resolvers.extend([
CachedMulledContainerResolver(self.app_info),
MulledContainerResolver(self.app_info, namespace="biocontainers"),
BuildMulledContainerResolver(self.app_info),
CachedMulledDockerContainerResolver(self.app_info),
MulledDockerContainerResolver(self.app_info, namespace="biocontainers"),
BuildMulledDockerContainerResolver(self.app_info),
CachedMulledSingularityContainerResolver(self.app_info),
])
return default_resolvers

Expand All @@ -231,6 +233,9 @@ def __resolvers_dict( self ):
def find_best_container_description(self, enabled_container_types, tool_info):
"""Yield best container description of supplied types matching tool info."""
for container_resolver in self.container_resolvers:
if hasattr(container_resolver, "container_type"):
if container_resolver.container_type not in enabled_container_types:
continue
container_description = container_resolver.resolve(enabled_container_types, tool_info)
log.info("Checking with container resolver [%s] found description [%s]" % (container_resolver, container_description))
if container_description:
Expand Down

0 comments on commit 8d224ed

Please sign in to comment.