Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for caching the Singularity cached images directory #12524

Merged
merged 3 commits into from Sep 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
123 changes: 107 additions & 16 deletions lib/galaxy/tool_util/deps/container_resolvers/mulled.py
Expand Up @@ -3,6 +3,10 @@
import logging
import os
import subprocess
from abc import (
ABCMeta,
abstractmethod,
)
from typing import NamedTuple, Optional

from galaxy.util import (
Expand Down Expand Up @@ -75,6 +79,75 @@ def package_hash(target):
return image_name.rsplit("/")[-1]


class CacheDirectory(metaclass=ABCMeta):
def __init__(self, path, hash_func="v2"):
self.path = path
self.hash_func = hash_func

def _list_cached_mulled_images_from_path(self):
contents = os.listdir(self.path)
sorted_images = version_sorted(contents)
raw_images = map(lambda name: identifier_to_cached_target(name, self.hash_func), sorted_images)
return list([i for i in raw_images if i is not None])

@abstractmethod
def list_cached_mulled_images_from_path(self):
"""Generate a list of cached, mulled images in the cache."""

@abstractmethod
def invalidate_cache(self):
"""Invalidate the cache."""


class UncachedCacheDirectory(CacheDirectory):
cacher_type = "uncached"

def list_cached_mulled_images_from_path(self):
return self._list_cached_mulled_images_from_path()

def invalidate_cache(self):
pass


class DirMtimeCacheDirectory(CacheDirectory):
cacher_type = "dir_mtime"

def __init__(self, path, **kwargs):
super().__init__(path, **kwargs)
self.invalidate_cache()

def __get_mtime(self):
return os.stat(self.path).st_mtime

def __cache(self):
self.__contents = self._list_cached_mulled_images_from_path()
self.__mtime = self.__get_mtime()
log.debug(f"Cached images in path {self.path} at directory mtime {self.__mtime}")

def list_cached_mulled_images_from_path(self):
mtime = self.__get_mtime()
if mtime != self.__mtime:
if mtime < self.__mtime:
log.warning(f"Modification time '{mtime}' of cache directory '{self.path}' is older than previous "
f"modification time '{self.__mtime}'! Cache directory will be recached")
self.__cache()
return self.__contents

def invalidate_cache(self):
self.__mtime = -1
self.__contents = []


def get_cache_directory_cacher(cacher_type):
# these can become a separate module and use plugin_config if we need more
cachers = {
UncachedCacheDirectory.cacher_type: UncachedCacheDirectory,
DirMtimeCacheDirectory.cacher_type: DirMtimeCacheDirectory,
}
cacher_type = cacher_type or "uncached"
return cachers[cacher_type]


def list_docker_cached_mulled_images(namespace=None, hash_func="v2", resolution_cache=None):
cache_key = "galaxy.tool_util.deps.container_resolvers.mulled:cached_images"
if resolution_cache is not None and cache_key in resolution_cache:
Expand Down Expand Up @@ -148,13 +221,6 @@ def identifier_to_cached_target(identifier, hash_func, namespace=None):
return image


def list_cached_mulled_images_from_path(directory, hash_func="v2"):
contents = os.listdir(directory)
sorted_images = version_sorted(contents)
raw_images = map(lambda name: identifier_to_cached_target(name, hash_func), sorted_images)
return [i for i in raw_images if i is not None]


def get_filter(namespace):
prefix = "quay.io/" if namespace is None else f"quay.io/{namespace}"
return lambda name: name.startswith(prefix) and name.count("/") == 2
Expand Down Expand Up @@ -231,16 +297,16 @@ def singularity_cached_container_description(targets, cache_directory, hash_func
if len(targets) == 0:
return None

if not os.path.exists(cache_directory):
if not os.path.exists(cache_directory.path):
return None

cached_images = list_cached_mulled_images_from_path(cache_directory, hash_func=hash_func)
cached_images = cache_directory.list_cached_mulled_images_from_path()
image = find_best_matching_cached_image(targets, cached_images, hash_func)

container = None
if image:
container = ContainerDescription(
os.path.abspath(os.path.join(cache_directory, image.image_identifier)),
os.path.abspath(os.path.join(cache_directory.path, image.image_identifier)),
type="singularity",
shell=shell,
)
Expand Down Expand Up @@ -363,8 +429,15 @@ class SingularityCliContainerResolver(CliContainerResolver):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.cache_directory = kwargs.get("cache_directory", os.path.join(kwargs['app_info'].container_image_cache_path, "singularity", "mulled"))
safe_makedirs(self.cache_directory)
self.cache_directory_path = kwargs.get("cache_directory", os.path.join(kwargs['app_info'].container_image_cache_path, "singularity", "mulled"))
self.cache_directory_cacher_type = kwargs.get("cache_directory_cacher_type", None)
self.cache_directory = None
self.hash_func = None

def _init_cache_directory(self):
cacher_class = get_cache_directory_cacher(self.cache_directory_cacher_type)
self.cache_directory = cacher_class(self.cache_directory_path, hash_func=self.hash_func)
safe_makedirs(self.cache_directory.path)


class CachedMulledDockerContainerResolver(CliContainerResolver):
Expand All @@ -382,6 +455,7 @@ def resolve(self, enabled_container_types, tool_info, **kwds):
return None

targets = mulled_targets(tool_info)
log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}")
resolution_cache = kwds.get("resolution_cache")
return docker_cached_container_description(targets, self.namespace, hash_func=self.hash_func, shell=self.shell, resolution_cache=resolution_cache)

Expand All @@ -397,16 +471,18 @@ class CachedMulledSingularityContainerResolver(SingularityCliContainerResolver):
def __init__(self, app_info=None, hash_func="v2", **kwds):
super().__init__(app_info=app_info, **kwds)
self.hash_func = hash_func
self._init_cache_directory()

def resolve(self, enabled_container_types, tool_info, **kwds):
if tool_info.requires_galaxy_python_environment or self.container_type not in enabled_container_types:
return None

targets = mulled_targets(tool_info)
log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}")
return singularity_cached_container_description(targets, self.cache_directory, hash_func=self.hash_func, shell=self.shell)

def __str__(self):
return f"CachedMulledSingularityContainerResolver[cache_directory={self.cache_directory}]"
return f"CachedMulledSingularityContainerResolver[cache_directory={self.cache_directory.path}]"


class MulledDockerContainerResolver(CliContainerResolver):
Expand Down Expand Up @@ -446,6 +522,7 @@ def resolve(self, enabled_container_types, tool_info, install=False, session=Non
return None

targets = mulled_targets(tool_info)
log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}")
if len(targets) == 0:
return None

Expand Down Expand Up @@ -499,6 +576,7 @@ def __init__(self, app_info=None, namespace="biocontainers", hash_func="v2", aut
super().__init__(app_info=app_info, **kwds)
self.namespace = namespace
self.hash_func = hash_func
self._init_cache_directory()
self.auto_install = string_as_bool(auto_install)

def cached_container_description(self, targets, namespace, hash_func, resolution_cache):
Expand All @@ -513,8 +591,9 @@ def can_list_containers(self):

def pull(self, container):
if self.cli_available:
cmds = container.build_mulled_singularity_pull_command(cache_directory=self.cache_directory, namespace=self.namespace)
cmds = container.build_mulled_singularity_pull_command(cache_directory=self.cache_directory.path, namespace=self.namespace)
shell(cmds=cmds)
self.cache_directory.invalidate_cache()

def __str__(self):
return f"MulledSingularityContainerResolver[namespace={self.namespace}]"
Expand Down Expand Up @@ -548,6 +627,7 @@ def resolve(self, enabled_container_types, tool_info, install=False, **kwds):
return None

targets = mulled_targets(tool_info)
log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}")
if len(targets) == 0:
return None
if self.auto_install or install:
Expand Down Expand Up @@ -580,13 +660,14 @@ def __init__(self, app_info=None, hash_func="v2", auto_install=True, **kwds):
'involucro_bin': self._get_config_option("involucro_path", None)
}
self.hash_func = hash_func
self._init_cache_directory()
self.auto_install = string_as_bool(auto_install)
self._mulled_kwds = {
'channels': self._get_config_option("mulled_channels", DEFAULT_CHANNELS),
'hash_func': self.hash_func,
'command': 'build-and-test',
'singularity': True,
'singularity_image_dir': self.cache_directory,
'singularity_image_dir': self.cache_directory.path,
}
self.auto_init = self._get_config_option("involucro_auto_init", True)

Expand All @@ -595,6 +676,7 @@ def resolve(self, enabled_container_types, tool_info, install=False, **kwds):
return None

targets = mulled_targets(tool_info)
log.debug(f"Image name for tool {tool_info.tool_id}: {image_name(targets, self.hash_func)}")
if len(targets) == 0:
return None

Expand All @@ -612,13 +694,22 @@ def _get_involucro_context(self):
return involucro_context

def __str__(self):
return f"BuildSingularityContainerResolver[cache_directory={self.cache_directory}]"
return f"BuildSingularityContainerResolver[cache_directory={self.cache_directory.path}]"


def mulled_targets(tool_info):
return requirements_to_mulled_targets(tool_info.requirements)


def image_name(targets, hash_func):
if len(targets) == 0:
return "no targets"
elif hash_func == "v2":
return v2_image_name(targets)
else:
return v1_image_name(targets)


__all__ = (
"CachedMulledDockerContainerResolver",
"CachedMulledSingularityContainerResolver",
Expand Down
34 changes: 34 additions & 0 deletions test/unit/tool_util/test_container_resolution.py
Expand Up @@ -2,12 +2,16 @@

from galaxy.tool_util.deps.container_resolvers.mulled import (
CachedMulledDockerContainerResolver,
CachedMulledSingularityContainerResolver,
MulledDockerContainerResolver,
)
from galaxy.tool_util.deps.dependencies import ToolInfo
from galaxy.tool_util.deps.requirements import ToolRequirement


SINGULARITY_IMAGES = ('foo:1.0--bar', 'baz:2.22', 'mulled-v2-fe8a3b846bc50d24e5df78fa0b562c43477fe9ce:9f946d13f673ab2903cb0da849ad42916d619d18-0')


def test_docker_container_resolver_detects_docker_cli_absent(mocker):
mocker.patch('galaxy.tool_util.deps.container_resolvers.mulled.which', return_value=None)
resolver = CachedMulledDockerContainerResolver()
Expand Down Expand Up @@ -51,3 +55,33 @@ def test_docker_container_docker_cli_exception_resolve(mocker):
assert resolver.cli_available is True
assert container_description.type == 'docker'
assert container_description.identifier == 'quay.io/biocontainers/samtools:1.10--h2e538c0_3'


def test_cached_singularity_container_resolver_uncached(mocker):
mocker.patch('os.listdir', return_value=SINGULARITY_IMAGES)
mocker.patch('os.path.exists', return_value=True)
mocker.patch('galaxy.tool_util.deps.container_resolvers.mulled.safe_makedirs')
resolver = CachedMulledSingularityContainerResolver(app_info=mocker.Mock(container_image_cache_path='/'))
requirement = ToolRequirement(name="foo", version="1.0", type="package")
tool_info = ToolInfo(requirements=[requirement])
container_description = resolver.resolve(enabled_container_types=['singularity'], tool_info=tool_info)
assert container_description.type == 'singularity'
assert container_description.identifier == '/singularity/mulled/foo:1.0--bar'


def test_cached_singularity_container_resolver_dir_mtime_cached(mocker):
mocker.patch('os.listdir', return_value=SINGULARITY_IMAGES)
mocker.patch('os.path.exists', return_value=True)
mocker.patch('galaxy.tool_util.deps.container_resolvers.mulled.safe_makedirs')
mocker.patch('os.stat', return_value=mocker.Mock(st_mtime=42))
resolver = CachedMulledSingularityContainerResolver(app_info=mocker.Mock(container_image_cache_path='/'), cache_directory_cacher_type='dir_mtime')
requirement = ToolRequirement(name="baz", version="2.22", type="package")
tool_info = ToolInfo(requirements=[requirement])
container_description = resolver.resolve(enabled_container_types=['singularity'], tool_info=tool_info)
assert container_description.type == 'singularity'
assert container_description.identifier == '/singularity/mulled/baz:2.22'
requirement = ToolRequirement(name="foo", version="1.0", type="package")
tool_info.requirements.append(requirement)
container_description = resolver.resolve(enabled_container_types=['singularity'], tool_info=tool_info)
assert container_description.type == 'singularity'
assert container_description.identifier == '/singularity/mulled/mulled-v2-fe8a3b846bc50d24e5df78fa0b562c43477fe9ce:9f946d13f673ab2903cb0da849ad42916d619d18-0'