Skip to content

Commit

Permalink
Merge pull request #3227 from mvdbeek/cached_deps_16.10
Browse files Browse the repository at this point in the history
[16.10] Backport #3106 and #3222: Cached conda environments and API to manage them
  • Loading branch information
martenson committed Nov 29, 2016
2 parents e3f8b72 + 2a4ccac commit d6f0e6b
Show file tree
Hide file tree
Showing 15 changed files with 266 additions and 46 deletions.
12 changes: 12 additions & 0 deletions config/galaxy.ini.sample
Expand Up @@ -217,6 +217,18 @@ paste.app_factory = galaxy.web.buildapp:app_factory
# of extra disk space usage and extra time spent copying packages.
#conda_copy_dependencies = False

# Certain dependency resolvers (namely Conda) take a considerable amount of
# time to build an isolated job environment in the job_working_directory if the
# job working directory is on a network share. Set the following option to True
# to cache the dependencies in a folder. This option is beta and should only be
# used if you experience long waiting times before a job is actually submitted
# to your cluster.
#use_cached_dependency_manager = False

# By default the tool_dependency_cache_dir is the _cache directory
# of the tool dependency directory
#tool_dependency_cache_dir = <tool_dependency_dir>/_cache

# File containing the Galaxy Tool Sheds that should be made available to
# install from in the admin interface (.sample used if default does not exist).
#tool_sheds_config_file = config/tool_sheds_conf.xml
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy/config.py
Expand Up @@ -325,6 +325,8 @@ def __init__( self, **kwargs ):
else:
self.tool_dependency_dir = None
self.use_tool_dependencies = os.path.exists(self.dependency_resolvers_config_file)
self.use_cached_dependency_manager = string_as_bool(kwargs.get("use_cached_dependency_manager", 'False'))
self.tool_dependency_cache_dir = kwargs.get( 'tool_dependency_cache_dir', os.path.join(self.tool_dependency_dir, '_cache'))

self.enable_beta_mulled_containers = string_as_bool( kwargs.get( 'enable_beta_mulled_containers', 'False' ) )
containers_resolvers_config_file = kwargs.get( 'containers_resolvers_config_file', None )
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/datatypes/converters/fasta_to_2bit.xml
Expand Up @@ -2,6 +2,7 @@
<!-- <description>__NOT_USED_CURRENTLY_FOR_CONVERTERS__</description> -->
<!-- Used on the metadata edit page. -->
<requirements>
<requirement type="package" version="332">ucsc-fatotwobit</requirement>
<requirement type="package">ucsc_tools</requirement>
</requirements>
<command>faToTwoBit '$input' '$output'</command>
Expand Down
24 changes: 19 additions & 5 deletions lib/galaxy/tools/__init__.py
Expand Up @@ -34,6 +34,7 @@
from galaxy.tools.actions.data_manager import DataManagerToolAction
from galaxy.tools.actions.model_operations import ModelOperationToolAction
from galaxy.tools.deps import views
from galaxy.tools.deps import CachedDependencyManager
from galaxy.tools.parameters import params_to_incoming, check_param, params_from_strings, params_to_strings, visit_input_values
from galaxy.tools.parameters import output_collect
from galaxy.tools.parameters.basic import (BaseURLToolParameter,
Expand Down Expand Up @@ -1305,17 +1306,30 @@ def validate_inputs( input, value, error, parent, context, prefixed_name, prefix
visit_input_values( self.inputs, values, validate_inputs )
return messages

def build_dependency_cache(self, **kwds):
if isinstance(self.app.toolbox.dependency_manager, CachedDependencyManager):
self.app.toolbox.dependency_manager.build_cache(
requirements=self.requirements,
installed_tool_dependencies=self.installed_tool_dependencies,
tool_dir=self.tool_dir,
job_directory=None,
metadata=False,
tool_instance=self,
**kwds
)

def build_dependency_shell_commands( self, job_directory=None, metadata=False ):
"""Return a list of commands to be run to populate the current environment to include this tools requirements."""
requirements_to_dependencies = self.app.toolbox.dependency_manager.requirements_to_dependencies(
self.requirements,
"""
Return a list of commands to be run to populate the current environment to include this tools requirements.
"""
return self.app.toolbox.dependency_manager.dependency_shell_commands(
requirements=self.requirements,
installed_tool_dependencies=self.installed_tool_dependencies,
tool_dir=self.tool_dir,
job_directory=job_directory,
metadata=metadata,
tool_instance=self
)
self.dependencies = [dep.to_dict() for dep in requirements_to_dependencies.values()]
return [dep.shell_commands(req) for req, dep in requirements_to_dependencies.items()]

@property
def installed_tool_dependencies(self):
Expand Down
62 changes: 60 additions & 2 deletions lib/galaxy/tools/deps/__init__.py
Expand Up @@ -2,12 +2,17 @@
Dependency management for tools.
"""

import json
import logging
import os.path
import shutil

from collections import OrderedDict

from galaxy.util import plugin_config
from galaxy.util import (
hash_util,
plugin_config
)

from .resolvers import NullDependency
from .resolvers.conda import CondaDependencyResolver, DEFAULT_ENSURE_CHANNELS
Expand Down Expand Up @@ -45,7 +50,11 @@ def build_dependency_manager( config ):
if value is CONFIG_VAL_NOT_FOUND:
value = default_value
dependency_manager_kwds[key] = value
dependency_manager = DependencyManager( **dependency_manager_kwds )
if config.use_cached_dependency_manager:
dependency_manager_kwds['tool_dependency_cache_dir'] = config.tool_dependency_cache_dir
dependency_manager = CachedDependencyManager(**dependency_manager_kwds)
else:
dependency_manager = DependencyManager( **dependency_manager_kwds )
else:
dependency_manager = NullDependencyManager()

Expand Down Expand Up @@ -109,6 +118,8 @@ def requirements_to_dependencies(self, requirements, **kwds):
log.debug(dependency.resolver_msg)
if dependency.dependency_type:
requirement_to_dependency[requirement] = dependency
if 'tool_instance' in kwds:
kwds['tool_instance'].dependencies = [dep.to_dict() for dep in requirement_to_dependency.values()]
return requirement_to_dependency

def uses_tool_shed_dependencies(self):
Expand Down Expand Up @@ -155,3 +166,50 @@ def __parse_resolver_conf_xml(self, plugin_source):
def __resolvers_dict( self ):
import galaxy.tools.deps.resolvers
return plugin_config.plugins_dict( galaxy.tools.deps.resolvers, 'resolver_type' )


class CachedDependencyManager(DependencyManager):
def __init__(self, default_base_path, conf_file=None, **extra_config):
super(CachedDependencyManager, self).__init__(default_base_path=default_base_path, conf_file=conf_file, **extra_config)

def build_cache(self, requirements, **kwds):
resolved_dependencies = self.requirements_to_dependencies(requirements, **kwds)
cacheable_dependencies = [dep for req, dep in resolved_dependencies.items() if dep.cacheable]
hashed_requirements_dir = self.get_hashed_requirements_path(cacheable_dependencies)
if kwds.get('force_rebuild', False) and os.path.exists(hashed_requirements_dir):
try:
shutil.rmtree(hashed_requirements_dir)
except Exception:
log.warning("Could not delete cached requirements directory '%s'" % hashed_requirements_dir)
pass
[dep.build_cache(hashed_requirements_dir) for dep in cacheable_dependencies]

def dependency_shell_commands( self, requirements, **kwds ):
"""
Runs a set of requirements through the dependency resolvers and returns
a list of commands required to activate the dependencies. If dependencies
are cacheable and the cache exists, will generate commands to activate
cached environments.
"""
resolved_dependencies = self.requirements_to_dependencies(requirements, **kwds)
cacheable_dependencies = [dep for req, dep in resolved_dependencies.items() if dep.cacheable]
hashed_requirements_dir = self.get_hashed_requirements_path(cacheable_dependencies)
if os.path.exists(hashed_requirements_dir):
[dep.set_cache_path(hashed_requirements_dir) for dep in cacheable_dependencies]
commands = [dep.shell_commands(req) for req, dep in resolved_dependencies.items()]
return commands

def hash_requirements(self, resolved_dependencies):
"""Return hash for requirements"""
resolved_dependencies = [[(dep.name, dep.version, dep.exact, dep.dependency_type) for dep in resolved_dependencies]]
hash_str = json.dumps(sorted([resolved_dependencies]))
return hash_util.new_secure_hash(hash_str)[:8] # short hash

def get_hashed_requirements_path(self, resolved_dependencies):
"""
Returns the path to the hashed requirements directory (but does not evaluate whether the path exists)
:param requirements:
:return:
"""
req_hashes = self.hash_requirements(resolved_dependencies)
return os.path.join(self.extra_config['tool_dependency_cache_dir'], req_hashes)
15 changes: 14 additions & 1 deletion lib/galaxy/tools/deps/conda_util.py
Expand Up @@ -54,7 +54,7 @@ class CondaContext(installable.InstallableContext):

def __init__(self, conda_prefix=None, conda_exec=None,
shell_exec=None, debug=False, ensure_channels='',
condarc_override=None, use_path_exec=USE_PATH_EXEC_DEFAULT):
condarc_override=None, use_path_exec=USE_PATH_EXEC_DEFAULT, copy_dependencies=False):
self.condarc_override = condarc_override
if not conda_exec and use_path_exec:
conda_exec = commands.which("conda")
Expand All @@ -63,6 +63,7 @@ def __init__(self, conda_prefix=None, conda_exec=None,
self.conda_exec = conda_exec
self.debug = debug
self.shell_exec = shell_exec or commands.shell
self.copy_dependencies = copy_dependencies

if conda_prefix is None:
info = self.conda_info()
Expand Down Expand Up @@ -213,6 +214,17 @@ def exec_install(self, args):
install_base_args.extend(args)
return self.exec_command("install", install_base_args)

def exec_clean(self, args=[]):
"""
Clean up after conda installation.
"""
clean_base_args = [
"--tarballs",
"-y"
]
clean_base_args.extend(args)
return self.exec_command("clean", clean_base_args)

def export_list(self, name, path):
return self.exec_command("list", [
"--name", name,
Expand Down Expand Up @@ -487,6 +499,7 @@ def build_isolated_environment(

return (path or tempdir_name, exit_code)
finally:
conda_context.exec_clean()
shutil.rmtree(tempdir)


Expand Down
3 changes: 3 additions & 0 deletions lib/galaxy/tools/deps/requirements.py
Expand Up @@ -25,6 +25,9 @@ def from_dict( dict ):
type = dict.get("type", None)
return ToolRequirement( name=name, type=type, version=version )

def __eq__(self, other):
return self.name == other.name and self.type == other.type and self.version == other.version


DEFAULT_CONTAINER_TYPE = "docker"
DEFAULT_CONTAINER_RESOLVE_DEPENDENCIES = False
Expand Down
7 changes: 6 additions & 1 deletion lib/galaxy/tools/deps/resolvers/__init__.py
Expand Up @@ -81,8 +81,9 @@ def install_dependency(self, name, version, type, **kwds):


class Dependency(Dictifiable, object):
dict_collection_visible_keys = ['dependency_type', 'exact', 'name', 'version']
dict_collection_visible_keys = ['dependency_type', 'exact', 'name', 'version', 'cacheable']
__metaclass__ = ABCMeta
cacheable = False

@abstractmethod
def shell_commands( self, requirement ):
Expand Down Expand Up @@ -121,3 +122,7 @@ def resolver_msg(self):

def shell_commands( self, requirement ):
return None


class DependencyException(Exception):
pass
85 changes: 51 additions & 34 deletions lib/galaxy/tools/deps/resolvers/conda.py
Expand Up @@ -21,6 +21,7 @@
)
from ..resolvers import (
Dependency,
DependencyException,
DependencyResolver,
InstallableDependencyResolver,
ListableDependencyResolver,
Expand Down Expand Up @@ -66,6 +67,7 @@ def get_option(name):
dependency_manager.default_base_path, DEFAULT_CONDARC_OVERRIDE
)

copy_dependencies = _string_as_bool(get_option("copy_dependencies"))
conda_exec = get_option("exec")
debug = _string_as_bool(get_option("debug"))
ensure_channels = get_option("ensure_channels")
Expand All @@ -84,18 +86,21 @@ def get_option(name):
ensure_channels=ensure_channels,
condarc_override=condarc_override,
use_path_exec=use_path_exec,
copy_dependencies=copy_dependencies
)
self.ensure_channels = ensure_channels

# Conda operations options (these define how resolution will occur)
auto_install = _string_as_bool(get_option("auto_install"))
copy_dependencies = _string_as_bool(get_option("copy_dependencies"))
self.auto_init = _string_as_bool(get_option("auto_init"))
self.conda_context = conda_context
self.disabled = not galaxy.tools.deps.installable.ensure_installed(conda_context, install_conda, self.auto_init)
self.auto_install = auto_install
self.copy_dependencies = copy_dependencies

def clean(self, **kwds):
return self.conda_context.exec_clean()

def resolve(self, name, version, type, **kwds):
# Check for conda just not being there, this way we can enable
# conda by default and just do nothing in not configured.
Expand All @@ -115,20 +120,7 @@ def resolve(self, name, version, type, **kwds):
)

job_directory = kwds.get("job_directory", None)
if job_directory is None: # Job directory is None when resolve() called by find_dep()
if is_installed:
return CondaDependency(
False,
os.path.join(self.conda_context.envs_path, conda_target.install_environment),
exact,
name=name,
version=version
)
else:
log.warning("Conda dependency resolver not sent job directory.")
return NullDependency(version=version, name=name)

if not is_installed and self.auto_install:
if not is_installed and self.auto_install and job_directory:
is_installed = self.install_dependency(name=name, version=version, type=type)

if not is_installed:
Expand All @@ -140,23 +132,19 @@ def resolve(self, name, version, type, **kwds):
conda_env = "conda-metadata-env"
else:
conda_env = "conda-env"
conda_environment = os.path.join(job_directory, conda_env)
env_path, exit_code = build_isolated_environment(
conda_target,
path=conda_environment,
copy=self.copy_dependencies,
conda_context=self.conda_context,
)
if not exit_code:
return CondaDependency(
self.conda_context.activate,
conda_environment,
exact,
name,
version
)

if job_directory:
conda_environment = os.path.join(job_directory, conda_env)
else:
return NullDependency(version=version, name=name)
conda_environment = None

return CondaDependency(
self.conda_context,
conda_environment,
exact,
name,
version
)

def list_dependencies(self):
for install_target in installed_conda_targets(self.conda_context):
Expand Down Expand Up @@ -204,13 +192,16 @@ def prefix(self):
class CondaDependency(Dependency):
dict_collection_visible_keys = Dependency.dict_collection_visible_keys + ['environment_path', 'name', 'version']
dependency_type = 'conda'
cacheable = True

def __init__(self, activate, environment_path, exact, name=None, version=None):
self.activate = activate
def __init__(self, conda_context, environment_path, exact, name=None, version=None):
self.activate = conda_context.activate
self.conda_context = conda_context
self.environment_path = environment_path
self._exact = exact
self._name = name
self._version = version
self.cache_path = None

@property
def exact(self):
Expand All @@ -224,8 +215,34 @@ def name(self):
def version(self):
return self._version

def build_cache(self, cache_path):
self.set_cache_path(cache_path)
self.build_environment()

def set_cache_path(self, cache_path):
self.cache_path = cache_path
self.environment_path = cache_path

def build_environment(self):
env_path, exit_code = build_isolated_environment(
CondaTarget(self.name, self.version),
path=self.environment_path,
copy=self.conda_context.copy_dependencies,
conda_context=self.conda_context,
)
if exit_code:
if len(os.path.abspath(self.environment_path)) > 79:
# TODO: remove this once conda_build version 2 is released and packages have been rebuilt.
raise DependencyException("Conda dependency failed to build job environment. "
"This is most likely a limitation in conda. "
"You can try to shorten the path to the job_working_directory.")
raise DependencyException("Conda dependency seemingly installed but failed to build job environment.")

def shell_commands(self, requirement):
return """[ "$CONDA_DEFAULT_ENV" = "%s" ] || . %s '%s' 2>&1 """ % (
if not self.cache_path:
# Build an isolated environment if not using a cached dependency manager
self.build_environment()
return """[ "$CONDA_DEFAULT_ENV" = "%s" ] || . %s '%s' > conda_activate.log 2>&1 """ % (
self.environment_path,
self.activate,
self.environment_path
Expand Down

0 comments on commit d6f0e6b

Please sign in to comment.