From f919a98601252ab8f87bffff6e449105e770aadf Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 2 Nov 2016 11:59:10 +0100 Subject: [PATCH] Build dependency cache at install time and only activate cached environments if they exist. --- lib/galaxy/tools/__init__.py | 12 ++++++ lib/galaxy/tools/deps/__init__.py | 39 +++++++------------ lib/galaxy/tools/deps/resolvers/__init__.py | 4 ++ lib/galaxy/tools/deps/resolvers/conda.py | 24 +++++++++--- .../galaxy_install/install_manager.py | 10 +++-- 5 files changed, 55 insertions(+), 34 deletions(-) diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 04f4117dced3..34a34b37f31d 100755 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -34,6 +34,7 @@ from galaxy.tools.actions.data_manager import DataManagerToolAction from galaxy.tools.actions.model_operations import ModelOperationToolAction from galaxy.tools.deps import views +from galaxy.tools.deps import CachedDependencyManager from galaxy.tools.parameters import params_to_incoming, check_param, params_from_strings, params_to_strings, visit_input_values from galaxy.tools.parameters import output_collect from galaxy.tools.parameters.basic import (BaseURLToolParameter, @@ -1305,6 +1306,17 @@ def validate_inputs( input, value, error, parent, context, prefixed_name, prefix visit_input_values( self.inputs, values, validate_inputs ) return messages + def build_dependency_cache(self): + if isinstance(self.app.toolbox.dependency_manager, CachedDependencyManager): + self.app.toolbox.dependency_manager.build_cache( + requirements=self.requirements, + installed_tool_dependencies=self.installed_tool_dependencies, + tool_dir=self.tool_dir, + job_directory=None, + metadata=False, + tool_instance=self + ) + def build_dependency_shell_commands( self, job_directory=None, metadata=False ): """ Return a list of commands to be run to populate the current environment to include this tools requirements. diff --git a/lib/galaxy/tools/deps/__init__.py b/lib/galaxy/tools/deps/__init__.py index 13154cd65d87..4f0a6d2a7d48 100644 --- a/lib/galaxy/tools/deps/__init__.py +++ b/lib/galaxy/tools/deps/__init__.py @@ -171,26 +171,32 @@ class CachedDependencyManager(DependencyManager): def __init__(self, default_base_path, conf_file=None, **extra_config): super(CachedDependencyManager, self).__init__(default_base_path=default_base_path, conf_file=conf_file, **extra_config) + def build_cache(self, requirements, **kwds): + resolved_dependencies = self.requirements_to_dependencies(requirements, **kwds) + cacheable_dependencies = [dep for req, dep in resolved_dependencies.items() if dep.cacheable] + hashed_requirements_dir = self.get_hashed_requirements_path(cacheable_dependencies) + [dep.build_cache(hashed_requirements_dir) for dep in cacheable_dependencies] + def dependency_shell_commands( self, requirements, **kwds ): """ Runs a set of requirements through the dependency resolvers and returns - a list of commands required to activate the dependencies. For dependencies - that are cacheable (currently only conda), calculates a hash based on the name, - version, exact and dependency_type attributes for all dependencies. The hash - will be used as name for the folder where the environment will be created, - which allows re-using these environments. + a list of commands required to activate the dependencies. If dependencies + are cacheable and the cache exists, will generate commands to activate + cached environments. """ resolved_dependencies = self.requirements_to_dependencies(requirements, **kwds) cacheable_dependencies = [dep for req, dep in resolved_dependencies.items() if dep.cacheable] hashed_requirements_dir = self.get_hashed_requirements_path(cacheable_dependencies) - [dep.set_cache_path(hashed_requirements_dir) for dep in cacheable_dependencies] + if os.path.exists(hashed_requirements_dir): + [dep.set_cache_path(hashed_requirements_dir) for dep in cacheable_dependencies] commands = [dep.shell_commands(req) for req, dep in resolved_dependencies.items()] return commands def hash_requirements(self, resolved_dependencies): """Return hash for requirements""" - hashable_str = json.dumps([(dep.name, dep.version, dep.exact, dep.dependency_type) for dep in resolved_dependencies]) - return hash_util.new_secure_hash(hashable_str)[:8] # short hash + resolved_dependencies = [[(dep.name, dep.version, dep.exact, dep.dependency_type) for dep in resolved_dependencies]] + hash_str = json.dumps(sorted([resolved_dependencies])) + return hash_util.new_secure_hash(hash_str)[:8] # short hash def get_hashed_requirements_path(self, resolved_dependencies): """ @@ -200,20 +206,3 @@ def get_hashed_requirements_path(self, resolved_dependencies): """ req_hashes = self.hash_requirements(resolved_dependencies) return os.path.join(self.extra_config['tool_dependency_cache_dir'], req_hashes) - - def get_cached_commands(self, requirements, **kwargs): - """ - Return commands for activating cached env if it exists - :param requirements_hash: - :return: list of commands - """ - if not requirements: # if tool has no requirements - return [] - hashed_requirements_dir = self.get_hashed_requirements_path(requirements) - if not os.path.exists(os.path.join(hashed_requirements_dir)): - return [] - else: - if 'tool_instance' in kwargs: - dependencies = json.load(open(os.path.join(hashed_requirements_dir, 'packages.json'))) - kwargs['tool_instance'].dependencies = dependencies - return [line.strip() for line in open(os.path.join(hashed_requirements_dir, 'dep_commands.sh'))] diff --git a/lib/galaxy/tools/deps/resolvers/__init__.py b/lib/galaxy/tools/deps/resolvers/__init__.py index 226b7a5f063b..c24c107887a3 100644 --- a/lib/galaxy/tools/deps/resolvers/__init__.py +++ b/lib/galaxy/tools/deps/resolvers/__init__.py @@ -122,3 +122,7 @@ def resolver_msg(self): def shell_commands( self, requirement ): return None + + +class DependencyException(Exception): + pass diff --git a/lib/galaxy/tools/deps/resolvers/conda.py b/lib/galaxy/tools/deps/resolvers/conda.py index 0443093e4df8..935895a1ce99 100644 --- a/lib/galaxy/tools/deps/resolvers/conda.py +++ b/lib/galaxy/tools/deps/resolvers/conda.py @@ -21,6 +21,7 @@ ) from ..resolvers import ( Dependency, + DependencyException, DependencyResolver, InstallableDependencyResolver, ListableDependencyResolver, @@ -196,6 +197,7 @@ def __init__(self, conda_context, environment_path, exact, name=None, version=No self._exact = exact self._name = name self._version = version + self.cache_path = None @property def exact(self): @@ -209,10 +211,15 @@ def name(self): def version(self): return self._version + def build_cache(self, cache_path): + self.set_cache_path(cache_path) + self.build_environment() + def set_cache_path(self, cache_path): + self.cache_path = cache_path self.environment_path = cache_path - def shell_commands(self, requirement, cache_path=None): + def build_environment(self): env_path, exit_code = build_isolated_environment( CondaTarget(self.name, self.version), path=self.environment_path, @@ -220,12 +227,17 @@ def shell_commands(self, requirement, cache_path=None): conda_context=self.conda_context, ) if exit_code: - if len(self.conda_environment) > 79: + if len(os.path.abspath(self.environment_path)) > 79: # TODO: remove this once conda_build version 2 is released and packages have been rebuilt. - raise Exception("Conda dependency failed to build job environment. " - "This is most likely a limitation in conda. " - "You can try to shorten the path to the job_working_directory.") - raise Exception("Conda dependency seemingly installed but failed to build job environment.") + raise DependencyException("Conda dependency failed to build job environment. " + "This is most likely a limitation in conda. " + "You can try to shorten the path to the job_working_directory.") + raise DependencyException("Conda dependency seemingly installed but failed to build job environment.") + + def shell_commands(self, requirement): + if not self.cache_path: + # Build an isolated environment if not using a cached dependency manager + self.build_environment() return """[ "$CONDA_DEFAULT_ENV" = "%s" ] || . %s '%s' > conda_activate.log 2>&1 """ % ( self.environment_path, self.activate, diff --git a/lib/tool_shed/galaxy_install/install_manager.py b/lib/tool_shed/galaxy_install/install_manager.py index 699797dcbd96..06c4ff7a4de6 100644 --- a/lib/tool_shed/galaxy_install/install_manager.py +++ b/lib/tool_shed/galaxy_install/install_manager.py @@ -904,6 +904,13 @@ def install_tool_shed_repository( self, tool_shed_repository, repo_info_dict, to self.install_model.context.refresh( tool_shed_repository ) metadata = tool_shed_repository.metadata if 'tools' in metadata: + if install_resolver_dependencies: + requirements = suc.get_unique_requirements_from_repository(tool_shed_repository) + [self._view.install_dependency(id=None, **req) for req in requirements] + for tool_d in metadata['tools']: + tool = self.app.toolbox._tools_by_id.get(tool_d['guid'], None) + if tool: + tool.build_dependency_cache() # Get the tool_versions from the tool shed for each tool in the installed change set. self.update_tool_shed_repository_status( tool_shed_repository, self.install_model.ToolShedRepository.installation_status.SETTING_TOOL_VERSIONS ) @@ -913,9 +920,6 @@ def install_tool_shed_repository( self, tool_shed_repository, repo_info_dict, to error_message += "Version information for the tools included in the %s repository is missing. " % tool_shed_repository.name error_message += "Reset all of this repository's metadata in the tool shed, then set the installed tool versions " error_message += "from the installed repository's Repository Actions menu. " - if install_resolver_dependencies: - requirements = suc.get_unique_requirements_from_repository(tool_shed_repository) - [self._view.install_dependency(id=None, **req) for req in requirements] if install_tool_dependencies and tool_shed_repository.tool_dependencies and 'tool_dependencies' in metadata: work_dir = tempfile.mkdtemp( prefix="tmp-toolshed-itsr" ) # Install tool dependencies.