Skip to content

Commit

Permalink
Build dependency cache at install time
Browse files Browse the repository at this point in the history
and only activate cached environments if they exist.
  • Loading branch information
mvdbeek committed Nov 29, 2016
1 parent 2b17657 commit f919a98
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 34 deletions.
12 changes: 12 additions & 0 deletions lib/galaxy/tools/__init__.py
Expand Up @@ -34,6 +34,7 @@
from galaxy.tools.actions.data_manager import DataManagerToolAction
from galaxy.tools.actions.model_operations import ModelOperationToolAction
from galaxy.tools.deps import views
from galaxy.tools.deps import CachedDependencyManager
from galaxy.tools.parameters import params_to_incoming, check_param, params_from_strings, params_to_strings, visit_input_values
from galaxy.tools.parameters import output_collect
from galaxy.tools.parameters.basic import (BaseURLToolParameter,
Expand Down Expand Up @@ -1305,6 +1306,17 @@ def validate_inputs( input, value, error, parent, context, prefixed_name, prefix
visit_input_values( self.inputs, values, validate_inputs )
return messages

def build_dependency_cache(self):
if isinstance(self.app.toolbox.dependency_manager, CachedDependencyManager):
self.app.toolbox.dependency_manager.build_cache(
requirements=self.requirements,
installed_tool_dependencies=self.installed_tool_dependencies,
tool_dir=self.tool_dir,
job_directory=None,
metadata=False,
tool_instance=self
)

def build_dependency_shell_commands( self, job_directory=None, metadata=False ):
"""
Return a list of commands to be run to populate the current environment to include this tools requirements.
Expand Down
39 changes: 14 additions & 25 deletions lib/galaxy/tools/deps/__init__.py
Expand Up @@ -171,26 +171,32 @@ class CachedDependencyManager(DependencyManager):
def __init__(self, default_base_path, conf_file=None, **extra_config):
super(CachedDependencyManager, self).__init__(default_base_path=default_base_path, conf_file=conf_file, **extra_config)

def build_cache(self, requirements, **kwds):
resolved_dependencies = self.requirements_to_dependencies(requirements, **kwds)
cacheable_dependencies = [dep for req, dep in resolved_dependencies.items() if dep.cacheable]
hashed_requirements_dir = self.get_hashed_requirements_path(cacheable_dependencies)
[dep.build_cache(hashed_requirements_dir) for dep in cacheable_dependencies]

def dependency_shell_commands( self, requirements, **kwds ):
"""
Runs a set of requirements through the dependency resolvers and returns
a list of commands required to activate the dependencies. For dependencies
that are cacheable (currently only conda), calculates a hash based on the name,
version, exact and dependency_type attributes for all dependencies. The hash
will be used as name for the folder where the environment will be created,
which allows re-using these environments.
a list of commands required to activate the dependencies. If dependencies
are cacheable and the cache exists, will generate commands to activate
cached environments.
"""
resolved_dependencies = self.requirements_to_dependencies(requirements, **kwds)
cacheable_dependencies = [dep for req, dep in resolved_dependencies.items() if dep.cacheable]
hashed_requirements_dir = self.get_hashed_requirements_path(cacheable_dependencies)
[dep.set_cache_path(hashed_requirements_dir) for dep in cacheable_dependencies]
if os.path.exists(hashed_requirements_dir):
[dep.set_cache_path(hashed_requirements_dir) for dep in cacheable_dependencies]
commands = [dep.shell_commands(req) for req, dep in resolved_dependencies.items()]
return commands

def hash_requirements(self, resolved_dependencies):
"""Return hash for requirements"""
hashable_str = json.dumps([(dep.name, dep.version, dep.exact, dep.dependency_type) for dep in resolved_dependencies])
return hash_util.new_secure_hash(hashable_str)[:8] # short hash
resolved_dependencies = [[(dep.name, dep.version, dep.exact, dep.dependency_type) for dep in resolved_dependencies]]
hash_str = json.dumps(sorted([resolved_dependencies]))
return hash_util.new_secure_hash(hash_str)[:8] # short hash

def get_hashed_requirements_path(self, resolved_dependencies):
"""
Expand All @@ -200,20 +206,3 @@ def get_hashed_requirements_path(self, resolved_dependencies):
"""
req_hashes = self.hash_requirements(resolved_dependencies)
return os.path.join(self.extra_config['tool_dependency_cache_dir'], req_hashes)

def get_cached_commands(self, requirements, **kwargs):
"""
Return commands for activating cached env if it exists
:param requirements_hash:
:return: list of commands
"""
if not requirements: # if tool has no requirements
return []
hashed_requirements_dir = self.get_hashed_requirements_path(requirements)
if not os.path.exists(os.path.join(hashed_requirements_dir)):
return []
else:
if 'tool_instance' in kwargs:
dependencies = json.load(open(os.path.join(hashed_requirements_dir, 'packages.json')))
kwargs['tool_instance'].dependencies = dependencies
return [line.strip() for line in open(os.path.join(hashed_requirements_dir, 'dep_commands.sh'))]
4 changes: 4 additions & 0 deletions lib/galaxy/tools/deps/resolvers/__init__.py
Expand Up @@ -122,3 +122,7 @@ def resolver_msg(self):

def shell_commands( self, requirement ):
return None


class DependencyException(Exception):
pass
24 changes: 18 additions & 6 deletions lib/galaxy/tools/deps/resolvers/conda.py
Expand Up @@ -21,6 +21,7 @@
)
from ..resolvers import (
Dependency,
DependencyException,
DependencyResolver,
InstallableDependencyResolver,
ListableDependencyResolver,
Expand Down Expand Up @@ -196,6 +197,7 @@ def __init__(self, conda_context, environment_path, exact, name=None, version=No
self._exact = exact
self._name = name
self._version = version
self.cache_path = None

@property
def exact(self):
Expand All @@ -209,23 +211,33 @@ def name(self):
def version(self):
return self._version

def build_cache(self, cache_path):
self.set_cache_path(cache_path)
self.build_environment()

def set_cache_path(self, cache_path):
self.cache_path = cache_path
self.environment_path = cache_path

def shell_commands(self, requirement, cache_path=None):
def build_environment(self):
env_path, exit_code = build_isolated_environment(
CondaTarget(self.name, self.version),
path=self.environment_path,
copy=self.conda_context.copy_dependencies,
conda_context=self.conda_context,
)
if exit_code:
if len(self.conda_environment) > 79:
if len(os.path.abspath(self.environment_path)) > 79:
# TODO: remove this once conda_build version 2 is released and packages have been rebuilt.
raise Exception("Conda dependency failed to build job environment. "
"This is most likely a limitation in conda. "
"You can try to shorten the path to the job_working_directory.")
raise Exception("Conda dependency seemingly installed but failed to build job environment.")
raise DependencyException("Conda dependency failed to build job environment. "
"This is most likely a limitation in conda. "
"You can try to shorten the path to the job_working_directory.")
raise DependencyException("Conda dependency seemingly installed but failed to build job environment.")

def shell_commands(self, requirement):
if not self.cache_path:
# Build an isolated environment if not using a cached dependency manager
self.build_environment()
return """[ "$CONDA_DEFAULT_ENV" = "%s" ] || . %s '%s' > conda_activate.log 2>&1 """ % (
self.environment_path,
self.activate,
Expand Down
10 changes: 7 additions & 3 deletions lib/tool_shed/galaxy_install/install_manager.py
Expand Up @@ -904,6 +904,13 @@ def install_tool_shed_repository( self, tool_shed_repository, repo_info_dict, to
self.install_model.context.refresh( tool_shed_repository )
metadata = tool_shed_repository.metadata
if 'tools' in metadata:
if install_resolver_dependencies:
requirements = suc.get_unique_requirements_from_repository(tool_shed_repository)
[self._view.install_dependency(id=None, **req) for req in requirements]
for tool_d in metadata['tools']:
tool = self.app.toolbox._tools_by_id.get(tool_d['guid'], None)
if tool:
tool.build_dependency_cache()
# Get the tool_versions from the tool shed for each tool in the installed change set.
self.update_tool_shed_repository_status( tool_shed_repository,
self.install_model.ToolShedRepository.installation_status.SETTING_TOOL_VERSIONS )
Expand All @@ -913,9 +920,6 @@ def install_tool_shed_repository( self, tool_shed_repository, repo_info_dict, to
error_message += "Version information for the tools included in the <b>%s</b> repository is missing. " % tool_shed_repository.name
error_message += "Reset all of this repository's metadata in the tool shed, then set the installed tool versions "
error_message += "from the installed repository's <b>Repository Actions</b> menu. "
if install_resolver_dependencies:
requirements = suc.get_unique_requirements_from_repository(tool_shed_repository)
[self._view.install_dependency(id=None, **req) for req in requirements]
if install_tool_dependencies and tool_shed_repository.tool_dependencies and 'tool_dependencies' in metadata:
work_dir = tempfile.mkdtemp( prefix="tmp-toolshed-itsr" )
# Install tool dependencies.
Expand Down

0 comments on commit f919a98

Please sign in to comment.