From be6d14b494d02c80298a3c0e5446328799d3ea3b Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sun, 15 Mar 2015 09:39:59 -0400 Subject: [PATCH] Implementat a small subset of the Common Workflow Language tool format. - Draft 2 currently, but stuff is stubbed out for draft 1 as well - shouldn't be too hard to support both. - Implemented integer params, data parameter, and arrays thereof. - Semantics of required versus optional parameters needs so work (may be subtlely difficult to mesh with Galaxy). Union types, null types, etc... are not implemented yet. - Docker integration is only partial - for simple things pull right from docker hub it works (so cat3-tool.cwl works). Full semantics of CWL docker stuff not yet implemented - so for instance bwa-mem image pulled from SBG does not yet work. Implementation: The reference implementation Python library (mainly developed by Peter Amstutz - https://github.com/common-workflow-language/common-workflow-language/tree/master/reference) is used to load tool files ending with ".json" or ".cwl" and proxy objects are created to adapt these tools to Galaxy representations. In particular input and output descriptions are loaded from the tool. When the tool is submitted, a special specialized tool class is used to build a cwltool compatible job description from the supplied Galaxy inputs and the CWL reference implementation is used to generate a CWL reference implementation Job object. A command-line is generated from this Job object. As a result of this - Galaxy largely does not need to worry about the details of command-line adapters, expressions, etc.... Galaxy writes a description of the CWL job that it can reload to the job working directory. After the process is complete (on the Galaxy compute server, but outside the Docker container) this representation is reloaded and the dynamic outputs are discovered and moved to fixed locations as expected by Galaxy (CWL allows for much more expressive output locations than Galaxy - for better or worse). Unfortunatley, none of the outputs have datatypes so Galaxy view of the datasets is quite limitted. They cannot really be used within the rest of the Galaxy ecosystem, no preview or visualization of the datasets is enabled, etc.... The fix is three fold - 1) Galaxy's metadata collection needs to be extended so datasets can be marked as requiring sniffing before metadata collection. 2) CWL should support EDAM declaration of types and Galaxy should provide a mapping to core datasets to skip sniffing is types are found. 3) For finer grain control within Galaxy, extensions to CWL should allow setting actual Galaxy output types on outputs. (Distinction between fastq and fastqsanger in Galaxy is very important for instance.) Testing: % git clone https://github.com/jmchilton/galaxy.git % git checkout cwl % cd galaxy % virtualenv galaxy/.venv % . galaxy/.venv/bin/activate % git clone https://github.com/common-workflow-language/common-workflow-language.git % cd common-workflow-language/reference % easy_install . % cd ../galaxy % cp -r test/unit/tools/cwl_tools tools/ % cp config/galaxy.ini.sample config/galaxy.ini % cp config/tool_conf.xml.sample config/tool_conf.xml % vi config/tool_conf.xml Add the following section
Start Galaxy. % GALAXY_CONFIG_ENABLE_BETA_TOOL_FORMATS=True run.sh --reload Open http://localhost:8080/ and see CWL Test Tools in left hand tool panel. To go a step further and actually run CWL jobs within their designated Docker containers, copy the following minimal Galaxy job configuration file to config/job_conf.xml. (Adjust the docker_sudo parameter based on how you execute Docker). https://gist.github.com/jmchilton/3997fa471d1b4c556966 Run an API test demonstrating the cat3 demo tool with the following command. ./run_tests.sh -with_framework_test_tools -api test/api/test_tools.py:ToolsTestCase.test_cwl_cat3 Rework output collection. Set outputs to work dir outputs and relocate them before metadata generation occurs, this way metadata collection happens on the outputs. Enable docker containers. --- CWL_TODO | 8 + lib/galaxy/jobs/command_factory.py | 6 + lib/galaxy/tools/__init__.py | 62 +++- lib/galaxy/tools/cwl/__init__.py | 7 + lib/galaxy/tools/cwl/cwltool_deps.py | 55 ++++ lib/galaxy/tools/cwl/parser.py | 273 ++++++++++++++++++ lib/galaxy/tools/cwl/runtime_actions.py | 29 ++ lib/galaxy/tools/evaluation.py | 2 +- lib/galaxy/tools/parser/cwl.py | 137 +++++++++ lib/galaxy/tools/parser/factory.py | 4 + lib/galaxy/tools/parser/yaml.py | 3 - lib/galaxy_ext/metadata/set_metadata.py | 12 + test/api/test_tools.py | 12 + test/functional/tools/cwl_tools | 1 + test/functional/tools/samples_tool_conf.xml | 4 + .../tools/cwl_tools/draft1/cat1-tool.json | 36 +++ .../tools/cwl_tools/draft2/bwa-mem-tool.cwl | 78 +++++ .../unit/tools/cwl_tools/draft2/cat1-tool.cwl | 29 ++ .../unit/tools/cwl_tools/draft2/cat3-tool.cwl | 28 ++ test/unit/tools/test_cwl.py | 48 +++ 20 files changed, 827 insertions(+), 7 deletions(-) create mode 100644 CWL_TODO create mode 100644 lib/galaxy/tools/cwl/__init__.py create mode 100644 lib/galaxy/tools/cwl/cwltool_deps.py create mode 100644 lib/galaxy/tools/cwl/parser.py create mode 100644 lib/galaxy/tools/cwl/runtime_actions.py create mode 100644 lib/galaxy/tools/parser/cwl.py create mode 120000 test/functional/tools/cwl_tools create mode 100644 test/unit/tools/cwl_tools/draft1/cat1-tool.json create mode 100644 test/unit/tools/cwl_tools/draft2/bwa-mem-tool.cwl create mode 100644 test/unit/tools/cwl_tools/draft2/cat1-tool.cwl create mode 100644 test/unit/tools/cwl_tools/draft2/cat3-tool.cwl create mode 100644 test/unit/tools/test_cwl.py diff --git a/CWL_TODO b/CWL_TODO new file mode 100644 index 000000000000..e7d479045a43 --- /dev/null +++ b/CWL_TODO @@ -0,0 +1,8 @@ + - Sniff the outputs so Galaxy can generate the correct dataytpes. + - More parameter types and constructs (e.g. tmap implementation). + + - Explore Galaxy extensions to CWL - + - Annotate datatypes. + - Annotate parameter labels and help. + - Annotate tool help. + - Annotate requirements. diff --git a/lib/galaxy/jobs/command_factory.py b/lib/galaxy/jobs/command_factory.py index f64f52ad7185..7ca3fd6d10a2 100644 --- a/lib/galaxy/jobs/command_factory.py +++ b/lib/galaxy/jobs/command_factory.py @@ -1,7 +1,9 @@ from os import getcwd from os import chmod +from os import fdopen from os.path import join from os.path import abspath +import tempfile CAPTURE_RETURN_CODE = "return_code=$?" YIELD_CAPTURED_CODE = 'sh -c "exit $return_code"' @@ -114,6 +116,10 @@ def __handle_work_dir_outputs(commands_builder, job_wrapper, runner, remote_comm work_dir_outputs = runner.get_work_dir_outputs( job_wrapper, **work_dir_outputs_kwds ) if work_dir_outputs: commands_builder.capture_return_code() + fd, fp = tempfile.mkstemp( suffix='.py', dir=job_wrapper.working_directory, prefix="relocate_dynamic_outputs_" ) + metadata_script_file = abspath( fp ) + fdopen( fd, 'w' ).write( 'from galaxy_ext.metadata.set_metadata import relocate_dynamic_outputs; relocate_dynamic_outputs()' ) + commands_builder.append_command("python %s" % metadata_script_file) copy_commands = map(__copy_if_exists_command, work_dir_outputs) commands_builder.append_commands(copy_commands) diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index dd50e17e2aeb..41bf9e810630 100755 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2100,7 +2100,7 @@ def call_hook( self, hook_name, *args, **kwargs ): def exec_before_job( self, app, inp_data, out_data, param_dict={} ): pass - def exec_after_process( self, app, inp_data, out_data, param_dict, job=None ): + def exec_after_process( self, app, inp_data, out_data, param_dict, job=None, **kwds ): pass def job_failed( self, job_wrapper, message, exception=False ): @@ -2836,7 +2836,7 @@ class SetMetadataTool( Tool ): tool_type = 'set_metadata' requires_setting_metadata = False - def exec_after_process( self, app, inp_data, out_data, param_dict, job=None ): + def exec_after_process( self, app, inp_data, out_data, param_dict, job=None, **kwds ): for name, dataset in inp_data.iteritems(): external_metadata = JobExternalOutputMetadataWrapper( job ) if external_metadata.external_metadata_set_successfully( dataset, app.model.context ): @@ -2881,6 +2881,62 @@ class GenomeIndexTool( Tool ): tool_type = 'index_genome' +class CwlTool( Tool ): + tool_type = 'cwl' + + def exec_before_job( self, app, inp_data, out_data, param_dict=None ): + super( CwlTool, self ).exec_before_job( app, inp_data, out_data, param_dict=param_dict ) + # Working directory on Galaxy server (instead of remote compute). + local_working_directory = param_dict["__local_working_directory__"] + log.info("exec_before_job for CWL tool") + input_json = {} + + def simple_value(input, param_dict_value): + if isinstance(input, DataToolParameter): + return {"path": str(param_dict_value)} + elif isinstance(input, IntegerToolParameter): + return int(str(param_dict_value)) + else: + return str(param_dict_value) + + for input_name, input in self.inputs.iteritems(): + if isinstance(input, Repeat): + only_input = input.inputs.values()[0] + array_value = [] + for instance in param_dict[input_name]: + array_value.append(simple_value(only_input, instance[input_name[:-len("_repeat")]])) + input_json[input_name[:-len("_repeat")]] = array_value + else: + input_json[input_name] = simple_value(input, param_dict[input_name]) + + log.info("input_json is %s" % input_json) + input_json["allocatedResources"] = { + "cpu": "$GALAXY_SLOTS", + } + if param_dict is None: + raise Exception("Internal error - param_dict is empty.") + cwl_job_proxy = self._cwl_tool_proxy.job_proxy(input_json, local_working_directory) + # Write representation to disk that can be reloaded at runtime + # and outputs collected before Galaxy metadata is gathered. + cwl_job_proxy.save_job() + cwl_job = cwl_job_proxy.cwl_job() + command_line = " ".join(cwl_job.command_line) + if cwl_job.stdin: + command_line += '< "' + cwl_job.stdin + '"' + if cwl_job.stdout: + command_line += '> "' + cwl_job.stdout + '"' + # TODO: handle generatefiles + param_dict["__cwl_command"] = command_line + log.info("CwlTool.exec_before_job() generated command_line %s" % command_line) + + def parse( self, tool_source, guid=None ): + super( CwlTool, self ).parse( tool_source, guid=guid ) + cwl_tool_proxy = getattr( tool_source, 'tool_proxy', None ) + if cwl_tool_proxy is None: + raise Exception("CwlTool.parse() called on tool source not defining a proxy object to underlying CWL tool.") + self._cwl_tool_proxy = cwl_tool_proxy + + class DataManagerTool( OutputParameterJSONTool ): tool_type = 'manage_data' default_tool_action = DataManagerToolAction @@ -2952,7 +3008,7 @@ def allow_user_access( self, user ): tool_types = {} for tool_class in [ Tool, SetMetadataTool, OutputParameterJSONTool, DataManagerTool, DataSourceTool, AsyncDataSourceTool, - DataDestinationTool ]: + DataDestinationTool, CwlTool ]: tool_types[ tool_class.tool_type ] = tool_class diff --git a/lib/galaxy/tools/cwl/__init__.py b/lib/galaxy/tools/cwl/__init__.py new file mode 100644 index 000000000000..24595e0cf272 --- /dev/null +++ b/lib/galaxy/tools/cwl/__init__.py @@ -0,0 +1,7 @@ +from .parser import tool_proxy +from .runtime_actions import handle_outputs + +__all__ = [ + 'tool_proxy', + 'handle_outputs', +] diff --git a/lib/galaxy/tools/cwl/cwltool_deps.py b/lib/galaxy/tools/cwl/cwltool_deps.py new file mode 100644 index 000000000000..868ea625b5fc --- /dev/null +++ b/lib/galaxy/tools/cwl/cwltool_deps.py @@ -0,0 +1,55 @@ +""" This module contains logic for dealing with cwltool as an optional +dependency for Galaxy and/or applications which use Galaxy as a library. +""" + +try: + from galaxy import eggs + eggs.require("requests") +except Exception: + pass + +try: + import requests +except ImportError: + requests = None + +try: + from cwltool import ( + ref_resolver, + draft1tool, + draft2tool, + ) +except ImportError as e: + ref_resolver = None + draft1tool = None + draft2tool = None + +try: + import jsonschema +except ImportError: + jsonschema = None + +try: + import avro +except ImportError: + avro = None + + +def ensure_cwltool_available(): + if ref_resolver is None: + message = "This feature requires cwltool and dependencies to be available, they are not." + if avro is None: + message += " Library avro unavailable." + if jsonschema is None: + message += " Library jsonschema unavailable." + if requests is None: + message += " Library requests unavailable." + raise ImportError(message) + + +__all__ = [ + 'ref_resolver', + 'draft1tool', + 'draft2tool', + 'ensure_cwltool_available', +] diff --git a/lib/galaxy/tools/cwl/parser.py b/lib/galaxy/tools/cwl/parser.py new file mode 100644 index 000000000000..6e15f562b138 --- /dev/null +++ b/lib/galaxy/tools/cwl/parser.py @@ -0,0 +1,273 @@ +""" This module provides proxy objects around objects from the common +workflow language reference implementation library cwltool. These proxies +adapt cwltool to Galaxy features and abstract the library away from the rest +of the framework. +""" +from __future__ import absolute_import +from abc import ABCMeta, abstractmethod +import json +import os + +from .cwltool_deps import ( + draft1tool, + draft2tool, + ref_resolver, + ensure_cwltool_available, +) + +from galaxy.util.bunch import Bunch + +JOB_JSON_FILE = ".cwl_job.json" + + +def tool_proxy(tool_path): + """ Provide a proxy object to cwltool data structures to just + grab relevant data. + """ + ensure_cwltool_available() + tool = to_cwl_tool_object(tool_path) + return tool + + +def load_job_proxy(job_directory): + ensure_cwltool_available() + job_objects_path = os.path.join(job_directory, JOB_JSON_FILE) + job_objects = json.load(open(job_objects_path, "r")) + tool_path = job_objects["tool_path"] + job_inputs = job_objects["job_inputs"] + cwl_tool = tool_proxy(tool_path) + cwl_job = cwl_tool.job_proxy(job_inputs, job_directory=job_directory) + return cwl_job + + +def to_cwl_tool_object(tool_path): + if ref_resolver is None: + raise Exception("Using CWL tools requires cwltool module.") + proxy_class = None + cwl_tool = None + toolpath_object = ref_resolver.from_url(tool_path) + if "schema" in toolpath_object: + proxy_class = Draft1ToolProxy + cwl_tool = draft1tool.Tool(toolpath_object) + if "class" in toolpath_object: + if toolpath_object["class"] == "CommandLineTool": + proxy_class = Draft2ToolProxy + cwl_tool = draft2tool.CommandLineTool(toolpath_object) + if proxy_class is None: + raise Exception("Unsupported CWL object encountered.") + proxy = proxy_class(cwl_tool, tool_path) + return proxy + + +class ToolProxy( object ): + __metaclass__ = ABCMeta + + def __init__(self, tool, tool_path): + self._tool = tool + self._tool_path = tool_path + + def job_proxy(self, input_dict, job_directory="."): + """ Build a cwltool.job.Job describing computation using a input_json + Galaxy will generate mapping the Galaxy description of the inputs into + a cwltool compatible variant. + """ + return JobProxy(self, input_dict, job_directory=job_directory) + + @abstractmethod + def input_instances(self): + """ Return InputInstance objects describing mapping to Galaxy inputs. """ + + @abstractmethod + def output_instances(self): + """ Return OutputInstance objects describing mapping to Galaxy inputs. """ + + @abstractmethod + def docker_identifier(self): + """ Return docker identifier for embedding in tool description. """ + + +class Draft1ToolProxy(ToolProxy): + + def input_instances(self): + # TODO + return self._tool.tool["inputs"] + + def output_instances(self): + # TODO + return [] + + def docker_identifier(self): + tool = self._tool + requirements = tool.get("requirements", {}) + environment = requirements.get("environment", {}) + container = environment.get("container", {}) + container_type = container.get("type", "docker") + if container_type != "docker": + return None + else: + return container.get("uri", None) + + +class Draft2ToolProxy(ToolProxy): + + def input_instances(self): + return self._find_inputs(self._tool.inputs_record_schema) + + def _find_inputs(self, schema): + schema_type = schema["type"] + if isinstance(schema_type, list): + raise Exception("Union types not yet implemented.") + elif isinstance(schema_type, dict): + return self._find_inputs(schema_type) + else: + if schema_type in self._tool.schemaDefs: + schema = self._tool.schemaDefs[schema_type] + + if schema["type"] == "record": + return map(_simple_field_to_input, schema["fields"]) + + def output_instances(self): + outputs_schema = self._tool.outputs_record_schema + return self._find_outputs(outputs_schema) + + def _find_outputs(self, schema): + rval = [] + if not rval and schema["type"] == "record": + for output in schema["fields"]: + output_type = output.get("type", None) + if output_type != "File": + raise Exception("Unhandled output type [%s] encountered.") + rval.append(_simple_field_to_output(output)) + + return rval + + def docker_identifier(self): + tool = self._tool.tool + reqs_and_hints = tool.get("requirements", []) + tool.get("hints", []) + for hint in reqs_and_hints: + if hint["class"] == "DockerRequirement": + if "dockerImageId" in hint: + return hint["dockerImageId"] + else: + return hint["dockerPull"] + return None + + +class JobProxy(object): + + def __init__(self, tool_proxy, input_dict, job_directory): + self._tool_proxy = tool_proxy + self._input_dict = input_dict + self._job_directory = job_directory + + def cwl_job(self): + return self._tool_proxy._tool.job( + self._input_dict, + basedir=self._job_directory, + use_container=False + ) + + def save_job(self): + job_file = JobProxy._job_file(self._job_directory) + job_objects = { + "tool_path": os.path.abspath(self._tool_proxy._tool_path), + "job_inputs": self._input_dict, + } + json.dump(job_objects, open(job_file, "w")) + + # @staticmethod + # def load_job(tool_proxy, job_directory): + # job_file = JobProxy._job_file(job_directory) + # input_dict = json.load(open(job_file, "r")) + # return JobProxy(tool_proxy, input_dict, job_directory) + + @staticmethod + def _job_file(job_directory): + return os.path.join(job_directory, JOB_JSON_FILE) + + +def _simple_field_to_input(field): + print field + name = field["name"] + field_type = field["type"] + if isinstance(field_type, dict): + field_type = field_type["type"] + + if isinstance(field_type, list): + if len(field_type) > 1: + raise Exception("Union types not yet implemented.") + else: + field_type = field_type[0] + + if field_type in ("File", "int"): + if field_type == "File": + input_type = INPUT_TYPE.DATA + else: + input_type = INPUT_TYPE.INTEGER + return InputInstance(name, input_type) + elif field_type == "array": + if isinstance(field["type"], dict): + array_type = field["type"]["items"] + else: + array_type = field["items"] + if array_type in ("File", "int"): + if array_type == "File": + input_type = INPUT_TYPE.DATA + elif array_type == "int": + input_type = INPUT_TYPE.INTEGER + else: + raise Exception("Unhandled array type encountered - [%s]." % array_type) + return InputInstance(name, input_type, array=True) + else: + raise Exception("Unhandled field type encountered - [%s]." % field_type) + + +def _simple_field_to_output(field): + name = field["name"] + output_instance = OutputInstance(name, output_type=OUTPUT_TYPE.GLOB) + return output_instance + + +INPUT_TYPE = Bunch( + DATA="data", + INTEGER="integer", +) + + +class InputInstance(object): + + def __init__(self, name, input_type, array=False): + self.input_type = input_type + self.name = name + self.required = True + self.array = array + + def to_dict(self): + as_dict = dict( + name=self.name, + label=self.name, + type=self.input_type, + optional=not self.required, + ) + if self.input_type == INPUT_TYPE.INTEGER: + as_dict["value"] = "0" + return as_dict + +OUTPUT_TYPE = Bunch( + GLOB="glob", + STDOUT="stdout", +) + + +class OutputInstance(object): + + def __init__(self, name, output_type, path=None): + self.name = name + self.output_type = output_type + self.path = path + + +__all__ = [ + 'tool_proxy', + 'load_job_proxy', +] diff --git a/lib/galaxy/tools/cwl/runtime_actions.py b/lib/galaxy/tools/cwl/runtime_actions.py new file mode 100644 index 000000000000..cd4b2a0a1cfb --- /dev/null +++ b/lib/galaxy/tools/cwl/runtime_actions.py @@ -0,0 +1,29 @@ +import os +import shutil + +from .parser import ( + JOB_JSON_FILE, + load_job_proxy, +) + + +def handle_outputs(job_directory=None): + # Relocate dynamically collected files to pre-determined locations + # registered with ToolOutput objects via from_work_dir handling. + if job_directory is None: + job_directory = os.getcwd() + cwl_job_file = os.path.join(job_directory, JOB_JSON_FILE) + if not os.path.exists(cwl_job_file): + # Not a CWL job, just continue + return + job_proxy = load_job_proxy(job_directory) + cwl_job = job_proxy.cwl_job() + outputs = cwl_job.collect_outputs(job_directory) + for output_name, output in outputs.iteritems(): + target_path = os.path.join(job_directory, "__cwl_output_%s" % output_name) + shutil.move(output["path"], target_path) + + +__all__ = [ + 'handle_outputs', +] diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py index bfe7fecf37de..6842dd0a5498 100644 --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -347,7 +347,7 @@ def get_data_table_entry(table_name, query_attr, query_val, return_attr): param_dict['__tool_directory__'] = self.compute_environment.tool_directory() param_dict['__get_data_table_entry__'] = get_data_table_entry - + param_dict['__local_working_directory__'] = self.local_working_directory # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper( self.app ) # More convienent access to app.config.new_file_path; we don't need to diff --git a/lib/galaxy/tools/parser/cwl.py b/lib/galaxy/tools/parser/cwl.py new file mode 100644 index 000000000000..fa3937f44e0f --- /dev/null +++ b/lib/galaxy/tools/parser/cwl.py @@ -0,0 +1,137 @@ +import logging +import os + +from .interface import ToolSource +from .interface import PagesSource +from .interface import PageSource +from .yaml import YamlInputSource + +from galaxy.tools.deps import requirements +from galaxy.tools.cwl import tool_proxy + +import galaxy.tools + +from galaxy.util.odict import odict + +log = logging.getLogger(__name__) + + +class CwlToolSource(ToolSource): + + def __init__(self, tool_file): + self._cwl_tool_file = tool_file + self._id, _ = os.path.splitext(os.path.basename(tool_file)) + self._tool_proxy = tool_proxy(tool_file) + + @property + def tool_proxy(self): + return self._tool_proxy + + def parse_tool_type(self): + return 'cwl' + + def parse_id(self): + log.warn("TOOL ID is %s" % self._id) + return self._id + + def parse_name(self): + return self._id + + def parse_command(self): + return "$__cwl_command" + + def parse_help(self): + return "" + + def parse_stdio(self): + # TODO: remove duplication with YAML + from galaxy.jobs.error_level import StdioErrorLevel + + # New format - starting out just using exit code. + exit_code_lower = galaxy.tools.ToolStdioExitCode() + exit_code_lower.range_start = float("-inf") + exit_code_lower.range_end = -1 + exit_code_lower.error_level = StdioErrorLevel.FATAL + exit_code_high = galaxy.tools.ToolStdioExitCode() + exit_code_high.range_start = 1 + exit_code_high.range_end = float("inf") + exit_code_lower.error_level = StdioErrorLevel.FATAL + return [exit_code_lower, exit_code_high], [] + + def parse_interpreter(self): + return None + + def parse_version(self): + return "0.0.1" + + def parse_description(self): + return "" + + def parse_input_pages(self): + page_source = CwlPageSource(self._tool_proxy) + return PagesSource([page_source]) + + def parse_outputs(self, tool): + output_instances = self._tool_proxy.output_instances() + outputs = odict() + output_defs = [] + for output_instance in output_instances: + output_defs.append(self._parse_output(tool, output_instance)) + # TODO: parse outputs collections + for output_def in output_defs: + outputs[output_def.name] = output_def + return outputs, odict() + + def _parse_output(self, tool, output_instance): + name = output_instance.name + # TODO: handle filters, actions, change_format + output = galaxy.tools.ToolOutput( name ) + output.format = "auto" + output.change_format = [] + output.format_source = None + output.metadata_source = "" + output.parent = None + output.label = None + output.count = None + output.filters = [] + output.tool = tool + output.from_work_dir = "__cwl_output_%s" % name + output.hidden = "" + output.dataset_collectors = [] + output.actions = galaxy.tools.ToolOutputActionGroup( output, None ) + return output + + def parse_requirements_and_containers(self): + containers = [] + docker_identifier = self._tool_proxy.docker_identifier() + if docker_identifier: + containers.append({"type": "docker", + "identifier": docker_identifier}) + return requirements.parse_requirements_from_dict(dict( + requirements=[], # TODO: enable via extensions + containers=containers, + )) + + +class CwlPageSource(PageSource): + + def __init__(self, tool_proxy): + cwl_instances = tool_proxy.input_instances() + self._input_list = map(self._to_input_source, cwl_instances) + + def _to_input_source(self, input_instance): + if input_instance.array: + as_dict = dict( + type="repeat", + name="%s_repeat" % input_instance.name, + title="%s" % input_instance.name, + blocks=[ + input_instance.to_dict() + ] + ) + else: + as_dict = input_instance.to_dict() + return YamlInputSource(as_dict) + + def parse_input_sources(self): + return self._input_list diff --git a/lib/galaxy/tools/parser/factory.py b/lib/galaxy/tools/parser/factory.py index 2a2b089bfbee..0aef13e89af2 100644 --- a/lib/galaxy/tools/parser/factory.py +++ b/lib/galaxy/tools/parser/factory.py @@ -11,6 +11,7 @@ from .yaml import YamlToolSource from .xml import XmlToolSource from .xml import XmlInputSource +from .cwl import CwlToolSource from .interface import InputSource @@ -32,6 +33,9 @@ def get_tool_source(config_file, enable_beta_formats=True): with open(config_file, "r") as f: as_dict = yaml.load(f) return YamlToolSource(as_dict) + elif config_file.endswith(".json") or config_file.endswith(".cwl"): + log.info("Loading CWL tool - this is experimental - tool likely will not function in future at least in same way.") + return CwlToolSource(config_file) else: tree = load_tool_xml(config_file) root = tree.getroot() diff --git a/lib/galaxy/tools/parser/yaml.py b/lib/galaxy/tools/parser/yaml.py index 11710021fdfc..282bb0ed12c1 100644 --- a/lib/galaxy/tools/parser/yaml.py +++ b/lib/galaxy/tools/parser/yaml.py @@ -77,9 +77,6 @@ def parse_outputs(self, tool): output_defs = [] for name, output_dict in outputs.items(): output_defs.append(self._parse_output(tool, name, output_dict)) - outputs = odict() - for output in output_defs: - outputs[output.name] = output # TODO: parse outputs collections return output_defs, odict() diff --git a/lib/galaxy_ext/metadata/set_metadata.py b/lib/galaxy_ext/metadata/set_metadata.py index 6f7c022ed2d2..1233fd2451a0 100644 --- a/lib/galaxy_ext/metadata/set_metadata.py +++ b/lib/galaxy_ext/metadata/set_metadata.py @@ -47,6 +47,18 @@ def set_meta_with_tool_provided( dataset_instance, file_dict, set_meta_kwds ): for metadata_name, metadata_value in file_dict.get( 'metadata', {} ).iteritems(): setattr( dataset_instance.metadata, metadata_name, metadata_value ) + +def relocate_dynamic_outputs(): + # In order to handle metadata for dynamically path-ed CWL outputs we must + # first relocate them to predetermined, fixed locations as expected by + # Galaxy. + + # TODO: make this conditional on being a CWL job. + # TODO: Separate this from this module. + from galaxy.tools.cwl import handle_outputs + handle_outputs() + + def set_metadata(): # locate galaxy_root for loading datatypes galaxy_root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir)) diff --git a/test/api/test_tools.py b/test/api/test_tools.py index e1b5485ce5b8..91ceefeca968 100644 --- a/test/api/test_tools.py +++ b/test/api/test_tools.py @@ -918,6 +918,18 @@ def test_combined_mapping_and_subcollection_mapping( self ): } self._check_combined_mapping_and_subcollection_mapping( history_id, inputs ) + @skip_without_tool( "cat3-tool" ) + def test_cwl_cat3( self ): + history_id = self.dataset_populator.new_history() + hda1 = dataset_to_param( self.dataset_populator.new_dataset( history_id, content='1\t2\t3' ) ) + inputs = { + "f1": hda1, + } + response = self._run( "cat3-tool", history_id, inputs, assert_ok=True ) + output1 = response[ "outputs" ][ 0 ] + output1_content = self.dataset_populator.get_history_dataset_content( history_id, dataset=output1 ) + assert output1_content == "1\t2\t3\n", output1_content + def _check_combined_mapping_and_subcollection_mapping( self, history_id, inputs ): self.dataset_populator.wait_for_history( history_id, assert_ok=True ) outputs = self._run_and_get_outputs( "collection_mixed_param", history_id, inputs ) diff --git a/test/functional/tools/cwl_tools b/test/functional/tools/cwl_tools new file mode 120000 index 000000000000..f7aa3669f89a --- /dev/null +++ b/test/functional/tools/cwl_tools @@ -0,0 +1 @@ +../../unit/tools/cwl_tools \ No newline at end of file diff --git a/test/functional/tools/samples_tool_conf.xml b/test/functional/tools/samples_tool_conf.xml index 2632fe565fe7..2c7a37c4ec6f 100644 --- a/test/functional/tools/samples_tool_conf.xml +++ b/test/functional/tools/samples_tool_conf.xml @@ -49,6 +49,10 @@ + + + +