Merge pull request #7944 from mvdbeek/expression_tool_improvements

[19.05] Allow mapping over of expression tool params
galaxyproject · May 14, 2019 · 703c968 · 703c968
2 parents 54190a9 + 3fb32df
commit 703c968
Show file tree

Hide file tree

Showing 6 changed files with 83 additions and 24 deletions.
diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py
@@ -1,9 +1,9 @@
 """
 Modules used in building workflows
 """
+import json
 import logging
 import re
-from json import loads
 from xml.etree.ElementTree import (
     Element,
     XML
@@ -31,6 +31,7 @@
     visit_input_values
 )
 from galaxy.tools.parameters.basic import (
+    BaseDataToolParameter,
     BooleanToolParameter,
     ConnectedValue,
     DataCollectionToolParameter,
@@ -1159,7 +1160,7 @@ def decode_runtime_state(self, runtime_state):
         if self.tool:
             state = super(ToolModule, self).decode_runtime_state(runtime_state)
             if RUNTIME_STEP_META_STATE_KEY in runtime_state:
-                self.__restore_step_meta_runtime_state(loads(runtime_state[RUNTIME_STEP_META_STATE_KEY]))
+                self.__restore_step_meta_runtime_state(json.loads(runtime_state[RUNTIME_STEP_META_STATE_KEY]))
             return state
         else:
             raise ToolMissingException("Tool %s missing. Cannot recover runtime state." % self.tool_id,
@@ -1204,11 +1205,13 @@ def callback(input, prefixed_name, **kwargs):
                 input_dict = all_inputs_by_name[prefixed_name]
 
                 replacement = NO_REPLACEMENT
+                dataset_instance = None
                 if iteration_elements and prefixed_name in iteration_elements:
-                    if isinstance(input, DataToolParameter) and hasattr(iteration_elements[prefixed_name], 'dataset_instance'):
+                    dataset_instance = getattr(iteration_elements[prefixed_name], 'dataset_instance', None)
+                    if isinstance(input, DataToolParameter) and dataset_instance:
                         # Pull out dataset instance (=HDA) from element and set a temporary element_identifier attribute
                         # See https://github.com/galaxyproject/galaxy/pull/1693 for context.
-                        replacement = iteration_elements[prefixed_name].dataset_instance
+                        replacement = dataset_instance
                         if hasattr(iteration_elements[prefixed_name], u'element_identifier') and iteration_elements[prefixed_name].element_identifier:
                             replacement.element_identifier = iteration_elements[prefixed_name].element_identifier
                     else:
@@ -1218,6 +1221,12 @@ def callback(input, prefixed_name, **kwargs):
                     replacement = progress.replacement_for_input(step, input_dict)
 
                 if replacement is not NO_REPLACEMENT:
+                    if not isinstance(input, BaseDataToolParameter):
+                        # Probably a parameter that can be replaced
+                        dataset = dataset_instance or replacement
+                        if getattr(dataset, 'extension', None) == 'expression.json':
+                            with open(dataset.file_name, 'r') as f:
+                                replacement = json.load(f)
                     found_replacement_keys.add(prefixed_name)
 
                 return replacement

diff --git a/lib/galaxy/workflow/run.py b/lib/galaxy/workflow/run.py
@@ -1,4 +1,3 @@
-import json
 import logging
 import uuid
 
@@ -369,15 +368,24 @@ def replacement_for_connection(self, connection, is_data=True):
                 delayed_why = "dependent collection [%s] not yet populated with datasets" % replacement.id
                 raise modules.DelayedWorkflowEvaluation(why=delayed_why)
 
-        is_hda = isinstance(replacement, model.HistoryDatasetAssociation)
-        if not is_data and is_hda:
-            if replacement.is_ok:
-                with open(replacement.file_name, 'r') as f:
-                    replacement = json.load(f)
-            elif replacement.is_pending:
-                raise modules.DelayedWorkflowEvaluation()
+        data_inputs = (model.HistoryDatasetAssociation, model.HistoryDatasetCollectionAssociation, model.DatasetCollection)
+        if not is_data and isinstance(replacement, data_inputs):
+            if isinstance(replacement, model.HistoryDatasetAssociation):
+                if replacement.is_pending:
+                    raise modules.DelayedWorkflowEvaluation()
+                if not replacement.is_ok:
+                    raise modules.CancelWorkflowEvaluation()
             else:
-                raise modules.CancelWorkflowEvaluation()
+                if not replacement.collection.populated:
+                    raise modules.DelayedWorkflowEvaluation()
+                pending = False
+                for dataset_instance in replacement.dataset_instances:
+                    if dataset_instance.is_pending:
+                        pending = True
+                    elif not dataset_instance.is_ok:
+                        raise modules.CancelWorkflowEvaluation()
+                if pending:
+                    raise modules.DelayedWorkflowEvaluation()
 
         return replacement
 

diff --git a/test/api/test_workflows.py b/test/api/test_workflows.py
@@ -2111,6 +2111,46 @@ def test_run_with_numeric_input_connection(self):
         assert int(str_43) == 43
         assert abs(float(str_4point14) - 4.14) < .0001
 
+    @skip_without_tool("param_value_from_file")
+    def test_expression_tool_map_over(self):
+        history_id = self.dataset_populator.new_history()
+        self._run_jobs("""
+class: GalaxyWorkflow
+inputs:
+  text_input1: collection
+steps:
+- label: param_out
+  tool_id: param_value_from_file
+  in:
+     input1: text_input1
+- label: consume_expression_parameter
+  tool_id: validation_default
+  in:
+    input1: param_out#text_param
+  outputs:
+    out_file1:
+      rename: "replaced_param_collection"
+test_data:
+  text_input1:
+    type: list
+    elements:
+      - identifier: A
+        content: A
+      - identifier: B
+        content: B
+""", history_id=history_id)
+        history_contents = self._get('histories/{history_id}/contents'.format(history_id=history_id)).json()
+        collection = [c for c in history_contents if c['history_content_type'] == 'dataset_collection' and c['name'] == 'replaced_param_collection'][0]
+        collection_details = self._get(collection['url']).json()
+        assert collection_details['element_count'] == 2
+        elements = collection_details['elements']
+        assert elements[0]['element_identifier'] == 'A'
+        assert elements[1]['element_identifier'] == 'B'
+        element_a_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=elements[0]['object'])
+        element_b_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=elements[1]['object'])
+        assert element_a_content.strip() == 'A'
+        assert element_b_content.strip() == 'B'
+
     @skip_without_tool('cat1')
     def test_workflow_rerun_with_use_cached_job(self):
         workflow = self.workflow_populator.load_workflow(name="test_for_run")

diff --git a/test/functional/tools/parse_values_from_file.xml b/test/functional/tools/parse_values_from_file.xml
@@ -0,0 +1 @@
+../../../tools/expression_tools/parse_values_from_file.xml
diff --git a/test/functional/tools/samples_tool_conf.xml b/test/functional/tools/samples_tool_conf.xml
@@ -54,6 +54,7 @@
   <tool file="checksum.xml" />
   <tool file="composite_shapefile.xml" />
   <tool file="is_valid_xml.xml" />
+  <tool file="param_value_from_file.xml"/>
   <!--
   TODO: Figure out why this transiently fails on Jenkins.
   <tool file="maxseconds.xml" />

diff --git a/tools/expression_tools/parse_values_from_file.xml b/tools/expression_tools/parse_values_from_file.xml
@@ -2,7 +2,7 @@
     <description>from dataset</description>
     <expression type="ecma5.1">{
 var output;
-if ($job.remove_newlines || $job.param_type != 'string') {
+if ($job.remove_newlines || $job.param_type != 'text') {
    $job.input1.contents = $job.input1.contents.trim();
 }
 if ($job.param_type == 'integer') {
@@ -29,16 +29,16 @@ return {'output': output};
     <inputs>
         <param type="data" label="Input file containing parameter to parse out of" load_contents="64000" name="input1" />
         <param name="param_type" type="select" label="Select type of parameter to parse">
-            <option value="string">String</option>
+            <option value="text">Text</option>
             <option value="integer">Integer</option>
             <option value="float">Float</option>
             <option value="boolean">Boolean</option>
         </param>
         <param name="remove_newlines" checked="true" type="boolean" label="Remove newlines ?" help="Uncheck this only if newlines should be preserved in parameter"/>
     </inputs>
     <outputs>
-        <output type="string" name="string_param" from="output">
-            <filter>param_type == 'string'</filter>
+        <output type="text" name="text_param" from="output">
+            <filter>param_type == 'text'</filter>
         </output>
         <output type="integer" name="integer_param" from="output">
             <filter>param_type == 'integer'</filter>
@@ -53,19 +53,19 @@ return {'output': output};
     <tests>
         <test expect_num_outputs="1">
             <param name="input1" value="simple_line.txt"/>
-            <param name="param_type" value="string"/>
+            <param name="param_type" value="text"/>
             <param name="remove_newlines" value="true"/>
-            <output name="string_param">
+            <output name="text_param">
                 <assert_contents>
                     <has_line line="&quot;This is a line of text.&quot;"/>
                 </assert_contents>
             </output>
         </test>
         <test expect_num_outputs="1">
             <param name="input1" value="simple_line.txt"/>
-            <param name="param_type" value="string"/>
+            <param name="param_type" value="text"/>
             <param name="remove_newlines" value="false"/>
-            <output name="string_param">
+            <output name="text_param">
                 <assert_contents>
                     <has_line line="&quot;This is a line of text.\n&quot;"/>
                 </assert_contents>
@@ -75,7 +75,7 @@ return {'output': output};
             <param name="input1" value="1.integer.txt"/>
             <param name="param_type" value="integer"/>
             <param name="remove_newlines" value="false"/>
-            <output name="string_param">
+            <output name="integer_param">
                 <assert_contents>
                     <has_line line="1"/>
                 </assert_contents>
@@ -85,7 +85,7 @@ return {'output': output};
             <param name="input1" value="1.integer.txt"/>
             <param name="param_type" value="float"/>
             <param name="remove_newlines" value="false"/>
-            <output name="string_param">
+            <output name="float_param">
                 <assert_contents>
                     <has_line line="1"/>
                 </assert_contents>
@@ -95,7 +95,7 @@ return {'output': output};
             <param name="input1" value="1.bool.txt"/>
             <param name="param_type" value="boolean"/>
             <param name="remove_newlines" value="false"/>
-            <output name="string_param">
+            <output name="boolean_param">
                 <assert_contents>
                     <has_line line="false"/>
                 </assert_contents>