galaxyproject · mvdbeek · Oct 23, 2020 · Oct 15, 2020 · Oct 16, 2020 · bernt-matthias
diff --git a/lib/galaxy/tool_util/parser/xml.py b/lib/galaxy/tool_util/parser/xml.py
@@ -513,7 +513,8 @@ def parse_tests_to_dict(self):
 
         if tests_elem is not None:
             for i, test_elem in enumerate(tests_elem.findall("test")):
-                tests.append(_test_elem_to_dict(test_elem, i))
+                profile = self.parse_profile()
+                tests.append(_test_elem_to_dict(test_elem, i, profile))
 
         return rval
 
@@ -533,10 +534,10 @@ def parse_python_template_version(self):
         return python_template_version
 
 
-def _test_elem_to_dict(test_elem, i):
+def _test_elem_to_dict(test_elem, i, profile=None):
     rval = dict(
         outputs=__parse_output_elems(test_elem),
-        output_collections=__parse_output_collection_elems(test_elem),
+        output_collections=__parse_output_collection_elems(test_elem, profile=profile),
         inputs=__parse_input_elems(test_elem, i),
         expect_num_outputs=test_elem.get("expect_num_outputs"),
         command=__parse_assert_list_from_elem(test_elem.find("assert_command")),
@@ -579,36 +580,38 @@ def __parse_command_elem(test_elem):
     return __parse_assert_list_from_elem(assert_elem)
 
 
-def __parse_output_collection_elems(test_elem):
+def __parse_output_collection_elems(test_elem, profile=None):
     output_collections = []
     for output_collection_elem in test_elem.findall("output_collection"):
-        output_collection_def = __parse_output_collection_elem(output_collection_elem)
+        output_collection_def = __parse_output_collection_elem(output_collection_elem, profile=profile)
         output_collections.append(output_collection_def)
     return output_collections
 
 
-def __parse_output_collection_elem(output_collection_elem):
+def __parse_output_collection_elem(output_collection_elem, profile=None):
     attrib = dict(output_collection_elem.attrib)
     name = attrib.pop('name', None)
     if name is None:
         raise Exception("Test output collection does not have a 'name'")
-    element_tests = __parse_element_tests(output_collection_elem)
+    element_tests = __parse_element_tests(output_collection_elem, profile=profile)
     return TestCollectionOutputDef(name, attrib, element_tests).to_dict()
 
 
-def __parse_element_tests(parent_element):
+def __parse_element_tests(parent_element, profile=None):
     element_tests = {}
     for idx, element in enumerate(parent_element.findall("element")):
         element_attrib = dict(element.attrib)
         identifier = element_attrib.pop('name', None)
         if identifier is None:
             raise Exception("Test primary dataset does not have a 'identifier'")
-        element_tests[identifier] = __parse_test_attributes(element, element_attrib, parse_elements=True)
-        element_tests[identifier][1]["element_index"] = idx
+        element_tests[identifier] = __parse_test_attributes(element, element_attrib, parse_elements=True, profile=profile)
+        if profile and profile >= "20.09":
+            element_tests[identifier][1]["expected_sort_order"] = idx
+
     return element_tests
 
 
-def __parse_test_attributes(output_elem, attrib, parse_elements=False, parse_discovered_datasets=False):
+def __parse_test_attributes(output_elem, attrib, parse_elements=False, parse_discovered_datasets=False, profile=None):
     assert_list = __parse_assert_list(output_elem)
 
     # Allow either file or value to specify a target file to compare result with
@@ -638,7 +641,7 @@ def __parse_test_attributes(output_elem, attrib, parse_elements=False, parse_dis
     checksum = attrib.get("checksum", None)
     element_tests = {}
     if parse_elements:
-        element_tests = __parse_element_tests(output_elem)
+        element_tests = __parse_element_tests(output_elem, profile=profile)
 
     primary_datasets = {}
     if parse_discovered_datasets:

diff --git a/lib/galaxy/tool_util/verify/interactor.py b/lib/galaxy/tool_util/verify/interactor.py
@@ -724,36 +724,29 @@ def verify_collection(output_collection_def, data_collection, verify_dataset):
             message = template % (name, expected_element_count, actual_element_count)
             raise AssertionError(message)
 
+    def get_element(elements, id):
+        for element in elements:
+            if element["element_identifier"] == id:
+                return element
+        return False
+
     def verify_elements(element_objects, element_tests):
-        sorted_test_ids = [None] * len(element_tests)
-        for element_identifier, element_test in element_tests.items():
-            if isinstance(element_test, dict):
-                element_outfile, element_attrib = None, element_test
-            else:
-                element_outfile, element_attrib = element_test
-            sorted_test_ids[element_attrib["element_index"]] = element_identifier
+        # sorted_test_ids = [None] * len(element_tests)
+        expected_sort_order = []
 
-        i = 0
-        for element_identifier in sorted_test_ids:
-            element_test = element_tests[element_identifier]
+        eo_ids = [_["element_identifier"] for _ in element_objects]
+        for element_identifier, element_test in element_tests.items():
             if isinstance(element_test, dict):
                 element_outfile, element_attrib = None, element_test
             else:
                 element_outfile, element_attrib = element_test
+            if 'expected_sort_order' in element_attrib:
+                expected_sort_order.append(element_identifier)
 
-            element = None
-            while i < len(element_objects):
-                if element_objects[i]["element_identifier"] == element_identifier:
-                    element = element_objects[i]
-                    i += 1
-                    break
-                i += 1
-
-            if element is None:
-                template = "Failed to find identifier '%s' of test collection %s in the tool generated collection elements %s (at the correct position)"
-                eo_ids = [_["element_identifier"] for _ in element_objects]
-                message = template % (element_identifier, sorted_test_ids,
-                                      eo_ids)
+            element = get_element(element_objects, element_identifier)
+            if not element:
+                template = "Failed to find identifier '%s' in the tool generated collection elements %s"
+                message = template % (element_identifier, eo_ids)
                 raise AssertionError(message)
 
             element_type = element["element_type"]
@@ -763,6 +756,21 @@ def verify_elements(element_objects, element_tests):
                 elements = element["object"]["elements"]
                 verify_elements(elements, element_attrib.get("elements", {}))
 
+        if len(expected_sort_order) > 0:
+            i = 0
+            for element_identifier in expected_sort_order:
+                element = None
+                while i < len(element_objects):
+                    if element_objects[i]["element_identifier"] == element_identifier:
+                        element = element_objects[i]
+                        i += 1
+                        break
+                    i += 1
+                if element is None:
+                    template = "Collection identifier '%s' found out of order, expected order of %s for the tool generated collection elements %s"
+                    message = template % (element_identifier, expected_sort_order, eo_ids)
+                    raise AssertionError(message)
+
     verify_elements(data_collection["elements"], output_collection_def.element_tests)
 
 

diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -1507,7 +1507,7 @@ Note that this tool uses ``assign_primary_output="true"`` for ``<discover_data_s
     <xs:annotation>
       <xs:documentation xml:lang="en"><![CDATA[
 
-Define tests for extra files corresponding to an output collection.
+Define tests for extra datasets and metadata corresponding to an output collection.
 
 ``output_collection`` directives should specify a ``name`` and ``type``
 attribute to describe the expected output collection as a whole.
@@ -1516,6 +1516,12 @@ Expectations about collection contents are described using child ``element``
 directives. For nested collections, these child ``element`` directives may
 themselves contain children.
 
+For tools marked as having profile 20.09 or newer, the order of elements within
+an ``output_collection`` declaration are meaningful. The test definition may
+omit any number of elements from a collection, but the ones that are specified
+will be checked against the actual resulting collection from the tool run and the
+order within the collection verified.
+
 ### Examples
 
 The [genetrack](https://github.com/galaxyproject/tools-iuc/blob/master/tools/genetrack/genetrack.xml)

diff --git a/test/functional/tools/discover_sort_by.xml b/test/functional/tools/discover_sort_by.xml
@@ -1,4 +1,4 @@
-<tool id="discover_sort_by" name="discover_sort_by" version="0.1.0">
+<tool id="discover_sort_by" name="discover_sort_by" version="0.1.0" profile="20.09">
     <command><![CDATA[
 for i in \$(seq 1 10);
 do

diff --git a/test/functional/tools/discover_sort_by_legacy_test.xml b/test/functional/tools/discover_sort_by_legacy_test.xml
@@ -0,0 +1,42 @@
+<tool id="discover_sort_by_legacy_test" name="discover_sort_by_legacy_test" version="0.1.0" profile="20.05">
+    <!-- same as discover_sort_by but with tests not ordered correctly, this would cause
+         failure in profile 20.09 or newer.
+    -->
+    <command><![CDATA[
+for i in \$(seq 1 10);
+do
+    echo "\$i" > \$i.txt;
+done
+]]></command>
+  <inputs/>
+  <outputs>
+    <collection name="collection_numeric_name" type="list" label="num">
+      <discover_datasets pattern="__name_and_ext__" sort_by="numeric_name"/>
+    </collection>
+    <collection name="collection_rev_numeric_name" type="list" label="num rev">
+      <discover_datasets pattern="__name_and_ext__" sort_by="reverse_numeric_name"/>
+    </collection>
+    <collection name="collection_lexical_name" type="list" label="num">
+      <discover_datasets pattern="__name_and_ext__" sort_by="lexical_name" />
+    </collection>
+    <data name="data_reverse_lexical_name">
+      <discover_datasets pattern="__name_and_ext__" format="txt" assign_primary_output="true" sort_by="reverse_lexical_name" visible="true"/>
+    </data>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="4">
+      <param name="input1" value="tinywga.fam" />
+      <output_collection name="collection_numeric_name" type="list" count="10">
+        <element name="2">
+          <assert_contents><has_text_matching expression="^.*$"/></assert_contents>
+        </element>
+        <element name="1">
+          <assert_contents><has_text_matching expression="^.*$"/></assert_contents>
+        </element>
+        <element name="10">
+          <assert_contents><has_text_matching expression="^.*$"/></assert_contents>
+        </element>
+      </output_collection>
+    </test>
+  </tests>
+</tool>