Update tool build API for implicit collection conversion.

galaxyproject · May 15, 2018 · 11a31a1 · 11a31a1
1 parent 3d6443b
commit 11a31a1
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 24 deletions.
diff --git a/lib/galaxy/datatypes/registry.py b/lib/galaxy/datatypes/registry.py
@@ -876,14 +876,21 @@ def get_converter_by_target_type(self, source_ext, target_ext):
             return converters[target_ext]
         return None
 
-    def find_conversion_destination_for_dataset_by_extensions(self, dataset, accepted_formats, converter_safe=True):
+    def find_conversion_destination_for_dataset_by_extensions(self, dataset_or_ext, accepted_formats, converter_safe=True):
         """Returns ( target_ext, existing converted dataset )"""
-        for convert_ext in self.get_converters_by_datatype(dataset.ext):
+        if hasattr(dataset_or_ext, "ext"):
+            ext = dataset_or_ext.ext
+            dataset = dataset_or_ext
+        else:
+            ext = dataset_or_ext
+            dataset = None
+
+        for convert_ext in self.get_converters_by_datatype(ext):
             convert_ext_datatype = self.get_datatype_by_extension(convert_ext)
             if convert_ext_datatype is None:
                 self.log.warning("Datatype class not found for extension '%s', which is used as target for conversion from datatype '%s'" % (convert_ext, dataset.ext))
             elif convert_ext_datatype.matches_any(accepted_formats):
-                converted_dataset = dataset.get_converted_files_by_type(convert_ext)
+                converted_dataset = dataset and dataset.get_converted_files_by_type(convert_ext)
                 if converted_dataset:
                     ret_data = converted_dataset
                 elif not converter_safe:

diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py
@@ -1853,8 +1853,12 @@ def append(list, hda, name, src, keep=False):
         # add dataset collections
         dataset_collection_matcher = dataset_matcher_factory.dataset_collection_matcher(dataset_matcher)
         for hdca in history.active_visible_dataset_collections:
-            if dataset_collection_matcher.hdca_match(hdca, reduction=multiple):
-                append(d['options']['hdca'], hdca, hdca.name, 'hdca')
+            match = dataset_collection_matcher.hdca_match(hdca, reduction=multiple)
+            if match:
+                name = hdca.name
+                if match.implicit_conversion:
+                    name = "%s (with implicit datatype conversion)" % name
+                append(d['options']['hdca'], hdca, name, 'hdca')
 
         # sort both lists
         d['options']['hda'] = sorted(d['options']['hda'], key=lambda k: k['hid'], reverse=True)
@@ -1892,18 +1896,19 @@ def match_collections(self, trans, history, dataset_collection_matcher):
         dataset_collections = trans.app.dataset_collections_service.history_dataset_collections(history, self._history_query(trans))
 
         for dataset_collection_instance in dataset_collections:
-            if not dataset_collection_matcher.hdca_match(dataset_collection_instance):
+            match = dataset_collection_matcher.hdca_match(dataset_collection_instance)
+            if not match:
                 continue
-            yield dataset_collection_instance
+            yield dataset_collection_instance, match.implicit_conversion
 
     def match_multirun_collections(self, trans, history, dataset_collection_matcher):
         for history_dataset_collection in history.active_visible_dataset_collections:
             if not self._history_query(trans).can_map_over(history_dataset_collection):
                 continue
 
-            datasets_match = dataset_collection_matcher.hdca_match(history_dataset_collection)
-            if datasets_match:
-                yield history_dataset_collection
+            match = dataset_collection_matcher.hdca_match(history_dataset_collection)
+            if match:
+                yield history_dataset_collection, match.implicit_conversion
 
     def from_json(self, value, trans, other_values={}):
         rval = None
@@ -1978,22 +1983,28 @@ def to_dict(self, trans, other_values=None):
         dataset_collection_matcher = dataset_matcher_factory.dataset_collection_matcher(dataset_matcher)
 
         # append directly matched collections
-        for hdca in self.match_collections(trans, history, dataset_collection_matcher):
+        for hdca, implicit_conversion in self.match_collections(trans, history, dataset_collection_matcher):
+            name = hdca.name
+            if implicit_conversion:
+                name = "%s (with implicit datatype conversion)" % name
             d['options']['hdca'].append({
                 'id'   : trans.security.encode_id(hdca.id),
                 'hid'  : hdca.hid,
-                'name' : hdca.name,
+                'name' : name,
                 'src'  : 'hdca',
                 'tags' : [t.user_tname if not t.value else "%s:%s" % (t.user_tname, t.value) for t in hdca.tags]
             })
 
         # append matching subcollections
-        for hdca in self.match_multirun_collections(trans, history, dataset_collection_matcher):
+        for hdca, implicit_conversion in self.match_multirun_collections(trans, history, dataset_collection_matcher):
             subcollection_type = self._history_query(trans).can_map_over(hdca).collection_type
+            name = hdca.name
+            if implicit_conversion:
+                name = "%s (with implicit datatype conversion)" % name
             d['options']['hdca'].append({
                 'id'   : trans.security.encode_id(hdca.id),
                 'hid'  : hdca.hid,
-                'name' : hdca.name,
+                'name' : name,
                 'src'  : 'hdca',
                 'tags' : [t.user_tname if not t.value else "%s:%s" % (t.user_tname, t.value) for t in hdca.tags],
                 'map_over_type': subcollection_type

diff --git a/lib/galaxy/tools/parameters/dataset_matcher.py b/lib/galaxy/tools/parameters/dataset_matcher.py
@@ -80,9 +80,9 @@ def dataset_matcher(self, param, other_values):
 
     def dataset_collection_matcher(self, dataset_matcher):
         if self._can_process_summary:
-            return SummaryDatasetCollectionMatcher(self, dataset_matcher)
+            return SummaryDatasetCollectionMatcher(self, self._trans, dataset_matcher)
         else:
-            return DatasetCollectionMatcher(dataset_matcher)
+            return DatasetCollectionMatcher(self._trans, dataset_matcher)
 
 
 class DatasetMatcher(object):
@@ -183,10 +183,25 @@ def implicit_conversion(self):
         return True
 
 
+class HdcaDirectMatch(object):
+    implicit_conversion = False
+
+    def __init__(self):
+        pass
+
+
+class HdcaImplicitMatch(object):
+    implicit_conversion = True
+
+    def __init__(self):
+        pass
+
+
 class SummaryDatasetCollectionMatcher(object):
 
-    def __init__(self, dataset_matcher_factory, dataset_matcher):
+    def __init__(self, dataset_matcher_factory, trans, dataset_matcher):
         self.dataset_matcher_factory = dataset_matcher_factory
+        self._trans = trans
         self.dataset_matcher = dataset_matcher
 
     def hdca_match(self, history_dataset_collection_association, reduction=False):
@@ -203,17 +218,26 @@ def hdca_match(self, history_dataset_collection_association, reduction=False):
                 return False
 
         formats = self.dataset_matcher.param.formats
+        uses_implicit_conversion = False
         for extension in extensions:
-            if not self.dataset_matcher_factory.matches_any_format(extension, formats):
+            if self.dataset_matcher_factory.matches_any_format(extension, formats):
+                continue
+
+            datatypes_registry = self._trans.app.datatypes_registry
+            converted_ext, _ = datatypes_registry.find_conversion_destination_for_dataset_by_extensions(extension, formats)
+            if not converted_ext:
                 return False
+            else:
+                uses_implicit_conversion = True
 
-        return True
+        return HdcaImplicitMatch() if uses_implicit_conversion else HdcaDirectMatch()
 
 
 class DatasetCollectionMatcher(object):
 
-    def __init__(self, dataset_matcher):
+    def __init__(self, trans, dataset_matcher):
         self.dataset_matcher = dataset_matcher
+        self._trans = trans
 
     def __valid_element(self, element):
         # Simplify things for now and assume these are hdas and not implicit
@@ -230,7 +254,7 @@ def __valid_element(self, element):
         if not hda:
             return False
         hda_match = self.dataset_matcher.hda_match(hda, ensure_visible=False)
-        return hda_match and not hda_match.implicit_conversion
+        return hda_match
 
     def hdca_match(self, history_dataset_collection_association, reduction=False):
         dataset_collection = history_dataset_collection_association.collection
@@ -246,11 +270,16 @@ def dataset_collection_match(self, dataset_collection):
             return False
 
         valid = True
+        uses_implicit_conversion = False
         for element in dataset_collection.elements:
-            if not self.__valid_element(element):
+            match_element = self.__valid_element(element)
+            if not match_element:
                 valid = False
                 break
-        return valid
+            elif match_element.implicit_conversion:
+                uses_implicit_conversion = True
+
+        return valid and (HdcaImplicitMatch() if uses_implicit_conversion else HdcaDirectMatch())
 
 
-__all__ = ('DatasetMatcher', 'DatasetCollectionMatcher')
+__all__ = ('get_dataset_matcher_factory', 'set_dataset_matcher_factory', 'unset_dataset_matcher_factory')