Fix Registry.get_datatype_by_extension() to return None if ext is unk…

…nown Without this fix, the Cheetah expression: $dataset.is_of_type('unknown_ext') in a tool command would be equivalent to: $dataset.is_of_type('txt') meaning that if the dataset datatype is a subclass of Text, the expression would evaluate to True without any warning. xref. galaxyproject/tools-iuc#1373 Also add missing `xml` datatype to `test/functional/tools/sample_datatypes_conf.xml` which is needed by 3 test tools.
galaxyproject · Jun 25, 2017 · 0bfa1df · 0bfa1df
1 parent 73faf7c
commit 0bfa1df
Show file tree

Hide file tree

Showing 5 changed files with 35 additions and 12 deletions.
diff --git a/lib/galaxy/datatypes/registry.py b/lib/galaxy/datatypes/registry.py
@@ -493,11 +493,11 @@ def get_mimetype_by_extension( self, ext, default='application/octet-stream' ):
         return mimetype
 
     def get_datatype_by_extension( self, ext ):
-        """Returns a datatype based on an extension"""
+        """Returns a datatype object based on an extension"""
         try:
             builder = self.datatypes_by_extension[ ext ]
         except KeyError:
-            builder = data.Text()
+            builder = None
         return builder
 
     def change_datatype( self, data, ext ):
@@ -813,7 +813,10 @@ def get_converter_by_target_type( self, source_ext, target_ext ):
     def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe=True ):
         """Returns ( target_ext, existing converted dataset )"""
         for convert_ext in self.get_converters_by_datatype( dataset.ext ):
-            if self.get_datatype_by_extension( convert_ext ).matches_any( accepted_formats ):
+            convert_ext_datatype = self.get_datatype_by_extension( convert_ext )
+            if convert_ext_datatype is None:
+                self.log.warning("Datatype class not found for extension '%s', which is used as target for conversion from datatype '%s'" % (convert_ext, dataset.ext))
+            elif convert_ext_datatype.matches_any( accepted_formats ):
                 converted_dataset = dataset.get_converted_files_by_type( convert_ext )
                 if converted_dataset:
                     ret_data = converted_dataset

diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
@@ -1947,7 +1947,14 @@ def extra_files_path( self ):
 
     @property
     def datatype( self ):
-        return _get_datatypes_registry().get_datatype_by_extension( self.extension )
+        extension = self.extension
+        if not extension or extension == 'auto' or extension == '_sniff_':
+            extension = 'data'
+        ret = _get_datatypes_registry().get_datatype_by_extension( extension )
+        if ret is None:
+            log.warning("Datatype class not found for extension '%s'" % extension)
+            return _get_datatypes_registry().get_datatype_by_extension( 'data' )
+        return ret
 
     def get_metadata( self ):
         # using weakref to store parent (to prevent circ ref),

diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py
@@ -1428,12 +1428,16 @@ def _datatypes_registery( self, trans, tool ):
     def _parse_formats( self, trans, tool, input_source ):
         datatypes_registry = self._datatypes_registery( trans, tool )
 
-        # Build tuple of classes for supported data formats
-        formats = []
+        # Build list of classes for supported data formats
         self.extensions = input_source.get( 'format', 'data' ).split( "," )
         normalized_extensions = [extension.strip().lower() for extension in self.extensions]
+        formats = []
         for extension in normalized_extensions:
-            formats.append( datatypes_registry.get_datatype_by_extension( extension ) )
+            datatype = datatypes_registry.get_datatype_by_extension(extension)
+            if datatype is not None:
+                formats.append(datatype)
+            else:
+                log.warning("Datatype class not found for extension '%s', which is used in the 'format' attribute of parameter '%s'" % (extension, self.name))
         self.formats = formats
 
     def _parse_options( self, input_source ):
@@ -1566,10 +1570,12 @@ def __init__( self, tool, input_source, trans=None):
         self._parse_options( input_source )
         # Load conversions required for the dataset input
         self.conversions = []
-        for name, conv_extensions in input_source.parse_conversion_tuples():
-            assert None not in [ name, conv_extensions ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, conv_extensions )
-            conv_types = [ tool.app.datatypes_registry.get_datatype_by_extension( conv_extensions.lower() ) ]
-            self.conversions.append( ( name, conv_extensions, conv_types ) )
+        for name, conv_extension in input_source.parse_conversion_tuples():
+            assert None not in [ name, conv_extension ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, conv_extension )
+            conv_type = tool.app.datatypes_registry.get_datatype_by_extension( conv_extension.lower() )
+            if conv_type is None:
+                raise ValueError("Datatype class not found for extension '%s', which is used as 'type' attribute in conversion of data parameter '%s'" % (conv_type, self.name))
+            self.conversions.append( ( name, conv_extension, [conv_type] ) )
 
     def match_collections( self, history, dataset_matcher, reduction=True ):
         dataset_collection_matcher = DatasetCollectionMatcher( dataset_matcher )

diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py
@@ -246,7 +246,13 @@ def is_collection( self ):
         return False
 
     def is_of_type( self, *exts ):
-        datatypes = [ self.datatypes_registry.get_datatype_by_extension( e ) for e in exts ]
+        datatypes = []
+        for e in exts:
+            datatype = self.datatypes_registry.get_datatype_by_extension(e)
+            if datatype is not None:
+                datatypes.append(datatype)
+            else:
+                log.warning("Datatype class not found for extension '%s', which is used as parameter of 'is_of_type()' method" % (e))
         return self.dataset.datatype.matches_any( datatypes )
 
     def __str__( self ):

diff --git a/test/functional/tools/sample_datatypes_conf.xml b/test/functional/tools/sample_datatypes_conf.xml
@@ -20,5 +20,6 @@
     <datatype extension="biom1" type="galaxy.datatypes.text:Biom1" display_in_upload="True" subclass="True" mimetype="application/json"/>
 	<datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true" description="BED format provides a flexible way to define the data lines that are displayed in an annotation track. BED lines have three required columns and nine additional optional columns. The three required columns are chrom, chromStart and chromEnd." description_url="https://galaxyproject.org/learn/datatypes/#bed">
     </datatype>
+    <datatype extension="xml" type="galaxy.datatypes.xml:GenericXml" mimetype="application/xml" display_in_upload="true"/>
   </registration>
 </datatypes>