Skip to content

Commit

Permalink
Fix Registry.get_datatype_by_extension() to return None if ext is unk…
Browse files Browse the repository at this point in the history
…nown

Without this fix, the Cheetah expression:

$dataset.is_of_type('unknown_ext')

in a tool command would be equivalent to:

$dataset.is_of_type('txt')

meaning that if the dataset datatype is a subclass of Text, the expression
would evaluate to True without any warning.

xref. galaxyproject/tools-iuc#1373

Also add missing `xml` datatype to
`test/functional/tools/sample_datatypes_conf.xml` which is needed by 3 test
tools.
  • Loading branch information
nsoranzo committed Jun 25, 2017
1 parent 73faf7c commit 0bfa1df
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 12 deletions.
9 changes: 6 additions & 3 deletions lib/galaxy/datatypes/registry.py
Expand Up @@ -493,11 +493,11 @@ def get_mimetype_by_extension( self, ext, default='application/octet-stream' ):
return mimetype

def get_datatype_by_extension( self, ext ):
"""Returns a datatype based on an extension"""
"""Returns a datatype object based on an extension"""
try:
builder = self.datatypes_by_extension[ ext ]
except KeyError:
builder = data.Text()
builder = None
return builder

def change_datatype( self, data, ext ):
Expand Down Expand Up @@ -813,7 +813,10 @@ def get_converter_by_target_type( self, source_ext, target_ext ):
def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe=True ):
"""Returns ( target_ext, existing converted dataset )"""
for convert_ext in self.get_converters_by_datatype( dataset.ext ):
if self.get_datatype_by_extension( convert_ext ).matches_any( accepted_formats ):
convert_ext_datatype = self.get_datatype_by_extension( convert_ext )
if convert_ext_datatype is None:
self.log.warning("Datatype class not found for extension '%s', which is used as target for conversion from datatype '%s'" % (convert_ext, dataset.ext))
elif convert_ext_datatype.matches_any( accepted_formats ):
converted_dataset = dataset.get_converted_files_by_type( convert_ext )
if converted_dataset:
ret_data = converted_dataset
Expand Down
9 changes: 8 additions & 1 deletion lib/galaxy/model/__init__.py
Expand Up @@ -1947,7 +1947,14 @@ def extra_files_path( self ):

@property
def datatype( self ):
return _get_datatypes_registry().get_datatype_by_extension( self.extension )
extension = self.extension
if not extension or extension == 'auto' or extension == '_sniff_':
extension = 'data'
ret = _get_datatypes_registry().get_datatype_by_extension( extension )
if ret is None:
log.warning("Datatype class not found for extension '%s'" % extension)
return _get_datatypes_registry().get_datatype_by_extension( 'data' )
return ret

def get_metadata( self ):
# using weakref to store parent (to prevent circ ref),
Expand Down
20 changes: 13 additions & 7 deletions lib/galaxy/tools/parameters/basic.py
Expand Up @@ -1428,12 +1428,16 @@ def _datatypes_registery( self, trans, tool ):
def _parse_formats( self, trans, tool, input_source ):
datatypes_registry = self._datatypes_registery( trans, tool )

# Build tuple of classes for supported data formats
formats = []
# Build list of classes for supported data formats
self.extensions = input_source.get( 'format', 'data' ).split( "," )
normalized_extensions = [extension.strip().lower() for extension in self.extensions]
formats = []
for extension in normalized_extensions:
formats.append( datatypes_registry.get_datatype_by_extension( extension ) )
datatype = datatypes_registry.get_datatype_by_extension(extension)
if datatype is not None:
formats.append(datatype)
else:
log.warning("Datatype class not found for extension '%s', which is used in the 'format' attribute of parameter '%s'" % (extension, self.name))
self.formats = formats

def _parse_options( self, input_source ):
Expand Down Expand Up @@ -1566,10 +1570,12 @@ def __init__( self, tool, input_source, trans=None):
self._parse_options( input_source )
# Load conversions required for the dataset input
self.conversions = []
for name, conv_extensions in input_source.parse_conversion_tuples():
assert None not in [ name, conv_extensions ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, conv_extensions )
conv_types = [ tool.app.datatypes_registry.get_datatype_by_extension( conv_extensions.lower() ) ]
self.conversions.append( ( name, conv_extensions, conv_types ) )
for name, conv_extension in input_source.parse_conversion_tuples():
assert None not in [ name, conv_extension ], 'A name (%s) and type (%s) are required for explicit conversion' % ( name, conv_extension )
conv_type = tool.app.datatypes_registry.get_datatype_by_extension( conv_extension.lower() )
if conv_type is None:
raise ValueError("Datatype class not found for extension '%s', which is used as 'type' attribute in conversion of data parameter '%s'" % (conv_type, self.name))
self.conversions.append( ( name, conv_extension, [conv_type] ) )

def match_collections( self, history, dataset_matcher, reduction=True ):
dataset_collection_matcher = DatasetCollectionMatcher( dataset_matcher )
Expand Down
8 changes: 7 additions & 1 deletion lib/galaxy/tools/wrappers.py
Expand Up @@ -246,7 +246,13 @@ def is_collection( self ):
return False

def is_of_type( self, *exts ):
datatypes = [ self.datatypes_registry.get_datatype_by_extension( e ) for e in exts ]
datatypes = []
for e in exts:
datatype = self.datatypes_registry.get_datatype_by_extension(e)
if datatype is not None:
datatypes.append(datatype)
else:
log.warning("Datatype class not found for extension '%s', which is used as parameter of 'is_of_type()' method" % (e))
return self.dataset.datatype.matches_any( datatypes )

def __str__( self ):
Expand Down
1 change: 1 addition & 0 deletions test/functional/tools/sample_datatypes_conf.xml
Expand Up @@ -20,5 +20,6 @@
<datatype extension="biom1" type="galaxy.datatypes.text:Biom1" display_in_upload="True" subclass="True" mimetype="application/json"/>
<datatype extension="bed" type="galaxy.datatypes.interval:Bed" display_in_upload="true" description="BED format provides a flexible way to define the data lines that are displayed in an annotation track. BED lines have three required columns and nine additional optional columns. The three required columns are chrom, chromStart and chromEnd." description_url="https://galaxyproject.org/learn/datatypes/#bed">
</datatype>
<datatype extension="xml" type="galaxy.datatypes.xml:GenericXml" mimetype="application/xml" display_in_upload="true"/>
</registration>
</datatypes>

0 comments on commit 0bfa1df

Please sign in to comment.