Skip to content

Commit

Permalink
Merge pull request #1353 from jmchilton/output_refactor
Browse files Browse the repository at this point in the history
Refactor tool output classes out of galaxy.tools and into parser module.
  • Loading branch information
bgruening committed Dec 17, 2015
2 parents 80a35c2 + 61f64e8 commit 455372e
Show file tree
Hide file tree
Showing 11 changed files with 246 additions and 233 deletions.
217 changes: 1 addition & 216 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from galaxy.tools.test import parse_tests
from galaxy.tools.parser import get_tool_source
from galaxy.tools.parser.xml import XmlPageSource
from galaxy.tools.parser import ToolOutputCollectionPart
from galaxy.tools.toolbox import AbstractToolBox
from galaxy.util import rst_to_html, string_as_bool
from galaxy.util import ExecutionTimer
Expand Down Expand Up @@ -226,222 +227,6 @@ def copy( self ):
return new_state


class ToolOutputBase( object, Dictifiable ):

def __init__( self, name, label=None, filters=None, hidden=False ):
super( ToolOutputBase, self ).__init__()
self.name = name
self.label = label
self.filters = filters or []
self.hidden = hidden
self.collection = False


class ToolOutput( ToolOutputBase ):
"""
Represents an output datasets produced by a tool. For backward
compatibility this behaves as if it were the tuple::
(format, metadata_source, parent)
"""

dict_collection_visible_keys = ( 'name', 'format', 'label', 'hidden' )

def __init__( self, name, format=None, format_source=None, metadata_source=None,
parent=None, label=None, filters=None, actions=None, hidden=False,
implicit=False ):
super( ToolOutput, self ).__init__( name, label=label, filters=filters, hidden=hidden )
self.format = format
self.format_source = format_source
self.metadata_source = metadata_source
self.parent = parent
self.actions = actions

# Initialize default values
self.change_format = []
self.implicit = implicit
self.from_work_dir = None

# Tuple emulation

def __len__( self ):
return 3

def __getitem__( self, index ):
if index == 0:
return self.format
elif index == 1:
return self.metadata_source
elif index == 2:
return self.parent
else:
raise IndexError( index )

def __iter__( self ):
return iter( ( self.format, self.metadata_source, self.parent ) )

def to_dict( self, view='collection', value_mapper=None, app=None ):
as_dict = super( ToolOutput, self ).to_dict( view=view, value_mapper=value_mapper )
format = self.format
if format and format != "input" and app:
edam_format = app.datatypes_registry.edam_formats.get(self.format)
as_dict["edam_format"] = edam_format
return as_dict


class ToolOutputCollection( ToolOutputBase ):
"""
Represents a HistoryDatasetCollectionAssociation of output datasets produced
by a tool.
<outputs>
<collection type="list" label="${tool.name} on ${on_string} fasta">
<discover_datasets pattern="__name__" ext="fasta" visible="True" directory="outputFiles" />
</collection>
<collection type="paired" label="${tool.name} on ${on_string} paired reads">
<data name="forward" format="fastqsanger" />
<data name="reverse" format="fastqsanger"/>
</collection>
<outputs>
"""

def __init__(
self,
name,
structure,
label=None,
filters=None,
hidden=False,
default_format="data",
default_format_source=None,
default_metadata_source=None,
inherit_format=False,
inherit_metadata=False
):
super( ToolOutputCollection, self ).__init__( name, label=label, filters=filters, hidden=hidden )
self.collection = True
self.default_format = default_format
self.structure = structure
self.outputs = odict()

self.inherit_format = inherit_format
self.inherit_metadata = inherit_metadata

self.metadata_source = default_metadata_source
self.format_source = default_format_source
self.change_format = [] # TODO

def known_outputs( self, inputs, type_registry ):
if self.dynamic_structure:
return []

# This line is probably not right - should verify structured_like
# or have outputs and all outputs have name.
if len( self.outputs ) > 1:
output_parts = [ToolOutputCollectionPart(self, k, v) for k, v in self.outputs.iteritems()]
else:
# either must have specified structured_like or something worse
if self.structure.structured_like:
collection_prototype = inputs[ self.structure.structured_like ].collection
else:
collection_prototype = type_registry.prototype( self.structure.collection_type )

def prototype_dataset_element_to_output( element, parent_ids=[] ):
name = element.element_identifier
format = self.default_format
if self.inherit_format:
format = element.dataset_instance.ext
output = ToolOutput(
name,
format=format,
format_source=self.format_source,
metadata_source=self.metadata_source,
implicit=True,
)
if self.inherit_metadata:
output.metadata_source = element.dataset_instance
return ToolOutputCollectionPart(
self,
element.element_identifier,
output,
parent_ids=parent_ids,
)

def prototype_collection_to_output( collection_prototype, parent_ids=[] ):
output_parts = []
for element in collection_prototype.elements:
element_parts = []
if not element.is_collection:
element_parts.append(prototype_dataset_element_to_output( element, parent_ids ))
else:
new_parent_ids = parent_ids[:] + [element.element_identifier]
element_parts.extend(prototype_collection_to_output(element.element_object, new_parent_ids))
output_parts.extend(element_parts)

return output_parts

output_parts = prototype_collection_to_output( collection_prototype )

return output_parts

@property
def dynamic_structure(self):
return self.structure.dynamic

@property
def dataset_collector_descriptions(self):
if not self.dynamic_structure:
raise Exception("dataset_collector_descriptions called for output collection with static structure")
return self.structure.dataset_collector_descriptions


class ToolOutputCollectionStructure( object ):

def __init__(
self,
collection_type,
collection_type_source,
structured_like,
dataset_collector_descriptions,
):
self.collection_type = collection_type
self.collection_type_source = collection_type_source
self.structured_like = structured_like
self.dataset_collector_descriptions = dataset_collector_descriptions
if collection_type and collection_type_source:
raise ValueError("Cannot set both type and type_source on collection output.")
if collection_type is None and structured_like is None and dataset_collector_descriptions is None and collection_type_source is None:
raise ValueError( "Output collection types must be specify type of structured_like" )
if dataset_collector_descriptions and structured_like:
raise ValueError( "Cannot specify dynamic structure (discovered_datasets) and structured_like attribute." )
self.dynamic = dataset_collector_descriptions is not None


class ToolOutputCollectionPart( object ):

def __init__( self, output_collection_def, element_identifier, output_def, parent_ids=[] ):
self.output_collection_def = output_collection_def
self.element_identifier = element_identifier
self.output_def = output_def
self.parent_ids = parent_ids

@property
def effective_output_name( self ):
name = self.output_collection_def.name
part_name = self.element_identifier
effective_output_name = "%s|__part__|%s" % ( name, part_name )
return effective_output_name

@staticmethod
def is_named_collection_part_name( name ):
return "|__part__|" in name

@staticmethod
def split_output_name( name ):
assert ToolOutputCollectionPart.is_named_collection_part_name( name )
return name.split("|__part__|")


class Tool( object, Dictifiable ):
"""
Represents a computational tool that can be executed through Galaxy.
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tools/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ def filter_output(output, incoming):

def determine_output_format(output, parameter_context, input_datasets, input_dataset_collections, random_input_ext):
""" Determines the output format for a dataset based on an abstract
description of the output (galaxy.tools.ToolOutput), the parameter
description of the output (galaxy.tools.parser.ToolOutput), the parameter
wrappers, a map of the input datasets (name => HDA), and the last input
extensions in the tool form.
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/tools/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
collections from matched collections.
"""
import collections
import galaxy.tools
from galaxy.tools.parser import ToolOutputCollectionPart
from galaxy.util import ExecutionTimer
from galaxy.tools.actions import on_text_for_names, ToolExecutionCache
from threading import Thread
Expand Down Expand Up @@ -104,7 +104,7 @@ def record_success( self, job, outputs ):
self.successful_jobs.append( job )
self.output_datasets.extend( outputs )
for output_name, output_dataset in outputs:
if galaxy.tools.ToolOutputCollectionPart.is_named_collection_part_name( output_name ):
if ToolOutputCollectionPart.is_named_collection_part_name( output_name ):
# Skip known collection outputs, these will be covered by
# output collections.
continue
Expand Down
5 changes: 4 additions & 1 deletion lib/galaxy/tools/parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@
from .interface import ToolSource
from .factory import get_tool_source
from .factory import get_input_source
from .output_objects import (
ToolOutputCollectionPart,
)

__all__ = ["ToolSource", "get_tool_source", "get_input_source"]
__all__ = ["ToolSource", "get_tool_source", "get_input_source", "ToolOutputCollectionPart"]
8 changes: 4 additions & 4 deletions lib/galaxy/tools/parser/output_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def get_value( self, other_values ):
try:
if options:
return str( options[ self.offset ][ self.column ] )
except Exception, e:
except Exception as e:
log.debug( "Error in FromFileToolOutputActionOption get_value: %s" % e )
return None

Expand Down Expand Up @@ -253,7 +253,7 @@ def get_value( self, other_values ):
try:
if options:
return str( options[ self.offset ][ self.column ] )
except Exception, e:
except Exception as e:
log.debug( "Error in FromParamToolOutputActionOption get_value: %s" % e )
return None

Expand Down Expand Up @@ -288,7 +288,7 @@ def get_value( self, other_values ):
try:
if options:
return str( options[ self.offset ][ self.column ] )
except Exception, e:
except Exception as e:
log.debug( "Error in FromDataTableOutputActionOption get_value: %s" % e )
return None

Expand Down Expand Up @@ -383,7 +383,7 @@ def filter_options( self, options, other_values ):
try:
if self.keep == ( self.compare( self.cast( fields[self.column] ), value ) ):
rval.append( fields )
except Exception, e:
except Exception as e:
log.debug(e)
continue # likely a bad cast or column out of range
return rval
Expand Down
Loading

0 comments on commit 455372e

Please sign in to comment.