Merge pull request #1353 from jmchilton/output_refactor

Refactor tool output classes out of galaxy.tools and into parser module.
galaxyproject · Dec 17, 2015 · 455372e · 455372e
2 parents 80a35c2 + 61f64e8
commit 455372e
Show file tree

Hide file tree

Showing 11 changed files with 246 additions and 233 deletions.
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
@@ -37,6 +37,7 @@
 from galaxy.tools.test import parse_tests
 from galaxy.tools.parser import get_tool_source
 from galaxy.tools.parser.xml import XmlPageSource
+from galaxy.tools.parser import ToolOutputCollectionPart
 from galaxy.tools.toolbox import AbstractToolBox
 from galaxy.util import rst_to_html, string_as_bool
 from galaxy.util import ExecutionTimer
@@ -226,222 +227,6 @@ def copy( self ):
         return new_state
 
 
-class ToolOutputBase( object, Dictifiable ):
-
-    def __init__( self, name, label=None, filters=None, hidden=False ):
-        super( ToolOutputBase, self ).__init__()
-        self.name = name
-        self.label = label
-        self.filters = filters or []
-        self.hidden = hidden
-        self.collection = False
-
-
-class ToolOutput( ToolOutputBase ):
-    """
-    Represents an output datasets produced by a tool. For backward
-    compatibility this behaves as if it were the tuple::
-
-      (format, metadata_source, parent)
-    """
-
-    dict_collection_visible_keys = ( 'name', 'format', 'label', 'hidden' )
-
-    def __init__( self, name, format=None, format_source=None, metadata_source=None,
-                  parent=None, label=None, filters=None, actions=None, hidden=False,
-                  implicit=False ):
-        super( ToolOutput, self ).__init__( name, label=label, filters=filters, hidden=hidden )
-        self.format = format
-        self.format_source = format_source
-        self.metadata_source = metadata_source
-        self.parent = parent
-        self.actions = actions
-
-        # Initialize default values
-        self.change_format = []
-        self.implicit = implicit
-        self.from_work_dir = None
-
-    # Tuple emulation
-
-    def __len__( self ):
-        return 3
-
-    def __getitem__( self, index ):
-        if index == 0:
-            return self.format
-        elif index == 1:
-            return self.metadata_source
-        elif index == 2:
-            return self.parent
-        else:
-            raise IndexError( index )
-
-    def __iter__( self ):
-        return iter( ( self.format, self.metadata_source, self.parent ) )
-
-    def to_dict( self, view='collection', value_mapper=None, app=None ):
-        as_dict = super( ToolOutput, self ).to_dict( view=view, value_mapper=value_mapper )
-        format = self.format
-        if format and format != "input" and app:
-            edam_format = app.datatypes_registry.edam_formats.get(self.format)
-            as_dict["edam_format"] = edam_format
-        return as_dict
-
-
-class ToolOutputCollection( ToolOutputBase ):
-    """
-    Represents a HistoryDatasetCollectionAssociation of output datasets produced
-    by a tool.
-
-    <outputs>
-      <collection type="list" label="${tool.name} on ${on_string} fasta">
-        <discover_datasets pattern="__name__" ext="fasta" visible="True" directory="outputFiles" />
-      </collection>
-      <collection type="paired" label="${tool.name} on ${on_string} paired reads">
-        <data name="forward" format="fastqsanger" />
-        <data name="reverse" format="fastqsanger"/>
-      </collection>
-    <outputs>
-    """
-
-    def __init__(
-        self,
-        name,
-        structure,
-        label=None,
-        filters=None,
-        hidden=False,
-        default_format="data",
-        default_format_source=None,
-        default_metadata_source=None,
-        inherit_format=False,
-        inherit_metadata=False
-    ):
-        super( ToolOutputCollection, self ).__init__( name, label=label, filters=filters, hidden=hidden )
-        self.collection = True
-        self.default_format = default_format
-        self.structure = structure
-        self.outputs = odict()
-
-        self.inherit_format = inherit_format
-        self.inherit_metadata = inherit_metadata
-
-        self.metadata_source = default_metadata_source
-        self.format_source = default_format_source
-        self.change_format = []  # TODO
-
-    def known_outputs( self, inputs, type_registry ):
-        if self.dynamic_structure:
-            return []
-
-        # This line is probably not right - should verify structured_like
-        # or have outputs and all outputs have name.
-        if len( self.outputs ) > 1:
-            output_parts = [ToolOutputCollectionPart(self, k, v) for k, v in self.outputs.iteritems()]
-        else:
-            # either must have specified structured_like or something worse
-            if self.structure.structured_like:
-                collection_prototype = inputs[ self.structure.structured_like ].collection
-            else:
-                collection_prototype = type_registry.prototype( self.structure.collection_type )
-
-            def prototype_dataset_element_to_output( element, parent_ids=[] ):
-                name = element.element_identifier
-                format = self.default_format
-                if self.inherit_format:
-                    format = element.dataset_instance.ext
-                output = ToolOutput(
-                    name,
-                    format=format,
-                    format_source=self.format_source,
-                    metadata_source=self.metadata_source,
-                    implicit=True,
-                )
-                if self.inherit_metadata:
-                    output.metadata_source = element.dataset_instance
-                return ToolOutputCollectionPart(
-                    self,
-                    element.element_identifier,
-                    output,
-                    parent_ids=parent_ids,
-                )
-
-            def prototype_collection_to_output( collection_prototype, parent_ids=[] ):
-                output_parts = []
-                for element in collection_prototype.elements:
-                    element_parts = []
-                    if not element.is_collection:
-                        element_parts.append(prototype_dataset_element_to_output( element, parent_ids ))
-                    else:
-                        new_parent_ids = parent_ids[:] + [element.element_identifier]
-                        element_parts.extend(prototype_collection_to_output(element.element_object, new_parent_ids))
-                    output_parts.extend(element_parts)
-
-                return output_parts
-
-            output_parts = prototype_collection_to_output( collection_prototype )
-
-        return output_parts
-
-    @property
-    def dynamic_structure(self):
-        return self.structure.dynamic
-
-    @property
-    def dataset_collector_descriptions(self):
-        if not self.dynamic_structure:
-            raise Exception("dataset_collector_descriptions called for output collection with static structure")
-        return self.structure.dataset_collector_descriptions
-
-
-class ToolOutputCollectionStructure( object ):
-
-    def __init__(
-        self,
-        collection_type,
-        collection_type_source,
-        structured_like,
-        dataset_collector_descriptions,
-    ):
-        self.collection_type = collection_type
-        self.collection_type_source = collection_type_source
-        self.structured_like = structured_like
-        self.dataset_collector_descriptions = dataset_collector_descriptions
-        if collection_type and collection_type_source:
-            raise ValueError("Cannot set both type and type_source on collection output.")
-        if collection_type is None and structured_like is None and dataset_collector_descriptions is None and collection_type_source is None:
-            raise ValueError( "Output collection types must be specify type of structured_like" )
-        if dataset_collector_descriptions and structured_like:
-            raise ValueError( "Cannot specify dynamic structure (discovered_datasets) and structured_like attribute." )
-        self.dynamic = dataset_collector_descriptions is not None
-
-
-class ToolOutputCollectionPart( object ):
-
-    def __init__( self, output_collection_def, element_identifier, output_def, parent_ids=[] ):
-        self.output_collection_def = output_collection_def
-        self.element_identifier = element_identifier
-        self.output_def = output_def
-        self.parent_ids = parent_ids
-
-    @property
-    def effective_output_name( self ):
-        name = self.output_collection_def.name
-        part_name = self.element_identifier
-        effective_output_name = "%s|__part__|%s" % ( name, part_name )
-        return effective_output_name
-
-    @staticmethod
-    def is_named_collection_part_name( name ):
-        return "|__part__|" in name
-
-    @staticmethod
-    def split_output_name( name ):
-        assert ToolOutputCollectionPart.is_named_collection_part_name( name )
-        return name.split("|__part__|")
-
-
 class Tool( object, Dictifiable ):
     """
     Represents a computational tool that can be executed through Galaxy.

diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py
@@ -660,7 +660,7 @@ def filter_output(output, incoming):
 
 def determine_output_format(output, parameter_context, input_datasets, input_dataset_collections, random_input_ext):
     """ Determines the output format for a dataset based on an abstract
-    description of the output (galaxy.tools.ToolOutput), the parameter
+    description of the output (galaxy.tools.parser.ToolOutput), the parameter
     wrappers, a map of the input datasets (name => HDA), and the last input
     extensions in the tool form.
 

diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py
@@ -4,7 +4,7 @@
 collections from matched collections.
 """
 import collections
-import galaxy.tools
+from galaxy.tools.parser import ToolOutputCollectionPart
 from galaxy.util import ExecutionTimer
 from galaxy.tools.actions import on_text_for_names, ToolExecutionCache
 from threading import Thread
@@ -104,7 +104,7 @@ def record_success( self, job, outputs ):
         self.successful_jobs.append( job )
         self.output_datasets.extend( outputs )
         for output_name, output_dataset in outputs:
-            if galaxy.tools.ToolOutputCollectionPart.is_named_collection_part_name( output_name ):
+            if ToolOutputCollectionPart.is_named_collection_part_name( output_name ):
                 # Skip known collection outputs, these will be covered by
                 # output collections.
                 continue

diff --git a/lib/galaxy/tools/parser/__init__.py b/lib/galaxy/tools/parser/__init__.py
@@ -3,5 +3,8 @@
 from .interface import ToolSource
 from .factory import get_tool_source
 from .factory import get_input_source
+from .output_objects import (
+    ToolOutputCollectionPart,
+)
 
-__all__ = ["ToolSource", "get_tool_source", "get_input_source"]
+__all__ = ["ToolSource", "get_tool_source", "get_input_source", "ToolOutputCollectionPart"]
diff --git a/lib/galaxy/tools/parser/output_actions.py b/lib/galaxy/tools/parser/output_actions.py
@@ -205,7 +205,7 @@ def get_value( self, other_values ):
         try:
             if options:
                 return str( options[ self.offset ][ self.column ] )
-        except Exception, e:
+        except Exception as e:
             log.debug( "Error in FromFileToolOutputActionOption get_value: %s" % e )
         return None
 
@@ -253,7 +253,7 @@ def get_value( self, other_values ):
         try:
             if options:
                 return str( options[ self.offset ][ self.column ] )
-        except Exception, e:
+        except Exception as e:
             log.debug( "Error in FromParamToolOutputActionOption get_value: %s" % e )
         return None
 
@@ -288,7 +288,7 @@ def get_value( self, other_values ):
         try:
             if options:
                 return str( options[ self.offset ][ self.column ] )
-        except Exception, e:
+        except Exception as e:
             log.debug( "Error in FromDataTableOutputActionOption get_value: %s" % e )
         return None
 
@@ -383,7 +383,7 @@ def filter_options( self, options, other_values ):
             try:
                 if self.keep == ( self.compare( self.cast( fields[self.column] ), value ) ):
                     rval.append( fields )
-            except Exception, e:
+            except Exception as e:
                 log.debug(e)
                 continue  # likely a bad cast or column out of range
         return rval