galaxyproject · martenson · Jan 17, 2018 · Dec 21, 2017 · Dec 21, 2017 · Jan 13, 2018
diff --git a/lib/galaxy/tools/parameters/output_collect.py b/lib/galaxy/tools/parameters/output_collect.py
@@ -474,43 +474,54 @@ def discover_files(output_name, tool_provided_metadata, extra_file_collectors, j
         # just load entries from tool provided metadata...
         assert len(extra_file_collectors) == 1
         extra_file_collector = extra_file_collectors[0]
-        target_directory = discover_target_directory(extra_file_collector, job_working_directory)
+        target_directory = discover_target_directory(extra_file_collector.directory, job_working_directory)
         for dataset in tool_provided_metadata.get_new_datasets(output_name):
             filename = dataset["filename"]
             path = os.path.join(target_directory, filename)
             yield DiscoveredFile(path, extra_file_collector, JsonCollectedDatasetMatch(dataset, extra_file_collector, filename, path=path))
     else:
-        for (match, collector) in walk_over_extra_files(extra_file_collectors, job_working_directory, matchable):
+        for (match, collector) in walk_over_file_collectors(extra_file_collectors, job_working_directory, matchable):
             yield DiscoveredFile(match.path, collector, match)
 
 
-def discover_target_directory(extra_file_collector, job_working_directory):
-    directory = job_working_directory
-    if extra_file_collector.directory:
-        directory = os.path.join(directory, extra_file_collector.directory)
+def discover_target_directory(dir_name, job_working_directory):
+    if dir_name:
+        directory = os.path.join(job_working_directory, dir_name)
         if not util.in_directory(directory, job_working_directory):
             raise Exception("Problem with tool configuration, attempting to pull in datasets from outside working directory.")
-    return directory
-
+        return directory
+    else:
+        return job_working_directory
 
-def walk_over_extra_files(extra_file_collectors, job_working_directory, matchable):
 
+def walk_over_file_collectors(extra_file_collectors, job_working_directory, matchable):
     for extra_file_collector in extra_file_collectors:
         assert extra_file_collector.discover_via == "pattern"
-        matches = []
-        directory = discover_target_directory(extra_file_collector, job_working_directory)
-        if not os.path.isdir(directory):
-            continue
-        for filename in os.listdir(directory):
-            path = os.path.join(directory, filename)
-            if not os.path.isfile(path):
-                continue
+        for match in walk_over_extra_files(extra_file_collector.directory, extra_file_collector, job_working_directory, matchable):
+            yield match, extra_file_collector
+
+
+def walk_over_extra_files(target_dir, extra_file_collector, job_working_directory, matchable):
+    """
+    Walks through all files in a given directory, and returns all files that
+    match the given collector's match criteria. If the collector has the
+    recurse flag enabled, will also recursively descend into child folders.
+    """
+    matches = []
+    directory = discover_target_directory(target_dir, job_working_directory)
+    for filename in os.listdir(directory):
+        path = os.path.join(directory, filename)
+        if os.path.isdir(path) and extra_file_collector.recurse:
+            # The current directory is already validated, so use that as the next job_working_directory when recursing
+            for match in walk_over_extra_files(filename, extra_file_collector, directory, matchable):
+                yield match
+        else:
             match = extra_file_collector.match(matchable, filename, path=path)
             if match:
                 matches.append(match)
 
-        for match in extra_file_collector.sort(matches):
-            yield match, extra_file_collector
+    for match in extra_file_collector.sort(matches):
+        yield match
 
 
 def dataset_collector(dataset_collection_description):
@@ -551,6 +562,7 @@ def __init__(self, dataset_collection_description):
         self.default_visible = dataset_collection_description.default_visible
         self.directory = dataset_collection_description.directory
         self.assign_primary_output = dataset_collection_description.assign_primary_output
+        self.recurse = dataset_collection_description.recurse
 
     def _pattern_for_dataset(self, dataset_instance=None):
         token_replacement = r'\d+'

diff --git a/lib/galaxy/tools/parser/output_collection_def.py b/lib/galaxy/tools/parser/output_collection_def.py
@@ -63,6 +63,7 @@ def __init__(self, **kwargs):
         self.default_visible = asbool(kwargs.get("visible", None))
         self.assign_primary_output = asbool(kwargs.get('assign_primary_output', False))
         self.directory = kwargs.get("directory", None)
+        self.recurse = False
 
 
 class ToolProvidedMetadataDatasetCollection(DatasetCollectionDescription):
@@ -77,6 +78,7 @@ class FilePatternDatasetCollectionDescription(DatasetCollectionDescription):
     def __init__(self, **kwargs):
         super(FilePatternDatasetCollectionDescription, self).__init__(**kwargs)
         pattern = kwargs.get("pattern", "__default__")
+        self.recurse = asbool(kwargs.get("recurse", False))
         if pattern in NAMED_PATTERNS:
             pattern = NAMED_PATTERNS.get(pattern)
         self.pattern = pattern

diff --git a/lib/galaxy/tools/xsd/galaxy.xsd b/lib/galaxy/tools/xsd/galaxy.xsd
@@ -3929,6 +3929,11 @@ More information can be found on Planemo's documentation for
         <xs:documentation xml:lang="en">Directory (relative to working directory) to search for files.</xs:documentation>
       </xs:annotation>
     </xs:attribute>
+    <xs:attribute name="recurse" type="xs:boolean" use="optional">
+      <xs:annotation>
+        <xs:documentation xml:lang="en">Indicates that the specified directory should be searched recursively for matching files.</xs:documentation>
+      </xs:annotation>
+    </xs:attribute>
     <xs:attribute name="format" type="xs:string" use="optional">
       <xs:annotation>
         <xs:documentation xml:lang="en">Format (or datatype) of discovered datasets (an alias with ``ext``).</xs:documentation>

diff --git a/test/functional/tools/multi_output_recurse.xml b/test/functional/tools/multi_output_recurse.xml
@@ -0,0 +1,65 @@
+<tool id="multi_output_recurse" name="multi_output_recurse" version="0.1.0">
+  <command>
+    echo "Hello" > $report;
+    mkdir subdir1;
+    echo "This" > subdir1/this.txt;
+    echo "That" > subdir1/that.txt;
+    mkdir subdir2;
+    echo "1" > subdir2/CUSTOM_1.txt;
+    echo "2" > subdir2/CUSTOM_2.txt;
+    mkdir subdir3;
+    echo "Foo" > subdir3/Foo;
+    mkdir subdir3/nested1;
+    echo "Bar" > subdir3/nested1/bar.txt;
+    echo "Hello" > subdir3/nested1/hello;
+    echo "1" > sample1.report.txt;
+    echo "2" > sample2.report.txt;
+  </command>
+  <inputs>
+    <param name="num_param" type="integer" value="7" />
+    <param name="input" type="data" />
+  </inputs>
+  <outputs>
+    <data auto_format="true" name="report">
+      <discover_datasets pattern="__designation__" directory="." visible="true" recurse="true" />
+    </data>
+  </outputs>
+  <tests>
+    <test>
+      <param name="num_param" value="7" />
+      <param name="input" ftype="txt" value="simple_line.txt"/>
+      <output name="report">
+        <assert_contents>
+          <has_line line="Hello" />
+        </assert_contents>
+        <discovered_dataset designation="this.txt">
+          <assert_contents><has_line line="This" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="that.txt">
+          <assert_contents><has_line line="That" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="CUSTOM_1.txt">
+          <assert_contents><has_line line="1" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="CUSTOM_2.txt">
+          <assert_contents><has_line line="2" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="Foo">
+          <assert_contents><has_line line="Foo" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="bar.txt">
+          <assert_contents><has_line line="Bar" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="hello">
+          <assert_contents><has_line line="Hello" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="sample1.report.txt">
+          <assert_contents><has_line line="1" /></assert_contents>
+        </discovered_dataset>
+        <discovered_dataset designation="sample2.report.txt">
+          <assert_contents><has_line line="2" /></assert_contents>
+        </discovered_dataset>
+      </output>
+    </test>
+  </tests>
+</tool>
diff --git a/test/functional/tools/samples_tool_conf.xml b/test/functional/tools/samples_tool_conf.xml
@@ -17,6 +17,7 @@
   <tool file="multi_output.xml" />
   <tool file="multi_output_configured.xml" />
   <tool file="multi_output_assign_primary.xml" />
+  <tool file="multi_output_recurse.xml" />
   <tool file="tool_provided_metadata_1.xml" />
   <tool file="tool_provided_metadata_2.xml" />
   <tool file="tool_provided_metadata_3.xml" />

diff --git a/test/unit/tools/test_collect_primary_datasets.py b/test/unit/tools/test_collect_primary_datasets.py
@@ -57,6 +57,31 @@ def test_collect_multiple(self):
         # didn't result in a dbkey being set.
         assert created_hda_1.dbkey == "?"
 
+    def test_collect_multiple_recurse(self):
+        self._replace_output_collectors('''<output>
+            <discover_datasets pattern="__name__" directory="subdir1" recurse="true" ext="txt" />
+            <discover_datasets pattern="__name__" directory="subdir2" recurse="true" ext="txt" />
+        </output>''')
+        path1 = self._setup_extra_file(filename="test1", subdir="subdir1")
+        path2 = self._setup_extra_file(filename="test2", subdir="subdir2/nested1/")
+        path3 = self._setup_extra_file(filename="test3", subdir="subdir2")
+
+        datasets = self._collect()
+        assert DEFAULT_TOOL_OUTPUT in datasets
+        self.assertEquals(len(datasets[DEFAULT_TOOL_OUTPUT]), 3)
+
+        # Test default order of collection.
+        assert list(datasets[DEFAULT_TOOL_OUTPUT].keys()) == ["test1", "test2", "test3"]
+
+        created_hda_1 = datasets[DEFAULT_TOOL_OUTPUT]["test1"]
+        self.app.object_store.assert_created_with_path(created_hda_1.dataset, path1)
+
+        created_hda_2 = datasets[DEFAULT_TOOL_OUTPUT]["test2"]
+        self.app.object_store.assert_created_with_path(created_hda_2.dataset, path2)
+
+        created_hda_3 = datasets[DEFAULT_TOOL_OUTPUT]["test3"]
+        self.app.object_store.assert_created_with_path(created_hda_3.dataset, path3)
+
     def test_collect_sorted_reverse(self):
         self._replace_output_collectors('''<output>
             <discover_datasets pattern="__name__" directory="subdir_for_name_discovery" sort_by="reverse_filename" ext="txt" />