Merge pull request #5462 from mvdbeek/tag_collection_from_files

Tool that adds/sets tags for collection elements from a file
galaxyproject · Feb 12, 2018 · 6d00958 · 6d00958
2 parents 0367a35 + d14bae7
commit 6d00958
Show file tree

Hide file tree

Showing 8 changed files with 160 additions and 3 deletions.
diff --git a/config/tool_conf.xml.sample b/config/tool_conf.xml.sample
@@ -39,6 +39,7 @@
     <tool file="${model_tools_path}/relabel_from_file.xml" />
     <tool file="${model_tools_path}/filter_from_file.xml" />
     <tool file="${model_tools_path}/sort_collection_list.xml" />
+    <tool file="${model_tools_path}/tag_collection_from_file.xml" />
   </section>
   <section id="liftOver" name="Lift-Over">
     <tool file="extract/liftOver_wrapper.xml" />

diff --git a/lib/galaxy/managers/taggable.py b/lib/galaxy/managers/taggable.py
@@ -24,7 +24,7 @@ def _tag_str_gen(item):
 def _tags_to_strings(item):
     if not hasattr(item, 'tags'):
         return None
-    return list(_tag_str_gen(item))
+    return sorted(list(_tag_str_gen(item)))
 
 
 def _tags_from_strings(item, tag_handler, new_tags_list, user=None):

diff --git a/lib/galaxy/managers/tags.py b/lib/galaxy/managers/tags.py
@@ -169,6 +169,8 @@ def apply_item_tag(self, user, item, name, value=None):
         item_tag_assoc.user_tname = name
         item_tag_assoc.user_value = value
         item_tag_assoc.value = lc_value
+        # Need to flush to get an ID. We need an ID to apply multiple tags with the same tname to an object.
+        self.sa_session.flush()
         return item_tag_assoc
 
     def apply_item_tags(self, user, item, tags_str):

diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
@@ -28,6 +28,7 @@
 from galaxy.datatypes.metadata import JobExternalOutputMetadataWrapper
 from galaxy.managers import histories
 from galaxy.managers.jobs import JobSearch
+from galaxy.managers.tags import GalaxyTagManager
 from galaxy.queue_worker import send_control_task
 from galaxy.tools.actions import DefaultToolAction
 from galaxy.tools.actions.data_manager import DataManagerToolAction
@@ -2605,6 +2606,65 @@ def add_copied_value_to_new_elements(new_label, dce_object):
         )
 
 
+class TagFromFileTool(DatabaseOperationTool):
+    tool_type = 'tag_from_file'
+
+    def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
+        hdca = incoming["input"]
+        how = incoming['how']
+        new_tags_dataset_assoc = incoming["tags"]
+        new_elements = odict()
+        tags_manager = GalaxyTagManager(trans.app.model.context)
+
+        def add_copied_value_to_new_elements(new_tags_dict, dce):
+            if getattr(dce.element_object, "history_content_type", None) == "dataset":
+                copied_value = dce.element_object.copy()
+                # copy should never be visible, since part of a collection
+                copied_value.visble = False
+                history.add_dataset(copied_value, copied_value, set_hid=False)
+                new_tags = new_tags_dict.get(dce.element_identifier)
+                if new_tags:
+                    if how in ('add', 'remove') and dce.element_object.tags:
+                        # We need get the original tags and update them with the new tags
+                        old_tags = set(tag for tag in tags_manager.get_tags_str(dce.element_object.tags).split(',') if tag)
+                        if how == 'add':
+                            old_tags.update(set(new_tags))
+                        elif how == 'remove':
+                            old_tags = old_tags - set(new_tags)
+                        new_tags = old_tags
+                    tags_manager.add_tags_from_list(user=history.user, item=copied_value, new_tags_list=new_tags)
+            else:
+                # We have a collection, and we copy the elements so that we don't manipulate the original tags
+                copied_value = dce.element_object.copy(element_destination=history)
+                for new_element, old_element in zip(copied_value.dataset_elements, dce.element_object.dataset_elements):
+                    # TODO: This should be eliminated, but collections created by the collection builder
+                    # don't set `visible` to `False` if you don't hide the original elements.
+                    new_element.element_object.visible = False
+                    new_tags = new_tags_dict.get(new_element.element_identifier)
+                    if how in ('add', 'remove'):
+                        old_tags = set(tag for tag in tags_manager.get_tags_str(old_element.element_object.tags).split(',') if tag)
+                        if new_tags:
+                            if how == 'add':
+                                old_tags.update(set(new_tags))
+                            elif how == 'remove':
+                                old_tags = old_tags - set(new_tags)
+                        new_tags = old_tags
+                    tags_manager.add_tags_from_list(user=history.user, item=new_element.element_object, new_tags_list=new_tags)
+            new_elements[dce.element_identifier] = copied_value
+
+        new_tags_path = new_tags_dataset_assoc.file_name
+        new_tags = open(new_tags_path, "r").readlines(1024 * 1000000)
+        # We have a tabular file, where the first column is an existing element identifier,
+        # and the remaining columns represent new tags.
+        source_new_tags = (line.strip().split('\t') for line in new_tags)
+        new_tags_dict = {item[0]: item[1:] for item in source_new_tags}
+        for i, dce in enumerate(hdca.collection.elements):
+            add_copied_value_to_new_elements(new_tags_dict, dce)
+        output_collections.create_collection(
+            next(iter(self.outputs.values())), "output", elements=new_elements
+        )
+
+
 class FilterFromFileTool(DatabaseOperationTool):
     tool_type = 'filter_from_file'
 

diff --git a/lib/galaxy/tools/tag_collection_from_file.xml b/lib/galaxy/tools/tag_collection_from_file.xml
@@ -0,0 +1,91 @@
+<tool id="__TAG_FROM_FILE__"
+      name="Tag elements from file"
+      version="1.0.0"
+      tool_type="tag_from_file">
+    <description>from contents of a file</description>
+    <type class="TagFromFileTool" module="galaxy.tools" />
+    <action module="galaxy.tools.actions.model_operations"
+            class="ModelOperationToolAction"/>
+    <inputs>
+        <param type="data_collection" name="input" label="Input Collection" help="A tabular file indicating how to tag collection elements."/>
+        <param type="data" name="tags" format="tabular" label="Tag collection elements according to this file"/>
+        <param name="how" type="select" label="How should the tags be updated">
+            <option value="add">New tags will be added, existing tags will be kept</option>
+            <option value="set">New tags will be added, existing tags will be removed</option>
+            <option value="remove">The tags listed will be removed</option>
+        </param>
+    </inputs>
+    <outputs>
+        <collection name="output" format_source="input" type_source="input" label="${on_string} (Tagged)" >
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input">
+                <collection type="list">
+                    <element name="forward" value="simple_line.txt" />
+                    <element name="reverse" value="simple_line_alternative.txt" />
+                </collection>
+            </param>
+            <param name="tags" value="new_tags_1.txt" ftype="txt" />
+            <param name="how" value="add"/>
+            <output_collection name="output" type="list">
+                <element name="forward">
+                    <assert_contents>
+                        <has_text_matching expression="^This is a line of text.\n$" />
+                    </assert_contents>
+                    <metadata name="tags" value="alias:f,alias:r1,orientation:forward" />
+                </element>
+                <element name="reverse">
+                    <assert_contents>
+                        <has_text_matching expression="^This is a different line of text.\n$" />
+                    </assert_contents>
+                    <metadata name="tags" value="alias:r,alias:r2,orientation:reverse" />
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="input">
+                <collection type="list:paired">
+                    <element name="i1">
+                        <collection type="paired">
+                            <element name="forward" value="simple_line.txt" />
+                            <element name="reverse" value="simple_line_alternative.txt" />
+                        </collection>
+                    </element>
+                </collection>
+            </param>
+            <param name="how" value="set"/>
+            <param name="tags" value="new_tags_1.txt" ftype="txt" />
+            <output_collection name="output" type="list:paired">
+                <element name="i1">
+                    <element name="forward">
+                        <assert_contents>
+                            <has_text_matching expression="^This is a line of text.\n$" />
+                        </assert_contents>
+                        <metadata name="tags" value="alias:f,alias:r1,orientation:forward" />
+                    </element>
+                    <element name="reverse">
+                        <assert_contents>
+                            <has_text_matching expression="^This is a different line of text.\n$" />
+                        </assert_contents>
+                        <metadata name="tags" value="alias:r,alias:r2,orientation:reverse" />
+                    </element>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+        This tool will take an input collection and a tabular file,
+        where the first column indicates an element identifier and the
+        remaining columns contains the new tags. This file may contain
+        less entries than elements in the collection.
+        In that case only matching list identifiers will be tagged.
+
+        This tool will create new history datasets from your collection
+        but your quota usage will not increase.
+    ]]></help>
+</tool>
diff --git a/test-data/new_tags_1.txt b/test-data/new_tags_1.txt
@@ -0,0 +1,2 @@
+forward	orientation:forward	alias:r1	alias:f
+reverse	orientation:reverse	alias:r2	alias:r
diff --git a/test/base/interactor.py b/test/base/interactor.py
@@ -107,12 +107,12 @@ def _verify_metadata(self, history_id, hid, attributes):
         """Check dataset metadata.
 
         ftype on output maps to `file_ext` on the hda's API description, `name`, `info`,
-        and `dbkey` all map to the API description directly. Other metadata attributes
+        `dbkey` and `tags` all map to the API description directly. Other metadata attributes
         are assumed to be datatype-specific and mapped with a prefix of `metadata_`.
         """
         metadata = attributes.get('metadata', {}).copy()
         for key, value in metadata.copy().items():
-            if key not in ['name', 'info']:
+            if key not in ['name', 'info', 'tags']:
                 new_key = "metadata_%s" % key
                 metadata[new_key] = metadata[key]
                 del metadata[key]

diff --git a/test/functional/tools/samples_tool_conf.xml b/test/functional/tools/samples_tool_conf.xml
@@ -183,5 +183,6 @@
   <tool file="${model_tools_path}/merge_collection.xml" />
   <tool file="${model_tools_path}/relabel_from_file.xml" />
   <tool file="${model_tools_path}/filter_from_file.xml" />
+  <tool file="${model_tools_path}/tag_collection_from_file.xml" />
 
 </toolbox>