Skip to content

Commit

Permalink
Add tool that tags collection elements from a file
Browse files Browse the repository at this point in the history
This tags collection elements (but not collections).
  • Loading branch information
mvdbeek committed Feb 6, 2018
1 parent 3864a17 commit b114f3f
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 0 deletions.
1 change: 1 addition & 0 deletions config/tool_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
<tool file="${model_tools_path}/filter_failed_collection.xml" />
<tool file="${model_tools_path}/flatten_collection.xml" />
<tool file="${model_tools_path}/merge_collection.xml" />
<tool file="${model_tools_path}/tag_collection_from_file.xml" />
<tool file="${model_tools_path}/relabel_from_file.xml" />
<tool file="${model_tools_path}/filter_from_file.xml" />
<tool file="${model_tools_path}/sort_collection_list.xml" />
Expand Down
54 changes: 54 additions & 0 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from galaxy.datatypes.metadata import JobExternalOutputMetadataWrapper
from galaxy.managers import histories
from galaxy.managers.jobs import JobSearch
from galaxy.managers.tags import GalaxyTagManager
from galaxy.queue_worker import send_control_task
from galaxy.tools.actions import DefaultToolAction
from galaxy.tools.actions.data_manager import DataManagerToolAction
Expand Down Expand Up @@ -2605,6 +2606,59 @@ def add_copied_value_to_new_elements(new_label, dce_object):
)


class TagFromFileTool(DatabaseOperationTool):
tool_type = 'tag_from_file'

def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
hdca = incoming["input"]
set_tags = incoming['set_tags']
new_tags_dataset_assoc = incoming["tags"]
new_elements = odict()
tags_manager = GalaxyTagManager(trans.app.model.context)

def add_copied_value_to_new_elements(new_tags_dict, dce):
if getattr(dce.element_object, "history_content_type", None) == "dataset":
copied_value = dce.element_object.copy()
# copy should never be visible, since part of a collection
copied_value.visble = False
history.add_dataset(copied_value, copied_value, set_hid=False)
new_tags = new_tags_dict.get(dce.element_identifier)
if new_tags:
if not set_tags and dce.element_object.tags:
# We need get the original tags and update them with the new tags
old_tags = set(tag for tag in tags_manager.get_tags_str(dce.element_object.tags).split(',') if tag)
old_tags.update(set(new_tags))
new_tags = old_tags
tags_manager.add_tags_from_list(user=history.user, item=copied_value, new_tags_list=new_tags)
else:
# We have a collection, and we copy the elements so that we don't manipulate the original tags
copied_value = dce.element_object.copy(element_destination=history)
for new_element, old_element in zip(copied_value.dataset_elements, dce.element_object.dataset_elements):
# TODO: This should be eliminated, but collections created by the collection builder
# don't set `visible` to `False` if you don't hide the original elements.
new_element.element_object.visible = False
new_tags = new_tags_dict.get(new_element.element_identifier)
if not set_tags:
old_tags = set(tag for tag in tags_manager.get_tags_str(old_element.element_object.tags).split(',') if tag)
if new_tags:
old_tags.update(set(new_tags))
new_tags = old_tags
tags_manager.add_tags_from_list(user=history.user, item=new_element.element_object, new_tags_list=new_tags)
new_elements[dce.element_identifier] = copied_value

new_tags_path = new_tags_dataset_assoc.file_name
new_tags = open(new_tags_path, "r").readlines(1024 * 1000000)
# We have a tabular file, where the first column is an existing element identifier,
# and the remaining columns represent new tags.
source_new_tags = (line.strip().split('\t') for line in new_tags)
new_tags_dict = {item[0]: item[1:] for item in source_new_tags}
for i, dce in enumerate(hdca.collection.elements):
add_copied_value_to_new_elements(new_tags_dict, dce)
output_collections.create_collection(
next(iter(self.outputs.values())), "output", elements=new_elements
)


class FilterFromFileTool(DatabaseOperationTool):
tool_type = 'filter_from_file'

Expand Down
85 changes: 85 additions & 0 deletions lib/galaxy/tools/tag_collection_from_file.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<tool id="__TAG_FROM_FILE__"
name="Tag elements from file"
version="1.0.0"
tool_type="tag_from_file">
<description>from contents of a file</description>
<type class="TagFromFileTool" module="galaxy.tools" />
<action module="galaxy.tools.actions.model_operations"
class="ModelOperationToolAction"/>
<inputs>
<param type="data_collection" name="input" label="Input Collection" help="A tabular file indicating how to tag collection elements."/>
<param type="data" name="tags" format="tabular" label="Add tags from this file" />
<param name="set_tags" type="boolean" label="Set tags instead of updating tags?" help="If you select yes existing tags will be removed, otherwise new tags and existing tags will be merged."/>
</inputs>
<outputs>
<collection name="output" format_source="input" type_source="input" label="${on_string} (Tagged)" >
</collection>
</outputs>
<tests>
<test>
<param name="input">
<collection type="list">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</param>
<param name="tags" value="new_tags_1.txt" ftype="txt" />
<output_collection name="output" type="list">
<element name="forward">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:r1,orientation:forward" />
</element>
<element name="reverse">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:r2,orientation:reverse" />
</element>
</output_collection>
</test>
<test>
<param name="input">
<collection type="list:paired">
<element name="i1">
<collection type="paired">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</element>
</collection>
</param>
<param name="tags" value="new_tags_1.txt" ftype="txt" />
<output_collection name="output" type="list:paired">
<element name="i1">
<element name="forward">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:r1,orientation:forward" />
</element>
<element name="reverse">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:r2,orientation:reverse" />
</element>
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[
.. class:: infomark
This tool will take an input collection and a tabular file,
where the first column indicates an element identifier and the
remaining columns contains the new tags. This file may contain
less entries than elements in the collection.
In that case only matching list identifiers will be tagged.
This tool will create new history datasets from your collection
but your quota usage will not increase.
]]></help>
</tool>
2 changes: 2 additions & 0 deletions test-data/new_tags_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
forward orientation:forward alias:r1
reverse orientation:reverse alias:r2

0 comments on commit b114f3f

Please sign in to comment.