Skip to content

Commit

Permalink
Merge pull request #5462 from mvdbeek/tag_collection_from_files
Browse files Browse the repository at this point in the history
Tool that adds/sets tags for collection elements from a file
  • Loading branch information
jmchilton committed Feb 12, 2018
2 parents 0367a35 + d14bae7 commit 6d00958
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 3 deletions.
1 change: 1 addition & 0 deletions config/tool_conf.xml.sample
Expand Up @@ -39,6 +39,7 @@
<tool file="${model_tools_path}/relabel_from_file.xml" />
<tool file="${model_tools_path}/filter_from_file.xml" />
<tool file="${model_tools_path}/sort_collection_list.xml" />
<tool file="${model_tools_path}/tag_collection_from_file.xml" />
</section>
<section id="liftOver" name="Lift-Over">
<tool file="extract/liftOver_wrapper.xml" />
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/managers/taggable.py
Expand Up @@ -24,7 +24,7 @@ def _tag_str_gen(item):
def _tags_to_strings(item):
if not hasattr(item, 'tags'):
return None
return list(_tag_str_gen(item))
return sorted(list(_tag_str_gen(item)))


def _tags_from_strings(item, tag_handler, new_tags_list, user=None):
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy/managers/tags.py
Expand Up @@ -169,6 +169,8 @@ def apply_item_tag(self, user, item, name, value=None):
item_tag_assoc.user_tname = name
item_tag_assoc.user_value = value
item_tag_assoc.value = lc_value
# Need to flush to get an ID. We need an ID to apply multiple tags with the same tname to an object.
self.sa_session.flush()
return item_tag_assoc

def apply_item_tags(self, user, item, tags_str):
Expand Down
60 changes: 60 additions & 0 deletions lib/galaxy/tools/__init__.py
Expand Up @@ -28,6 +28,7 @@
from galaxy.datatypes.metadata import JobExternalOutputMetadataWrapper
from galaxy.managers import histories
from galaxy.managers.jobs import JobSearch
from galaxy.managers.tags import GalaxyTagManager
from galaxy.queue_worker import send_control_task
from galaxy.tools.actions import DefaultToolAction
from galaxy.tools.actions.data_manager import DataManagerToolAction
Expand Down Expand Up @@ -2605,6 +2606,65 @@ def add_copied_value_to_new_elements(new_label, dce_object):
)


class TagFromFileTool(DatabaseOperationTool):
tool_type = 'tag_from_file'

def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
hdca = incoming["input"]
how = incoming['how']
new_tags_dataset_assoc = incoming["tags"]
new_elements = odict()
tags_manager = GalaxyTagManager(trans.app.model.context)

def add_copied_value_to_new_elements(new_tags_dict, dce):
if getattr(dce.element_object, "history_content_type", None) == "dataset":
copied_value = dce.element_object.copy()
# copy should never be visible, since part of a collection
copied_value.visble = False
history.add_dataset(copied_value, copied_value, set_hid=False)
new_tags = new_tags_dict.get(dce.element_identifier)
if new_tags:
if how in ('add', 'remove') and dce.element_object.tags:
# We need get the original tags and update them with the new tags
old_tags = set(tag for tag in tags_manager.get_tags_str(dce.element_object.tags).split(',') if tag)
if how == 'add':
old_tags.update(set(new_tags))
elif how == 'remove':
old_tags = old_tags - set(new_tags)
new_tags = old_tags
tags_manager.add_tags_from_list(user=history.user, item=copied_value, new_tags_list=new_tags)
else:
# We have a collection, and we copy the elements so that we don't manipulate the original tags
copied_value = dce.element_object.copy(element_destination=history)
for new_element, old_element in zip(copied_value.dataset_elements, dce.element_object.dataset_elements):
# TODO: This should be eliminated, but collections created by the collection builder
# don't set `visible` to `False` if you don't hide the original elements.
new_element.element_object.visible = False
new_tags = new_tags_dict.get(new_element.element_identifier)
if how in ('add', 'remove'):
old_tags = set(tag for tag in tags_manager.get_tags_str(old_element.element_object.tags).split(',') if tag)
if new_tags:
if how == 'add':
old_tags.update(set(new_tags))
elif how == 'remove':
old_tags = old_tags - set(new_tags)
new_tags = old_tags
tags_manager.add_tags_from_list(user=history.user, item=new_element.element_object, new_tags_list=new_tags)
new_elements[dce.element_identifier] = copied_value

new_tags_path = new_tags_dataset_assoc.file_name
new_tags = open(new_tags_path, "r").readlines(1024 * 1000000)
# We have a tabular file, where the first column is an existing element identifier,
# and the remaining columns represent new tags.
source_new_tags = (line.strip().split('\t') for line in new_tags)
new_tags_dict = {item[0]: item[1:] for item in source_new_tags}
for i, dce in enumerate(hdca.collection.elements):
add_copied_value_to_new_elements(new_tags_dict, dce)
output_collections.create_collection(
next(iter(self.outputs.values())), "output", elements=new_elements
)


class FilterFromFileTool(DatabaseOperationTool):
tool_type = 'filter_from_file'

Expand Down
91 changes: 91 additions & 0 deletions lib/galaxy/tools/tag_collection_from_file.xml
@@ -0,0 +1,91 @@
<tool id="__TAG_FROM_FILE__"
name="Tag elements from file"
version="1.0.0"
tool_type="tag_from_file">
<description>from contents of a file</description>
<type class="TagFromFileTool" module="galaxy.tools" />
<action module="galaxy.tools.actions.model_operations"
class="ModelOperationToolAction"/>
<inputs>
<param type="data_collection" name="input" label="Input Collection" help="A tabular file indicating how to tag collection elements."/>
<param type="data" name="tags" format="tabular" label="Tag collection elements according to this file"/>
<param name="how" type="select" label="How should the tags be updated">
<option value="add">New tags will be added, existing tags will be kept</option>
<option value="set">New tags will be added, existing tags will be removed</option>
<option value="remove">The tags listed will be removed</option>
</param>
</inputs>
<outputs>
<collection name="output" format_source="input" type_source="input" label="${on_string} (Tagged)" >
</collection>
</outputs>
<tests>
<test>
<param name="input">
<collection type="list">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</param>
<param name="tags" value="new_tags_1.txt" ftype="txt" />
<param name="how" value="add"/>
<output_collection name="output" type="list">
<element name="forward">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:f,alias:r1,orientation:forward" />
</element>
<element name="reverse">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:r,alias:r2,orientation:reverse" />
</element>
</output_collection>
</test>
<test>
<param name="input">
<collection type="list:paired">
<element name="i1">
<collection type="paired">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</element>
</collection>
</param>
<param name="how" value="set"/>
<param name="tags" value="new_tags_1.txt" ftype="txt" />
<output_collection name="output" type="list:paired">
<element name="i1">
<element name="forward">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:f,alias:r1,orientation:forward" />
</element>
<element name="reverse">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:r,alias:r2,orientation:reverse" />
</element>
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[
.. class:: infomark
This tool will take an input collection and a tabular file,
where the first column indicates an element identifier and the
remaining columns contains the new tags. This file may contain
less entries than elements in the collection.
In that case only matching list identifiers will be tagged.
This tool will create new history datasets from your collection
but your quota usage will not increase.
]]></help>
</tool>
2 changes: 2 additions & 0 deletions test-data/new_tags_1.txt
@@ -0,0 +1,2 @@
forward orientation:forward alias:r1 alias:f
reverse orientation:reverse alias:r2 alias:r
4 changes: 2 additions & 2 deletions test/base/interactor.py
Expand Up @@ -107,12 +107,12 @@ def _verify_metadata(self, history_id, hid, attributes):
"""Check dataset metadata.
ftype on output maps to `file_ext` on the hda's API description, `name`, `info`,
and `dbkey` all map to the API description directly. Other metadata attributes
`dbkey` and `tags` all map to the API description directly. Other metadata attributes
are assumed to be datatype-specific and mapped with a prefix of `metadata_`.
"""
metadata = attributes.get('metadata', {}).copy()
for key, value in metadata.copy().items():
if key not in ['name', 'info']:
if key not in ['name', 'info', 'tags']:
new_key = "metadata_%s" % key
metadata[new_key] = metadata[key]
del metadata[key]
Expand Down
1 change: 1 addition & 0 deletions test/functional/tools/samples_tool_conf.xml
Expand Up @@ -183,5 +183,6 @@
<tool file="${model_tools_path}/merge_collection.xml" />
<tool file="${model_tools_path}/relabel_from_file.xml" />
<tool file="${model_tools_path}/filter_from_file.xml" />
<tool file="${model_tools_path}/tag_collection_from_file.xml" />

</toolbox>

0 comments on commit 6d00958

Please sign in to comment.