diff --git a/config/tool_conf.xml.sample b/config/tool_conf.xml.sample index 3e19d92f2658..2f0399853748 100644 --- a/config/tool_conf.xml.sample +++ b/config/tool_conf.xml.sample @@ -36,6 +36,7 @@ +
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index b84709615f6a..e442812a3e94 100755 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2506,11 +2506,60 @@ def add_elements(collection, prefix=""): ) +class RelabelFromFileTool(DatabaseOperationTool): + tool_type = 'relabel_from_file' + + def produce_outputs(self, trans, out_data, output_collections, incoming, history): + hdca = incoming["input"] + how_type = incoming["how"]["how_select"] + new_labels_dataset_assoc = incoming["how"]["labels"] + strict = string_as_bool(incoming["how"]["strict"]) + new_elements = odict() + + def add_copied_value_to_new_elements(new_label, dce_object): + new_label = new_label.strip() + if new_label in new_elements: + raise Exception("New identifier [%s] appears twice in resulting collection, these values must be unique." % new_label) + copied_value = dce_object.copy() + if getattr(copied_value, "history_content_type", None) == "dataset": + history.add_dataset(copied_value, set_hid=False) + new_elements[new_label] = copied_value + + new_labels_path = new_labels_dataset_assoc.file_name + new_labels = open(new_labels_path, "r").readlines(1024 * 1000000) + if strict and len(hdca.collection.elements) != len(new_labels): + raise Exception("Relabel mapping file contains incorrect number of identifiers") + if how_type == "tabular": + # We have a tabular file, where the first column is an existing element identifier, + # and the second column is the new element identifier. + source_new_label = (line.strip().split('\t') for line in new_labels) + new_labels_dict = {source: new_label for source, new_label in source_new_label} + for i, dce in enumerate(hdca.collection.elements): + dce_object = dce.element_object + element_identifier = dce.element_identifier + default = element_identifier if strict else None + new_label = new_labels_dict.get(element_identifier, default) + if not new_label: + raise Exception("Failed to find new label for identifier [%s]" % element_identifier) + add_copied_value_to_new_elements(new_label, dce_object) + else: + # If new_labels_dataset_assoc is not a two-column tabular dataset we label with the current line of the dataset + for i, dce in enumerate(hdca.collection.elements): + dce_object = dce.element_object + add_copied_value_to_new_elements(new_labels[i], dce_object) + for key in new_elements.keys(): + if not re.match("^[\w\-_]+$", key): + raise Exception("Invalid new colleciton identifier [%s]" % key) + output_collections.create_collection( + next(iter(self.outputs.values())), "output", elements=new_elements + ) + + # Populate tool_type to ToolClass mappings tool_types = {} for tool_class in [ Tool, SetMetadataTool, OutputParameterJSONTool, DataManagerTool, DataSourceTool, AsyncDataSourceTool, - UnzipCollectionTool, ZipCollectionTool, MergeCollectionTool, + UnzipCollectionTool, ZipCollectionTool, MergeCollectionTool, RelabelFromFileTool, DataDestinationTool ]: tool_types[ tool_class.tool_type ] = tool_class diff --git a/lib/galaxy/tools/relabel_from_file.xml b/lib/galaxy/tools/relabel_from_file.xml new file mode 100644 index 000000000000..ca806b3b5ec5 --- /dev/null +++ b/lib/galaxy/tools/relabel_from_file.xml @@ -0,0 +1,149 @@ + + from contents of a file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool will take an input list and a text file with new identifiers + and build a new list with the same datasets but these new identifiers. + The order and number of entries in the text file must match the order + of the items you want to rename in your dataset collection. + + Alternatively a tabular file may be supplied, where the first column + if the current identifier that should be renamed, and the second column + contains the new label. This file may contain less entries than items + in the collection. In that case only matching list identifiers will be + relabeled. + + This tool will create new history datasets from your collection + but your quota usage will not increase. + + diff --git a/test-data/new_labels_1.txt b/test-data/new_labels_1.txt new file mode 100644 index 000000000000..3fe0afa80c6f --- /dev/null +++ b/test-data/new_labels_1.txt @@ -0,0 +1 @@ +new_i1 \ No newline at end of file diff --git a/test-data/new_labels_2.txt b/test-data/new_labels_2.txt new file mode 100644 index 000000000000..7f11cbd1aafa --- /dev/null +++ b/test-data/new_labels_2.txt @@ -0,0 +1 @@ +i1 new_i1 diff --git a/test-data/new_labels_bad_1.txt b/test-data/new_labels_bad_1.txt new file mode 100644 index 000000000000..5177cef38e05 --- /dev/null +++ b/test-data/new_labels_bad_1.txt @@ -0,0 +1 @@ +new_i; rm -rf \ No newline at end of file diff --git a/test-data/new_labels_bad_2.txt b/test-data/new_labels_bad_2.txt new file mode 100644 index 000000000000..591022662d72 --- /dev/null +++ b/test-data/new_labels_bad_2.txt @@ -0,0 +1,2 @@ +newi1 +newi1 \ No newline at end of file diff --git a/test/functional/tools/samples_tool_conf.xml b/test/functional/tools/samples_tool_conf.xml index 59f76c37d1d1..35a1480f8172 100644 --- a/test/functional/tools/samples_tool_conf.xml +++ b/test/functional/tools/samples_tool_conf.xml @@ -146,5 +146,6 @@ +