Skip to content

Commit

Permalink
Merge pull request #6844 from jmchilton/compressed_types_and_extra_files
Browse files Browse the repository at this point in the history
Infrastructure/API Support for Composite Uploads/Downloads
  • Loading branch information
jmchilton committed Nov 1, 2018
2 parents c312c10 + e3bc005 commit f035d64
Show file tree
Hide file tree
Showing 15 changed files with 244 additions and 32 deletions.
6 changes: 5 additions & 1 deletion config/datatypes_conf.xml.sample
Expand Up @@ -197,7 +197,11 @@
<datatype extension="nhdr" type="galaxy.datatypes.images:Nrrd" subclass="true"/>
<datatype extension="rna_eps" type="galaxy.datatypes.sequence:RNADotPlotMatrix" mimetype="image/eps" display_in_upload="true"/>
<datatype extension="zip" type="galaxy.datatypes.binary:CompressedZipArchive" display_in_upload="true"/>
<datatype extension="tar" type="galaxy.datatypes.binary:CompressedArchive" subclass="true" display_in_upload="true"/>
<datatype extension="tar" type="galaxy.datatypes.binary:CompressedArchive" subclass="true" display_in_upload="true">
<converter file="tar_to_directory.xml" target_datatype="directory"/>
</datatype>
<datatype extension="directory" type="galaxy.datatypes.data:Directory">
</datatype>
<!-- Proteomics Datatypes -->
<datatype extension="pepxml" type="galaxy.datatypes.proteomics:PepXml" mimetype="application/xml" display_in_upload="true"/>
<datatype extension="raw_pepxml" type="galaxy.datatypes.proteomics:PepXml" mimetype="application/xml" subclass="true"/>
Expand Down
16 changes: 16 additions & 0 deletions lib/galaxy/datatypes/converters/tar_to_directory.xml
@@ -0,0 +1,16 @@
<tool id="CONVERTER_tar_to_directory" name="Convert tar to directory" version="1.0.0" profile="17.05">
<!-- Don't use tar directly so we can verify safety of results - tar -xzf '$input1'; -->
<command>
mkdir '$output1.files_path';
cd '$output1.files_path';
python -c "from galaxy.util.compression_utils import CompressedFile; CompressedFile('$input1').extract('.');"
</command>
<inputs>
<param format="tar" name="input1" type="data"/>
</inputs>
<outputs>
<data format="directory" name="output1"/>
</outputs>
<help>
</help>
</tool>
4 changes: 4 additions & 0 deletions lib/galaxy/datatypes/data.py
Expand Up @@ -927,6 +927,10 @@ def regex_line_dataprovider(self, dataset, **settings):
return dataproviders.line.RegexLineDataProvider(dataset_source, **settings)


class Directory(Data):
"""Class representing a directory of files."""


class GenericAsn1(Text):
"""Class for generic ASN.1 text format"""
edam_data = "data_0849"
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/tools/__init__.py
Expand Up @@ -138,6 +138,7 @@
"CONVERTER_maf_to_fasta_0",
"CONVERTER_maf_to_interval_0",
"CONVERTER_wiggle_to_interval_0",
"CONVERTER_tar_to_directory",
# Tools improperly migrated to the tool shed (devteam)
"qualityFilter",
"winSplitter",
Expand Down
19 changes: 13 additions & 6 deletions lib/galaxy/tools/parameters/grouping.py
Expand Up @@ -16,6 +16,7 @@
ConfigDoesNotAllowException,
)
from galaxy.util import (
asbool,
inflector,
relpath,
sanitize_for_filename
Expand Down Expand Up @@ -217,7 +218,7 @@ def get_composite_dataset_name(self, context):
filenames = list()
for composite_file in context.get('files', []):
if not composite_file.get('ftp_files', ''):
filenames.append(composite_file.get('file_data', {}).get('filename', ''))
filenames.append((composite_file.get('file_data') or {}).get('filename', ''))
else:
filenames.append(composite_file.get('ftp_files', [])[0])
dataset_name = os.path.commonprefix(filenames).rstrip('.') or None
Expand Down Expand Up @@ -543,13 +544,14 @@ def get_filenames(context):
d_type = self.get_datatype(trans, context)
dbkey = self.get_dbkey(context)
tag_using_filenames = context.get('tag_using_filenames', False)
force_composite = asbool(context.get('force_composite', 'False'))
writable_files = d_type.writable_files
writable_files_offset = 0
groups_incoming = [None for _ in range(file_count)]
for group_incoming in context.get(self.name, []):
i = int(group_incoming['__index__'])
groups_incoming[i] = group_incoming
if d_type.composite_type is not None:
if d_type.composite_type is not None or force_composite:
# handle uploading of composite datatypes
# Only one Dataset can be created
dataset = Bunch()
Expand Down Expand Up @@ -594,18 +596,23 @@ def get_filenames(context):
dataset.warnings.extend(warnings)
if dataset.primary_file is None: # remove this before finish, this should create an empty dataset
raise Exception('No primary dataset file was available for composite upload')
keys = [value.name for value in writable_files.values()]
for i, group_incoming in enumerate(groups_incoming[writable_files_offset :]):
if not force_composite:
keys = [value.name for value in writable_files.values()]
else:
keys = [str(index) for index in range(file_count)]
for i, group_incoming in enumerate(groups_incoming[writable_files_offset:]):
key = keys[i + writable_files_offset]
if group_incoming is None and not writable_files[list(writable_files.keys())[keys.index(key)]].optional:
if not force_composite and group_incoming is None and not writable_files[list(writable_files.keys())[keys.index(key)]].optional:
dataset.warnings.append("A required composite file (%s) was not specified." % (key))
dataset.composite_files[key] = None
else:
file_bunch, warnings = get_one_filename(group_incoming)
dataset.warnings.extend(warnings)
if file_bunch.path:
if force_composite:
key = group_incoming.get("NAME") or i
dataset.composite_files[key] = file_bunch.__dict__
else:
elif not force_composite:
dataset.composite_files[key] = None
if not writable_files[list(writable_files.keys())[keys.index(key)]].optional:
dataset.warnings.append("A required composite file (%s) was not specified." % (key))
Expand Down
4 changes: 4 additions & 0 deletions lib/galaxy/util/compression_utils.py
Expand Up @@ -65,6 +65,10 @@ def get_fileobj_raw(filename, mode="r", compressed_formats=None):

class CompressedFile(object):

@staticmethod
def can_decompress(file_path):
return tarfile.is_tarfile(file_path) or zipfile.is_zipfile(file_path)

def __init__(self, file_path, mode='r'):
if tarfile.is_tarfile(file_path):
self.file_type = 'tar'
Expand Down
23 changes: 23 additions & 0 deletions lib/galaxy/webapps/galaxy/api/datasets.py
Expand Up @@ -2,6 +2,7 @@
API operations on the contents of a history dataset.
"""
import logging
import os

from six import string_types

Expand All @@ -13,6 +14,9 @@
web
)
from galaxy.datatypes import dataproviders
from galaxy.util.path import (
safe_walk
)
from galaxy.visualization.data_providers.genome import (
BamDataProvider,
FeatureLocationIndexDataProvider,
Expand Down Expand Up @@ -302,6 +306,25 @@ def _raw_data(self, trans, dataset, provider=None, **kwargs):

return data

@web.expose_api_anonymous
def extra_files(self, trans, history_content_id, history_id, **kwd):
"""
GET /api/histories/{encoded_history_id}/contents/{encoded_content_id}/extra_files
Generate list of extra files.
"""
decoded_content_id = self.decode_id(history_content_id)

hda = self.hda_manager.get_accessible(decoded_content_id, trans.user)
extra_files_path = hda.extra_files_path
rval = []
for root, directories, files in safe_walk(extra_files_path):
for directory in directories:
rval.append({"class": "Directory", "path": os.path.relpath(os.path.join(root, directory), extra_files_path)})
for file in files:
rval.append({"class": "File", "path": os.path.relpath(os.path.join(root, file), extra_files_path)})

return rval

@web.expose_api_raw_anonymous
def display(self, trans, history_content_id, history_id,
preview=False, filename=None, to_ext=None, raw=False, **kwd):
Expand Down
5 changes: 5 additions & 0 deletions lib/galaxy/webapps/galaxy/buildapp.py
Expand Up @@ -223,6 +223,11 @@ def populate_api_routes(webapp, app):
controller="history_contents",
action="update_permissions",
conditions=dict(method=["PUT"]))
webapp.mapper.connect("history_contents_extra_files",
"/api/histories/{history_id}/contents/{history_content_id}/extra_files",
controller="datasets",
action="extra_files",
conditions=dict(method=["GET"]))
webapp.mapper.connect("history_contents_metadata_file",
"/api/histories/{history_id}/contents/{history_content_id}/metadata_file",
controller="datasets",
Expand Down
53 changes: 53 additions & 0 deletions test/api/test_tools.py
Expand Up @@ -221,6 +221,59 @@ def test_test_data_yaml_tools(self):
test_data = test_data_response.json()
assert len(test_data) == 3

@uses_test_history(require_new=False)
def test_upload_composite_as_tar(self, history_id):
tar_path = self.test_data_resolver.get_filename("testdir.tar")
with open(tar_path, "rb") as tar_f:
payload = self.dataset_populator.upload_payload(history_id, "Test123",
extra_inputs={
"files_1|file_data": tar_f,
"files_1|NAME": "composite",
"file_count": "2",
"force_composite": "True",
}
)
run_response = self.dataset_populator.tools_post(payload)
self.dataset_populator.wait_for_tool_run(history_id, run_response)
dataset = run_response.json()["outputs"][0]
content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset)
assert content.strip() == "Test123"
extra_files = self.dataset_populator.get_history_dataset_extra_files(history_id, dataset_id=dataset["id"])
assert len(extra_files) == 5, extra_files
expected_contents = {
"testdir": "Directory",
"testdir/c": "Directory",
"testdir/a": "File",
"testdir/b": "File",
"testdir/c/d": "File",
}
found_files = set()
for extra_file in extra_files:
path = extra_file["path"]
assert path in expected_contents
assert extra_file["class"] == expected_contents[path]
found_files.add(path)

assert len(found_files) == 5, found_files

@uses_test_history(require_new=False)
def test_upload_composite_from_bad_tar(self, history_id):
tar_path = self.test_data_resolver.get_filename("unsafe.tar")
with open(tar_path, "rb") as tar_f:
payload = self.dataset_populator.upload_payload(history_id, "Test123",
extra_inputs={
"files_1|file_data": tar_f,
"files_1|NAME": "composite",
"file_count": "2",
"force_composite": "True",
}
)
run_response = self.dataset_populator.tools_post(payload)
self.dataset_populator.wait_for_tool_run(history_id, run_response, assert_ok=False)
dataset = run_response.json()["outputs"][0]
details = self.dataset_populator.get_history_dataset_details(history_id, dataset=dataset, assert_ok=False)
assert details["state"] == "error"

def test_unzip_collection(self):
with self.dataset_populator.test_history() as history_id:
hdca_id = self.__build_pair(history_id, ["123", "456"])
Expand Down
21 changes: 21 additions & 0 deletions test/api/test_tools_upload.py
Expand Up @@ -463,6 +463,27 @@ def test_multiple_files_posix_lines(self):
content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[2])
assert content == ONE_TO_SIX_WITH_TABS

def test_upload_force_composite(self):
with self.dataset_populator.test_history() as history_id:
payload = self.dataset_populator.upload_payload(history_id, "Test123",
extra_inputs={
"files_1|url_paste": "CompositeContent",
"files_1|NAME": "composite",
"file_count": "2",
"force_composite": "True",
}
)
run_response = self.dataset_populator.tools_post(payload)
self.dataset_populator.wait_for_tool_run(history_id, run_response)
dataset = run_response.json()["outputs"][0]
content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset)
assert content.strip() == "Test123"
extra_files = self.dataset_populator.get_history_dataset_extra_files(history_id, dataset_id=dataset["id"])
assert len(extra_files) == 1, extra_files # [{u'path': u'1', u'class': u'File'}]
extra_file = extra_files[0]
assert extra_file["path"] == "composite"
assert extra_file["class"] == "File"

def test_upload_from_invalid_url(self):
history_id, new_dataset = self._upload('https://usegalaxy.org/bla123', assert_ok=False)
dataset_details = self.dataset_populator.get_history_dataset_details(history_id, dataset_id=new_dataset["id"], assert_ok=False)
Expand Down
6 changes: 6 additions & 0 deletions test/base/populators.py
Expand Up @@ -372,6 +372,12 @@ def get_history_dataset_details(self, history_id, **kwds):
assert details_response.status_code == 200
return details_response.json()

def get_history_dataset_extra_files(self, history_id, **kwds):
dataset_id = self.__history_content_id(history_id, **kwds)
details_response = self._get_contents_request(history_id, "/%s/extra_files" % dataset_id)
assert details_response.status_code == 200, details_response.content
return details_response.json()

def get_history_collection_details(self, history_id, **kwds):
hdca_id = self.__history_content_id(history_id, **kwds)
details_response = self._get_contents_request(history_id, "/dataset_collections/%s" % hdca_id)
Expand Down
7 changes: 7 additions & 0 deletions test/functional/tools/sample_datatypes_conf.xml
Expand Up @@ -42,5 +42,12 @@
<datatype extension="data_manager_json" type="galaxy.datatypes.text:Json" mimetype="application/json" subclass="true" display_in_upload="false"/>
<datatype extension="data" type="galaxy.datatypes.data:Data" mimetype="application/octet-stream" max_optional_metadata_filesize="1048576" />
<datatype extension="binary" type="galaxy.datatypes.binary:Binary" mimetype="application/octet-stream" max_optional_metadata_filesize="1048576" />
<datatype extension="zip" type="galaxy.datatypes.binary:CompressedZipArchive" display_in_upload="true">
</datatype>
<datatype extension="tar" type="galaxy.datatypes.binary:CompressedArchive" subclass="true" display_in_upload="true">
<converter file="tar_to_directory.xml" target_datatype="directory"/>
</datatype>
<datatype extension="directory" type="galaxy.datatypes.data:Directory">
</datatype>
</registration>
</datatypes>
22 changes: 22 additions & 0 deletions test/integration/test_upload_configuration_options.py
Expand Up @@ -839,3 +839,25 @@ def test_fetch_recursive_archive_to_library(self):
library = matching[0]
dataset = self.library_populator.get_library_contents_with_path(library["id"], "/file1")
assert dataset["file_size"] == 6, dataset


class TestDirectoryAndCompressedTypes(BaseUploadContentConfigurationTestCase):

require_admin_user = True

@classmethod
def handle_galaxy_config_kwds(cls, config):
config["allow_path_paste"] = True

def test_tar_to_directory(self):
dataset = self.dataset_populator.new_dataset(
self.history_id, 'file://%s/testdir.tar' % TEST_DATA_DIRECTORY, file_type="tar", auto_decompress=False, wait=True
)
dataset = self.dataset_populator.get_history_dataset_details(self.history_id, dataset=dataset)
assert dataset["file_ext"] == "tar", dataset
response = self.dataset_populator.run_tool(
tool_id="CONVERTER_tar_to_directory",
inputs={"input1": {"src": "hda", "id": dataset["id"]}},
history_id=self.history_id,
)
self.dataset_populator.wait_for_job(response["jobs"][0]["id"])

0 comments on commit f035d64

Please sign in to comment.