Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow multiple simultaneous uploads via single POST. #4563

Merged
merged 1 commit into from Oct 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 3 additions & 4 deletions lib/galaxy/tools/parameters/__init__.py
Expand Up @@ -298,11 +298,10 @@ def populate_state(request_context, inputs, incoming, state, errors={}, prefix='
elif input.type == 'section':
populate_state(request_context, input.inputs, incoming, group_state, errors, prefix=group_prefix, context=context, check=check)
elif input.type == 'upload_dataset':
d_type = input.get_datatype(request_context, context=context)
writable_files = d_type.writable_files
while len(group_state) > len(writable_files):
file_count = input.get_file_count(request_context, context)
while len(group_state) > file_count:
del group_state[-1]
while len(writable_files) > len(group_state):
while file_count > len(group_state):
new_state = {'__index__' : len(group_state)}
for upload_item in input.inputs.values():
new_state[upload_item.name] = upload_item.get_initial_value(request_context, context)
Expand Down
87 changes: 68 additions & 19 deletions lib/galaxy/tools/parameters/grouping.py
Expand Up @@ -219,17 +219,30 @@ def get_file_base_name(self, context):
fd = context.get('files_metadata|base_name', 'Galaxy_Composite_file')
return fd

def get_file_type(self, context):
return context.get(self.file_type_name, self.default_file_type)

def get_datatype_ext(self, trans, context):
ext = self.get_file_type(context)
def get_file_type(self, context, parent_context=None):
file_type = context.get(self.file_type_name, None)
if file_type == "":
if parent_context:
file_type = parent_context.get(self.file_type_name, self.default_file_type)
else:
file_type = self.default_file_type
return file_type

def get_dbkey(self, context, parent_context=None):
dbkey = context.get("dbkey", None)
if dbkey == "":
if parent_context:
dbkey = parent_context.get("dbkey", dbkey)
return dbkey
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be missing something but why are we not just returning parent_context.get("dbkey", context.get("dbkey"))?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the inner dbkey - the per file one - is just a hidden field in the tool form with a default of "" - that is what the outer one is checking the top-level dbkey param in the tool that I think will default to ?. So if the API request sends a dbkey for all files and then one for a specific file - I think we should use the base for all files that don't specify an explicit dbkey and then use the specific keys as set. I tried to use this strategy with the file extensions also. This implementation is crap because we are mapping API requests to a tool that wasn't really ever designed to do this well (these frustrations made led me to create #4734).

So I implemented API tests to verify all this behavior with respect to specific file dbkey versus dbkey for all files - if you can rework the implementation to be cleaner in such a way that the API calls don't break - I'm totally on board. I'm quite frustrated with a lot of grouping.py. There are more test cases in #4746 that should also help verify cleanups to the implementation don't break FTP uploads and such.


def get_datatype_ext(self, trans, context, parent_context=None):
ext = self.get_file_type(context, parent_context=parent_context)
if ext in self.file_type_to_ext:
ext = self.file_type_to_ext[ext] # when using autodetect, we will use composite info from 'text', i.e. only the main file
return ext

def get_datatype(self, trans, context):
ext = self.get_datatype_ext(trans, context)
def get_datatype(self, trans, context, parent_context=None):
ext = self.get_datatype_ext(trans, context, parent_context=parent_context)
return trans.app.datatypes_registry.get_datatype_by_extension(ext)

@property
Expand All @@ -249,6 +262,8 @@ def title_by_index(self, trans, index, context):
if composite_file.optional:
rval = "%s [optional]" % rval
return rval
if index < self.get_file_count(trans, context):
return "Extra primary file"
return None

def value_to_basic(self, value, app):
Expand Down Expand Up @@ -279,10 +294,14 @@ def value_from_basic(self, value, app, ignore_errors=False):
rval.append(rval_dict)
return rval

def get_file_count(self, trans, context):
file_count = context.get("file_count", "auto")
return len(self.get_datatype(trans, context).writable_files) if file_count == "auto" else int(file_count)

def get_initial_value(self, trans, context):
d_type = self.get_datatype(trans, context)
file_count = self.get_file_count(trans, context)
rval = []
for i, (composite_name, composite_file) in enumerate(d_type.writable_files.items()):
for i in range(file_count):
rval_dict = {}
rval_dict['__index__'] = i # create __index__
for input in self.inputs.values():
Expand Down Expand Up @@ -356,6 +375,8 @@ def get_one_filename(context):
name = context.get('NAME', None)
info = context.get('INFO', None)
uuid = context.get('uuid', None) or None # Turn '' to None
file_type = context.get('file_type', None)
dbkey = self.get_dbkey(context)
warnings = []
to_posix_lines = False
if context.get('to_posix_lines', None) not in ["None", None, False]:
Expand Down Expand Up @@ -405,6 +426,10 @@ def get_one_filename(context):
file_bunch.auto_decompress = auto_decompress
file_bunch.space_to_tab = space_to_tab
file_bunch.uuid = uuid
if file_type is not None:
file_bunch.file_type = file_type
if dbkey is not None:
file_bunch.dbkey = dbkey
return file_bunch, warnings

def get_filenames(context):
Expand All @@ -414,6 +439,8 @@ def get_filenames(context):
uuid = context.get('uuid', None) or None # Turn '' to None
name = context.get('NAME', None)
info = context.get('INFO', None)
file_type = context.get('file_type', None)
dbkey = self.get_dbkey(context)
to_posix_lines = False
if context.get('to_posix_lines', None) not in ["None", None, False]:
to_posix_lines = True
Expand All @@ -429,13 +456,23 @@ def get_filenames(context):
file_bunch.to_posix_lines = to_posix_lines
file_bunch.auto_decompress = auto_decompress
file_bunch.space_to_tab = space_to_tab
if file_type is not None:
file_bunch.file_type = file_type
if dbkey is not None:
file_bunch.dbkey = dbkey

rval.append(file_bunch)
for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
if file_bunch.path:
file_bunch.uuid = uuid
file_bunch.to_posix_lines = to_posix_lines
file_bunch.auto_decompress = auto_decompress
file_bunch.space_to_tab = space_to_tab
if file_type is not None:
file_bunch.file_type = file_type
if dbkey is not None:
file_bunch.dbkey = dbkey

rval.append(file_bunch)
# look for files uploaded via FTP
valid_files = []
Expand Down Expand Up @@ -474,15 +511,20 @@ def get_filenames(context):
file_bunch.to_posix_lines = to_posix_lines
file_bunch.auto_decompress = auto_decompress
file_bunch.space_to_tab = space_to_tab
if file_type is not None:
file_bunch.file_type = file_type
if dbkey is not None:
file_bunch.dbkey = dbkey
rval.append(file_bunch)
return rval
file_type = self.get_file_type(context)
file_count = self.get_file_count(trans, context)
d_type = self.get_datatype(trans, context)
dbkey = context.get('dbkey', None)
dbkey = self.get_dbkey(context)
tag_using_filenames = context.get('tag_using_filenames', False)
writable_files = d_type.writable_files
writable_files_offset = 0
groups_incoming = [None for _ in writable_files]
groups_incoming = [None for _ in range(file_count)]
for group_incoming in context.get(self.name, []):
i = int(group_incoming['__index__'])
groups_incoming[i] = group_incoming
Expand Down Expand Up @@ -524,6 +566,10 @@ def get_filenames(context):
dataset.to_posix_lines = file_bunch.to_posix_lines
dataset.auto_decompress = file_bunch.auto_decompress
dataset.space_to_tab = file_bunch.space_to_tab
if file_bunch.file_type:
dataset.file_type = file_type
if file_bunch.dbkey:
dataset.dbkey = dbkey
dataset.warnings.extend(warnings)
if dataset.primary_file is None: # remove this before finish, this should create an empty dataset
raise Exception('No primary dataset file was available for composite upload')
Expand All @@ -544,15 +590,18 @@ def get_filenames(context):
dataset.warnings.append("A required composite file (%s) was not specified." % (key))
return [dataset]
else:
datasets = get_filenames(context[self.name][0])
rval = []
for dataset in datasets:
dataset.file_type = file_type
dataset.datatype = d_type
dataset.ext = self.get_datatype_ext(trans, context)
dataset.dbkey = dbkey
dataset.tag_using_filenames = tag_using_filenames
rval.append(dataset)
for i, file_contexts in enumerate(context[self.name]):
datasets = get_filenames(file_contexts)
for dataset in datasets:
override_file_type = self.get_file_type(context[self.name][i], parent_context=context)
d_type = self.get_datatype(trans, context[self.name][i], parent_context=context)
dataset.file_type = override_file_type
dataset.datatype = d_type
dataset.ext = self.get_datatype_ext(trans, context[self.name][i], parent_context=context)
dataset.dbkey = self.get_dbkey(context[self.name][i], parent_context=context)
dataset.tag_using_filenames = tag_using_filenames
rval.append(dataset)
return rval


Expand Down
93 changes: 93 additions & 0 deletions test/api/test_tools.py
Expand Up @@ -185,6 +185,99 @@ def _get_roadmaps_content(self, history_id, dataset):
roadmaps_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset, filename="Roadmaps")
return roadmaps_content

def test_upload_dbkey(self):
with self.dataset_populator.test_history() as history_id:
payload = self.dataset_populator.upload_payload(history_id, "Test123", dbkey="hg19")
run_response = self.dataset_populator.tools_post(payload)
self.dataset_populator.wait_for_tool_run(history_id, run_response)
datasets = run_response.json()["outputs"]
assert datasets[0].get("genome_build") == "hg19", datasets[0]

def test_upload_multiple_files_1(self):
with self.dataset_populator.test_history() as history_id:
payload = self.dataset_populator.upload_payload(history_id, "Test123",
dbkey="hg19",
extra_inputs={
"files_1|url_paste": "SecondOutputContent",
"files_1|NAME": "SecondOutputName",
"files_1|file_type": "tabular",
"files_1|dbkey": "hg18",
"file_count": "2",
}
)
run_response = self.dataset_populator.tools_post(payload)
self.dataset_populator.wait_for_tool_run(history_id, run_response)
datasets = run_response.json()["outputs"]

assert len(datasets) == 2, datasets
content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
assert content.strip() == "Test123"
assert datasets[0]["file_ext"] == "txt"
assert datasets[0]["genome_build"] == "hg19", datasets

content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
assert content.strip() == "SecondOutputContent"
assert datasets[1]["file_ext"] == "tabular"
assert datasets[1]["genome_build"] == "hg18", datasets

def test_upload_multiple_files_2(self):
with self.dataset_populator.test_history() as history_id:
payload = self.dataset_populator.upload_payload(history_id, "Test123",
file_type="tabular",
dbkey="hg19",
extra_inputs={
"files_1|url_paste": "SecondOutputContent",
"files_1|NAME": "SecondOutputName",
"files_1|file_type": "txt",
"files_1|dbkey": "hg18",
"file_count": "2",
}
)
run_response = self.dataset_populator.tools_post(payload)
self.dataset_populator.wait_for_tool_run(history_id, run_response)
datasets = run_response.json()["outputs"]

assert len(datasets) == 2, datasets
content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
assert content.strip() == "Test123"
assert datasets[0]["file_ext"] == "tabular", datasets
assert datasets[0]["genome_build"] == "hg19", datasets

content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
assert content.strip() == "SecondOutputContent"
assert datasets[1]["file_ext"] == "txt"
assert datasets[1]["genome_build"] == "hg18", datasets

def test_upload_multiple_files_3(self):
with self.dataset_populator.test_history() as history_id:
payload = self.dataset_populator.upload_payload(history_id, "Test123",
file_type="tabular",
dbkey="hg19",
extra_inputs={
"files_0|file_type": "txt",
"files_0|dbkey": "hg18",
"files_1|url_paste": "SecondOutputContent",
"files_1|NAME": "SecondOutputName",
"files_1|file_type": "txt",
"files_1|dbkey": "hg18",
"file_count": "2",
}
)
run_response = self.dataset_populator.tools_post(payload)
self.dataset_populator.wait_for_tool_run(history_id, run_response)
datasets = run_response.json()["outputs"]

assert len(datasets) == 2, datasets
content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[0])
assert content.strip() == "Test123"
assert datasets[0]["file_ext"] == "txt", datasets
assert datasets[0]["genome_build"] == "hg18", datasets

content = self.dataset_populator.get_history_dataset_content(history_id, dataset=datasets[1])
assert content.strip() == "SecondOutputContent"
assert datasets[1]["file_ext"] == "txt"
assert datasets[1]["genome_build"] == "hg18", datasets

def test_unzip_collection(self):
with self.dataset_populator.test_history() as history_id:
hdca_id = self.__build_pair(history_id, ["123", "456"])
Expand Down
4 changes: 2 additions & 2 deletions tools/data_source/upload.py
Expand Up @@ -408,7 +408,7 @@ def __main__():
dataset = util.bunch.Bunch(**safe_dict(dataset))
try:
output_path = output_paths[int(dataset.dataset_id)][0]
except:
except Exception:
print('Output path for dataset %s not found on command line' % dataset.dataset_id, file=sys.stderr)
sys.exit(1)
if dataset.type == 'composite':
Expand All @@ -422,7 +422,7 @@ def __main__():
# parent directory is writable by the user.
try:
os.remove(sys.argv[3])
except:
except Exception:
pass


Expand Down
5 changes: 5 additions & 0 deletions tools/data_source/upload.xml
Expand Up @@ -32,6 +32,7 @@
</options>
</param>
<param name="async_datasets" type="hidden" value="None"/>
<param name="file_count" type="hidden" value="auto" />
<upload_dataset name="files" title="Specify Files for Dataset" file_type_name="file_type" metadata_ref="files_metadata">
<param name="file_data" type="file" size="30" label="File" ajax-upload="true" help="TIP: Due to browser limitations, uploading files larger than 2GB is guaranteed to fail. To upload large files, use the URL method (below) or FTP (if enabled by the site administrator).">
</param>
Expand All @@ -43,6 +44,10 @@
<param name="uuid" type="hidden" required="False" />
<param name="to_posix_lines" type="hidden" value="Yes" />
<param name="auto_decompress" type="hidden" value="Yes" />
<!-- allow per-file override of dbkey -->
<param name="file_type" type="hidden" value="" />
<param name="dbkey" type="hidden" value="" />

<!--
<param name="to_posix_lines" type="select" display="checkboxes" multiple="True" label="Convert universal line endings to Posix line endings" help="Turn this option off if you upload a gzip, bz2 or zip archive which contains a binary file." value="Yes">
<option value="Yes" selected="true">Yes</option>
Expand Down