Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
4d6ab0d
fetching of whole directories
sjanssen2 Nov 6, 2025
dae7765
adding a test for directory fetching
sjanssen2 Nov 6, 2025
296e138
using a local path
sjanssen2 Nov 6, 2025
821d5a1
debug
sjanssen2 Nov 6, 2025
bf327a7
more debugging
sjanssen2 Nov 6, 2025
74a8dff
more debug
sjanssen2 Nov 6, 2025
138d6a3
remove infos
sjanssen2 Nov 6, 2025
3c086c8
explicit file names
sjanssen2 Nov 6, 2025
9b54be3
avoid access to PluginTestCase
sjanssen2 Nov 6, 2025
bfe86f8
test with prefix
sjanssen2 Nov 6, 2025
f70bd86
remove ./
sjanssen2 Nov 6, 2025
d3709d9
debug
sjanssen2 Nov 6, 2025
405f52e
check nginx
sjanssen2 Nov 6, 2025
09a43ca
manually switch to tornado
sjanssen2 Nov 6, 2025
038db34
check nginx logs
sjanssen2 Nov 7, 2025
a2919cc
pull correct branch
sjanssen2 Nov 7, 2025
c48fe15
proper renaming
sjanssen2 Nov 7, 2025
c3163fb
create dir first
sjanssen2 Nov 7, 2025
d36f4b8
clean up test
sjanssen2 Nov 7, 2025
4005690
debug
sjanssen2 Nov 7, 2025
34ff8b0
more debug
sjanssen2 Nov 7, 2025
9406453
adapt fp
sjanssen2 Nov 7, 2025
5d968a8
clean up
sjanssen2 Nov 7, 2025
d226ab5
clean up workflow file
sjanssen2 Nov 7, 2025
18dbaa8
prepare to clean up filepath mess
sjanssen2 Nov 7, 2025
e21171b
get files
sjanssen2 Nov 7, 2025
7cd6f16
more debug
sjanssen2 Nov 7, 2025
9e2baf0
forcer cat
sjanssen2 Nov 7, 2025
c3cc119
read log here
sjanssen2 Nov 7, 2025
7b11c64
debug in client
sjanssen2 Nov 7, 2025
13b226a
remove cat
sjanssen2 Nov 7, 2025
c8eaae6
trigger build
sjanssen2 Nov 7, 2025
c560eff
trigger
sjanssen2 Nov 7, 2025
86f86fb
trigger
sjanssen2 Nov 7, 2025
119e14a
clean debug infos
sjanssen2 Nov 7, 2025
fa819be
remove debug infos
sjanssen2 Nov 7, 2025
eed36ed
Merge pull request #3 from jlab/fetch_directories_clean
sjanssen2 Nov 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/workflows/qiita-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,11 @@ jobs:

# we need to download qiita directly so we have "easy" access to
# all config files
wget https://github.com/biocore/qiita/archive/dev.zip
unzip dev.zip
# wget https://github.com/biocore/qiita/archive/dev.zip
# unzip dev.zip
wget https://github.com/jlab/qiita/archive/refs/heads/tornado_FetchFileFromCentralHandler_alsoDirs_debug.zip
unzip tornado_FetchFileFromCentralHandler_alsoDirs_debug.zip
mv qiita-tornado_FetchFileFromCentralHandler_alsoDirs_debug qiita-dev

# pull out the port so we can modify the configuration file easily
pgport=${{ job.services.postgres.ports[5432] }}
Expand Down
63 changes: 39 additions & 24 deletions qiita_client/qiita_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from json import dumps
from random import randint
import fnmatch
from io import BytesIO
from zipfile import ZipFile


try:
from itertools import zip_longest
Expand Down Expand Up @@ -269,7 +272,7 @@ def _request_oauth2(self, req, rettype, *args, **kwargs):
The request to execute
rettype : string
The return type of the function, either "json" or
if e.g. files are transferred "content"
"object" for the response object itself
args : tuple
The request args
kwargs : dict
Expand Down Expand Up @@ -328,15 +331,15 @@ def _request_retry(self, req, url, rettype='json', **kwargs):
The request to execute
rettype : string
The return type of the function, either "json" (default) or
if e.g. files are transferred "content"
"object" for the response object itself
url : str
The url to access in the server
kwargs : dict
The request kwargs

Returns
-------
dict or None or plain content IF rettype='content'
dict or None or response object IF rettype='object'
The JSON information in the request response, if any

Raises
Expand Down Expand Up @@ -391,13 +394,13 @@ def _request_retry(self, req, url, rettype='json', **kwargs):
if rettype is None or rettype == 'json':
return r.json()
else:
if rettype == 'content':
return r.content
if rettype == 'object':
return r
else:
raise ValueError(
("return type rettype='%s' cannot be "
"understand. Choose from 'json' (default) "
"or 'content!") % rettype)
"or 'object!") % rettype)
except ValueError:
return None
stime = randint(MIN_TIME_SLEEP, MAX_TIME_SLEEP)
Expand All @@ -418,7 +421,7 @@ def get(self, url, rettype='json', **kwargs):
The url to access in the server
rettype : string
The return type of the function, either "json" (default) or
if e.g. files are transferred "content"
"object" for the response object itself
kwargs : dict
The request kwargs

Expand Down Expand Up @@ -746,8 +749,8 @@ def _process_files_per_sample_fastq(self, files, prep_info,
return sample_names, prep_info

def fetch_file_from_central(self, filepath, prefix=None):
"""Moves content of a file from Qiita's central BASE_DATA_DIR to a
local plugin file-system.
"""Moves content of a file or directory from Qiita's central
BASE_DATA_DIR to a local plugin file-system.

By default, this is exactly the same location, i.e. the return
filepath is identical to the requested one and nothing is moved /
Expand All @@ -760,22 +763,24 @@ def fetch_file_from_central(self, filepath, prefix=None):
----------
filepath : str
The filepath in Qiita's central BASE_DATA_DIR to the requested
file content
file or directory content
prefix : str
Primarily for testing: prefix the target filepath with this
filepath prefix to
a) in 'filesystem' mode: create an actual file copy (for testing)
a) in 'filesystem' mode: create an actual file/directiry copy
(for testing)
If prefix=None, nothing will be copied/moved
b) in 'https' mode: flexibility to locate files differently in
plugin local file system.
b) in 'https' mode: flexibility to locate files/directories
differently in plugin local file system.

Returns
-------
str : the filepath of the requested file within the local file system
str : the filepath of the requested file or directory within the local
file system
"""
target_filepath = filepath
logger.debug(
'Fetching file "%s" via protocol=%s from Qiita main.' % (
'Fetching file/directory "%s" via protocol=%s from Qiita main.' % (
filepath, self._plugincoupling))

if (prefix is not None) and (prefix != ""):
Expand All @@ -792,7 +797,10 @@ def fetch_file_from_central(self, filepath, prefix=None):
if not os.path.exists(os.path.dirname(target_filepath)):
os.makedirs(os.path.dirname(target_filepath))

shutil.copyfile(filepath, target_filepath)
if os.path.isdir(filepath):
shutil.copytree(filepath, target_filepath)
else:
shutil.copyfile(filepath, target_filepath)

return target_filepath

Expand All @@ -802,17 +810,24 @@ def fetch_file_from_central(self, filepath, prefix=None):
filepath = filepath[len(os.path.abspath(os.sep)):]

# actual call to Qiita central to obtain file content
content = self.get(
response = self.get(
'/cloud/fetch_file_from_central/' + filepath,
rettype='content')
rettype='object')

# check if requested filepath is a single file OR a whole directory
if 'Is-Qiita-Directory' in response.headers.keys():
with ZipFile(BytesIO(response.content)) as zf:
zf.extractall(path=target_filepath)
else:
content = response.content

# create necessary directory locally
if not os.path.exists(os.path.dirname(target_filepath)):
os.makedirs(os.path.dirname(target_filepath))
# create necessary directory locally
if not os.path.exists(os.path.dirname(target_filepath)):
os.makedirs(os.path.dirname(target_filepath))

# write retrieved file content
with open(target_filepath, 'wb') as f:
f.write(content)
# write retrieved file content
with open(target_filepath, 'wb') as f:
f.write(content)

return target_filepath

Expand Down
26 changes: 26 additions & 0 deletions qiita_client/tests/test_qiita_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,32 @@ def test_delete_file_from_central(self):
# qiita main filepath, returned by delete_file_from_central
self.assertTrue(exists(fp_deleted))

def test_fetch_directory(self):
# a bit hacky, but should work as long as test database does not change
ainfo = self.qclient.get('/qiita_db/artifacts/1/')
base_data_dir = ainfo['files']['raw_forward_seqs'][0]['filepath'][
:(-1 * len('raw_data/1_s_G1_L001_sequences.fastq.gz'))]

# creating a LOCAL test directory within base_data_dir as the DB entry
# but no files exist. "job" is the according mountpoint
fp_test = join(base_data_dir, 'job', '2_test_folder')
self._create_test_dir(prefix=fp_test)

# transmitting test directory to qiita main (remote)
self.tester._plugincoupling = 'https'
self.tester.push_file_to_central(fp_test)
# fp_main = join(base_data_dir, join(*Path(fp_test).parts))

# fetch test directory from qiita main to a different location
# (=prefix) than it was generated
prefix = join(expanduser("~"), 'localFetch')
fp_obs = self.tester.fetch_file_from_central(fp_test, prefix=prefix)

# test a file of the freshly transferred directory from main has
# expected file content
with open(join(fp_obs, 'testdir', 'fileA.txt'), 'r') as f:
self.assertIn('contentA', '\n'.join(f.readlines()))


if __name__ == '__main__':
main()
Loading