Skip to content

Commit

Permalink
Merge pull request #6745 from adswa/mnt-export-archive
Browse files Browse the repository at this point in the history
MNT: Modernize export-archive
  • Loading branch information
yarikoptic committed Jun 8, 2022
2 parents 40897ef + df977b6 commit 8e49548
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 44 deletions.
70 changes: 27 additions & 43 deletions datalad/local/export_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@

from datalad.interface.base import Interface
from datalad.interface.base import build_doc
from datalad.support import path

from pathlib import Path

@build_doc
class ExportArchive(Interface):
Expand Down Expand Up @@ -77,12 +76,9 @@ def __call__(filename=None,
archivetype='tar',
compression='gz',
missing_content='error'):
import os
import tarfile
import zipfile
from unittest.mock import patch
from os.path import join as opj, dirname, normpath, isabs
import os.path as op

from datalad.distribution.dataset import require_dataset
from datalad.utils import file_basename
Expand Down Expand Up @@ -115,15 +111,17 @@ def _filter_tarinfo(ti):
compression) if archivetype == 'tar' else '')

default_filename = "datalad_{.id}".format(dataset)
if filename is not None:
filename = Path(filename)
if filename is None:
filename = default_filename # in current directory
elif path.exists(filename) and path.isdir(filename):
filename = path.join(filename, default_filename) # under given directory
if not filename.endswith(file_extension):
filename += file_extension
filename = Path(default_filename) # in current directory
elif filename.exists() and filename.is_dir():
filename = filename / default_filename # under given directory
if filename.suffix != file_extension:
filename = filename.with_suffix(file_extension)

root = dataset.path
# use dir inside matching the output filename
# use dir inside matching the output filename without suffix(es)
# TODO: could be an option to the export plugin allowing empty value
# for no leading dir
leading_dir = file_basename(filename)
Expand All @@ -137,46 +135,32 @@ def _filter_tarinfo(ti):
zipfile.ZIP_STORED if not compression else zipfile.ZIP_DEFLATED) \
as archive:
add_method = archive.add if archivetype == 'tar' else archive.write
repo_files = sorted(repo.get_indexed_files())


repo_files = repo.get_content_info(ref='HEAD', untracked='no')
if isinstance(repo, AnnexRepo):
annexed = repo.is_under_annex(
repo_files, allow_quick=True, batch=True)
# remember: returns False for files in Git!
has_content = repo.file_has_content(
repo_files, allow_quick=True, batch=True)
else:
annexed = [False] * len(repo_files)
has_content = [True] * len(repo_files)
for i, rpath in enumerate(repo_files):
fpath = opj(root, rpath)
if annexed[i]:
if not has_content[i]:
if missing_content in ('ignore', 'continue'):
(lgr.warning if missing_content == 'continue' else lgr.debug)(
'File %s has no content available, skipped', fpath)
continue
else:
raise IOError('File %s has no content available' % fpath)

# resolve to possible link target
if op.islink(fpath):
link_target = os.readlink(fpath)
if not isabs(link_target):
link_target = normpath(opj(dirname(fpath), link_target))
fpath = link_target
# add availability (has_content) info
repo_files = repo.get_content_annexinfo(ref='HEAD',
init=repo_files,
eval_availability=True)
for p, props in repo_files.items():
if 'key' in props and not props.get('has_content', False):
if missing_content in ('ignore', 'continue'):
(lgr.warning if missing_content == 'continue' else lgr.debug)(
'File %s has no content available, skipped', p)
continue
else:
raise IOError('File %s has no content available' % p)
# name in the archive
aname = normpath(opj(leading_dir, rpath))
aname = Path(leading_dir) / p.relative_to(repo.pathobj)
add_method(
fpath,
p if 'key' not in props else props['objloc'],
arcname=aname,
**(tar_args if archivetype == 'tar' else {}))

if not isabs(filename):
filename = opj(os.getcwd(), filename)

yield dict(
status='ok',
path=filename,
path=filename.resolve(),
type='file',
action='export_archive',
logger=lgr)
2 changes: 1 addition & 1 deletion datalad/support/gitrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1468,7 +1468,7 @@ def commit(self, msg=None, options=None, _datalad_msg=False, careless=True,
env=env,
)

# TODO usage is primarily in the tests, consider making a test helper and
# TODO usage is only in the tests, consider making a test helper and
# remove from GitRepo API
def get_indexed_files(self):
"""Get a list of files in git's index
Expand Down

0 comments on commit 8e49548

Please sign in to comment.