Skip to content

Commit

Permalink
Merge pull request #806 from mih/bf-797
Browse files Browse the repository at this point in the history
ENH: Exit `save()` faster, if clean (fixes gh-797)
  • Loading branch information
mih committed Sep 13, 2016
2 parents f30733f + 1800480 commit 3f48269
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 33 deletions.
88 changes: 64 additions & 24 deletions datalad/distribution/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,45 +250,85 @@ def __call__(
return_values = []
for dspath in calls:
ds = Dataset(dspath)
if calls[dspath]['g_add']:

lgr.info("Processing dataset %s ..." % ds)

# check every (sub-)dataset for annex once, since we can't add or
# addurl anything, if there is no annex:
# TODO: Q: Alternatively, just call git-annex-init if there's no
# annex yet and we have an annex-add/annex-addurl request?
_is_annex = isinstance(ds.repo, AnnexRepo)

if calls[ds.path]['g_add']:
return_values.extend(ds.repo.add(calls[dspath]['g_add'],
git=True,
git_options=git_opts))
if calls[ds.path]['a_add']:
# TODO: annex required or call git-annex-init if there's no annex yet?
assert isinstance(ds.repo, AnnexRepo)
return_values.extend(
ds.repo.add(calls[dspath]['a_add'],
git=False,
git_options=git_opts,
annex_options=annex_opts,
options=annex_add_opts))
if _is_annex:
return_values.extend(
ds.repo.add(calls[dspath]['a_add'],
git=False,
git_options=git_opts,
annex_options=annex_opts,
options=annex_add_opts
)
)
else:
lgr.debug("{0} is no annex. Skip 'annex-add' for "
"files {1}".format(ds, calls[dspath]['a_add']))
return_values.extend(
[{'file': f,
'success': False,
'note': "no annex at %s" % ds.path}
for f in calls[dspath]['a_add']]
)

# TODO: AnnexRepo.add_urls' return value doesn't contain the created
# file name but the url
if calls[ds.path]['addurl_s']:
if to_git:
raise NotImplementedError("Can't add a remote source "
"directly to git.")
assert isinstance(ds.repo, AnnexRepo)
return_values.extend(
ds.repo.add_urls(calls[ds.path]['addurl_s'],
options=annex_add_opts,
# TODO: extra parameter for addurl?
git_options=git_opts,
annex_options=annex_opts))
if _is_annex:
return_values.extend(
ds.repo.add_urls(calls[ds.path]['addurl_s'],
options=annex_add_opts,
# TODO: extra parameter for addurl?
git_options=git_opts,
annex_options=annex_opts
)
)
else:
lgr.debug("{0} is no annex. Skip 'annex-addurl' for "
"files {1}".format(ds, calls[dspath]['addurl_s']))
return_values.extend(
[{'file': f,
'success': False,
'note': "no annex at %s" % ds.path}
for f in calls[dspath]['addurl_s']]
)

if calls[ds.path]['addurl_f']:
if to_git:
raise NotImplementedError("Can't add a remote source "
"directly to git.")
assert isinstance(ds.repo, AnnexRepo)
for f, u in calls[ds.path]['addurl_f']:
return_values.append(
ds.repo.add_url_to_file(f, u,
options=annex_add_opts, # TODO: see above
git_options=git_opts,
annex_options=annex_opts,
batch=True))
if _is_annex:
for f, u in calls[ds.path]['addurl_f']:
return_values.append(
ds.repo.add_url_to_file(f, u,
options=annex_add_opts, # TODO: see above
git_options=git_opts,
annex_options=annex_opts,
batch=True))
else:
lgr.debug("{0} is no annex. Skip 'annex-addurl' for "
"files {1}".format(ds, calls[dspath]['addurl_f']))
return_values.extend(
[{'file': f,
'success': False,
'note': "no annex at %s" % ds.path}
for f in calls[dspath]['addurl_f']]
)

if save and len(return_values):
# we got something added -> save
Expand Down
8 changes: 8 additions & 0 deletions datalad/distribution/tests/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,14 @@ def test_add_recursive(path):
ds.add(opj('dir', 'testindir2'), recursive=True, to_git=True)
assert_in('testindir2', Dataset(opj(path, 'dir')).repo.get_indexed_files())

subds = ds.create_subdataset('git-sub', no_annex=True)
with open(opj(subds.path, 'somefile.txt'), "w") as f:
f.write("bla bla")
result = ds.add(opj('git-sub', 'somefile.txt'), recursive=True, to_git=False)
eq_(result, [{'file': opj(subds.path, 'somefile.txt'),
'note': "no annex at %s" % subds.path,
'success': False}])


@with_tree(**tree_arg)
def test_relpath_add(path):
Expand Down
42 changes: 33 additions & 9 deletions datalad/interface/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import logging

from os.path import abspath, join as opj, isdir, realpath, relpath
from os.path import join as opj, isdir, realpath, relpath

from datalad.support.constraints import EnsureStr
from datalad.support.constraints import EnsureNone
Expand Down Expand Up @@ -68,6 +68,13 @@ class Save(Interface):
Optionally, an additional tag, such as a version, can be assigned to the
saved state. Such tag enables straightforward retrieval of past versions
at a later point in time.
|| PYTHON >>
Returns
-------
commit or None
`None` if nothing was saved, the resulting commit otherwise.
<< PYTHON ||
"""

_params_ = dict(
Expand Down Expand Up @@ -108,7 +115,26 @@ class Save(Interface):
def __call__(message=None, files=None, dataset=None,
auto_add_changes=False, version_tag=None,
recursive=False, recursion_limit=None):
# XXX path resolution needs to come before dataset resolution!
# import locally to avoid circularity in API
from datalad.distribution.add import Add
# shortcut
ds = require_dataset(dataset, check_installed=True,
purpose='saving')

if not ds.repo.repo.is_dirty(
index=True,
working_tree=True,
untracked_files=True,
submodules=True):
# if we cannot see anything dirty at all, the only things we could
# do is tag
if version_tag:
ds.repo.tag(version_tag)
# take the easy one out
return

# XXX path resolution needs to happen on the input argument, not the
# resolved dataset!
# otherwise we will not be able to figure out, whether there was an
# explicit dataset provided, or just a matching one resolved
# automatically.
Expand All @@ -118,10 +144,6 @@ def __call__(message=None, files=None, dataset=None,
# make sure we apply the usual path interpretation logic
files = [resolve_path(p, dataset) for p in files]

# shortcut
ds = require_dataset(dataset, check_installed=True,
purpose='saving')

# use the dataset's base path to indiciate that everything
# should be saved
if auto_add_changes:
Expand Down Expand Up @@ -180,8 +202,10 @@ def __call__(message=None, files=None, dataset=None,
if ds.get_containing_subdataset(f, recursion_limit=1) == ds]
if len(absf):
# XXX Is there a better way to handle files in mixed repos?
ds.repo.add(absf)
ds.repo.add(absf, git=True)
Add.__call__(dataset=ds, path=absf, recursive=False, save=False,
to_git=False)
Add.__call__(dataset=ds, path=absf, recursive=False, save=False,
to_git=True)

_datalad_msg = False
if not message:
Expand All @@ -207,7 +231,7 @@ def __call__(message=None, files=None, dataset=None,
if version_tag:
ds.repo.tag(version_tag)

return ds.repo.repo.head.commit if _modified_flag else False
return ds.repo.repo.head.commit if _modified_flag else None

@staticmethod
def result_renderer_cmdline(res, args):
Expand Down

0 comments on commit 3f48269

Please sign in to comment.