Skip to content

Commit

Permalink
Merge pull request #340 from yarikoptic/enh-batched-annex
Browse files Browse the repository at this point in the history
ENH+BF: annex compatibility, use of --json ouput of addurl, minor cookies tune up etc
  • Loading branch information
yarikoptic committed Jan 28, 2016
2 parents 1ad42d6 + a4691ef commit dc57d31
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 15 deletions.
7 changes: 3 additions & 4 deletions datalad/crawler/nodes/tests/test_annex.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,13 @@ def _test_add_archive_content_tar(direct, repo_path):
annex.add_archive_content(
existing='archive-suffix',
strip_leading_dirs=True,)(output_add[0]))
# http://git-annex.branchable.com/bugs/addurl_--batch_from_url_from_a_custom_special_remote_adds_to_annex_disregarding_largefiles___40__on_first_run__41__/?updated
# TODO: largefiles instruction seems to be ignored on the first run, but works in direct mode
assert_equal(output_addarchive,
[{'datalad_stats': ActivityStats(add_annex=1 + int(not direct), add_git=int(direct), files=3, renamed=2), 'filename': '1.tar'}])
[{'datalad_stats': ActivityStats(add_annex=1, add_git=1, files=3, renamed=2),
'filename': '1.tar'}])
if not direct: # Notimplemented otherwise
assert_true(annex.repo.dirty)
annex.repo.commit("added")
ok_file_under_git(repo_path, 'file.txt', annexed=True)
ok_file_under_git(repo_path, 'file.txt', annexed=False)
ok_file_under_git(repo_path, '1.dat', annexed=True)
assert_false(lexists(opj(repo_path, '1.tar')))
if not direct: # Notimplemented otherwise
Expand Down
2 changes: 1 addition & 1 deletion datalad/dochelpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def exc_str(exc=None, limit=None):
out = str(exc)
if limit is None:
# TODO: config logging.exceptions.traceback_levels = 1
limit = 1
limit = int(os.environ.get('DATALAD_EXC_STR_TBLIMIT', '1'))
try:
exctype, value, tb = sys.exc_info()
if not exc:
Expand Down
4 changes: 2 additions & 2 deletions datalad/interface/add_archive_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,9 @@ def __call__(self, archive, annex=None, strip_leading_dirs=False,
lgr.debug("Adding %s to annex pointing to %s and with options %r",
target_file, url, annex_options)

annex.annex_addurl_to_file(target_file, url, options=annex_options, batch=True)
out_json = annex.annex_addurl_to_file(target_file, url, options=annex_options, batch=True)

if annex.is_under_annex(target_file, batch=True):
if 'key' in out_json: # annex.is_under_annex(target_file, batch=True):
stats.add_annex += 1
else:
lgr.debug("File {} was added to git, not adding url".format(target_file))
Expand Down
20 changes: 18 additions & 2 deletions datalad/support/annexrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@

from functools import wraps

from six import string_types, PY3
from six import string_types
from six.moves import filter
from six.moves.configparser import NoOptionError
from six.moves.urllib.parse import quote as urlquote

Expand Down Expand Up @@ -444,7 +445,15 @@ def get_file_key(self, file_):
# Not sure, whether or not this can actually happen
raise e

return out.rstrip(linesep).splitlines()[0]
entries = out.rstrip(linesep).splitlines()
# filter out the ones which start with (: http://git-annex.branchable.com/bugs/lookupkey_started_to_spit_out___34__debug__34___messages_to_stdout/?updated
entries = list(filter(lambda x: not x.startswith('('), entries))
if len(entries) > 1:
lgr.warning("Got multiple entries in reply asking for a key of a file: %s"
% (str(entries)))
elif not entries:
raise FileNotInAnnexError("Could not get a key for a file %s -- empty output" % file_)
return entries[0]

@normalize_paths
def file_has_content(self, files):
Expand Down Expand Up @@ -619,6 +628,12 @@ def annex_addurl_to_file(self, file_, url, options=None, backend=None,
batch: bool, optional
initiate or continue with a batched run of annex addurl, instead of just
calling a single git annex addurl command
Returns
-------
dict
In batch mode only ATM returns dict representation of json output returned
by annex
"""
options = options[:] if options else []
git_options = []
Expand Down Expand Up @@ -660,6 +675,7 @@ def annex_addurl_to_file(self, file_, url, options=None, backend=None,
cmd="addurl",
msg="Error, annex reported failure for addurl: %s"
% str(out_json))
return out_json


def annex_addurls(self, urls, options=None, backend=None, cwd=None):
Expand Down
4 changes: 2 additions & 2 deletions datalad/support/cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ def _load(self):
cookies_dir = os.path.dirname(filename)
else:
cookies_dir = os.path.join(appdirs.user_config_dir(), 'datalad') # FIXME prolly shouldn't hardcode 'datalad'
filename = os.path.join(cookies_dir, 'cookies.db')
filename = os.path.join(cookies_dir, 'cookies')

# TODO: guarantee restricted permissions

if not os.path.exists(cookies_dir):
os.makedirs(cookies_dir)

db = self._cookies_db = shelve.open(filename, writeback=True)
atexit.register(lambda : db.close())
atexit.register(db.close)

def _get_provider(self, url):
if self._cookies_db is None:
Expand Down
6 changes: 4 additions & 2 deletions datalad/tests/test_annexrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,7 @@ def _test_AnnexRepo_get_contentlocation(batch, path):
with swallow_outputs() as cmo:
annex.annex_get(fname)
key_location = annex.get_contentlocation(key, batch=batch)
assert(key_location)
# they both should point to the same location eventually
eq_(os.path.realpath(opj(annex.path, fname)),
os.path.realpath(opj(annex.path, key_location)))
Expand All @@ -682,9 +683,10 @@ def _test_AnnexRepo_get_contentlocation(batch, path):
eq_(os.path.realpath(opj(annex.path, fname)),
os.path.realpath(opj(annex.path, key_location)))


def test_AnnexRepo_get_contentlocation():
yield _test_AnnexRepo_get_contentlocation, False
yield _test_AnnexRepo_get_contentlocation, True
for batch in (False, True):
yield _test_AnnexRepo_get_contentlocation, batch


@with_tree(tree=(('about.txt', 'Lots of abouts'),
Expand Down
14 changes: 12 additions & 2 deletions datalad/tests/test_dochelpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
"""Tests for dochelpers (largely copied from PyMVPA, the same copyright)
"""

import os
from mock import patch

from ..dochelpers import single_or_plural, borrowdoc, borrowkwargs
from ..dochelpers import exc_str

Expand Down Expand Up @@ -142,9 +145,16 @@ def f2():
try:
f()
except Exception as e:
estr_ = exc_str()
# default one:
estr2 = exc_str(e, 2)
estr1 = exc_str(e)
estr1 = exc_str(e, 1)
# and we can control it via environ by default
with patch.dict('os.environ', {'DATALAD_EXC_STR_TBLIMIT': '3'}):
estr3 = exc_str(e)
with patch.dict('os.environ', {}, clear=True):
estr_ = exc_str()

assert_re_in("my bad again \[test_dochelpers.py:test_exc_str:...,test_dochelpers.py:f:...,test_dochelpers.py:f2:...\]", estr3)
assert_re_in("my bad again \[test_dochelpers.py:f:...,test_dochelpers.py:f2:...\]", estr2)
assert_re_in("my bad again \[test_dochelpers.py:f2:...\]", estr1)
assert_equal(estr_, estr1)

0 comments on commit dc57d31

Please sign in to comment.