Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/maint'
Browse files Browse the repository at this point in the history
* origin/maint: (24 commits)
  BF(TEMP): use git-annex from neurodebian -devel to gain fix for bug detected with datalad-crawler
  BF(TST): use introduced _p instead of _path_ an denable test_path_prefix testing on windows
  BF: f-prefix the f-string
  BF: use .as_posix to convert to posix, name posix_paths for clarity
  BF+RF(TST): mark path_prefix back to skip, use custom _p helper for paths
  BF(TST): apparently description is getting set to target_url
  BF: convert submodule relative path to PurePosixPath before giving to get_parent_paths
  ENH+BF: get_parent_paths - make / into sep option and consistently use / as path separator
  Safety-net to prevent data-loss by drop from symlink'ed annex
  BF(TST): prevent auto-upgrade of "remote" test sibling, do not use local path for URL
  [DATALAD RUNCMD] DOC: allow codespell to fix all typos it finds
  RF: replace 2 letter abbrevs with some meaningful words
  Remove no longer needed import
  BF(TST): make tests use _path_ helper for Windows "friendliness" of the tests
  Avoid unicode in test that does not strictly require this complication
  TST: Add a testcase for #6950
  BF: Remove duplicate ds key from result record
  NF: tools/eval_under_nfs to help running tests under NFS partition
  Acknowledge git-config comment chars
  DOC: Datalad -> DataLad
  ...
  • Loading branch information
yarikoptic committed Aug 20, 2022
2 parents 25bda4b + 496e5e3 commit 80aa7b5
Show file tree
Hide file tree
Showing 28 changed files with 274 additions and 86 deletions.
1 change: 1 addition & 0 deletions .codespell-ignorewords
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
commitish
ba
froms
ro
1 change: 1 addition & 0 deletions .github/workflows/test_extensions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
shell: bash
run: |
bash <(wget -q -O- http://neuro.debian.net/_files/neurodebian-travis.sh)
sudo sed -i-devel.list -e 's,/debian ,/debian-devel ,g' /etc/apt/sources.list.d/neurodebian.sources.list
sudo apt-get update -qq
sudo apt-get install eatmydata
sudo eatmydata apt-get install git-annex-standalone
Expand Down
4 changes: 3 additions & 1 deletion datalad/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1114,12 +1114,14 @@ def quote_config(v):
To-be-quoted string
"""
white = (' ', '\t')
comment = ('#', ';')
# backslashes need to be quoted in any case
v = v.replace('\\', '\\\\')
# must not have additional unquoted quotes
v = v.replace('"', '\\"')
if v[0] in white or v[-1] in white:
if v[0] in white or v[-1] in white or any(c in v for c in comment):
# quoting the value due to leading/trailing whitespace
# or occurrence of comment char
v = '"{}"'.format(v)
return v

Expand Down
2 changes: 1 addition & 1 deletion datalad/core/local/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class Diff(Interface):
path=Parameter(
args=("path",),
metavar="PATH",
doc="""path to contrain the report to""",
doc="""path to constrain the report to""",
nargs="*",
constraints=EnsureStr() | EnsureNone()),
fr=Parameter(
Expand Down
2 changes: 1 addition & 1 deletion datalad/core/local/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ def _create_record(run_info, sidecar_flag, ds):
Returns
-------
str or None, str or None
The first value is either the full run record in JSON serialzied form,
The first value is either the full run record in JSON serialized form,
or content-based ID hash, if the record was written to a file. In that
latter case, the second value is the path to the record sidecar file,
or None otherwise.
Expand Down
2 changes: 1 addition & 1 deletion datalad/core/local/tests/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_repo_diff(path=None, norepo=None):
'state': 'untracked',
'type': 'directory'}})

# again a unmatching path constrainted will give an empty report
# again a unmatching path constrained will give an empty report
eq_(ds.repo.diff(fr='HEAD', to=None, paths=['other']), {})
# perfect match and anything underneath will do
eq_(ds.repo.diff(fr='HEAD', to=None, paths=['deep']),
Expand Down
2 changes: 1 addition & 1 deletion datalad/core/local/tests/test_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def test_bf1886(path=None):
op.join(parent.path, 'subdir', 'subsubdir', 'upup'))
parent.save(op.join('subdir', 'subsubdir', 'upup'))
assert_repo_status(parent.path)
# simulatenously add a subds and a symlink pointing to it
# simultaneously add a subds and a symlink pointing to it
# create subds, but don't register it
create(op.join(parent.path, 'sub2'))
os.symlink(
Expand Down
5 changes: 5 additions & 0 deletions datalad/distributed/drop.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,9 @@ def _kill_dataset(ds):
def _drop_allkeys(ds, repo, force=False, jobs=None):
"""
"""
assert not (repo.dot_git / 'annex').is_symlink(), \
"Dropping from a symlinked annex is unsupported to prevent data-loss"

cmd = ['drop', '--all']
if force:
cmd.append('--force')
Expand Down Expand Up @@ -757,6 +760,8 @@ def _drop_files(ds, repo, paths, force=False, jobs=None):
------
dict
"""
assert not (repo.dot_git / 'annex').is_symlink(), \
"Dropping from a symlinked annex is unsupported to prevent data-loss"
cmd = ['drop']
if force:
cmd.append('--force')
Expand Down
47 changes: 47 additions & 0 deletions datalad/distributed/tests/test_drop.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import os
import os.path as op
from pathlib import Path
from unittest.mock import patch

from datalad.api import (
Expand Down Expand Up @@ -45,6 +46,9 @@
from datalad.utils import chpwd


ckwa = dict(result_renderer='disabled')


@with_tempfile
@with_tempfile
def test_drop_file_content(path=None, outside_path=None):
Expand Down Expand Up @@ -567,3 +571,46 @@ def test_drop_allkeys_result_contains_annex_error_messages(path=None):
ds.drop(what='allkeys', on_failure='ignore'),
error_message='git-annex error message here',
)


# https://github.com/datalad/datalad/issues/6948
@with_tempfile
@with_tempfile
def test_nodrop_symlinked_annex(origpath=None, clonepath=None):
# create a dataset with a key
ds = Dataset(origpath).create(**ckwa)
testfile = ds.pathobj / 'file1'
testcontent = 'precious'
testfile.write_text(testcontent)
ds.save(**ckwa)
rec = ds.status(testfile, annex='availability',
return_type='item-or-list', **ckwa)
eq_(testcontent, Path(rec['objloc']).read_text())

def _droptest(_ds):
# drop refuses to drop from a symlinked annex
if (_ds.repo.dot_git / 'annex').is_symlink():
assert_raises(AssertionError, _ds.drop, **ckwa)
for what in ('all', 'allkeys', 'filecontent', 'datasets'):
assert_raises(AssertionError, _ds.drop, what=what, **ckwa)
# but a reckless kill works without crashing
_ds.drop(what='all', recursive=True, reckless='kill', **ckwa)
nok_(_ds.is_installed())
# nothing has made the original file content vanish
_rec = ds.status(
testfile, annex='availability',
return_type='item-or-list', **ckwa)
eq_(testcontent, Path(_rec['objloc']).read_text())

# test on a clone that does not know it has key access
dsclone1 = clone(ds.path, clonepath, reckless='ephemeral', **ckwa)
_droptest(dsclone1)

# test again on a clone that does think it has a key copy
dsclone2 = clone(ds.path, clonepath, reckless='ephemeral', **ckwa)
if not dsclone2.repo.is_managed_branch():
# it really should not matter, but 'origin' is set to annex.ignore=true
# on crippledFS
# https://github.com/datalad/datalad/issues/6960
dsclone2.get('.')
_droptest(dsclone2)
44 changes: 30 additions & 14 deletions datalad/distribution/tests/test_create_sibling.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,14 +194,15 @@ def test_target_ssh_simple(origin=None, src_path=None, target_rootpath=None):
ui=have_webui())
assert_not_in('enableremote local_target failed', cml.out)

GitRepo(target_path, create=False) # raises if not a git repo
target_gitrepo = GitRepo(target_path, create=False) # raises if not a git repo
assert_in("local_target", source.repo.get_remotes())
# Both must be annex or git repositories
src_is_annex = AnnexRepo.is_valid_repo(src_path)
eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
# And target one should be known to have a known UUID within the source if annex
if src_is_annex:
lclcfg = AnnexRepo(src_path).config
target_aversion = target_gitrepo.config['annex.version']
# basic config in place
eq_(lclcfg.get('remote.local_target.annex-ignore'), 'false')
ok_(lclcfg.get('remote.local_target.annex-uuid'))
Expand All @@ -217,14 +218,26 @@ def test_target_ssh_simple(origin=None, src_path=None, target_rootpath=None):
ok_(str(cm.value).startswith(
"Target path %s already exists." % target_path))
if src_is_annex:
target_description = AnnexRepo(target_path, create=False).get_description()
assert_not_equal(target_description, None)
assert_not_equal(target_description, target_path)
# on yoh's laptop TMPDIR is under HOME, so things start to become
# tricky since then target_path is shortened and we would need to know
# remote $HOME. To not over-complicate and still test, test only for
# the basename of the target_path
ok_endswith(target_description, basename(target_path))
# Before we "talk" to it with git-annex directly -- we must prevent auto-upgrades
# since the git-annex on "remote server" (e.g. docker container) could be outdated
# and not support new git-annex repo version
target_gitrepo.config.set('annex.autoupgraderepository', "false", scope='local')
try:
target_description = AnnexRepo(target_path, create=False).get_description()
except CommandError as e:
if 'is at unsupported version' not in str(e.stderr):
raise
# we just would skip this part of the test and avoid future similar query
target_description = None
else:
assert target_gitrepo.config['annex.version'] == target_aversion
assert_not_equal(target_description, None)
assert_not_equal(target_description, target_path)
# on yoh's laptop TMPDIR is under HOME, so things start to become
# tricky since then target_path is shortened and we would need to know
# remote $HOME. To not over-complicate and still test, test only for
# the basename of the target_path
ok_endswith(target_description, basename(target_path))
# now, with force and correct url, which is also used to determine
# target_dir
# Note: on windows absolute path is not url conform. But this way it's easy
Expand Down Expand Up @@ -261,13 +274,16 @@ def interactive_assert_create_sshwebserver():
eq_(lclcfg.get('remote.local_target.push'), DEFAULT_BRANCH)

# again, by explicitly passing urls. Since we are on datalad-test, the
# local path should work:
# could use local path, but then it would not use "remote" git-annex
# and thus potentially lead to incongruent result. So make URLs a bit
# different by adding trailing /. to regular target_url
target_url = "ssh://datalad-test" + target_path + "/."
cpkwargs = dict(
dataset=source,
name="local_target",
sshurl="ssh://datalad-test",
target_dir=target_path,
target_url=target_path,
target_url=target_url,
target_pushurl="ssh://datalad-test" + target_path,
ui=have_webui(),
)
Expand All @@ -277,12 +293,12 @@ def interactive_assert_create_sshwebserver():
assert_create_sshwebserver(existing='replace', **cpkwargs)
interactive_assert_create_sshwebserver()

if src_is_annex:
if src_is_annex and target_description:
target_description = AnnexRepo(target_path,
create=False).get_description()
eq_(target_description, target_path)
eq_(target_description, target_url)

eq_(target_path,
eq_(target_url,
source.repo.get_remote_url("local_target"))
eq_("ssh://datalad-test" + target_path,
source.repo.get_remote_url("local_target", push=True))
Expand Down
2 changes: 1 addition & 1 deletion datalad/dochelpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def borrowkwargs(cls=None, methodname=None, exclude=None):
In the simplest scenario -- just grab all arguments from parent class::
@borrowkwargs(A)
def met1(self, bu, **kwargs):
def met1(self, desc, **kwargs):
pass
Parameters
Expand Down
2 changes: 1 addition & 1 deletion datalad/local/add_archive_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ def __call__(

if delete_after:
# we count the removal here, but don't yet perform it
# to not interfer with batched processes - any pure Git
# to not interfere with batched processes - any pure Git
# action invokes precommit which closes batched processes.
stats.removed += 1

Expand Down
2 changes: 1 addition & 1 deletion datalad/local/addurls.py
Original file line number Diff line number Diff line change
Expand Up @@ -1528,7 +1528,7 @@ def addurls_to_ds(args):
# We need to group by dataset since otherwise we will initiate
# batched annex process per each subdataset, which might be infeasible
# in any use-case with a considerable number of subdatasets.
# Also groupping allows us for parallelization across datasets, and avoids
# Also grouping allows us for parallelization across datasets, and avoids
# proliferation of commit messages upon creation of each individual subdataset.

def keyfn(d):
Expand Down
7 changes: 7 additions & 0 deletions datalad/local/tests/test_wtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
)
from datalad.utils import ensure_unicode

from datalad.support.external_versions import external_versions


@with_tree({OBSCURE_FILENAME: {}})
def test_wtf(topdir=None):
Expand Down Expand Up @@ -77,6 +79,11 @@ def test_wtf(topdir=None):
wtf(dataset=ds.path, sensitive='all')
assert_not_in(_HIDDEN, cmo.out) # all is shown
assert_in('user.name: ', cmo.out)
if external_versions['psutil']:
# filesystems detail should be reported
assert_in('max_pathlength:', cmo.out)
else:
assert_in("Hint: install psutil", cmo.out)

# Sections selection
#
Expand Down
7 changes: 6 additions & 1 deletion datalad/local/wtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def _describe_system():
}


def _get_fs_type(loc, path):
def _get_fs_type(loc, path, _was_warned=[]):
"""Return file system info for given Paths. Provide pathlib path as input"""
res = {'path': path}
try:
Expand All @@ -169,6 +169,11 @@ def _get_fs_type(loc, path):
ce = CapturedException(exc)
# if an exception occurs, leave out the fs type. The result renderer can
# display a hint based on its lack in the report
if not _was_warned:
(lgr.debug if isinstance(exc, ImportError) else lgr.warning) (
"Could not determine filesystem types due to %s", ce)
# Rely on side-effect of [] as default arg
_was_warned.append("warned")
return res


Expand Down
2 changes: 1 addition & 1 deletion datalad/metadata/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,7 +1272,7 @@ class Search(Interface):
documenttype configuration has changed."""),
max_nresults=Parameter(
args=("--max-nresults",),
doc="""maxmimum number of search results to report. Setting this
doc="""maximum number of search results to report. Setting this
to 0 will report all search matches. Depending on the mode this
can search substantially slower. If not specified, a
mode-specific default setting will be used.""",
Expand Down
6 changes: 3 additions & 3 deletions datalad/metadata/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_search_outside1_noninteractive_ui(tdir=None):
# we should raise an informative exception
with chpwd(tdir):
with assert_raises(NoDatasetFound) as cme:
list(search("bu"))
list(search("irrelevant"))
assert_in('run interactively', str(cme.value))


Expand All @@ -81,13 +81,13 @@ def test_search_outside1(tdir=None, newhome=None):
# should fail since directory exists, but not a dataset
# should not even waste our response ;)
with patch_config({'datalad.locations.default-dataset': newhome}):
gen = search("bu", return_type='generator')
gen = search("irrelevant", return_type='generator')
assert_is_generator(gen)
assert_raises(NoDatasetFound, next, gen)

# and if we point to some non-existing dataset
with assert_raises(ValueError):
next(search("bu", dataset=newhome))
next(search("irrelevant", dataset=newhome))


@with_testsui(responses='yes')
Expand Down
2 changes: 1 addition & 1 deletion datalad/support/due_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def get_field(struct, field):
desc = get_field(metadata, desc_field) if desc_field else None
desc = desc or "DataLad dataset %s" % dataset.id

# DueCredit's path defines groupping of entries, so with
# DueCredit's path defines grouping of entries, so with
# "datalad." we bring them all under datalad's roof!
# And as for unique suffix, there is no better one but the ID,
# but that one is too long so let's take the first part of UUID
Expand Down
2 changes: 1 addition & 1 deletion datalad/support/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def format_standard(self):
str
"""
# TODO: Intended for introducing a decent debug mode later when this
# can be used fromm within log formatter / result renderer.
# can be used from within log formatter / result renderer.
# For now: a one-liner is free
return ''.join(self.tb.format())

Expand Down
14 changes: 8 additions & 6 deletions datalad/support/gitrepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2720,21 +2720,23 @@ def get_content_info(self, paths=None, ref=None, untracked='all'):
# any incoming path has to be relative already, so we can simply
# convert unconditionally
# note: will be list-ified below
paths = map(ut.PurePosixPath, paths)
posix_paths = [ut.PurePath(p).as_posix() for p in paths]
elif paths is not None:
return info
else:
posix_paths = None

path_strs = list(map(str, paths)) if paths else None
if path_strs and (not ref or external_versions["cmd:git"] >= "2.29.0"):
if posix_paths and (not ref or external_versions["cmd:git"] >= "2.29.0"):
# If a path points within a submodule, we need to map it to the
# containing submodule before feeding it to ls-files or ls-tree.
#
# Before Git 2.29.0, ls-tree and ls-files differed in how they
# reported paths within submodules: ls-files provided no output,
# and ls-tree listed the submodule. Now they both return no output.
submodules = [str(s["path"].relative_to(self.pathobj))
submodules = [s["path"].relative_to(self.pathobj).as_posix()
for s in self.get_submodules_()]
path_strs = get_parent_paths(path_strs, submodules)
# `paths` get normalized into PurePosixPath above, submodules are POSIX as well
posix_paths = get_parent_paths(posix_paths, submodules)

# this will not work in direct mode, but everything else should be
# just fine
Expand Down Expand Up @@ -2768,7 +2770,7 @@ def get_content_info(self, paths=None, ref=None, untracked='all'):
try:
stdout = self.call_git(
cmd,
files=path_strs,
files=posix_paths,
expect_fail=True,
read_only=True)
except CommandError as exc:
Expand Down

0 comments on commit 80aa7b5

Please sign in to comment.