Skip to content

Commit

Permalink
Merge pull request #673 from mih/bf-670
Browse files Browse the repository at this point in the history
Enable running `iter_annexworktree()` on just-Git repos
  • Loading branch information
mih committed Apr 26, 2024
2 parents 20176ab + 06ff688 commit 57d73ca
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 1 deletion.
15 changes: 15 additions & 0 deletions changelog.d/20240426_082448_michael.hanke_bf_670.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
### 🐛 Bug Fixes

- `iter_annexworktree()` can now also be used on plain Git repos,
and would behave exactly as if reporting on non-annexed files
in a git-annex repo. Previously, a cryptic `iterable did not yield
matching item for route-in item, cardinality mismatch?` error was
issued in this case.
Fixes https://github.com/datalad/datalad-next/issues/670 via
https://github.com/datalad/datalad-next/pull/673 (by @mih)

### 💫 Enhancements and new features

- A new `has_initialized_annex()` helper function is provided to
test for a locally initialized annex in a repo.
Via https://github.com/datalad/datalad-next/pull/673 (by @mih)
15 changes: 15 additions & 0 deletions datalad_next/iter_collections/annexworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
route_out,
StoreOnly,
)
from datalad_next.repo_utils import has_initialized_annex
from datalad_next.runners import iter_git_subproc

from .gitworktree import (
Expand Down Expand Up @@ -146,6 +147,20 @@ def iter_annexworktree(
recursive=recursive,
)

if not has_initialized_annex(path):
# this is not an annex repo.
# we just yield the items from the gitworktree iterator.
# we funnel them through the standard result item prep
# function for type equality.
# when a recursive-mode other than 'repository' will be
# implemented, this implementation needs to be double-checked
# to avoid decision making on submodules just based on
# the nature of the toplevel repo.
for item in glsf:
yield _get_worktree_item(
path, get_fs_info=link_target, git_item=item)
return

git_fileinfo_store: list[Any] = list()
# this is a technical helper that will just store a bunch of `None`s
# for aligning item-results between git-ls-files and git-annex-find
Expand Down
15 changes: 14 additions & 1 deletion datalad_next/iter_collections/tests/test_iterannexworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
from datalad_next.datasets import Dataset
from datalad_next.utils import check_symlink_capability

from ..gitworktree import GitTreeItemType
from ..gitworktree import (
GitTreeItemType,
iter_gitworktree,
)
from ..annexworktree import iter_annexworktree

from .test_itergitworktree import prep_fp_tester
Expand Down Expand Up @@ -117,3 +120,13 @@ def test_iter_annexworktree_nonrecursive(existing_dataset):
dirs = [i for i in all_items if i.gittype == GitTreeItemType.directory]
assert len(dirs) == 1
dirs[0].name == PurePath('.datalad')


def test_iter_annexworktree_noannex(existing_noannex_dataset):
# plain smoke test to ensure this can run on a dataset without an annex
all_annex_items = list(
iter_annexworktree(existing_noannex_dataset.pathobj))
all_git_items = list(iter_gitworktree(existing_noannex_dataset.pathobj))
assert len(all_annex_items) == len(all_git_items)
for a, g in zip(all_annex_items, all_git_items):
assert a.name == g.name
4 changes: 4 additions & 0 deletions datalad_next/repo_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
:toctree: generated
get_worktree_head
has_initialized_annex
"""

from .annex import (
has_initialized_annex,
)
from .worktree import (
get_worktree_head,
)
34 changes: 34 additions & 0 deletions datalad_next/repo_utils/annex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from pathlib import Path

from datalad_next.runners import call_git_success


def has_initialized_annex(
path: Path,
) -> bool:
"""Return whether there is an initialized annex for ``path``
The given ``path`` can be any directory, inside or outside a Git
repository. ``True`` is returned when the path is found to be
within a (locally) initialized git-annex repository.
When this test returns ``True`` it can be expected that no subsequent
call to an annex command fails with
`git-annex: First run: git-annex init`
for this ``path``.
"""
# this test is about 3ms in MIH's test system.
# datalad-core tests for a git repo and then for .git/annex, this
# achieves both in one step (although the test in datalad-core is
# likely still faster, because it only inspects the filesystem
# for a few key members of a Git repo. In order for that test to
# work, though, it has to traverse the filesystem to find a repo root
# -- if there even is any).
# also ee https://git-annex.branchable.com/forum/Cheapest_test_for_an_initialized_annex__63__/
return call_git_success(
['config', '--local', 'annex.uuid'],
cwd=path,
capture_output=True,
)
18 changes: 18 additions & 0 deletions datalad_next/repo_utils/tests/test_annex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from ..annex import has_initialized_annex


def test_has_initialized_annex(existing_dataset):
# for the root
assert has_initialized_annex(existing_dataset.pathobj)
# for a subdir
assert has_initialized_annex(existing_dataset.pathobj / '.datalad')


def test_no_initialized_annex(existing_noannex_dataset, tmp_path):
# for the root
assert not has_initialized_annex(existing_noannex_dataset.pathobj)
# for a subdir
assert not has_initialized_annex(
existing_noannex_dataset.pathobj / '.datalad')
# for a random directory
assert not has_initialized_annex(tmp_path)

0 comments on commit 57d73ca

Please sign in to comment.