Skip to content

Commit

Permalink
Merge pull request #6706 from adswa/mnt-6644
Browse files Browse the repository at this point in the history
MNT: Factor out helper function used in core and next from ria functionality
  • Loading branch information
bpoldrack committed Jun 13, 2022
2 parents 029f839 + 56b8e79 commit 0654290
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 54 deletions.
71 changes: 18 additions & 53 deletions datalad/distributed/create_sibling_ria.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
EnsureDataset,
require_dataset,
)
from datalad.distribution.utils import _yield_ds_w_matching_siblings
from datalad.distributed.ora_remote import (
LocalIO,
RIARemoteError,
Expand Down Expand Up @@ -342,62 +343,26 @@ def __call__(url,
# local remotes with existence of the actual remote sibling
# in wording
if existing == 'error':
# in recursive mode this check could take a substantial amount of
# time: employ a progress bar (or rather a counter, because we don't
# know the total in advance
pbar_id = 'check-siblings-{}'.format(id(ds))
log_progress(
lgr.info, pbar_id,
'Start checking pre-existing sibling configuration %s', ds,
label='Query siblings',
unit=' Siblings',
)
# even if we have to fail, let's report all conflicting siblings
# in subdatasets
failed = False
for r in ds.siblings(result_renderer='disabled',
return_type='generator',
recursive=recursive,
recursion_limit=recursion_limit):
log_progress(
lgr.info, pbar_id,
'Discovered sibling %s in dataset at %s',
r['name'], r['path'],
update=1,
increment=True)
if not r['type'] == 'sibling' or r['status'] != 'ok':
# this is an internal status query that has not consequence
# for the outside world. Be silent unless something useful
# can be said
#yield r
continue
if r['name'] == name:
res = get_status_dict(
status='error',
message="a sibling '{}' is already configured in "
"dataset {}".format(name, r['path']),
**res_kwargs,
)
failed = True
yield res
continue
if storage_name and r['name'] == storage_name:
res = get_status_dict(
status='error',
message="a sibling '{}' is already configured in "
"dataset {}".format(storage_name, r['path']),
**res_kwargs,
)
failed = True
yield res
continue
log_progress(
lgr.info, pbar_id,
'Finished checking pre-existing sibling configuration %s', ds,
)
for dpath, sname in _yield_ds_w_matching_siblings(
ds,
(name, storage_name),
recursive=recursive,
recursion_limit=recursion_limit):
res = get_status_dict(
status='error',
message=(
"a sibling %r is already configured in dataset %r",
sname, dpath),
type='sibling',
name=sname,
ds=ds,
**res_kwargs,
)
failed = True
yield res
if failed:
return

# TODO: - URL parsing + store creation needs to be RF'ed based on
# command abstractions
# - more generally consider store creation a dedicated command or
Expand Down
86 changes: 85 additions & 1 deletion datalad/distribution/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
normpath,
)
import posixpath

from datalad.log import log_progress
from datalad.support.annexrepo import AnnexRepo
from datalad.support.network import (
PathRI,
RI,
Expand Down Expand Up @@ -80,3 +81,86 @@ def _get_flexible_source_candidates(src, base_url=None, alternate_suffix=True):
'{0}/.git'.format(src.rstrip('/')))

return candidates



def _yield_ds_w_matching_siblings(
ds, names, recursive=False, recursion_limit=None):
"""(Recursively) inspect a dataset for siblings with particular name(s)
Parameters
----------
ds: Dataset
The dataset to be inspected.
names: iterable
Sibling names (str) to test for.
recursive: bool, optional
Whether to recurse into subdatasets.
recursion_limit: int, optional
Recursion depth limit.
Yields
------
str, str
Path to the dataset with a matching sibling, and name of the matching
sibling in that dataset.
"""

def _discover_all_remotes(ds, refds, **kwargs):
"""Helper to be run on all relevant datasets via foreach
"""
# Note, that `siblings` doesn't tell us about not enabled special
# remotes. There could still be conflicting names we need to know
# about in order to properly deal with the `existing` switch.

repo = ds.repo
# list of known git remotes
if isinstance(repo, AnnexRepo):
remotes = repo.get_remotes(exclude_special_remotes=True)
remotes.extend([v['name']
for k, v in repo.get_special_remotes().items()]
)
else:
remotes = repo.get_remotes()
return remotes

if not recursive:
for name in _discover_all_remotes(ds, ds):
if name in names:
yield ds.path, name
return

# in recursive mode this check could take a substantial amount of
# time: employ a progress bar (or rather a counter, because we don't
# know the total in advance
pbar_id = 'check-siblings-{}'.format(id(ds))
log_progress(
lgr.info, pbar_id,
'Start checking pre-existing sibling configuration %s', ds,
label='Query siblings',
unit=' Siblings',
)

for res in ds.foreach_dataset(
_discover_all_remotes,
recursive=recursive,
recursion_limit=recursion_limit,
return_type='generator',
result_renderer='disabled',
):
# unwind result generator
if 'result' in res:
for name in res['result']:
log_progress(
lgr.info, pbar_id,
'Discovered sibling %s in dataset at %s',
name, res['path'],
update=1,
increment=True)
if name in names:
yield res['path'], name

log_progress(
lgr.info, pbar_id,
'Finished checking pre-existing sibling configuration %s', ds,
)

0 comments on commit 0654290

Please sign in to comment.