Skip to content

Commit

Permalink
RF: Move clone-internal helper to clone.py
Browse files Browse the repository at this point in the history
Not used elsewhere, not actually useful in a generic context. Also
move the associated tests to improve modularity.
  • Loading branch information
mih committed Oct 14, 2019
1 parent 9d8cf69 commit 8f9ae55
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 158 deletions.
139 changes: 134 additions & 5 deletions datalad/distribution/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import os
import re
from collections import OrderedDict
from urllib.parse import unquote as urlunquote

from datalad.interface.base import Interface
from datalad.interface.utils import eval_results
Expand All @@ -26,17 +27,28 @@
GitRepo,
GitCommandError,
)
from datalad.support.annexrepo import AnnexRepo
from datalad.support.constraints import (
EnsureNone,
EnsureStr,
EnsureKeyChoice,
)
from datalad.support.param import Parameter
from datalad.support.network import get_local_file_url
from datalad.dochelpers import exc_str
from datalad.support.network import (
get_local_file_url,
URL,
RI,
DataLadRI,
)
from datalad.dochelpers import (
exc_str,
single_or_plural,
)
from datalad.utils import (
rmtree,
assure_list,
assure_bool,
knows_annex,
)

from datalad.distribution.dataset import (
Expand All @@ -48,10 +60,7 @@
)
from datalad.distribution.utils import (
_get_git_url_from_source,
_get_tracking_source,
_get_flexible_source_candidates,
_handle_possible_annex_dataset,
_get_installationpath_from_url,
)

__docformat__ = 'restructuredtext'
Expand Down Expand Up @@ -309,3 +318,123 @@ def __call__(
# subdataset clone down below will not alter the Git-state of the
# parent
yield get_status_dict(status='ok', **status_kwargs)


def _handle_possible_annex_dataset(dataset, reckless, description=None):
"""If dataset "knows annex" -- annex init it, set into reckless etc
Provides additional tune up to a possibly an annex repo, e.g.
"enables" reckless mode, sets up description
"""
# in any case check whether we need to annex-init the installed thing:
if not knows_annex(dataset.path):
# not for us
return

# init annex when traces of a remote annex can be detected
if reckless:
lgr.debug(
"Instruct annex to hardlink content in %s from local "
"sources, if possible (reckless)", dataset.path)
dataset.config.add(
'annex.hardlink', 'true', where='local', reload=True)
lgr.debug("Initializing annex repo at %s", dataset.path)
# XXX this is rather convoluted, init does init, but cannot
# set a description without `create=True`
repo = AnnexRepo(dataset.path, init=True)
# so do manually see #1403
if description:
repo._init(description=description)
if reckless:
repo._run_annex_command('untrust', annex_options=['here'])

srs = {True: [], False: []} # special remotes by "autoenable" key
remote_uuids = None # might be necessary to discover known UUIDs

for uuid, config in repo.get_special_remotes().items():
sr_name = config.get('name', None)
sr_autoenable = config.get('autoenable', False)
try:
sr_autoenable = assure_bool(sr_autoenable)
except ValueError:
# Be resilient against misconfiguration. Here it is only about
# informing the user, so no harm would be done
lgr.warning(
'Failed to process "autoenable" value %r for sibling %s in '
'dataset %s as bool. You might need to enable it later '
'manually and/or fix it up to avoid this message in the future.',
sr_autoenable, sr_name, dataset.path)
continue

# determine either there is a registered remote with matching UUID
if uuid:
if remote_uuids is None:
remote_uuids = {
repo.config.get('remote.%s.annex-uuid' % r)
for r in repo.get_remotes()
}
if uuid not in remote_uuids:
srs[sr_autoenable].append(sr_name)

if srs[True]:
lgr.debug(
"configuration for %s %s added because of autoenable,"
" but no UUIDs for them yet known for dataset %s",
# since we are only at debug level, we could call things their
# proper names
single_or_plural("special remote", "special remotes", len(srs[True]), True),
", ".join(srs[True]),
dataset.path
)

if srs[False]:
# if has no auto-enable special remotes
lgr.info(
'access to %s %s not auto-enabled, enable with:\n\t\tdatalad siblings -d "%s" enable -s %s',
# but since humans might read it, we better confuse them with our
# own terms!
single_or_plural("dataset sibling", "dataset siblings", len(srs[False]), True),
", ".join(srs[False]),
dataset.path,
srs[False][0] if len(srs[False]) == 1 else "SIBLING",
)


def _get_tracking_source(ds):
"""Returns name and url of a potential configured source
tracking remote"""
vcs = ds.repo
# if we have a remote, let's check the location of that remote
# for the presence of the desired submodule

remote_name, tracking_branch = vcs.get_tracking_branch()
# TODO: better default `None`? Check where we might rely on '':
remote_url = ''
if remote_name:
remote_url = vcs.get_remote_url(remote_name, push=False)

return remote_name, remote_url


def _get_installationpath_from_url(url):
"""Returns a relative path derived from the trailing end of a URL
This can be used to determine an installation path of a Dataset
from a URL, analog to what `git clone` does.
"""
ri = RI(url)
if isinstance(ri, (URL, DataLadRI)): # decode only if URL
path = ri.path.rstrip('/')
path = urlunquote(path) if path else ri.hostname
else:
path = url
path = path.rstrip('/')
if '/' in path:
path = path.split('/')
if path[-1] == '.git':
path = path[-2]
else:
path = path[-1]
if path.endswith('.git'):
path = path[:-4]
return path
27 changes: 27 additions & 0 deletions datalad/distribution/tests/test_clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from datalad.tests.utils import use_cassette
from datalad.tests.utils import skip_if_no_network
from datalad.tests.utils import skip_if
from ..clone import _get_installationpath_from_url

from ..dataset import Dataset

Expand Down Expand Up @@ -367,3 +368,29 @@ def test_autoenabled_remote_msg(path):
res = clone('///repronim/containers', path)
assert_status('ok', res)
assert_not_in("not auto-enabled", cml.out)


def test_installationpath_from_url():
for p in ('lastbit',
'lastbit/',
'/lastbit',
'lastbit.git',
'lastbit.git/',
'http://example.com/lastbit',
'http://example.com/lastbit.git',
'http://lastbit:8000'
):
eq_(_get_installationpath_from_url(p), 'lastbit')
# we need to deal with quoted urls
for url in (
# although some docs say that space could've been replaced with +
'http://localhost:8000/+last%20bit',
'http://localhost:8000/%2Blast%20bit',
'///%2Blast%20bit',
'///d1/%2Blast%20bit',
'///d1/+last bit',
):
eq_(_get_installationpath_from_url(url), '+last bit')
# and the hostname alone
eq_(_get_installationpath_from_url("http://hostname"), 'hostname')
eq_(_get_installationpath_from_url("http://hostname/"), 'hostname')
27 changes: 0 additions & 27 deletions datalad/distribution/tests/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,40 +75,13 @@
from datalad.utils import rmtree

from ..dataset import Dataset
from ..utils import _get_installationpath_from_url
from ..utils import _get_git_url_from_source

###############
# Test helpers:
###############


def test_installationpath_from_url():
for p in ('lastbit',
'lastbit/',
'/lastbit',
'lastbit.git',
'lastbit.git/',
'http://example.com/lastbit',
'http://example.com/lastbit.git',
'http://lastbit:8000'
):
eq_(_get_installationpath_from_url(p), 'lastbit')
# we need to deal with quoted urls
for url in (
# although some docs say that space could've been replaced with +
'http://localhost:8000/+last%20bit',
'http://localhost:8000/%2Blast%20bit',
'///%2Blast%20bit',
'///d1/%2Blast%20bit',
'///d1/+last bit',
):
eq_(_get_installationpath_from_url(url), '+last bit')
# and the hostname alone
eq_(_get_installationpath_from_url("http://hostname"), 'hostname')
eq_(_get_installationpath_from_url("http://hostname/"), 'hostname')


def test_get_git_url_from_source():

# resolves datalad RIs:
Expand Down
Loading

0 comments on commit 8f9ae55

Please sign in to comment.