Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NF: Factory function to get Repo instances from a path #4273

Merged
merged 1 commit into from Mar 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
73 changes: 73 additions & 0 deletions datalad/core/local/repo.py
@@ -0,0 +1,73 @@
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
""" Core repository-related functionality

"""

from datalad.support.exceptions import (
InvalidGitRepositoryError,
InvalidAnnexRepositoryError,
NoSuchPathError,
)

import logging
lgr = logging.getLogger('datalad.core.local.repo')

__all__ = ["repo_from_path"]


def repo_from_path(path):
"""Get a Repo instance from a path.

Parameters
----------
path : path-like
Root path of the repository.

Returns
-------
Repo
Repo instance matching the type of the repository at path.

Raises
------
ValueError
If no repository could be found at the path, or if its type could not
be determined.
"""
# keep the imports local for now until it is clearer what the module setup
# will be
from datalad.support.gitrepo import GitRepo
from datalad.support.annexrepo import AnnexRepo

repo = None
for cls, ckw, kw in (
# Non-initialized is okay. We want to figure the correct instance
# to represent what's there - that's it.
(AnnexRepo, {'allow_noninitialized': True}, {'init': False}),
(GitRepo, {}, {})
):
if not cls.is_valid_repo(path, **ckw):
continue

try:
lgr.log(5, "Detected %s at %s", cls, path)
repo = cls(path, create=False, **kw)
break
except (InvalidGitRepositoryError, NoSuchPathError,
InvalidAnnexRepositoryError) as exc:
lgr.log(
5,
"Ignore exception after inappropriate repository type guess: "
"%s", exc)

if repo is None:
raise ValueError('No repository at {}'.format(path))

return repo
40 changes: 9 additions & 31 deletions datalad/distribution/dataset.py
Expand Up @@ -22,23 +22,19 @@

from datalad import cfg
from datalad.config import ConfigManager
from datalad.dochelpers import exc_str
from datalad.core.local.repo import repo_from_path
from datalad.support.annexrepo import AnnexRepo
from datalad.support.constraints import Constraint
# DueCredit
from datalad.support.due import due
from datalad.support.due_utils import duecredit_dataset
from datalad.support.exceptions import (
InvalidAnnexRepositoryError,
InvalidGitRepositoryError,
NoDatasetArgumentFound,
NoSuchPathError,
)
from datalad.support.gitrepo import (
GitRepo,
)
from datalad.support.repo import PathBasedFlyweight
from datalad.support.network import RI
from datalad.support import path as op

import datalad.utils as ut
Expand Down Expand Up @@ -284,34 +280,16 @@ def repo(self):
# be the last reference, which would lead to those objects being
# destroyed and therefore the constructor call would result in an
# actually new instance. This is unnecessarily costly.
valid = False
for cls, ckw, kw in (
# Non-initialized is okay. We want to figure the correct instance to represent what's there - that's it.
(AnnexRepo, {'allow_noninitialized': True}, {'init': False}),
(GitRepo, {}, {})
):
if cls.is_valid_repo(self._path, **ckw):
try:
lgr.log(5, "Detected %s at %s", cls, self._path)
self._repo = cls(self._path, create=False, **kw)
valid = True
break
except (InvalidGitRepositoryError, NoSuchPathError,
InvalidAnnexRepositoryError) as exc:
lgr.log(5,
"Oops -- guess on repo type was wrong?: %s",
exc_str(exc))

if not valid:
try:
self._repo = repo_from_path(self._path)
except ValueError:
lgr.log(5, "Failed to detect a valid repo at %s", self.path)
self._repo = None
return

if self._repo is None:
# Often .repo is requested to 'sense' if anything is installed
# under, and if so -- to proceed forward. Thus log here only
# at DEBUG level and if necessary "complaint upstairs"
lgr.log(5, "Failed to detect a valid repo at %s", self.path)
elif due.active:
# TODO: Figure out, when exactly this is needed. Don't think it makes sense to do this for every dataset,
if due.active:
# TODO: Figure out, when exactly this is needed. Don't think it
# makes sense to do this for every dataset,
# no matter what => we want .repo to be as cheap as it gets.
# Makes sense only on installed dataset - @never_fail'ed
duecredit_dataset(self)
Expand Down