diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 8566c080b9..058c9e3938 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -47,7 +47,9 @@ def _make_repo(self, *, locked=True): d = self.def_repo rev = (d.get("rev_lock") if locked else None) or d.get("rev") - return external_repo(d["url"], rev=rev) + return external_repo( + d["url"], path=os.path.dirname(self.def_path), rev=rev + ) def _get_checksum(self, locked=True): from dvc.repo.tree import RepoTree @@ -56,7 +58,7 @@ def _get_checksum(self, locked=True): try: return repo.find_out_by_relpath(self.def_path).info["md5"] except OutputNotFoundError: - path = PathInfo(os.path.join(repo.root_dir, self.def_path)) + path = PathInfo(os.path.join(repo.scm.root_dir, self.def_path)) # we want stream but not fetch, so DVC out directories are # walked, but dir contents is not fetched diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 5f22321473..93bf8fe89a 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -27,15 +27,22 @@ @contextmanager -def external_repo(url, rev=None, for_write=False): +def external_repo(url, rev=None, path=None, for_write=False): logger.debug("Creating external repo %s@%s", url, rev) - path = _cached_clone(url, rev, for_write=for_write) + cloned_path = _cached_clone(url, rev, for_write=for_write) + # DVCRepo can find the path recursively + path = ( + os.path.join(cloned_path, path) + if path and not os.path.isabs(path) + else cloned_path + ) + if not rev: rev = "HEAD" try: - repo = ExternalRepo(path, url, rev, for_write=for_write) + repo = ExternalRepo(cloned_path, path, url, rev, for_write=for_write) except NotDvcRepoError: - repo = ExternalGitRepo(path, url, rev) + repo = ExternalGitRepo(cloned_path, url, rev) try: yield repo @@ -116,7 +123,7 @@ def fetch_external(self, paths: Iterable, **kwargs): download_results = [] failed = 0 - paths = [PathInfo(self.root_dir) / path for path in paths] + paths = [PathInfo(self.scm.root_dir) / path for path in paths] def download_update(result): download_results.append(result) @@ -145,19 +152,19 @@ def get_external(self, path, dest): else: # git-only erepo with no cache, just copy files directly # to dest - path = PathInfo(self.root_dir) / path + path = PathInfo(self.scm.root_dir) / path if not self.repo_tree.exists(path): raise PathMissingError(path, self.url) self.repo_tree.copytree(path, dest) class ExternalRepo(Repo, BaseExternalRepo): - def __init__(self, root_dir, url, rev, for_write=False): + def __init__(self, root_dir, path, url, rev, for_write=False): if for_write: super().__init__(root_dir) else: root_dir = os.path.realpath(root_dir) - super().__init__(root_dir, scm=Git(root_dir), rev=rev) + super().__init__(path, scm=Git(root_dir), rev=rev) self.url = url self._set_cache_dir() self._fix_upstream() diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 685f179eeb..fadb71e21f 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -153,18 +153,20 @@ def __repr__(self): def find_root(cls, root=None, tree=None): root_dir = os.path.realpath(root or os.curdir) - if tree: - if tree.isdir(os.path.join(root_dir, cls.DVC_DIR)): - return root_dir - raise NotDvcRepoError(f"'{root}' does not contain DVC directory") - - if not os.path.isdir(root_dir): - raise NotDvcRepoError(f"directory '{root}' does not exist") + is_dir = tree.isdir if tree else os.path.isdir while True: dvc_dir = os.path.join(root_dir, cls.DVC_DIR) - if os.path.isdir(dvc_dir): + if is_dir(dvc_dir): return root_dir + if ( + tree + and os.path.dirname(os.path.abspath(tree.tree_root)) + == root_dir + ): + raise NotDvcRepoError( + f"'{root}' does not contain DVC directory" + ) if os.path.ismount(root_dir): break root_dir = os.path.dirname(root_dir) diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 1141b00772..7bc885825c 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -36,7 +36,9 @@ def get(url, path, out=None, rev=None): dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: - with external_repo(url=url, rev=rev) as repo: + with external_repo( + url=url, rev=rev, path=os.path.dirname(path) + ) as repo: if hasattr(repo, "cache"): repo.cache.local.cache_dir = tmp_dir diff --git a/dvc/repo/ls.py b/dvc/repo/ls.py index e02d52116a..ab5006f7ab 100644 --- a/dvc/repo/ls.py +++ b/dvc/repo/ls.py @@ -1,3 +1,5 @@ +import os + from dvc.exceptions import PathMissingError from dvc.path_info import PathInfo @@ -29,8 +31,10 @@ def ls( """ from dvc.external_repo import external_repo - with external_repo(url, rev) as repo: - path_info = PathInfo(repo.root_dir) + d_path = os.path.dirname(path) if path else None + + with external_repo(url, rev, path=d_path if d_path else None) as repo: + path_info = PathInfo(repo.scm.root_dir) if path: path_info /= path