Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 27 additions & 11 deletions dvc/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
from contextlib import _GeneratorContextManager as GCM
from contextlib import contextmanager

from dvc.exceptions import DvcException, NotDvcRepoError
from dvc.external_repo import external_repo
from dvc.exceptions import (
DvcException,
FileMissingError,
NotDvcRepoError,
PathMissingError,
)
from dvc.external_repo import ExternalDVCRepo, ExternalGitRepo, external_repo
from dvc.repo import Repo


Expand All @@ -26,10 +31,14 @@ def get_url(path, repo=None, rev=None, remote=None):
directory in the remote storage.
"""
with _make_repo(repo, rev=rev) as _repo:
if not isinstance(_repo, Repo):
raise UrlNotDvcRepoError(_repo.url) # pylint: disable=no-member
out = _repo.find_out_by_relpath(path)
remote_obj = _repo.cloud.get_remote(remote)
# pylint: disable=no-member
path = os.path.join(_repo.root_dir, path)
is_erepo = isinstance(_repo, (ExternalDVCRepo, ExternalGitRepo))
r = _repo.in_repo(path) if is_erepo else _repo
if is_erepo and not r:
raise UrlNotDvcRepoError(_repo.url)
out = r.find_out_by_relpath(path)
remote_obj = r.cloud.get_remote(remote)
return str(remote_obj.tree.hash_to_path_info(out.checksum))


Expand Down Expand Up @@ -74,10 +83,17 @@ def __getattr__(self, name):

def _open(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
with _make_repo(repo, rev=rev) as _repo:
with _repo.open_by_relpath(
path, remote=remote, mode=mode, encoding=encoding
) as fd:
yield fd
is_erepo = not isinstance(_repo, Repo)
try:
with _repo.repo_tree.open_by_relpath(
path, remote=remote, mode=mode, encoding=encoding
) as fd:
yield fd
except FileNotFoundError as exc:
if is_erepo:
# pylint: disable=no-member
raise PathMissingError(path, _repo.url) from exc
raise FileMissingError(path) from exc


def read(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
Expand All @@ -101,5 +117,5 @@ def _make_repo(repo_url=None, rev=None):
return
except NotDvcRepoError:
pass # fallthrough to external_repo
with external_repo(url=repo_url, rev=rev) as repo:
with external_repo(url=repo_url, rev=rev, stream=True) as repo:
yield repo
27 changes: 6 additions & 21 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from voluptuous import Required

from dvc.exceptions import OutputNotFoundError
from dvc.path_info import PathInfo

from .local import LocalDependency
Expand Down Expand Up @@ -42,30 +41,17 @@ def repo_pair(self):
def __str__(self):
return "{} ({})".format(self.def_path, self.def_repo[self.PARAM_URL])

def _make_repo(self, *, locked=True):
def _make_repo(self, *, locked=True, **kwargs):
from dvc.external_repo import external_repo

d = self.def_repo
rev = (d.get("rev_lock") if locked else None) or d.get("rev")
return external_repo(d["url"], rev=rev)
return external_repo(d["url"], rev=rev, **kwargs)

def _get_checksum(self, locked=True):
from dvc.repo.tree import RepoTree

with self._make_repo(locked=locked) as repo:
try:
return repo.find_out_by_relpath(self.def_path).info["md5"]
except OutputNotFoundError:
path = PathInfo(os.path.join(repo.root_dir, self.def_path))

# we want stream but not fetch, so DVC out directories are
# walked, but dir contents is not fetched
tree = RepoTree(repo, stream=True)

# We are polluting our repo cache with some dir listing here
if tree.isdir(path):
return self.repo.cache.local.tree.get_hash(path, tree=tree)
return tree.get_file_hash(path)
with self._make_repo(locked=locked, stream=True) as repo:
path = PathInfo(os.path.join(repo.root_dir, self.def_path))
return repo.get_checksum(path, self.repo.cache.local)

def status(self):
current_checksum = self._get_checksum(locked=True)
Expand All @@ -88,8 +74,7 @@ def download(self, to):
self.def_repo[self.PARAM_REV_LOCK] = repo.get_rev()

cache = self.repo.cache.local
with repo.use_cache(cache):
_, _, cache_infos = repo.fetch_external([self.def_path])
_, _, cache_infos = repo.fetch_external([self.def_path], cache)
cache.checkout(to.path_info, cache_infos[0])

def update(self, rev=None):
Expand Down
Loading