diff --git a/dvc/exceptions.py b/dvc/exceptions.py index a352ada150..bca505c1a9 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -259,10 +259,11 @@ def __init__(self, etag, cached_etag): ) -class OutputFileMissingError(DvcException): - def __init__(self, path): - super(OutputFileMissingError, self).__init__( - "Can't find {} neither locally nor on remote".format(path) +class FileMissingError(DvcException): + def __init__(self, path, cause=None): + super(FileMissingError, self).__init__( + "Can't find '{}' neither locally nor on remote".format(path), + cause=cause, ) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index fed4935317..060f32cff9 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -12,7 +12,7 @@ from dvc.exceptions import ( NotDvcRepoError, OutputNotFoundError, - OutputFileMissingError, + FileMissingError, ) from dvc.ignore import DvcIgnoreFilter from dvc.path_info import PathInfo @@ -454,14 +454,29 @@ def is_dvc_internal(self, path): @contextmanager def open(self, path, remote=None, mode="r", encoding=None): """Opens a specified resource as a file descriptor""" + cause = None try: - with self._open(path, remote, mode, encoding) as fd: + out, = self.find_outs_by_path(path) + except OutputNotFoundError as e: + out = None + cause = e + + if out and out.use_cache: + try: + with self._open_cached(out, remote, mode, encoding) as fd: + yield fd + return + except FileNotFoundError as e: + raise FileMissingError(relpath(path, self.root_dir), cause=e) + + if self.tree.exists(path): + with self.tree.open(path, mode, encoding) as fd: yield fd - except FileNotFoundError: - raise OutputFileMissingError(relpath(path, self.root_dir)) + return - def _open(self, path, remote=None, mode="r", encoding=None): - out, = self.find_outs_by_path(path) + raise FileMissingError(relpath(path, self.root_dir), cause=cause) + + def _open_cached(self, out, remote=None, mode="r", encoding=None): if out.isdir(): raise ValueError("Can't open a dir") diff --git a/dvc/scm/git/tree.py b/dvc/scm/git/tree.py index 1493f40303..23653b2511 100644 --- a/dvc/scm/git/tree.py +++ b/dvc/scm/git/tree.py @@ -45,7 +45,8 @@ def __init__(self, git, rev): def tree_root(self): return self.git.working_dir - def open(self, path, binary=False): + def open(self, path, mode="r", encoding="utf-8"): + assert mode in {"r", "rb"} relative_path = relpath(path, self.git.working_dir) @@ -61,9 +62,9 @@ def open(self, path, binary=False): # the `open()` behavior (since data_stream.read() returns bytes, # and `open` with default "r" mode returns str) data = obj.data_stream.read() - if binary: + if mode == "rb": return BytesIO(data) - return StringIO(data.decode("utf-8")) + return StringIO(data.decode(encoding)) def exists(self, path): return self.git_object_by_path(path) is not None diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index a90306f769..0931a59d0a 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -11,7 +11,7 @@ class BaseTree(object): def tree_root(self): pass - def open(self, path, binary=False): + def open(self, path, mode="r", encoding="utf-8"): """Open file and return a stream.""" def exists(self, path): @@ -42,11 +42,9 @@ def __init__(self, repo_root=os.getcwd()): def tree_root(self): return self.repo_root - def open(self, path, binary=False): + def open(self, path, mode="r", encoding="utf-8"): """Open file and return a stream.""" - if binary: - return open(path, "rb") - return open(path, encoding="utf-8") + return open(path, mode=mode, encoding=encoding) def exists(self, path): """Test whether a path exists.""" diff --git a/tests/func/test_api.py b/tests/func/test_api.py index 2bba711b1b..70e36c3ecb 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -1,8 +1,10 @@ +import os + import pytest import shutil from dvc import api -from dvc.exceptions import OutputFileMissingError +from dvc.exceptions import FileMissingError from dvc.main import main from dvc.path_info import URLInfo from dvc.remote.config import RemoteConfig @@ -134,7 +136,7 @@ def test_missing(repo_dir, dvc_repo, remote_url): # Remove cache to make foo missing shutil.rmtree(dvc_repo.cache.local.cache_dir) - with pytest.raises(OutputFileMissingError): + with pytest.raises(FileMissingError): api.read(repo_dir.FOO) @@ -143,3 +145,30 @@ def _set_remote_url_and_commit(repo, remote_url): rconfig.modify("upstream", "url", remote_url) repo.scm.add([repo.config.config_file]) repo.scm.commit("modify remote") + + +def test_open_scm_controlled(dvc_repo, repo_dir): + stage, = dvc_repo.add(repo_dir.FOO) + + stage_content = open(stage.path, "r").read() + with api.open(stage.path) as fd: + assert fd.read() == stage_content + + +def test_open_not_cached(dvc_repo): + metric_file = "metric.txt" + metric_content = "0.6" + metric_code = "open('{}', 'w').write('{}')".format( + metric_file, metric_content + ) + dvc_repo.run( + metrics_no_cache=[metric_file], + cmd=('python -c "{}"'.format(metric_code)), + ) + + with api.open(metric_file) as fd: + assert fd.read() == metric_content + + os.remove(metric_file) + with pytest.raises(FileMissingError): + api.read(metric_file)