Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,11 @@ def __init__(self, etag, cached_etag):
)


class OutputFileMissingError(DvcException):
def __init__(self, path):
super(OutputFileMissingError, self).__init__(
"Can't find {} neither locally nor on remote".format(path)
class FileMissingError(DvcException):
def __init__(self, path, cause=None):
super(FileMissingError, self).__init__(
"Can't find '{}' neither locally nor on remote".format(path),
cause=cause,
)


Expand Down
27 changes: 21 additions & 6 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dvc.exceptions import (
NotDvcRepoError,
OutputNotFoundError,
OutputFileMissingError,
FileMissingError,
)
from dvc.ignore import DvcIgnoreFilter
from dvc.path_info import PathInfo
Expand Down Expand Up @@ -454,14 +454,29 @@ def is_dvc_internal(self, path):
@contextmanager
def open(self, path, remote=None, mode="r", encoding=None):
"""Opens a specified resource as a file descriptor"""
cause = None
try:
with self._open(path, remote, mode, encoding) as fd:
out, = self.find_outs_by_path(path)
except OutputNotFoundError as e:
out = None
cause = e

if out and out.use_cache:
try:
with self._open_cached(out, remote, mode, encoding) as fd:
yield fd
return
except FileNotFoundError as e:
raise FileMissingError(relpath(path, self.root_dir), cause=e)

if self.tree.exists(path):
with self.tree.open(path, mode, encoding) as fd:
yield fd
except FileNotFoundError:
raise OutputFileMissingError(relpath(path, self.root_dir))
return

def _open(self, path, remote=None, mode="r", encoding=None):
out, = self.find_outs_by_path(path)
raise FileMissingError(relpath(path, self.root_dir), cause=cause)

def _open_cached(self, out, remote=None, mode="r", encoding=None):
if out.isdir():
raise ValueError("Can't open a dir")

Expand Down
7 changes: 4 additions & 3 deletions dvc/scm/git/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def __init__(self, git, rev):
def tree_root(self):
return self.git.working_dir

def open(self, path, binary=False):
def open(self, path, mode="r", encoding="utf-8"):
assert mode in {"r", "rb"}

relative_path = relpath(path, self.git.working_dir)

Expand All @@ -61,9 +62,9 @@ def open(self, path, binary=False):
# the `open()` behavior (since data_stream.read() returns bytes,
# and `open` with default "r" mode returns str)
data = obj.data_stream.read()
if binary:
if mode == "rb":
return BytesIO(data)
return StringIO(data.decode("utf-8"))
return StringIO(data.decode(encoding))

def exists(self, path):
return self.git_object_by_path(path) is not None
Expand Down
8 changes: 3 additions & 5 deletions dvc/scm/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class BaseTree(object):
def tree_root(self):
pass

def open(self, path, binary=False):
def open(self, path, mode="r", encoding="utf-8"):
"""Open file and return a stream."""

def exists(self, path):
Expand Down Expand Up @@ -42,11 +42,9 @@ def __init__(self, repo_root=os.getcwd()):
def tree_root(self):
return self.repo_root

def open(self, path, binary=False):
def open(self, path, mode="r", encoding="utf-8"):
"""Open file and return a stream."""
if binary:
return open(path, "rb")
return open(path, encoding="utf-8")
return open(path, mode=mode, encoding=encoding)

def exists(self, path):
"""Test whether a path exists."""
Expand Down
33 changes: 31 additions & 2 deletions tests/func/test_api.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os

import pytest
import shutil

from dvc import api
from dvc.exceptions import OutputFileMissingError
from dvc.exceptions import FileMissingError
from dvc.main import main
from dvc.path_info import URLInfo
from dvc.remote.config import RemoteConfig
Expand Down Expand Up @@ -134,7 +136,7 @@ def test_missing(repo_dir, dvc_repo, remote_url):
# Remove cache to make foo missing
shutil.rmtree(dvc_repo.cache.local.cache_dir)

with pytest.raises(OutputFileMissingError):
with pytest.raises(FileMissingError):
api.read(repo_dir.FOO)


Expand All @@ -143,3 +145,30 @@ def _set_remote_url_and_commit(repo, remote_url):
rconfig.modify("upstream", "url", remote_url)
repo.scm.add([repo.config.config_file])
repo.scm.commit("modify remote")


def test_open_scm_controlled(dvc_repo, repo_dir):
stage, = dvc_repo.add(repo_dir.FOO)

stage_content = open(stage.path, "r").read()
with api.open(stage.path) as fd:
assert fd.read() == stage_content


def test_open_not_cached(dvc_repo):
metric_file = "metric.txt"
metric_content = "0.6"
metric_code = "open('{}', 'w').write('{}')".format(
metric_file, metric_content
)
dvc_repo.run(
metrics_no_cache=[metric_file],
cmd=('python -c "{}"'.format(metric_code)),
)

with api.open(metric_file) as fd:
assert fd.read() == metric_content

os.remove(metric_file)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but I don't think there is another way to test it. We want to test whether dvc will inform us that some output cannot be checked out. If the file is in a repo, it seems natural that it will be used, instead of checking whether it is in the cache, don't you think?

with pytest.raises(FileMissingError):
api.read(metric_file)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two tests are almost the same. I would say simply move:

    os.remove(metric_file)
    with pytest.raises(NotCachedOutputFileMissingError):
        api.read(metric_file)

to the end of the first one and remove the second test.

Also, why do you create new code file, while we already have code.py?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New code is required due to issue regarding original code described in #2574