Skip to content

Commit

Permalink
Merge branch 'master' into opt-validate
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Nov 20, 2019
2 parents 3bffe9d + f563892 commit a1433ba
Show file tree
Hide file tree
Showing 21 changed files with 184 additions and 74 deletions.
15 changes: 13 additions & 2 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@ class OutputNotFoundError(DvcException):
output (unicode): path to the file/directory.
"""

def __init__(self, output):
def __init__(self, output, repo=None):
self.output = output
self.repo = repo
super(OutputNotFoundError, self).__init__(
"unable to find DVC-file with output '{path}'".format(
path=relpath(output)
path=relpath(self.output)
)
)

Expand Down Expand Up @@ -338,3 +340,12 @@ def __init__(self, url, cause=None):
),
cause=cause,
)


class NoOutputInExternalRepoError(DvcException):
def __init__(self, path, external_repo_path, external_repo_url):
super(NoOutputInExternalRepoError, self).__init__(
"Output '{}' not found in target repository '{}'".format(
relpath(path, external_repo_path), external_repo_url
)
)
8 changes: 7 additions & 1 deletion dvc/external_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

from dvc.config import NoRemoteError
from dvc.exceptions import RemoteNotSpecifiedInExternalRepoError
from dvc.utils import remove
from dvc.exceptions import NoOutputInExternalRepoError
from dvc.exceptions import OutputNotFoundError
from dvc.utils.fs import remove


REPO_CACHE = {}
Expand All @@ -25,6 +27,10 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None):
yield repo
except NoRemoteError as exc:
raise RemoteNotSpecifiedInExternalRepoError(url, cause=exc)
except OutputNotFoundError as exc:
if exc.repo is repo:
raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url)
raise
repo.close()


Expand Down
2 changes: 1 addition & 1 deletion dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@
from dvc.utils import file_md5
from dvc.utils import makedirs
from dvc.utils import relpath
from dvc.utils import remove
from dvc.utils import tmp_fname
from dvc.utils import walk_files
from dvc.utils.compat import fspath_py35
from dvc.utils.compat import open
from dvc.utils.compat import str
from dvc.utils.fs import move
from dvc.utils.fs import remove

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def func(out):

matched = list(filter(func, outs))
if not matched:
raise OutputNotFoundError(path)
raise OutputNotFoundError(path, self)

return matched

Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/destroy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dvc.utils import remove
from dvc.utils.fs import remove


def destroy(self):
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from dvc.path_info import PathInfo
from dvc.stage import Stage
from dvc.state import StateNoop
from dvc.utils import remove
from dvc.utils import resolve_output
from dvc.utils.fs import remove

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dvc.scm import SCM
from dvc.utils import boxify
from dvc.utils import relpath
from dvc.utils import remove
from dvc.utils.fs import remove

logger = logging.getLogger(__name__)

Expand Down
40 changes: 27 additions & 13 deletions dvc/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
from dvc.utils.collections import apply_diff
from dvc.utils.fs import contains_symlink_up_to
from dvc.utils.stage import dump_stage_file
from dvc.utils.stage import load_stage_fd
from dvc.utils.stage import parse_stage
from dvc.utils.stage import parse_stage_for_update


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -166,8 +167,8 @@ def __init__(
md5=None,
locked=False,
tag=None,
state=None,
always_changed=False,
stage_text=None,
):
if deps is None:
deps = []
Expand All @@ -184,7 +185,7 @@ def __init__(
self.locked = locked
self.tag = tag
self.always_changed = always_changed
self._state = state or {}
self._stage_text = stage_text

def __repr__(self):
return "Stage: '{path}'".format(
Expand Down Expand Up @@ -355,7 +356,7 @@ def update(self):

@staticmethod
def validate(d, fname=None):
from dvc.utils import convert_to_unicode
from dvc.utils.compat import convert_to_unicode

try:
Stage.COMPILED_SCHEMA(convert_to_unicode(d))
Expand Down Expand Up @@ -609,10 +610,8 @@ def load(repo, fname):
Stage._check_isfile(repo, fname)

with repo.tree.open(fname) as fd:
d = load_stage_fd(fd, fname)
# Making a deepcopy since the original structure
# looses keys in deps and outs load
state = copy.deepcopy(d)
stage_text = fd.read()
d = parse_stage(stage_text, fname)

Stage.validate(d, fname=relpath(fname))
path = os.path.abspath(fname)
Expand All @@ -630,7 +629,8 @@ def load(repo, fname):
locked=d.get(Stage.PARAM_LOCKED, False),
tag=tag,
always_changed=d.get(Stage.PARAM_ALWAYS_CHANGED, False),
state=state,
# We store stage text to apply updates to the same structure
stage_text=stage_text,
)

stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
Expand All @@ -653,7 +653,6 @@ def dumpd(self):
Stage.PARAM_LOCKED: self.locked,
Stage.PARAM_DEPS: [d.dumpd() for d in self.deps],
Stage.PARAM_OUTS: [o.dumpd() for o in self.outs],
Stage.PARAM_META: self._state.get("meta"),
Stage.PARAM_ALWAYS_CHANGED: self.always_changed,
}.items()
if value
Expand All @@ -667,9 +666,24 @@ def dump(self):
logger.debug(
"Saving information to '{file}'.".format(file=relpath(fname))
)
d = self.dumpd()
apply_diff(d, self._state)
dump_stage_file(fname, self._state)
state = self.dumpd()

# When we load a stage we parse yaml with a fast parser, which strips
# off all the comments and formatting. To retain those on update we do
# a trick here:
# - reparse the same yaml text with a slow but smart ruamel yaml parser
# - apply changes to a returned structure
# - serialize it
if self._stage_text is not None:
saved_state = parse_stage_for_update(self._stage_text, fname)
# Stage doesn't work with meta in any way, so .dumpd() doesn't
# have it. We simply copy it over.
if "meta" in saved_state:
state["meta"] = saved_state["meta"]
apply_diff(state, saved_state)
state = saved_state

dump_stage_file(fname, state)

self.repo.scm.track_file(relpath(fname))

Expand Down
2 changes: 1 addition & 1 deletion dvc/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
from dvc.exceptions import DvcException
from dvc.utils import current_timestamp
from dvc.utils import relpath
from dvc.utils import remove
from dvc.utils import to_chunks
from dvc.utils.compat import fspath_py35
from dvc.utils.compat import is_py2
from dvc.utils.compat import urlencode
from dvc.utils.compat import urlunparse
from dvc.utils.fs import get_inode
from dvc.utils.fs import get_mtime_and_size
from dvc.utils.fs import remove


SQLITE_MAX_VARIABLES_NUMBER = 999
Expand Down
47 changes: 5 additions & 42 deletions dvc/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
"""Helpers for other modules."""
from __future__ import unicode_literals

import errno
import hashlib
import json
import logging
import math
import os
import re
import shutil
import stat
import sys
import time

Expand Down Expand Up @@ -152,40 +149,16 @@ def makedirs(path, exist_ok=False, mode=None):
_makedirs(path, exist_ok=exist_ok)
return

umask = os.umask(0)
# utilize umask to set proper permissions since Python 3.7 the `mode`
# `makedirs` argument no longer affects the file permission bits of
# newly-created intermediate-level directories.
umask = os.umask(0o777 - mode)
try:
_makedirs(path, exist_ok=exist_ok, mode=mode)
_makedirs(path, exist_ok=exist_ok)
finally:
os.umask(umask)


def _chmod(func, p, excinfo):
perm = os.lstat(p).st_mode
perm |= stat.S_IWRITE

try:
os.chmod(p, perm)
except OSError as exc:
# broken symlink or file is not owned by us
if exc.errno not in [errno.ENOENT, errno.EPERM]:
raise

func(p)


def remove(path):
logger.debug("Removing '{}'".format(relpath(path)))

try:
if os.path.isdir(path):
shutil.rmtree(path, onerror=_chmod)
else:
_chmod(os.unlink, path, None)
except OSError as exc:
if exc.errno != errno.ENOENT:
raise


def _split(list_to_split, chunk_size):
return [
list_to_split[i : i + chunk_size]
Expand Down Expand Up @@ -292,16 +265,6 @@ def fix_env(env=None):
return env


def convert_to_unicode(data):
if isinstance(data, builtin_str):
return str(data)
if isinstance(data, dict):
return dict(map(convert_to_unicode, data.items()))
if isinstance(data, (list, tuple)):
return type(data)(map(convert_to_unicode, data))
return data


def tmp_fname(fname):
""" Temporary name for a partial download """
return fspath(fname) + "." + str(uuid()) + ".tmp"
Expand Down
14 changes: 12 additions & 2 deletions dvc/utils/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ def _makedirs(name, mode=0o777, exist_ok=False):
if e.errno != errno.EEXIST:
raise
cdir = os.curdir
if isinstance(tail, bytes):
cdir = bytes(os.curdir, "ASCII")
if tail == cdir:
return
try:
Expand Down Expand Up @@ -143,6 +141,15 @@ def __enter__(self):
def __exit__(self, *args):
self.close()

def convert_to_unicode(data):
if isinstance(data, builtin_str):
return str(data)
if isinstance(data, dict):
return dict(map(convert_to_unicode, data.items()))
if isinstance(data, (list, tuple)):
return type(data)(map(convert_to_unicode, data))
return data


elif is_py3:
import pathlib # noqa: F401
Expand Down Expand Up @@ -170,6 +177,9 @@ def __exit__(self, *args):
range = range # noqa: F821
FileNotFoundError = FileNotFoundError

def convert_to_unicode(data):
return data


# Backport os.fspath() from Python 3.6
try:
Expand Down
31 changes: 31 additions & 0 deletions dvc/utils/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import os
import shutil
import stat

import nanotime
from shortuuid import uuid
Expand All @@ -13,6 +14,7 @@
from dvc.utils import dict_md5
from dvc.utils import fspath
from dvc.utils import fspath_py35
from dvc.utils import relpath
from dvc.utils import walk_files
from dvc.utils.compat import str

Expand Down Expand Up @@ -103,3 +105,32 @@ def move(src, dst, mode=None):
os.chmod(tmp, mode)

shutil.move(tmp, dst)


def _chmod(func, p, excinfo):
perm = os.lstat(p).st_mode
perm |= stat.S_IWRITE

try:
os.chmod(p, perm)
except OSError as exc:
# broken symlink or file is not owned by us
if exc.errno not in [errno.ENOENT, errno.EPERM]:
raise

func(p)


def remove(path):
path = fspath_py35(path)

logger.debug("Removing '{}'".format(relpath(path)))

try:
if os.path.isdir(path):
shutil.rmtree(path, onerror=_chmod)
else:
_chmod(os.unlink, path, None)
except OSError as exc:
if exc.errno != errno.ENOENT:
raise
Loading

0 comments on commit a1433ba

Please sign in to comment.