Skip to content

Commit

Permalink
ignore: CleanTree should always check ignores, not just for walk() (#…
Browse files Browse the repository at this point in the history
…3876)

* tests: test that CleanTree works outside of walk()

* ignore: CleanTree should always check ignores, not just for walk()

* CleanTree: handle broken symlinks

* handle case for linking from shared local cache dirs

* Fix windows relpath/different drive issue
  • Loading branch information
pmrowla committed May 27, 2020
1 parent 2fecc66 commit 10f2bb9
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 6 deletions.
76 changes: 70 additions & 6 deletions dvc/ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from pathspec import PathSpec
from pathspec.patterns import GitWildMatchPattern

from dvc.path_info import PathInfo
from dvc.scm.tree import BaseTree
from dvc.utils import relpath

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -125,19 +127,79 @@ def tree_root(self):
return self.tree.tree_root

def open(self, path, mode="r", encoding="utf-8"):
return self.tree.open(path, mode, encoding)
if self.isfile(path):
return self.tree.open(path, mode, encoding)
raise FileNotFoundError

def exists(self, path):
return self.tree.exists(path)
if self.tree.exists(path) and self._parents_exist(path):
if self.tree.isdir(path):
return self._valid_dirname(path)
return self._valid_filename(path)
return False

def isdir(self, path):
return self.tree.isdir(path)
return (
self.tree.isdir(path)
and self._parents_exist(path)
and self._valid_dirname(path)
)

def _valid_dirname(self, path):
dirname, basename = os.path.split(os.path.normpath(path))
dirs, _ = self.dvcignore(os.path.abspath(dirname), [basename], [])
if dirs:
return True
return False

def isfile(self, path):
return self.tree.isfile(path)
return (
self.tree.isfile(path)
and self._parents_exist(path)
and self._valid_filename(path)
)

def _valid_filename(self, path):
dirname, basename = os.path.split(os.path.normpath(path))
_, files = self.dvcignore(os.path.abspath(dirname), [], [basename])
if files:
return True
return False

def isexec(self, path):
return self.tree.isexec(path)
return self.exists(path) and self.tree.isexec(path)

def _parents_exist(self, path):
from dvc.repo import Repo

path = PathInfo(path)

# if parent is tree_root or inside a .dvc dir we can skip this check
if path.parent == self.tree_root or Repo.DVC_DIR in path.parts:
return True

# if path is outside of tree, assume this is a local remote/local cache
# link/move operation where we do not need to filter ignores
path = relpath(path, self.tree_root)
if path.startswith("..") or (
os.name == "nt"
and not os.path.commonprefix(
[os.path.abspath(path), self.tree_root]
)
):
return True

# check if parent directories are in our ignores, starting from
# tree_root
for parent_dir in reversed(PathInfo(path).parents):
dirname, basename = os.path.split(parent_dir)
if basename == ".":
# parent_dir == tree_root
continue
dirs, _ = self.dvcignore(os.path.abspath(dirname), [basename], [])
if not dirs:
return False
return True

def walk(self, top, topdown=True):
for root, dirs, files in self.tree.walk(top, topdown):
Expand All @@ -148,4 +210,6 @@ def walk(self, top, topdown=True):
yield root, dirs, files

def stat(self, path):
return self.tree.stat(path)
if self.exists(path):
return self.tree.stat(path)
raise FileNotFoundError
21 changes: 21 additions & 0 deletions tests/func/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from dvc.ignore import CleanTree
from dvc.path_info import PathInfo
from dvc.repo import Repo
from dvc.repo.tree import RepoTree
from dvc.scm import SCM
from dvc.scm.git import GitTree
Expand Down Expand Up @@ -220,3 +221,23 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, setup_remote):
cache.save(PathInfo(erepo_dir / "dir"), None, tree=tree)
for checksum in expected:
assert os.path.exists(cache.checksum_to_path_info(checksum))


def test_cleantree_subrepo(tmp_dir, dvc, scm, monkeypatch):
tmp_dir.gen({"subdir": {}})
subrepo_dir = tmp_dir / "subdir"
with subrepo_dir.chdir():
subrepo = Repo.init(subdir=True)
subrepo_dir.gen({"foo": "foo", "dir": {"bar": "bar"}})

assert isinstance(dvc.tree, CleanTree)
assert not dvc.tree.exists(subrepo_dir / "foo")
assert not dvc.tree.isfile(subrepo_dir / "foo")
assert not dvc.tree.exists(subrepo_dir / "dir")
assert not dvc.tree.isdir(subrepo_dir / "dir")

assert isinstance(subrepo.tree, CleanTree)
assert subrepo.tree.exists(subrepo_dir / "foo")
assert subrepo.tree.isfile(subrepo_dir / "foo")
assert subrepo.tree.exists(subrepo_dir / "dir")
assert subrepo.tree.isdir(subrepo_dir / "dir")

0 comments on commit 10f2bb9

Please sign in to comment.