diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 6b38af4ead..da96fd0f9f 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -270,3 +270,11 @@ def __init__(self, path): super(OutputFileMissingError, self).__init__( "Can't find {} neither locally nor on remote".format(path) ) + + +class DvcIgnoreInCollectedDirError(DvcException): + def __init__(self, ignore_dirname): + super(DvcIgnoreInCollectedDirError, self).__init__( + ".dvcignore file should not be in collected dir path: " + "'{}'".format(ignore_dirname) + ) diff --git a/dvc/ignore.py b/dvc/ignore.py index 5c91d302ba..b24c7824fe 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -1,23 +1,15 @@ from __future__ import unicode_literals +import logging import os from pathspec import PathSpec from pathspec.patterns import GitWildMatchPattern from dvc.utils import relpath -from dvc.utils.fs import get_parent_dirs_up_to +from dvc.utils.compat import open -class DvcIgnoreFileHandler(object): - def __init__(self, tree): - self.tree = tree - - def read_patterns(self, path): - with self.tree.open(path) as fobj: - return PathSpec.from_lines(GitWildMatchPattern, fobj) - - def get_repo_root(self): - return self.tree.tree_root +logger = logging.getLogger(__name__) class DvcIgnore(object): @@ -27,12 +19,15 @@ def __call__(self, root, dirs, files): raise NotImplementedError -class DvcIgnoreFromFile(DvcIgnore): - def __init__(self, ignore_file_path, ignore_handler): +class DvcIgnorePatterns(DvcIgnore): + def __init__(self, ignore_file_path): + assert os.path.isabs(ignore_file_path) + self.ignore_file_path = ignore_file_path self.dirname = os.path.normpath(os.path.dirname(ignore_file_path)) - self.ignore_spec = ignore_handler.read_patterns(ignore_file_path) + with open(ignore_file_path, encoding="utf-8") as fobj: + self.ignore_spec = PathSpec.from_lines(GitWildMatchPattern, fobj) def __call__(self, root, dirs, files): files = [f for f in files if not self.matches(root, f)] @@ -42,67 +37,43 @@ def __call__(self, root, dirs, files): def matches(self, dirname, basename): abs_path = os.path.join(dirname, basename) - relative_path = relpath(abs_path, self.dirname) - if os.name == "nt": - relative_path = relative_path.replace("\\", "/") + rel_path = relpath(abs_path, self.dirname) - return self.ignore_spec.match_file(relative_path) + if os.pardir + os.sep in rel_path: + return False + return self.ignore_spec.match_file(rel_path) def __hash__(self): return hash(self.ignore_file_path) + def __eq__(self, other): + return self.ignore_file_path == other.ignore_file_path -class DvcIgnoreConstant(DvcIgnore): - def __init__(self, basename): - self.basename = basename +class DvcIgnoreDirs(DvcIgnore): + def __init__(self, basenames): + self.basenames = set(basenames) -class DvcIgnoreDir(DvcIgnoreConstant): def __call__(self, root, dirs, files): - dirs = [d for d in dirs if not d == self.basename] - - return dirs, files - - -class DvcIgnoreFile(DvcIgnoreConstant): - def __call__(self, root, dirs, files): - files = [f for f in files if not f == self.basename] + dirs = [d for d in dirs if d not in self.basenames] return dirs, files class DvcIgnoreFilter(object): - def __init__(self, wdir, ignore_file_handler=None): - self.ignores = [ - DvcIgnoreDir(".git"), - DvcIgnoreDir(".hg"), - DvcIgnoreDir(".dvc"), - DvcIgnoreFile(".dvcignore"), - ] - - self.ignore_file_handler = ignore_file_handler - self._process_ignores_in_parent_dirs(wdir) - - def _process_ignores_in_parent_dirs(self, wdir): - if self.ignore_file_handler: - wdir = os.path.normpath(os.path.abspath(wdir)) - ignore_search_end_dir = self.ignore_file_handler.get_repo_root() - parent_dirs = get_parent_dirs_up_to(wdir, ignore_search_end_dir) - for d in parent_dirs: - self.update(d) - - def update(self, wdir): - ignore_file_path = os.path.join(wdir, DvcIgnore.DVCIGNORE_FILE) + def __init__(self, root_dir): + self.ignores = {DvcIgnoreDirs([".git", ".hg", ".dvc"])} + self._update(root_dir) + for root, dirs, _ in os.walk(root_dir): + for d in dirs: + self._update(os.path.join(root, d)) + + def _update(self, dirname): + ignore_file_path = os.path.join(dirname, DvcIgnore.DVCIGNORE_FILE) if os.path.exists(ignore_file_path): - file_ignore = DvcIgnoreFromFile( - ignore_file_path, ignore_handler=self.ignore_file_handler - ) - self.ignores.append(file_ignore) + self.ignores.add(DvcIgnorePatterns(ignore_file_path)) def __call__(self, root, dirs, files): - if self.ignore_file_handler: - self.update(root) - for ignore in self.ignores: dirs, files = ignore(root, dirs, files) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index e56171d0ec..c4a88467d8 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +from dvc.ignore import DvcIgnore from dvc.utils.compat import str, basestring, urlparse, fspath_py35, makedirs import os @@ -14,7 +15,11 @@ import dvc.prompt as prompt from dvc.config import Config -from dvc.exceptions import DvcException, ConfirmRemoveError +from dvc.exceptions import ( + DvcException, + ConfirmRemoveError, + DvcIgnoreInCollectedDirError, +) from dvc.progress import progress, ProgressCallback from dvc.utils import LARGE_DIR_SIZE, tmp_fname, to_chunks, move, relpath from dvc.state import StateBase @@ -149,6 +154,10 @@ def _collect_dir(self, path_info): root_info = path_info / root for fname in files: + + if fname == DvcIgnore.DVCIGNORE_FILE: + raise DvcIgnoreInCollectedDirError(root) + file_info = root_info / fname relative_path = file_info.relative_to(path_info) checksum = executor.submit( diff --git a/dvc/remote/local/__init__.py b/dvc/remote/local/__init__.py index 1f82690157..fa72369d5b 100644 --- a/dvc/remote/local/__init__.py +++ b/dvc/remote/local/__init__.py @@ -29,6 +29,7 @@ file_md5, walk_files, relpath, + dvc_walk, ) from dvc.config import Config from dvc.exceptions import DvcException @@ -222,7 +223,7 @@ def isdir(self, path_info): return os.path.isdir(fspath_py35(path_info)) def walk(self, path_info): - return os.walk(fspath_py35(path_info)) + return dvc_walk(path_info, self.repo.dvcignore) def get_file_checksum(self, path_info): return file_md5(fspath_py35(path_info))[0] @@ -477,13 +478,11 @@ def _unprotect_file(path): os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE) - @staticmethod - def _unprotect_dir(path): - for path in walk_files(path): + def _unprotect_dir(self, path): + for path in walk_files(path, self.repo.dvcignore): RemoteLOCAL._unprotect_file(path) - @staticmethod - def unprotect(path_info): + def unprotect(self, path_info): path = path_info.fspath if not os.path.exists(path): raise DvcException( @@ -491,7 +490,7 @@ def unprotect(path_info): ) if os.path.isdir(path): - RemoteLOCAL._unprotect_dir(path) + self._unprotect_dir(path) else: RemoteLOCAL._unprotect_file(path) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index fc90f0ed71..4ec38c0535 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -5,6 +5,8 @@ from itertools import chain +from funcy import cached_property + from dvc.config import Config from dvc.exceptions import ( NotDvcRepoError, @@ -12,7 +14,7 @@ TargetNotDirectoryError, OutputFileMissingError, ) -from dvc.ignore import DvcIgnoreFileHandler +from dvc.ignore import DvcIgnoreFilter from dvc.path_info import PathInfo from dvc.utils.compat import open as _open, fspath_py35 from dvc.utils import relpath @@ -356,6 +358,17 @@ def pipelines(self, from_directory=None): G.subgraph(c).copy() for c in nx.weakly_connected_components(G) ] + @staticmethod + def _filter_out_dirs(dirs, outs, root_dir): + def filter_dirs(dname): + path = os.path.join(root_dir, dname) + for out in outs: + if path == os.path.normpath(out): + return False + return True + + return list(filter(filter_dirs, dirs)) + def stages(self, from_directory=None, check_dag=True): """ Walks down the root directory looking for Dvcfiles, @@ -376,9 +389,8 @@ def stages(self, from_directory=None, check_dag=True): stages = [] outs = [] - ignore_file_handler = DvcIgnoreFileHandler(self.tree) for root, dirs, files in self.tree.walk( - from_directory, ignore_file_handler=ignore_file_handler + from_directory, dvcignore=self.dvcignore ): for fname in files: path = os.path.join(root, fname) @@ -390,16 +402,7 @@ def stages(self, from_directory=None, check_dag=True): outs.append(out.fspath + out.sep) stages.append(stage) - def filter_dirs(dname, root=root): - path = os.path.join(root, dname) - if path in (self.dvc_dir, self.scm.dir): - return False - for out in outs: - if path == os.path.normpath(out) or path.startswith(out): - return False - return True - - dirs[:] = list(filter(filter_dirs, dirs)) + dirs[:] = self._filter_out_dirs(dirs, outs, root) if check_dag: self.check_dag(stages) @@ -467,3 +470,7 @@ def open(self, path, remote=None, mode="r", encoding=None): raise OutputFileMissingError(relpath(path, self.root_dir)) return _open(cache_file, mode=mode, encoding=encoding) + + @cached_property + def dvcignore(self): + return DvcIgnoreFilter(self.root_dir) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 0b02a19aba..0092fcc58b 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -44,7 +44,7 @@ def _find_all_targets(repo, target, recursive): if os.path.isdir(target) and recursive: return [ fname - for fname in walk_files(target) + for fname in walk_files(target, repo.dvcignore) if not repo.is_dvc_internal(fname) if not Stage.is_stage_file(fname) if not repo.scm.belongs_to_scm(fname) diff --git a/dvc/scm/git/tree.py b/dvc/scm/git/tree.py index 89c593c54d..5716984d2e 100644 --- a/dvc/scm/git/tree.py +++ b/dvc/scm/git/tree.py @@ -1,7 +1,6 @@ import errno import os -from dvc.ignore import DvcIgnoreFilter from dvc.utils import relpath from dvc.utils.compat import StringIO, BytesIO from dvc.exceptions import DvcException @@ -103,13 +102,7 @@ def git_object_by_path(self, path): tree = tree[i] return tree - def _walk( - self, - tree, - topdown=True, - ignore_file_handler=None, - dvc_ignore_filter=None, - ): + def _walk(self, tree, topdown=True): dirs, nondirs = [], [] for i in tree: if i.mode == GIT_MODE_DIR: @@ -118,26 +111,16 @@ def _walk( nondirs.append(i.name) if topdown: - if not dvc_ignore_filter: - dvc_ignore_filter = DvcIgnoreFilter( - tree.abspath, ignore_file_handler=ignore_file_handler - ) - dirs, nondirs = dvc_ignore_filter(tree.path, dirs, nondirs) yield os.path.normpath(tree.abspath), dirs, nondirs for i in dirs: - for x in self._walk( - tree[i], - topdown=True, - ignore_file_handler=ignore_file_handler, - dvc_ignore_filter=dvc_ignore_filter, - ): + for x in self._walk(tree[i], topdown=True): yield x if not topdown: yield os.path.normpath(tree.abspath), dirs, nondirs - def walk(self, top, topdown=True, ignore_file_handler=None): + def walk(self, top, topdown=True, dvcignore=None): """Directory tree generator. See `os.walk` for the docs. Differences: diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index 8a555902f9..a90306f769 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -23,7 +23,7 @@ def isdir(self, path): def isfile(self, path): """Test whether a path is a regular file""" - def walk(self, top, topdown=True, ignore_file_handler=None): + def walk(self, top, topdown=True, dvcignore=None): """Directory tree generator. See `os.walk` for the docs. Differences: @@ -60,7 +60,7 @@ def isfile(self, path): """Test whether a path is a regular file""" return os.path.isfile(path) - def walk(self, top, topdown=True, ignore_file_handler=None): + def walk(self, top, topdown=True, dvcignore=None): """Directory tree generator. See `os.walk` for the docs. Differences: @@ -68,13 +68,12 @@ def walk(self, top, topdown=True, ignore_file_handler=None): - it could raise exceptions, there is no onerror argument """ + assert dvcignore + def onerror(e): raise e for root, dirs, files in dvc_walk( - os.path.abspath(top), - topdown=topdown, - onerror=onerror, - ignore_file_handler=ignore_file_handler, + os.path.abspath(top), dvcignore, topdown=topdown, onerror=onerror ): yield os.path.normpath(root), dirs, files diff --git a/dvc/state.py b/dvc/state.py index 3f03d084e2..211ed26832 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -355,7 +355,9 @@ def save(self, path_info, checksum): path = fspath_py35(path_info) assert os.path.exists(path) - actual_mtime, actual_size = get_mtime_and_size(path) + actual_mtime, actual_size = get_mtime_and_size( + path, self.repo.dvcignore + ) actual_inode = get_inode(path) existing_record = self.get_state_record_for_inode(actual_inode) @@ -386,7 +388,9 @@ def get(self, path_info): if not os.path.exists(path): return None - actual_mtime, actual_size = get_mtime_and_size(path) + actual_mtime, actual_size = get_mtime_and_size( + path, self.repo.dvcignore + ) actual_inode = get_inode(path) existing_record = self.get_state_record_for_inode(actual_inode) @@ -413,7 +417,7 @@ def save_link(self, path_info): if not os.path.exists(path): return - mtime, _ = get_mtime_and_size(path) + mtime, _ = get_mtime_and_size(path, self.repo.dvcignore) inode = get_inode(path) relative_path = relpath(path, self.root_dir) @@ -443,7 +447,7 @@ def remove_unused_links(self, used): continue actual_inode = get_inode(path) - actual_mtime, _ = get_mtime_and_size(path) + actual_mtime, _ = get_mtime_and_size(path, self.repo.dvcignore) if inode == actual_inode and mtime == actual_mtime: logger.debug("Removing '{}' as unused link.".format(path)) diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index a372d90d4e..ed04b8ace7 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -2,7 +2,14 @@ from __future__ import unicode_literals -from dvc.utils.compat import str, builtin_str, open, cast_bytes_py2, StringIO +from dvc.utils.compat import ( + str, + builtin_str, + open, + cast_bytes_py2, + StringIO, + fspath_py35, +) from dvc.utils.compat import fspath import os @@ -272,39 +279,25 @@ def to_yaml_string(data): return stream.getvalue() -def dvc_walk( - top, - topdown=True, - onerror=None, - followlinks=False, - ignore_file_handler=None, -): +def dvc_walk(top, dvcignore, topdown=True, onerror=None, followlinks=False): """ Proxy for `os.walk` directory tree generator. Utilizes DvcIgnoreFilter functionality. """ - ignore_filter = None - if topdown: - from dvc.ignore import DvcIgnoreFilter - - ignore_filter = DvcIgnoreFilter( - top, ignore_file_handler=ignore_file_handler - ) + top = fspath_py35(top) for root, dirs, files in os.walk( top, topdown=topdown, onerror=onerror, followlinks=followlinks ): - if ignore_filter: - dirs[:], files[:] = ignore_filter(root, dirs, files) + if dvcignore: + dirs[:], files[:] = dvcignore(root, dirs, files) yield root, dirs, files -def walk_files(directory, ignore_file_handler=None): - for root, _, files in dvc_walk( - directory, ignore_file_handler=ignore_file_handler - ): +def walk_files(directory, dvcignore): + for root, _, files in dvc_walk(directory, dvcignore): for f in files: yield os.path.join(root, f) diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 8538aee1b6..811b4195a9 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -7,7 +7,7 @@ from dvc.exceptions import DvcException from dvc.system import System -from dvc.utils import dvc_walk +from dvc.utils import dict_md5, walk_files from dvc.utils.compat import str @@ -20,30 +20,33 @@ def get_inode(path): return inode -def get_mtime_and_size(path): - stat = os.stat(path) - size = stat.st_size - mtime = stat.st_mtime - +def get_mtime_and_size(path, dvcignore): if os.path.isdir(path): - for root, dirs, files in dvc_walk(path): - for name in dirs + files: - entry = os.path.join(root, name) - try: - stat = os.stat(entry) - except OSError as exc: - # NOTE: broken symlink case. - if exc.errno != errno.ENOENT: - raise - continue - size += stat.st_size - entry_mtime = stat.st_mtime - if entry_mtime > mtime: - mtime = entry_mtime + size = 0 + files_mtimes = {} + for file_path in walk_files(path, dvcignore): + try: + stat = os.stat(file_path) + except OSError as exc: + # NOTE: broken symlink case. + if exc.errno != errno.ENOENT: + raise + continue + size += stat.st_size + files_mtimes[file_path] = stat.st_mtime + + # We track file changes and moves, which cannot be detected with simply + # max(mtime(f) for f in non_ignored_files) + mtime = dict_md5(files_mtimes) + else: + base_stat = os.stat(path) + size = base_stat.st_size + mtime = base_stat.st_mtime + mtime = int(nanotime.timestamp(mtime)) # State of files handled by dvc is stored in db as TEXT. # We cast results to string for later comparisons with stored values. - return str(int(nanotime.timestamp(mtime))), str(size) + return str(mtime), str(size) class BasePathNotInCheckedPathException(DvcException): diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index c984ff4100..1aa7a83702 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -136,7 +136,7 @@ def outs_info(self, stage): paths = [ path for output in stage["outs"] - for path in walk_files(output["path"]) + for path in walk_files(output["path"], self.dvc.dvcignore) ] return [ diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index a646d4afb7..2b5fec7e91 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -1,21 +1,25 @@ import os +import shutil -from dvc.ignore import DvcIgnore, DvcIgnoreFileHandler +import pytest + +from dvc.exceptions import DvcIgnoreInCollectedDirError +from dvc.ignore import DvcIgnore from dvc.utils.compat import cast_bytes +from dvc.utils.fs import get_mtime_and_size from tests.basic_env import TestDvc +from tests.utils import to_posixpath class TestDvcIgnore(TestDvc): def setUp(self): super(TestDvcIgnore, self).setUp() - self.ignore_file_handler = DvcIgnoreFileHandler(self.dvc.tree) def _get_all_paths(self): paths = [] - ignore_file_handler = DvcIgnoreFileHandler(self.dvc.tree) for root, dirs, files in self.dvc.tree.walk( - self.dvc.root_dir, ignore_file_handler=ignore_file_handler + self.dvc.root_dir, dvcignore=self.dvc.dvcignore ): for dname in dirs: paths.append(os.path.join(root, dname)) @@ -56,3 +60,74 @@ def test_ignore_in_parent_dir(self): all_paths = self._get_all_paths() self.assertNotIn(forbidden_path, all_paths) + + +def test_metadata_unchanged_when_moving_ignored_file(dvc_repo, repo_dir): + new_data_path = repo_dir.DATA_SUB + "_new" + + ignore_file = os.path.join(dvc_repo.root_dir, DvcIgnore.DVCIGNORE_FILE) + repo_dir.create( + ignore_file, + "\n".join( + [to_posixpath(repo_dir.DATA_SUB), to_posixpath(new_data_path)] + ), + ) + + mtime_sig, size = get_mtime_and_size(repo_dir.DATA_DIR, dvc_repo.dvcignore) + + shutil.move(repo_dir.DATA_SUB, new_data_path) + + new_mtime_sig, new_size = get_mtime_and_size( + repo_dir.DATA_DIR, dvc_repo.dvcignore + ) + + assert new_mtime_sig == mtime_sig + assert new_size == size + + +def test_mtime_changed_when_moving_non_ignored_file(dvc_repo, repo_dir): + new_data_path = repo_dir.DATA_SUB + "_new" + mtime, size = get_mtime_and_size(repo_dir.DATA_DIR, dvc_repo.dvcignore) + + shutil.move(repo_dir.DATA_SUB, new_data_path) + new_mtime, new_size = get_mtime_and_size( + repo_dir.DATA_DIR, dvc_repo.dvcignore + ) + + assert new_mtime != mtime + assert new_size == size + + +def test_metadata_unchanged_on_ignored_file_deletion(dvc_repo, repo_dir): + ignore_file = os.path.join(dvc_repo.root_dir, DvcIgnore.DVCIGNORE_FILE) + repo_dir.create(ignore_file, to_posixpath(repo_dir.DATA_SUB)) + + mtime, size = get_mtime_and_size(repo_dir.DATA_DIR, dvc_repo.dvcignore) + + os.remove(repo_dir.DATA_SUB) + new_mtime, new_size = get_mtime_and_size( + repo_dir.DATA_DIR, dvc_repo.dvcignore + ) + + assert new_mtime == mtime + assert new_size == size + + +def test_metadata_changed_on_non_ignored_file_deletion(dvc_repo, repo_dir): + mtime, size = get_mtime_and_size(repo_dir.DATA_DIR, dvc_repo.dvcignore) + + os.remove(repo_dir.DATA_SUB) + new_mtime_sig, new_size = get_mtime_and_size( + repo_dir.DATA_DIR, dvc_repo.dvcignore + ) + + assert new_mtime_sig != mtime + assert new_size != size + + +def test_should_raise_on_dvcignore_in_out_dir(dvc_repo, repo_dir): + ignore_file = os.path.join(repo_dir.DATA_DIR, DvcIgnore.DVCIGNORE_FILE) + repo_dir.create(ignore_file, "") + + with pytest.raises(DvcIgnoreInCollectedDirError): + dvc_repo.add(repo_dir.DATA_DIR) diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index abd8ed3805..8da5082ddb 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -1,4 +1,6 @@ +from dvc.ignore import DvcIgnore from dvc.main import main +from dvc.repo import Repo from dvc.stage import Stage from tests.basic_env import TestDvcGit @@ -50,16 +52,16 @@ class TestIgnore(TestDvcGit): def _stage_name(self, file): return file + Stage.STAGE_FILE_SUFFIX - def test_should_not_gather_stage_files_from_ignored_d(self): + def test_should_not_gather_stage_files_from_ignored_dir(self): ret = main(["add", self.FOO, self.BAR, self.DATA, self.DATA_SUB]) self.assertEqual(0, ret) stages = self.dvc.stages() self.assertEqual(4, len(stages)) - with open(".dvcignore", "w") as fobj: - fobj.write("data_dir") + self.create(DvcIgnore.DVCIGNORE_FILE, self.DATA_DIR) + self.dvc = Repo(self.dvc.root_dir) stages = self.dvc.stages() self.assertEqual(2, len(stages)) diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index 8e937d4161..7bec4ce71d 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -4,6 +4,7 @@ from os.path import join +from dvc.ignore import DvcIgnoreFilter from dvc.scm import SCM from dvc.scm.git import GitTree from dvc.scm.tree import WorkingTree @@ -97,9 +98,10 @@ def convert_to_sets(walk_results): class TestWalkInNoSCM(AssertWalkEqualMixin, TestDir): def test(self): + dvcignore = DvcIgnoreFilter(self.root_dir) tree = WorkingTree(self._root_dir) self.assertWalkEqual( - tree.walk(self._root_dir), + tree.walk(self._root_dir, dvcignore=dvcignore), [ ( self._root_dir, @@ -116,9 +118,10 @@ def test(self): ) def test_subdir(self): + dvcignore = DvcIgnoreFilter(self.root_dir) tree = WorkingTree(self._root_dir) self.assertWalkEqual( - tree.walk(join("data_dir", "data_sub_dir")), + tree.walk(join("data_dir", "data_sub_dir"), dvcignore=dvcignore), [ ( join(self._root_dir, "data_dir", "data_sub_dir"), @@ -132,8 +135,9 @@ def test_subdir(self): class TestWalkInGit(AssertWalkEqualMixin, TestGit): def test_nobranch(self): tree = WorkingTree(self._root_dir) + dvcignore = DvcIgnoreFilter(self._root_dir) self.assertWalkEqual( - tree.walk("."), + tree.walk(".", dvcignore=dvcignore), [ ( self._root_dir, @@ -149,7 +153,7 @@ def test_nobranch(self): ], ) self.assertWalkEqual( - tree.walk(join("data_dir", "data_sub_dir")), + tree.walk(join("data_dir", "data_sub_dir"), dvcignore=dvcignore), [ ( join(self._root_dir, "data_dir", "data_sub_dir"), diff --git a/tests/unit/test_ignore.py b/tests/unit/test_ignore.py index aa0b8109b5..d27c9f1ec8 100644 --- a/tests/unit/test_ignore.py +++ b/tests/unit/test_ignore.py @@ -1,24 +1,20 @@ import os +import dvc import pytest -from pathspec import PathSpec -from pathspec.patterns import GitWildMatchPattern +from mock import mock_open, patch -from dvc.ignore import DvcIgnoreFromFile, DvcIgnoreDir, DvcIgnoreFile -from mock import patch, Mock +from dvc.ignore import DvcIgnorePatterns, DvcIgnoreDirs def mock_dvcignore(dvcignore_path, patterns): - mock_ignore_file_handler = Mock() + with patch.object( - mock_ignore_file_handler, - "read_patterns", - return_value=PathSpec.from_lines(GitWildMatchPattern, patterns), + dvc.ignore, "open", mock_open(read_data="\n".join(patterns)) ): - ignore_file = DvcIgnoreFromFile( - dvcignore_path, mock_ignore_file_handler - ) - return ignore_file + ignore_patterns = DvcIgnorePatterns(dvcignore_path) + + return ignore_patterns def test_ignore_from_file_should_filter_dirs_and_files(): @@ -76,6 +72,7 @@ def test_ignore_from_file_should_filter_dirs_and_files(): ), ("dont_ignore.txt", ["dont_ignore"], False), ("dont_ignore.txt", ["dont*", "!dont_ignore.txt"], False), + ("../../../something.txt", ["**/something.txt"], False), ], ) def test_match_ignore_from_file( @@ -97,7 +94,7 @@ def test_match_ignore_from_file( @pytest.mark.parametrize("omit_dir", [".git", ".hg", ".dvc"]) def test_should_ignore_dir(omit_dir): - ignore = DvcIgnoreDir(omit_dir) + ignore = DvcIgnoreDirs([".git", ".hg", ".dvc"]) root = os.path.join(os.path.sep, "walk", "dir", "root") dirs = [omit_dir, "dir1", "dir2"] @@ -106,16 +103,3 @@ def test_should_ignore_dir(omit_dir): new_dirs, _ = ignore(root, dirs, files) assert set(new_dirs) == {"dir1", "dir2"} - - -def test_should_ignore_file(): - dvcignore = ".dvcignore" - ignore = DvcIgnoreFile(dvcignore) - - root = os.path.join(os.path.sep, "walk", "dir", "root") - dirs = [] - files = ["file1", "file2", dvcignore] - - _, new_files = ignore(root, dirs, files) - - assert set(new_files) == {"file1", "file2"} diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index f34a914151..30a96b63fc 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -3,6 +3,8 @@ import dvc import pytest + +from dvc.ignore import DvcIgnoreFilter from dvc.system import System from dvc.path_info import PathInfo from dvc.utils import relpath @@ -20,15 +22,13 @@ class TestMtimeAndSize(TestDir): def test(self): - file_time, file_size = get_mtime_and_size(self.DATA) - dir_time, dir_size = get_mtime_and_size(self.DATA_DIR) + dvcignore = DvcIgnoreFilter(self.root_dir) + file_time, file_size = get_mtime_and_size(self.DATA, dvcignore) + dir_time, dir_size = get_mtime_and_size(self.DATA_DIR, dvcignore) actual_file_size = os.path.getsize(self.DATA) - actual_dir_size = ( - os.path.getsize(self.DATA_DIR) - + os.path.getsize(self.DATA) - + os.path.getsize(self.DATA_SUB_DIR) - + os.path.getsize(self.DATA_SUB) + actual_dir_size = os.path.getsize(self.DATA) + os.path.getsize( + self.DATA_SUB ) self.assertIs(type(file_time), str) diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py index b43af8d066..cd50069c98 100644 --- a/tests/utils/__init__.py +++ b/tests/utils/__init__.py @@ -40,3 +40,7 @@ def trees_equal(dir_path_1, dir_path_2): for d in comparison.common_dirs: trees_equal(os.path.join(dir_path_1, d), os.path.join(dir_path_2, d)) + + +def to_posixpath(path): + return path.replace("\\", "/")