Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,11 @@ def __init__(self, path):
super(OutputFileMissingError, self).__init__(
"Can't find {} neither locally nor on remote".format(path)
)


class DvcIgnoreInCollectedDirError(DvcException):
def __init__(self, ignore_dirname):
super(DvcIgnoreInCollectedDirError, self).__init__(
".dvcignore file should not be in collected dir path: "
"'{}'".format(ignore_dirname)
)
87 changes: 29 additions & 58 deletions dvc/ignore.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,15 @@
from __future__ import unicode_literals

import logging
import os
from pathspec import PathSpec
from pathspec.patterns import GitWildMatchPattern

from dvc.utils import relpath
from dvc.utils.fs import get_parent_dirs_up_to

from dvc.utils.compat import open

class DvcIgnoreFileHandler(object):
def __init__(self, tree):
self.tree = tree

def read_patterns(self, path):
with self.tree.open(path) as fobj:
return PathSpec.from_lines(GitWildMatchPattern, fobj)

def get_repo_root(self):
return self.tree.tree_root
logger = logging.getLogger(__name__)


class DvcIgnore(object):
Expand All @@ -27,12 +19,15 @@ def __call__(self, root, dirs, files):
raise NotImplementedError


class DvcIgnoreFromFile(DvcIgnore):
def __init__(self, ignore_file_path, ignore_handler):
class DvcIgnorePatterns(DvcIgnore):
def __init__(self, ignore_file_path):
assert os.path.isabs(ignore_file_path)

self.ignore_file_path = ignore_file_path
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Judging by __eq__, we expect only abs path here, right? Should we put a check/assert here? Otherwise it might get nasty with __eq__ saying True for two relative paths.

self.dirname = os.path.normpath(os.path.dirname(ignore_file_path))

self.ignore_spec = ignore_handler.read_patterns(ignore_file_path)
with open(ignore_file_path, encoding="utf-8") as fobj:
self.ignore_spec = PathSpec.from_lines(GitWildMatchPattern, fobj)

def __call__(self, root, dirs, files):
files = [f for f in files if not self.matches(root, f)]
Expand All @@ -42,67 +37,43 @@ def __call__(self, root, dirs, files):

def matches(self, dirname, basename):
abs_path = os.path.join(dirname, basename)
relative_path = relpath(abs_path, self.dirname)
if os.name == "nt":
relative_path = relative_path.replace("\\", "/")
rel_path = relpath(abs_path, self.dirname)

return self.ignore_spec.match_file(relative_path)
if os.pardir + os.sep in rel_path:
return False
return self.ignore_spec.match_file(rel_path)

def __hash__(self):
return hash(self.ignore_file_path)

def __eq__(self, other):
return self.ignore_file_path == other.ignore_file_path

class DvcIgnoreConstant(DvcIgnore):
def __init__(self, basename):
self.basename = basename

class DvcIgnoreDirs(DvcIgnore):
def __init__(self, basenames):
self.basenames = set(basenames)

class DvcIgnoreDir(DvcIgnoreConstant):
def __call__(self, root, dirs, files):
dirs = [d for d in dirs if not d == self.basename]

return dirs, files


class DvcIgnoreFile(DvcIgnoreConstant):
def __call__(self, root, dirs, files):
files = [f for f in files if not f == self.basename]
dirs = [d for d in dirs if d not in self.basenames]

return dirs, files


class DvcIgnoreFilter(object):
def __init__(self, wdir, ignore_file_handler=None):
self.ignores = [
DvcIgnoreDir(".git"),
DvcIgnoreDir(".hg"),
DvcIgnoreDir(".dvc"),
DvcIgnoreFile(".dvcignore"),
]

self.ignore_file_handler = ignore_file_handler
self._process_ignores_in_parent_dirs(wdir)

def _process_ignores_in_parent_dirs(self, wdir):
if self.ignore_file_handler:
wdir = os.path.normpath(os.path.abspath(wdir))
ignore_search_end_dir = self.ignore_file_handler.get_repo_root()
parent_dirs = get_parent_dirs_up_to(wdir, ignore_search_end_dir)
for d in parent_dirs:
self.update(d)

def update(self, wdir):
ignore_file_path = os.path.join(wdir, DvcIgnore.DVCIGNORE_FILE)
def __init__(self, root_dir):
self.ignores = {DvcIgnoreDirs([".git", ".hg", ".dvc"])}
self._update(root_dir)
for root, dirs, _ in os.walk(root_dir):
for d in dirs:
self._update(os.path.join(root, d))

def _update(self, dirname):
ignore_file_path = os.path.join(dirname, DvcIgnore.DVCIGNORE_FILE)
if os.path.exists(ignore_file_path):
file_ignore = DvcIgnoreFromFile(
ignore_file_path, ignore_handler=self.ignore_file_handler
)
self.ignores.append(file_ignore)
self.ignores.add(DvcIgnorePatterns(ignore_file_path))

def __call__(self, root, dirs, files):
if self.ignore_file_handler:
self.update(root)

for ignore in self.ignores:
dirs, files = ignore(root, dirs, files)

Expand Down
11 changes: 10 additions & 1 deletion dvc/remote/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import unicode_literals

from dvc.ignore import DvcIgnore
from dvc.utils.compat import str, basestring, urlparse, fspath_py35, makedirs

import os
Expand All @@ -14,7 +15,11 @@

import dvc.prompt as prompt
from dvc.config import Config
from dvc.exceptions import DvcException, ConfirmRemoveError
from dvc.exceptions import (
DvcException,
ConfirmRemoveError,
DvcIgnoreInCollectedDirError,
)
from dvc.progress import progress, ProgressCallback
from dvc.utils import LARGE_DIR_SIZE, tmp_fname, to_chunks, move, relpath
from dvc.state import StateBase
Expand Down Expand Up @@ -149,6 +154,10 @@ def _collect_dir(self, path_info):
root_info = path_info / root

for fname in files:

if fname == DvcIgnore.DVCIGNORE_FILE:
raise DvcIgnoreInCollectedDirError(root)

file_info = root_info / fname
relative_path = file_info.relative_to(path_info)
checksum = executor.submit(
Expand Down
13 changes: 6 additions & 7 deletions dvc/remote/local/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
file_md5,
walk_files,
relpath,
dvc_walk,
)
from dvc.config import Config
from dvc.exceptions import DvcException
Expand Down Expand Up @@ -222,7 +223,7 @@ def isdir(self, path_info):
return os.path.isdir(fspath_py35(path_info))

def walk(self, path_info):
return os.walk(fspath_py35(path_info))
return dvc_walk(path_info, self.repo.dvcignore)

def get_file_checksum(self, path_info):
return file_md5(fspath_py35(path_info))[0]
Expand Down Expand Up @@ -477,21 +478,19 @@ def _unprotect_file(path):

os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)

@staticmethod
def _unprotect_dir(path):
for path in walk_files(path):
def _unprotect_dir(self, path):
for path in walk_files(path, self.repo.dvcignore):
RemoteLOCAL._unprotect_file(path)

@staticmethod
def unprotect(path_info):
def unprotect(self, path_info):
path = path_info.fspath
if not os.path.exists(path):
raise DvcException(
"can't unprotect non-existing data '{}'".format(path)
)

if os.path.isdir(path):
RemoteLOCAL._unprotect_dir(path)
self._unprotect_dir(path)
else:
RemoteLOCAL._unprotect_file(path)

Expand Down
33 changes: 20 additions & 13 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@

from itertools import chain

from funcy import cached_property

from dvc.config import Config
from dvc.exceptions import (
NotDvcRepoError,
OutputNotFoundError,
TargetNotDirectoryError,
OutputFileMissingError,
)
from dvc.ignore import DvcIgnoreFileHandler
from dvc.ignore import DvcIgnoreFilter
from dvc.path_info import PathInfo
from dvc.utils.compat import open as _open, fspath_py35
from dvc.utils import relpath
Expand Down Expand Up @@ -356,6 +358,17 @@ def pipelines(self, from_directory=None):
G.subgraph(c).copy() for c in nx.weakly_connected_components(G)
]

@staticmethod
def _filter_out_dirs(dirs, outs, root_dir):
def filter_dirs(dname):
path = os.path.join(root_dir, dname)
for out in outs:
if path == os.path.normpath(out):
return False
return True

return list(filter(filter_dirs, dirs))

def stages(self, from_directory=None, check_dag=True):
"""
Walks down the root directory looking for Dvcfiles,
Expand All @@ -376,9 +389,8 @@ def stages(self, from_directory=None, check_dag=True):
stages = []
outs = []

ignore_file_handler = DvcIgnoreFileHandler(self.tree)
for root, dirs, files in self.tree.walk(
from_directory, ignore_file_handler=ignore_file_handler
from_directory, dvcignore=self.dvcignore
):
for fname in files:
path = os.path.join(root, fname)
Expand All @@ -390,16 +402,7 @@ def stages(self, from_directory=None, check_dag=True):
outs.append(out.fspath + out.sep)
stages.append(stage)

def filter_dirs(dname, root=root):
path = os.path.join(root, dname)
if path in (self.dvc_dir, self.scm.dir):
return False
for out in outs:
if path == os.path.normpath(out) or path.startswith(out):
return False
return True

dirs[:] = list(filter(filter_dirs, dirs))
dirs[:] = self._filter_out_dirs(dirs, outs, root)

if check_dag:
self.check_dag(stages)
Expand Down Expand Up @@ -467,3 +470,7 @@ def open(self, path, remote=None, mode="r", encoding=None):
raise OutputFileMissingError(relpath(path, self.root_dir))

return _open(cache_file, mode=mode, encoding=encoding)

@cached_property
def dvcignore(self):
return DvcIgnoreFilter(self.root_dir)
2 changes: 1 addition & 1 deletion dvc/repo/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def _find_all_targets(repo, target, recursive):
if os.path.isdir(target) and recursive:
return [
fname
for fname in walk_files(target)
for fname in walk_files(target, repo.dvcignore)
if not repo.is_dvc_internal(fname)
if not Stage.is_stage_file(fname)
if not repo.scm.belongs_to_scm(fname)
Expand Down
23 changes: 3 additions & 20 deletions dvc/scm/git/tree.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import errno
import os

from dvc.ignore import DvcIgnoreFilter
from dvc.utils import relpath
from dvc.utils.compat import StringIO, BytesIO
from dvc.exceptions import DvcException
Expand Down Expand Up @@ -103,13 +102,7 @@ def git_object_by_path(self, path):
tree = tree[i]
return tree

def _walk(
self,
tree,
topdown=True,
ignore_file_handler=None,
dvc_ignore_filter=None,
):
def _walk(self, tree, topdown=True):
dirs, nondirs = [], []
for i in tree:
if i.mode == GIT_MODE_DIR:
Expand All @@ -118,26 +111,16 @@ def _walk(
nondirs.append(i.name)

if topdown:
if not dvc_ignore_filter:
dvc_ignore_filter = DvcIgnoreFilter(
tree.abspath, ignore_file_handler=ignore_file_handler
)
dirs, nondirs = dvc_ignore_filter(tree.path, dirs, nondirs)
yield os.path.normpath(tree.abspath), dirs, nondirs

for i in dirs:
for x in self._walk(
tree[i],
topdown=True,
ignore_file_handler=ignore_file_handler,
dvc_ignore_filter=dvc_ignore_filter,
):
for x in self._walk(tree[i], topdown=True):
yield x

if not topdown:
yield os.path.normpath(tree.abspath), dirs, nondirs

def walk(self, top, topdown=True, ignore_file_handler=None):
def walk(self, top, topdown=True, dvcignore=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dvcignore is never used here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but it is required by base method.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pared Does base method use it though?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@efiop, no, only WorkingTree. Should I pass it some other way?

"""Directory tree generator.

See `os.walk` for the docs. Differences:
Expand Down
11 changes: 5 additions & 6 deletions dvc/scm/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def isdir(self, path):
def isfile(self, path):
"""Test whether a path is a regular file"""

def walk(self, top, topdown=True, ignore_file_handler=None):
def walk(self, top, topdown=True, dvcignore=None):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
Expand Down Expand Up @@ -60,21 +60,20 @@ def isfile(self, path):
"""Test whether a path is a regular file"""
return os.path.isfile(path)

def walk(self, top, topdown=True, ignore_file_handler=None):
def walk(self, top, topdown=True, dvcignore=None):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
- no support for symlinks
- it could raise exceptions, there is no onerror argument
"""

assert dvcignore

def onerror(e):
raise e

for root, dirs, files in dvc_walk(
os.path.abspath(top),
topdown=topdown,
onerror=onerror,
ignore_file_handler=ignore_file_handler,
os.path.abspath(top), dvcignore, topdown=topdown, onerror=onerror
):
yield os.path.normpath(root), dirs, files
Loading