Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 38 additions & 12 deletions dvc/ignore.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
import re
from itertools import groupby
from itertools import groupby, takewhile

from pathspec.patterns import GitWildMatchPattern
from pathspec.util import normalize_file
Expand Down Expand Up @@ -131,20 +131,19 @@ def __init__(self, tree, root_dir):
self.ignores_trie_tree[root_dir] = DvcIgnorePatterns(
default_ignore_patterns, root_dir
)
for root, dirs, _ in self.tree.walk(
self.root_dir, use_dvcignore=False
):
self._update(root)
self._update_sub_repo(root, dirs)
dirs[:], _ = self(root, dirs, [])
self._update(self.root_dir)

def _update(self, dirname):
old_pattern = self.ignores_trie_tree.longest_prefix(dirname).value
Copy link
Contributor

@karajan1001 karajan1001 Jul 26, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe here we also need

        ignore_pattern = self.ignores_trie_tree.get(dirname)
        if ignore_pattern:
            return

to prevent it from running multiply times. But if nowhere else except _get_trie_pattern calls it, it is not necessary.

Seems nowhere else calls it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! Indeed, for now _update is only called in _get_trie_pattern, so that is already handled for us.

matches = old_pattern.matches(dirname, DvcIgnore.DVCIGNORE_FILE, False)

ignore_file_path = os.path.join(dirname, DvcIgnore.DVCIGNORE_FILE)
if self.tree.exists(ignore_file_path, use_dvcignore=False):
if not matches and self.tree.exists(
ignore_file_path, use_dvcignore=False
):
new_pattern = DvcIgnorePatterns.from_files(
ignore_file_path, self.tree
)
old_pattern = self._get_trie_pattern(dirname)
if old_pattern:
self.ignores_trie_tree[dirname] = DvcIgnorePatterns(
*merge_patterns(
Expand All @@ -156,11 +155,18 @@ def _update(self, dirname):
)
else:
self.ignores_trie_tree[dirname] = new_pattern
elif old_pattern:
self.ignores_trie_tree[dirname] = old_pattern

# NOTE: using `walk` + `break` because tree doesn't have `listdir()`
for root, dirs, _ in self.tree.walk(dirname, use_dvcignore=False):
self._update_sub_repo(root, dirs)
break

def _update_sub_repo(self, root, dirs):
for d in dirs:
if self._is_dvc_repo(root, d):
old_pattern = self._get_trie_pattern(root)
old_pattern = self.ignores_trie_tree.longest_prefix(root).value
if old_pattern:
self.ignores_trie_tree[root] = DvcIgnorePatterns(
*merge_patterns(
Expand All @@ -183,8 +189,28 @@ def __call__(self, root, dirs, files):
return dirs, files

def _get_trie_pattern(self, dirname):
ignore_pattern = self.ignores_trie_tree.longest_prefix(dirname).value
return ignore_pattern
ignore_pattern = self.ignores_trie_tree.get(dirname)
if ignore_pattern:
return ignore_pattern

prefix = self.ignores_trie_tree.longest_prefix(dirname).key
if not prefix:
# outside of the repo
return None

dirs = list(
takewhile(
lambda path: path != prefix,
(parent.fspath for parent in PathInfo(dirname).parents),
)
)
dirs.reverse()
dirs.append(dirname)

for parent in dirs:
self._update(parent)

return self.ignores_trie_tree.get(dirname)

def _is_ignored(self, path, is_dir=False):
if self._outside_repo(path):
Expand Down