Skip to content

Commit

Permalink
35% faster than pathspec \o/. #24 cpburnz/python-pathspec#38
Browse files Browse the repository at this point in the history
  • Loading branch information
excitoon committed Aug 28, 2022
1 parent cfa6678 commit d96ef52
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 14 deletions.
41 changes: 27 additions & 14 deletions gitignorefile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __call__(self, path, is_dir=None):

else:
for plain_path in plain_paths:
# assert plain_path.parts not in self.__gitignores
self.__gitignores[plain_path.parts] = []

if add_to_children:
Expand All @@ -59,6 +60,7 @@ def __call__(self, path, is_dir=None):
return False

for parent, (_, parent_plain_paths) in reversed(list(add_to_children.items())):
# assert parent.parts not in self.__gitignores
self.__gitignores[parent.parts] = self.__gitignores[parent.parts[:-1]].copy()
for parent_to_add, (gitignore_to_add, _) in reversed(list(add_to_children.items())):
self.__gitignores[parent.parts].append(gitignore_to_add)
Expand All @@ -68,10 +70,12 @@ def __call__(self, path, is_dir=None):
self.__gitignores[parent.parts].reverse()

for plain_path in parent_plain_paths:
# assert plain_path.parts not in self.__gitignores
self.__gitignores[plain_path.parts] = self.__gitignores[parent.parts]

# This parent comes either from first or second loop.
for plain_path in plain_paths:
# assert plain_path.parts not in self.__gitignores
self.__gitignores[plain_path.parts] = self.__gitignores[parent.parts]

return any((m(path, is_dir=is_dir) for m in self.__gitignores[parent.parts]))
Expand All @@ -98,7 +102,7 @@ def join(self, name):
return _Path(self.__parts + (name,))

def relpath(self, base_path):
assert self.__parts[: len(base_path.__parts)] == base_path.__parts
# assert self.__parts[: len(base_path.__parts)] == base_path.__parts
return "/".join(self.__parts[len(base_path.__parts) :])

def parents(self):
Expand Down Expand Up @@ -198,11 +202,7 @@ def _rule_from_pattern(pattern):
pattern = pattern[:i]
i -= 1

regexp = _fnmatch_pathname_to_regexp(pattern, directory_only)

if anchored:
regexp = f"^{regexp}"

regexp = _fnmatch_pathname_to_regexp(pattern, anchored, directory_only)
return _IgnoreRule(regexp, negation, directory_only)


Expand Down Expand Up @@ -244,19 +244,24 @@ def __init__(self, regexp, negation, directory_only):
self.__regexp = re.compile(regexp)
self.__negation = negation
self.__directory_only = directory_only
self.__match = self.__regexp.match

@property
def regexp(self):
return self.__regexp

@property
def negation(self):
return self.__negation

def match(self, rel_path, is_dir):
match = self.__regexp.search(rel_path)
m = self.__match(rel_path)

# If we need a directory, check there is something after slash and if there is not, target must be a directory.
# If there is something after slash then it's a directory irrelevant to type of target.
# `self.directory_only` implies we have group number 1.
# N.B. Question mark inside a group without a name can shift indices. :(
return match and (not self.__directory_only or match.group(1) is not None or is_dir)
return m and (not self.__directory_only or m.group(1) is not None or is_dir)


if os.altsep is not None:
Expand All @@ -269,14 +274,22 @@ def match(self, rel_path, is_dir):

# Frustratingly, python's fnmatch doesn't provide the FNM_PATHNAME
# option that `.gitignore`'s behavior depends on.
def _fnmatch_pathname_to_regexp(pattern, directory_only):
def _fnmatch_pathname_to_regexp(pattern, anchored, directory_only):
"""
Implements `fnmatch` style-behavior, as though with `FNM_PATHNAME` flagged;
the path separator will not match shell-style `*` and `.` wildcards.
"""

if not pattern:
if directory_only:
return "[^/]+(/.+)?$" # Empty name means no path fragment.

else:
return ".*"

i, n = 0, len(pattern)

res = ["(?:^|/)"] if pattern else [] # Empty name means no path fragment.
res = ["(?:^|.+/)" if not anchored else ""]
while i < n:
c = pattern[i]
i += 1
Expand All @@ -290,10 +303,10 @@ def _fnmatch_pathname_to_regexp(pattern, directory_only):
res.append("/?")

else:
res.append(f"[^/]*")
res.append("[^/]*")

except IndexError:
res.append(f"[^/]*")
res.append("[^/]*")

elif c == "?":
res.append("[^/]")
Expand Down Expand Up @@ -322,9 +335,9 @@ def _fnmatch_pathname_to_regexp(pattern, directory_only):
res.append(re.escape(c))

if directory_only: # In this case we are interested if there is something after slash.
res.append(f"(/.+)?$")
res.append("(/.+)?$")

else:
res.append(f"(?:/|$)")
res.append("(?:/.+)?$")

return "".join(res)
18 changes: 18 additions & 0 deletions tests/test_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ def test_second_level_directories_unchained(self):
self.assertTrue(matches("/home/michael/a/doc/frotz", is_dir=True))
self.assertFalse(matches("/home/michael/a/b/doc/frotz", is_dir=False))
self.assertTrue(matches("/home/michael/a/b/doc/frotz", is_dir=True))
for is_dir in (False, True):
with self.subTest(i=is_dir):
self.assertTrue(matches("/home/michael/doc/frotz/file", is_dir=False))
self.assertTrue(matches("/home/michael/doc/frotz/file", is_dir=True))
self.assertTrue(matches("/home/michael/a/doc/frotz/file", is_dir=False))
self.assertTrue(matches("/home/michael/a/doc/frotz/file", is_dir=True))
self.assertTrue(matches("/home/michael/a/b/doc/frotz/file", is_dir=False))
self.assertTrue(matches("/home/michael/a/b/doc/frotz/file", is_dir=True))

def test_second_level_files(self):
matches = self.__parse_gitignore_string(["doc/frotz"], fake_base_dir="/home/michael")
Expand All @@ -124,6 +132,16 @@ def test_ignore_file(self):
self.assertTrue(matches("/home/michael/.venv/folder", is_dir=is_dir))
self.assertTrue(matches("/home/michael/.venv/file.txt", is_dir=is_dir))

def test_ignore_core_file(self):
matches = self.__parse_gitignore_string(["core", "!core/"], fake_base_dir="/home/michael")
for is_dir in (False, True):
with self.subTest(i=is_dir):
self.assertFalse(matches("/home/michael/core/a", is_dir=is_dir))
self.assertTrue(matches("/home/michael/core", is_dir=False))
self.assertFalse(matches("/home/michael/core", is_dir=True))
self.assertTrue(matches("/home/michael/a/core", is_dir=False))
self.assertFalse(matches("/home/michael/a/core", is_dir=True))

def test_ignore_directory(self):
matches = self.__parse_gitignore_string([".venv/"], fake_base_dir="/home/michael")
for is_dir in (False, True):
Expand Down

0 comments on commit d96ef52

Please sign in to comment.