Skip to content

Commit

Permalink
Merge pull request #108 from jaraco/feature/glob-alignment
Browse files Browse the repository at this point in the history
Generalize glob functionality.
  • Loading branch information
jaraco committed Dec 16, 2023
2 parents ce7c685 + 1a9313d commit e8044c1
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 25 deletions.
1 change: 1 addition & 0 deletions newsfragments/108.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactored glob functionality to support a more generalized solution with support for platform-specific path separators.
5 changes: 3 additions & 2 deletions zipp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re

from .py310compat import text_encoding
from .glob import translate
from .glob import Translator


__all__ = ['Path']
Expand Down Expand Up @@ -397,7 +397,8 @@ def glob(self, pattern):
raise ValueError(f"Unacceptable pattern: {pattern!r}")

prefix = re.escape(self.at)
matches = re.compile(prefix + translate(pattern)).fullmatch
tr = Translator(seps='/')
matches = re.compile(prefix + tr.translate(pattern)).fullmatch
return map(self._next, filter(matches, self.root.namelist()))

def rglob(self, pattern):
Expand Down
112 changes: 89 additions & 23 deletions zipp/glob.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,97 @@
import os
import re


def translate(pattern):
r"""
Given a glob pattern, produce a regex that matches it.
_default_seps = os.sep + str(os.altsep) * bool(os.altsep)

>>> translate('*.txt')
'[^/]*\\.txt'
>>> translate('a?txt')
'a[^/]txt'
>>> translate('**/*')
'.*/[^/]*'

class Translator:
"""
>>> Translator('xyz')
Traceback (most recent call last):
...
AssertionError: Invalid separators
>>> Translator('')
Traceback (most recent call last):
...
AssertionError: Invalid separators
"""
return ''.join(map(replace, separate(pattern)))

seps: str

def __init__(self, seps: str = _default_seps):
assert seps and set(seps) <= set(_default_seps), "Invalid separators"
self.seps = seps

def translate(self, pattern):
"""
Given a glob pattern, produce a regex that matches it.
"""
return self.extend(self.translate_core(pattern))

def extend(self, pattern):
r"""
Extend regex for pattern-wide concerns.
Apply '(?s:)' to create a non-matching group that
matches newlines (valid on Unix).
Append '\Z' to imply fullmatch even when match is used.
"""
return rf'(?s:{pattern})\Z'

def translate_core(self, pattern):
r"""
Given a glob pattern, produce a regex that matches it.
>>> t = Translator()
>>> t.translate_core('*.txt').replace('\\\\', '')
'[^/]*\\.txt'
>>> t.translate_core('a?txt')
'a[^/]txt'
>>> t.translate_core('**/*').replace('\\\\', '')
'.*/[^/][^/]*'
"""
self.restrict_rglob(pattern)
return ''.join(map(self.replace, separate(self.star_not_empty(pattern))))

def replace(self, match):
"""
Perform the replacements for a match from :func:`separate`.
"""
return match.group('set') or (
re.escape(match.group(0))
.replace('\\*\\*', r'.*')
.replace('\\*', rf'[^{re.escape(self.seps)}]*')
.replace('\\?', r'[^/]')
)

def restrict_rglob(self, pattern):
"""
Raise ValueError if ** appears in anything but a full path segment.
>>> Translator().translate('**foo')
Traceback (most recent call last):
...
ValueError: ** must appear alone in a path segment
"""
seps_pattern = rf'[{re.escape(self.seps)}]+'
segments = re.split(seps_pattern, pattern)
if any('**' in segment and segment != '**' for segment in segments):
raise ValueError("** must appear alone in a path segment")

def star_not_empty(self, pattern):
"""
Ensure that * will not match an empty segment.
"""

def handle_segment(match):
segment = match.group(0)
return '?*' if segment == '*' else segment

not_seps_pattern = rf'[^{re.escape(self.seps)}]+'
return re.sub(not_seps_pattern, handle_segment, pattern)


def separate(pattern):
Expand All @@ -25,16 +104,3 @@ def separate(pattern):
['a', '[?]', 'txt']
"""
return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)


def replace(match):
"""
Perform the replacements for a match from :func:`separate`.
"""

return match.group('set') or (
re.escape(match.group(0))
.replace('\\*\\*', r'.*')
.replace('\\*', r'[^/]*')
.replace('\\?', r'[^/]')
)

0 comments on commit e8044c1

Please sign in to comment.