From 1f0243e96b1e21cf2eb62449777b21cded248cb9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 26 Oct 2023 18:07:06 +0300 Subject: [PATCH] gh-111259: Optimize recursive wildcards in pathlib (GH-111303) Regular expression pattern `(?s:.)` is much faster than `[\s\S]`. --- Lib/pathlib.py | 6 +++--- .../Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 5c1c71ecec28053..e3eecc3b6d73e3e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -124,13 +124,13 @@ def _compile_pattern_lines(pattern_lines, case_sensitive): elif part == '*': part = r'.+' elif part == '**\n': - # '**/' component: we use '[\s\S]' rather than '.' so that path + # '**/' component: we use '(?s:.)' rather than '.' so that path # separators (i.e. newlines) are matched. The trailing '^' ensures # we terminate after a path separator (i.e. on a new line). - part = r'[\s\S]*^' + part = r'(?s:.)*^' elif part == '**': # '**' component. - part = r'[\s\S]*' + part = r'(?s:.)*' elif '**' in part: raise ValueError("Invalid pattern: '**' can only be an entire path component") else: diff --git a/Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst b/Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst new file mode 100644 index 000000000000000..4b597f51a955a70 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-25-11-13-35.gh-issue-111259.z7ndeA.rst @@ -0,0 +1 @@ +Optimize recursive wildcards in :mod:`pathlib`.