Skip to content

Commit

Permalink
Merge branch 'cb/grep-pcre-ucp'
Browse files Browse the repository at this point in the history
"grep -P" learned to use Unicode Character Property to grok
character classes when processing \b and \w etc.

* cb/grep-pcre-ucp:
  grep: correctly identify utf-8 characters with \{b,w} in -P
  • Loading branch information
gitster committed Jan 27, 2023
2 parents 3e64176 + acabd20 commit 557d93a
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 1 deletion.
2 changes: 1 addition & 1 deletion grep.c
Expand Up @@ -293,7 +293,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
options |= PCRE2_CASELESS;
}
if (!opt->ignore_locale && is_utf8_locale() && !literal)
options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF);
options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF);

#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER
/* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
Expand Down
42 changes: 42 additions & 0 deletions t/perf/p7822-grep-perl-character.sh
@@ -0,0 +1,42 @@
#!/bin/sh

test_description="git-grep's perl regex
If GIT_PERF_GREP_THREADS is set to a list of threads (e.g. '1 4 8'
etc.) we will test the patterns under those numbers of threads.
"

. ./perf-lib.sh

test_perf_large_repo
test_checkout_worktree

if test -n "$GIT_PERF_GREP_THREADS"
then
test_set_prereq PERF_GREP_ENGINES_THREADS
fi

for pattern in \
'\\bhow' \
'\\bÆvar' \
'\\d+ \\bÆvar' \
'\\bBelón\\b' \
'\\w{12}\\b'
do
echo '$pattern' >pat
if ! test_have_prereq PERF_GREP_ENGINES_THREADS
then
test_perf "grep -P '$pattern'" --prereq PCRE "
git -P grep -f pat || :
"
else
for threads in $GIT_PERF_GREP_THREADS
do
test_perf "grep -P '$pattern' with $threads threads" --prereq PTHREADS,PCRE "
git -c grep.threads=$threads -P grep -f pat || :
"
done
fi
done

test_done

0 comments on commit 557d93a

Please sign in to comment.