Skip to content

Commit

Permalink
grep: fix multibyte regex handling under macOS
Browse files Browse the repository at this point in the history
The commit 29de205 (Makefile: fix default regex settings on
Darwin, 2013-05-11) fixed t0070-fundamental.sh under Darwin (macOS) by
adopting Git's regex library.  However, this library is compiled with
NO_MBSUPPORT, which causes git-grep to work incorrectly on multibyte
(e.g. UTF-8) files.  Current macOS versions pass t0070-fundamental.sh
with the native macOS regex library, which also supports multibyte
characters.

Adjust the Makefile to use the native regex library, and call
setlocale(3) to set CTYPE according to the user's preference.
The setlocale call is required on all platforms, but in platforms
supporting gettext(3), setlocale was called as a side-effect of
initializing gettext.  Therefore, move the CTYPE setlocale call from
gettext.c to common-main.c and the corresponding locale.h include
into git-compat-util.h.

Thanks to the global initialization of CTYPE setlocale, the test-tool
regex command now works correctly with supported multibyte regexes, and
is used to set the MB_REGEX test prerequisite by assessing a platform's
support for them.

Signed-off-by: Diomidis Spinellis <dds@aueb.gr>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
dspinellis authored and gitster committed Aug 26, 2022
1 parent 795ea87 commit 1819ad3
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1434,7 +1434,6 @@ ifeq ($(uname_S),Darwin)
APPLE_COMMON_CRYPTO = YesPlease
COMPAT_CFLAGS += -DAPPLE_COMMON_CRYPTO
endif
NO_REGEX = YesPlease
PTHREAD_LIBS =
endif

Expand Down Expand Up @@ -2979,6 +2978,7 @@ GIT-BUILD-OPTIONS: FORCE
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+
@echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+
@echo NO_REGEX=\''$(subst ','\'',$(subst ','\'',$(NO_REGEX)))'\' >>$@+
@echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+
@echo PAGER_ENV=\''$(subst ','\'',$(subst ','\'',$(PAGER_ENV)))'\' >>$@+
@echo DC_SHA1=\''$(subst ','\'',$(subst ','\'',$(DC_SHA1)))'\' >>$@+
Expand Down
1 change: 1 addition & 0 deletions common-main.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ int main(int argc, const char **argv)

git_resolve_executable_dir(argv[0]);

setlocale(LC_CTYPE, "");
git_setup_gettext();

initialize_the_repository();
Expand Down
2 changes: 0 additions & 2 deletions gettext.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "config.h"

#ifndef NO_GETTEXT
# include <locale.h>
# include <libintl.h>
# ifdef GIT_WINDOWS_NATIVE

Expand Down Expand Up @@ -80,7 +79,6 @@ static int test_vsnprintf(const char *fmt, ...)

static void init_gettext_charset(const char *domain)
{
setlocale(LC_CTYPE, "");
charset = locale_charset();
bind_textdomain_codeset(domain, charset);

Expand Down
1 change: 1 addition & 0 deletions git-compat-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ struct strbuf;
#endif
#include <errno.h>
#include <limits.h>
#include <locale.h>
#ifdef NEEDS_SYS_PARAM_H
#include <sys/param.h>
#endif
Expand Down
15 changes: 15 additions & 0 deletions t/t7810-grep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ test_invalid_grep_expression() {
'
}

LC_ALL=en_US.UTF-8 test-tool regex '^.$' '¿' &&
test_set_prereq MB_REGEX

cat >hello.c <<EOF
#include <assert.h>
#include <stdio.h>
Expand Down Expand Up @@ -88,6 +91,10 @@ test_expect_success setup '
echo unusual >"\"unusual\" pathname" &&
echo unusual >"t/nested \"unusual\" pathname"
fi &&
if test_have_prereq MB_REGEX
then
echo "¿" >reverse-question-mark
fi &&
git add . &&
test_tick &&
git commit -m initial
Expand Down Expand Up @@ -569,6 +576,14 @@ do
'
done

test_expect_success MB_REGEX 'grep exactly one char in single-char multibyte file' '
LC_ALL=en_US.UTF-8 git grep "^.$" reverse-question-mark
'

test_expect_success MB_REGEX 'grep two chars in single-char multibyte file' '
LC_ALL=en_US.UTF-8 test_expect_code 1 git grep ".." reverse-question-mark
'

cat >expected <<EOF
file
EOF
Expand Down

0 comments on commit 1819ad3

Please sign in to comment.