Navigation Menu

Skip to content

Commit

Permalink
Merge branch 'ab/grep-preparatory-cleanup'
Browse files Browse the repository at this point in the history
The internal implementation of "git grep" has seen some clean-up.

* ab/grep-preparatory-cleanup: (31 commits)
  grep: assert that threading is enabled when calling grep_{lock,unlock}
  grep: given --threads with NO_PTHREADS=YesPlease, warn
  pack-objects: fix buggy warning about threads
  pack-objects & index-pack: add test for --threads warning
  test-lib: add a PTHREADS prerequisite
  grep: move is_fixed() earlier to avoid forward declaration
  grep: change internal *pcre* variable & function names to be *pcre1*
  grep: change the internal PCRE macro names to be PCRE1
  grep: factor test for \0 in grep patterns into a function
  grep: remove redundant regflags assignments
  grep: catch a missing enum in switch statement
  perf: add a comparison test of log --grep regex engines with -F
  perf: add a comparison test of log --grep regex engines
  perf: add a comparison test of grep regex engines with -F
  perf: add a comparison test of grep regex engines
  perf: emit progress output when unpacking & building
  perf: add a GIT_PERF_MAKE_COMMAND for when *_MAKE_OPTS won't do
  grep: add tests to fix blind spots with \0 patterns
  grep: prepare for testing binary regexes containing rx metacharacters
  grep: add a test helper function for less verbose -f \0 tests
  ...
  • Loading branch information
gitster committed Jun 2, 2017
2 parents 7ef0d04 + 8df4c29 commit 36dcb57
Show file tree
Hide file tree
Showing 24 changed files with 843 additions and 239 deletions.
7 changes: 5 additions & 2 deletions Documentation/git-grep.txt
Expand Up @@ -161,8 +161,11 @@ OPTIONS

-P::
--perl-regexp::
Use Perl-compatible regexp for patterns. Requires libpcre to be
compiled in.
Use Perl-compatible regular expressions for patterns.
+
Support for these types of regular expressions is an optional
compile-time dependency. If Git wasn't compiled with support for them
providing this option will cause it to die.

-F::
--fixed-strings::
Expand Down
8 changes: 6 additions & 2 deletions Documentation/rev-list-options.txt
Expand Up @@ -92,8 +92,12 @@ endif::git-rev-list[]
pattern as a regular expression).

--perl-regexp::
Consider the limiting patterns to be Perl-compatible regular expressions.
Requires libpcre to be compiled in.
Consider the limiting patterns to be Perl-compatible regular
expressions.
+
Support for these types of regular expressions is an optional
compile-time dependency. If Git wasn't compiled with support for them
providing this option will cause it to die.

--remove-empty::
Stop when a given path disappears from the tree.
Expand Down
14 changes: 10 additions & 4 deletions Makefile
Expand Up @@ -24,8 +24,10 @@ all::
# Define NO_OPENSSL environment variable if you do not have OpenSSL.
# This also implies BLK_SHA1.
#
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
# able to use Perl-compatible regular expressions.
# Define USE_LIBPCRE if you have and want to use libpcre. Various
# commands such as log and grep offer runtime options to use
# Perl-compatible regular expressions instead of standard or extended
# POSIX regular expressions.
#
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
Expand Down Expand Up @@ -1087,7 +1089,7 @@ ifdef NO_LIBGEN_H
endif

ifdef USE_LIBPCRE
BASIC_CFLAGS += -DUSE_LIBPCRE
BASIC_CFLAGS += -DUSE_LIBPCRE1
ifdef LIBPCREDIR
BASIC_CFLAGS += -I$(LIBPCREDIR)/include
EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
Expand Down Expand Up @@ -2239,8 +2241,9 @@ GIT-BUILD-OPTIONS: FORCE
@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@+
@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@+
@echo NO_EXPAT=\''$(subst ','\'',$(subst ','\'',$(NO_EXPAT)))'\' >>$@+
@echo USE_LIBPCRE=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@+
@echo USE_LIBPCRE1=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@+
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+
@echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+
@echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+
@echo PAGER_ENV=\''$(subst ','\'',$(subst ','\'',$(PAGER_ENV)))'\' >>$@+
Expand Down Expand Up @@ -2271,6 +2274,9 @@ endif
ifdef GIT_PERF_MAKE_OPTS
@echo GIT_PERF_MAKE_OPTS=\''$(subst ','\'',$(subst ','\'',$(GIT_PERF_MAKE_OPTS)))'\' >>$@+
endif
ifdef GIT_PERF_MAKE_COMMAND
@echo GIT_PERF_MAKE_COMMAND=\''$(subst ','\'',$(subst ','\'',$(GIT_PERF_MAKE_COMMAND)))'\' >>$@+
endif
ifdef GIT_INTEROP_MAKE_OPTS
@echo GIT_INTEROP_MAKE_OPTS=\''$(subst ','\'',$(subst ','\'',$(GIT_INTEROP_MAKE_OPTS)))'\' >>$@+
endif
Expand Down
23 changes: 19 additions & 4 deletions builtin/grep.c
Expand Up @@ -73,14 +73,14 @@ static pthread_mutex_t grep_mutex;

static inline void grep_lock(void)
{
if (num_threads)
pthread_mutex_lock(&grep_mutex);
assert(num_threads);
pthread_mutex_lock(&grep_mutex);
}

static inline void grep_unlock(void)
{
if (num_threads)
pthread_mutex_unlock(&grep_mutex);
assert(num_threads);
pthread_mutex_unlock(&grep_mutex);
}

/* Signalled when a new work_item is added to todo. */
Expand Down Expand Up @@ -289,6 +289,17 @@ static int grep_cmd_config(const char *var, const char *value, void *cb)
if (num_threads < 0)
die(_("invalid number of threads specified (%d) for %s"),
num_threads, var);
#ifdef NO_PTHREADS
else if (num_threads && num_threads != 1) {
/*
* TRANSLATORS: %s is the configuration
* variable for tweaking threads, currently
* grep.threads
*/
warning(_("no threads support, ignoring %s"), var);
num_threads = 0;
}
#endif
}

return st;
Expand Down Expand Up @@ -495,6 +506,8 @@ static void compile_submodule_options(const struct grep_opt *opt,
break;
case GREP_PATTERN_TYPE_UNSPECIFIED:
break;
default:
die("BUG: Added a new grep pattern type without updating switch statement");
}

for (pattern = opt->pattern_list; pattern != NULL;
Expand Down Expand Up @@ -1229,6 +1242,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
else if (num_threads < 0)
die(_("invalid number of threads specified (%d)"), num_threads);
#else
if (num_threads)
warning(_("no threads support, ignoring --threads"));
num_threads = 0;
#endif

Expand Down
4 changes: 3 additions & 1 deletion builtin/pack-objects.c
Expand Up @@ -2483,8 +2483,10 @@ static int git_pack_config(const char *k, const char *v, void *cb)
die("invalid number of threads specified (%d)",
delta_search_threads);
#ifdef NO_PTHREADS
if (delta_search_threads != 1)
if (delta_search_threads != 1) {
warning("no threads support, ignoring %s", k);
delta_search_threads = 0;
}
#endif
return 0;
}
Expand Down
12 changes: 8 additions & 4 deletions configure.ac
Expand Up @@ -250,8 +250,10 @@ AS_HELP_STRING([--with-openssl],[use OpenSSL library (default is YES)])
AS_HELP_STRING([], [ARG can be prefix for openssl library and headers]),
GIT_PARSE_WITH([openssl]))

# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
# able to use Perl-compatible regular expressions.
# Define USE_LIBPCRE if you have and want to use libpcre. Various
# commands such as log and grep offer runtime options to use
# Perl-compatible regular expressions instead of standard or extended
# POSIX regular expressions.
#
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
Expand Down Expand Up @@ -499,8 +501,10 @@ GIT_CONF_SUBST([NEEDS_SSL_WITH_CRYPTO])
GIT_CONF_SUBST([NO_OPENSSL])

#
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
# able to use Perl-compatible regular expressions.
# Define USE_LIBPCRE if you have and want to use libpcre. Various
# commands such as log and grep offer runtime options to use
# Perl-compatible regular expressions instead of standard or extended
# POSIX regular expressions.
#

if test -n "$USE_LIBPCRE"; then
Expand Down
110 changes: 57 additions & 53 deletions grep.c
Expand Up @@ -178,26 +178,23 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st

case GREP_PATTERN_TYPE_BRE:
opt->fixed = 0;
opt->pcre = 0;
opt->regflags &= ~REG_EXTENDED;
opt->pcre1 = 0;
break;

case GREP_PATTERN_TYPE_ERE:
opt->fixed = 0;
opt->pcre = 0;
opt->pcre1 = 0;
opt->regflags |= REG_EXTENDED;
break;

case GREP_PATTERN_TYPE_FIXED:
opt->fixed = 1;
opt->pcre = 0;
opt->regflags &= ~REG_EXTENDED;
opt->pcre1 = 0;
break;

case GREP_PATTERN_TYPE_PCRE:
opt->fixed = 0;
opt->pcre = 1;
opt->regflags &= ~REG_EXTENDED;
opt->pcre1 = 1;
break;
}
}
Expand Down Expand Up @@ -324,40 +321,64 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p,
die("%s'%s': %s", where, p->pattern, error);
}

#ifdef USE_LIBPCRE
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
static int is_fixed(const char *s, size_t len)
{
size_t i;

for (i = 0; i < len; i++) {
if (is_regex_special(s[i]))
return 0;
}

return 1;
}

static int has_null(const char *s, size_t len)
{
/*
* regcomp cannot accept patterns with NULs so when using it
* we consider any pattern containing a NUL fixed.
*/
if (memchr(s, 0, len))
return 1;

return 0;
}

#ifdef USE_LIBPCRE1
static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
const char *error;
int erroffset;
int options = PCRE_MULTILINE;

if (opt->ignore_case) {
if (has_non_ascii(p->pattern))
p->pcre_tables = pcre_maketables();
p->pcre1_tables = pcre_maketables();
options |= PCRE_CASELESS;
}
if (is_utf8_locale() && has_non_ascii(p->pattern))
options |= PCRE_UTF8;

p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
p->pcre_tables);
if (!p->pcre_regexp)
p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
p->pcre1_tables);
if (!p->pcre1_regexp)
compile_regexp_failed(p, error);

p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
if (!p->pcre_extra_info && error)
p->pcre1_extra_info = pcre_study(p->pcre1_regexp, 0, &error);
if (!p->pcre1_extra_info && error)
die("%s", error);
}

static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
int ovector[30], ret, flags = 0;

if (eflags & REG_NOTBOL)
flags |= PCRE_NOTBOL;

ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line, eol - line,
0, flags, ovector, ARRAY_SIZE(ovector));
if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
die("pcre_exec failed with error code %d", ret);
Expand All @@ -370,55 +391,36 @@ static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
return ret;
}

static void free_pcre_regexp(struct grep_pat *p)
static void free_pcre1_regexp(struct grep_pat *p)
{
pcre_free(p->pcre_regexp);
pcre_free(p->pcre_extra_info);
pcre_free((void *)p->pcre_tables);
pcre_free(p->pcre1_regexp);
pcre_free(p->pcre1_extra_info);
pcre_free((void *)p->pcre1_tables);
}
#else /* !USE_LIBPCRE */
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
#else /* !USE_LIBPCRE1 */
static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
}

static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
return 1;
}

static void free_pcre_regexp(struct grep_pat *p)
static void free_pcre1_regexp(struct grep_pat *p)
{
}
#endif /* !USE_LIBPCRE */

static int is_fixed(const char *s, size_t len)
{
size_t i;

/* regcomp cannot accept patterns with NULs so we
* consider any pattern containing a NUL fixed.
*/
if (memchr(s, 0, len))
return 1;

for (i = 0; i < len; i++) {
if (is_regex_special(s[i]))
return 0;
}

return 1;
}
#endif /* !USE_LIBPCRE1 */

static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
{
struct strbuf sb = STRBUF_INIT;
int err;
int regflags;
int regflags = opt->regflags;

basic_regex_quote_buf(&sb, p->pattern);
regflags = opt->regflags & ~REG_EXTENDED;
if (opt->ignore_case)
regflags |= REG_ICASE;
err = regcomp(&p->regexp, sb.buf, regflags);
Expand Down Expand Up @@ -455,7 +457,9 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
* simple string match using kws. p->fixed tells us if we
* want to use kws.
*/
if (opt->fixed || is_fixed(p->pattern, p->patternlen))
if (opt->fixed ||
has_null(p->pattern, p->patternlen) ||
is_fixed(p->pattern, p->patternlen))
p->fixed = !icase || ascii_only;
else
p->fixed = 0;
Expand All @@ -475,8 +479,8 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
return;
}

if (opt->pcre) {
compile_pcre_regexp(p, opt);
if (opt->pcre1) {
compile_pcre1_regexp(p, opt);
return;
}

Expand Down Expand Up @@ -832,8 +836,8 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN_BODY:
if (p->kws)
kwsfree(p->kws);
else if (p->pcre_regexp)
free_pcre_regexp(p);
else if (p->pcre1_regexp)
free_pcre1_regexp(p);
else
regfree(&p->regexp);
free(p->pattern);
Expand Down Expand Up @@ -912,8 +916,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,

if (p->fixed)
hit = !fixmatch(p, line, eol, match);
else if (p->pcre_regexp)
hit = !pcrematch(p, line, eol, match, eflags);
else if (p->pcre1_regexp)
hit = !pcre1match(p, line, eol, match, eflags);
else
hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
eflags);
Expand Down

0 comments on commit 36dcb57

Please sign in to comment.