From 579cae458ad9e47a689c7063e600d7bc492cb176 Mon Sep 17 00:00:00 2001 From: Dave Mielke Date: Tue, 9 Oct 2018 13:12:09 -0400 Subject: [PATCH] Convert rgx.[ch] to use pcre2 (rather than regex). (dm) --- Headers/rgx.h | 54 ++++++++++--- Programs/Makefile.in | 6 +- Programs/rgx.c | 186 ++++++++++++++++++++++++++++++++----------- config.h.in | 6 ++ config.mk.in | 3 + configure.ac | 15 ++++ 6 files changed, 209 insertions(+), 61 deletions(-) diff --git a/Headers/rgx.h b/Headers/rgx.h index 00b8fec46a..4a2fe02d2b 100644 --- a/Headers/rgx.h +++ b/Headers/rgx.h @@ -29,36 +29,64 @@ extern RegularExpressionObject *newRegularExpressionObject (void *data); extern void destroyRegularExpressionObject (RegularExpressionObject *rgx); typedef struct { - const char *string; - const char *expression; + struct { + const wchar_t *characters; + void *internal; + size_t length; + } string; + + struct { + const wchar_t *characters; + size_t length; + } expression; struct { - const void *array; - size_t size; + void *internal; + size_t count; } matches; - void *objectData; - void *patternData; - void *matchData; + struct { + void *object; + void *pattern; + void *match; + } data; } RegularExpressionHandlerParameters; #define REGULAR_EXPRESSION_HANDLER(name) void name (const RegularExpressionHandlerParameters *parameters) typedef REGULAR_EXPRESSION_HANDLER(RegularExpressionHandler); -extern int addRegularExpression ( +extern int addRegularExpressionCharacters ( + RegularExpressionObject *rgx, + const wchar_t *characters, size_t length, + RegularExpressionHandler *handler, void *data +); + +extern int +addRegularExpressionString ( + RegularExpressionObject *rgx, + const wchar_t *string, + RegularExpressionHandler *handler, void *data +); + +extern int matchRegularExpressionsCharacters ( RegularExpressionObject *rgx, - const char *expression, - size_t submatches, - RegularExpressionHandler *handler, + const wchar_t *characters, + size_t length, void *data ); -extern int matchRegularExpressions ( +extern int +matchRegularExpressionsString ( RegularExpressionObject *rgx, - const char *string, + const wchar_t *string, void *data ); +extern unsigned int +getRegularExpressionMatchCount ( + const RegularExpressionHandlerParameters *parameters +); + extern int getRegularExpressionMatch ( const RegularExpressionHandlerParameters *parameters, unsigned int index, int *start, int *end diff --git a/Programs/Makefile.in b/Programs/Makefile.in index 495c870c2b..297e975ec5 100644 --- a/Programs/Makefile.in +++ b/Programs/Makefile.in @@ -305,7 +305,7 @@ queue.$O: $(CC) $(LIBCFLAGS) -c $(SRC_DIR)/queue.c rgx.$O: - $(CC) $(LIBCFLAGS) -c $(SRC_DIR)/rgx.c + $(CC) $(LIBCFLAGS) $(PCRE_INCLUDES) -c $(SRC_DIR)/rgx.c datafile.$O: $(CC) $(LIBCFLAGS) -c $(SRC_DIR)/datafile.c @@ -676,7 +676,7 @@ cldr.$O: ############################################################################### -CORE_OBJECTS = core.$O $(PROGRAM_OBJECTS) revision.$O report.$O config.$O $(SERVICE_OBJECTS) activity.$O $(PREFS_OBJECTS) profile.$O menu.$O menu_prefs.$O ses.$O status.$O update.$O blink.$O dataarea.$O $(CMD_OBJECTS) pipe.$O $(TTB_OBJECTS) $(ATB_OBJECTS) $(CTB_OBJECTS) $(KTB_OBJECTS) ktb_keyboard.$O $(KBD_OBJECTS) kbd_keycodes.$O $(BELL_OBJECTS) $(LEDS_OBJECTS) $(ALERT_OBJECTS) hidkeys.$O drivers.$O driver.$O $(SCREEN_OBJECTS) $(SPECIAL_SCREEN_OBJECTS) $(BRAILLE_OBJECTS) $(SPEECH_OBJECTS) spk_input.$O api_control.$O $(API_SERVER_OBJECTS) +CORE_OBJECTS = core.$O $(PROGRAM_OBJECTS) revision.$O report.$O config.$O rgx.$O $(SERVICE_OBJECTS) activity.$O $(PREFS_OBJECTS) profile.$O menu.$O menu_prefs.$O ses.$O status.$O update.$O blink.$O dataarea.$O $(CMD_OBJECTS) pipe.$O $(TTB_OBJECTS) $(ATB_OBJECTS) $(CTB_OBJECTS) $(KTB_OBJECTS) ktb_keyboard.$O $(KBD_OBJECTS) kbd_keycodes.$O $(BELL_OBJECTS) $(LEDS_OBJECTS) $(ALERT_OBJECTS) hidkeys.$O drivers.$O driver.$O $(SCREEN_OBJECTS) $(SPECIAL_SCREEN_OBJECTS) $(BRAILLE_OBJECTS) $(SPEECH_OBJECTS) spk_input.$O api_control.$O $(API_SERVER_OBJECTS) CORE_NAME = brltty brltty-core: $(CORE_OBJECTS) @@ -775,7 +775,7 @@ learn.$O: ############################################################################### BRLTTY_OBJECTS = brltty.$O $(CORE_OBJECTS) -BRLTTY_LIBRARIES = $(BRAILLE_DRIVER_LIBRARIES) $(SPEECH_DRIVER_LIBRARIES) $(SCREEN_DRIVER_LIBRARIES) $(SERVICE_LIBS) $(PCM_LIBS) $(MIDI_LIBS) $(USB_LIBS) $(BLUETOOTH_LIBS) $(LOUIS_LIBS) $(EXPAT_LIBS) $(POLKIT_LIBS) $(LDLIBS) +BRLTTY_LIBRARIES = $(BRAILLE_DRIVER_LIBRARIES) $(SPEECH_DRIVER_LIBRARIES) $(SCREEN_DRIVER_LIBRARIES) $(SERVICE_LIBS) $(PCM_LIBS) $(MIDI_LIBS) $(USB_LIBS) $(BLUETOOTH_LIBS) $(LOUIS_LIBS) $(EXPAT_LIBS) $(PCRE_LIBS) $(POLKIT_LIBS) $(LDLIBS) brltty$X: $(BRLTTY_OBJECTS) $(CC) $(LDFLAGS) -o $@ $(BRLTTY_OBJECTS) $(BRLTTY_LIBRARIES) diff --git a/Programs/rgx.c b/Programs/rgx.c index be0896a1c2..f15b97198d 100644 --- a/Programs/rgx.c +++ b/Programs/rgx.c @@ -19,72 +19,127 @@ #include "prologue.h" #include -#include + +#ifdef HAVE_PCRE2 +#define PCRE2_CODE_UNIT_WIDTH 32 +#include +#else /* Unicode regular expression support */ +#warning Unicode regular expression support has not been included +#endif /* Unicode regular expression support */ #include "log.h" #include "rgx.h" #include "queue.h" struct RegularExpressionObjectStruct { + uint32_t options; Queue *patterns; - int compileFlags; void *data; }; typedef struct { - char *expression; - regex_t matcher; - size_t submatches; + struct { + wchar_t *characters; + size_t length; + } expression; + + struct { + pcre2_code *code; + pcre2_match_data *matches; + } compiled; + + uint32_t options; RegularExpressionHandler *handler; void *data; } RegularExpressionPattern; static void logRegularExpressionError (const RegularExpressionPattern *pattern, int error) { - char message[0X100]; - regerror(error, &pattern->matcher, message, sizeof(message)); - logMessage(LOG_WARNING, "regular expression error: %s", message); + size_t size = 0X100; + PCRE2_UCHAR message[size]; + pcre2_get_error_message(error, message, size); + wchar_t characters[size]; + + { + const PCRE2_UCHAR *from = message; + wchar_t *to = characters; + while ((*to++ = *from++)); + } + + logMessage(LOG_WARNING, + "regular expression error %d: %"PRIws, + error, characters + ); } static void deallocateRegularExpressionPattern (void *item, void *data) { RegularExpressionPattern *pattern = item; - regfree(&pattern->matcher); - free(pattern->expression); + pcre2_match_data_free(pattern->compiled.matches); + pcre2_code_free(pattern->compiled.code); + free(pattern->expression.characters); free(pattern); } int -addRegularExpression ( +addRegularExpressionCharacters ( RegularExpressionObject *rgx, - const char *expression, - size_t submatches, - RegularExpressionHandler *handler, - void *data + const wchar_t *characters, size_t length, + RegularExpressionHandler *handler, void *data ) { RegularExpressionPattern *pattern; if ((pattern = malloc(sizeof(*pattern)))) { memset(pattern, 0, sizeof(*pattern)); - pattern->submatches = submatches; + + pattern->options = 0; pattern->handler = handler; pattern->data = data; - if ((pattern->expression = strdup(expression))) { - int error = regcomp(&pattern->matcher, expression, rgx->compileFlags); + pattern->expression.characters = calloc( + (pattern->expression.length = length), + sizeof(*pattern->expression.characters) + ); - if (!error) { - if (enqueueItem(rgx->patterns, pattern)) { - return 1; + if (pattern->expression.characters) { + PCRE2_UCHAR internal[length]; + + for (unsigned int index=0; indexexpression.characters[index] = character; + } + + int error; + PCRE2_SIZE offset; + + pattern->compiled.code = pcre2_compile( + internal, length, rgx->options, + &error, &offset, NULL + ); + + if (pattern->compiled.code) { + pattern->compiled.matches = pcre2_match_data_create_from_pattern( + pattern->compiled.code, NULL + ); + + if (pattern->compiled.matches) { + if (enqueueItem(rgx->patterns, pattern)) { + return 1; + } + + pcre2_match_data_free(pattern->compiled.matches); + } else { + logMallocError(); } - regfree(&pattern->matcher); + pcre2_code_free(pattern->compiled.code); } else { logRegularExpressionError(pattern, error); } - free(pattern->expression); + free(pattern->expression.characters); } else { logMallocError(); } @@ -97,58 +152,101 @@ addRegularExpression ( return 0; } +int +addRegularExpressionString ( + RegularExpressionObject *rgx, + const wchar_t *string, + RegularExpressionHandler *handler, void *data +) { + return addRegularExpressionCharacters(rgx, string, wcslen(string), handler, data); +} + static int testRegularExpressionPattern (const void *item, void *data) { const RegularExpressionPattern *pattern = item; RegularExpressionHandlerParameters *parameters = data; - size_t matches = pattern->submatches + 1; - regmatch_t match[matches]; - int error = regexec(&pattern->matcher, parameters->string, matches, match, 0); + int matches = pcre2_match( + pattern->compiled.code, + parameters->string.internal, parameters->string.length, + 0, pattern->options, pattern->compiled.matches, NULL + ); + + if (matches > 0) { + parameters->data.pattern = pattern->data; - if (!error) { - parameters->expression = pattern->expression; - parameters->patternData = pattern->data; + parameters->expression.characters = pattern->expression.characters; + parameters->expression.length = pattern->expression.length; - parameters->matches.array = match; - parameters->matches.size = matches; + parameters->matches.internal = pattern->compiled.matches; + parameters->matches.count = matches - 1; pattern->handler(parameters); return 1; } - if (error != REG_NOMATCH) logRegularExpressionError(pattern, error); + if (matches != PCRE2_ERROR_NOMATCH) logRegularExpressionError(pattern, matches); return 0; } int -matchRegularExpressions ( +matchRegularExpressionsCharacters ( RegularExpressionObject *rgx, - const char *string, + const wchar_t *characters, + size_t length, void *data ) { + PCRE2_UCHAR internal[length]; + + for (unsigned int index=0; indexdata, - .matchData = data + .string = { + .characters = characters, + .internal = internal, + .length = length + }, + + .data = { + .object = rgx->data, + .match = data + } }; return !!findElement(rgx->patterns, testRegularExpressionPattern, ¶meters); } +int +matchRegularExpressionsString ( + RegularExpressionObject *rgx, + const wchar_t *string, + void *data +) { + return matchRegularExpressionsCharacters(rgx, string, wcslen(string), data); +} + +unsigned int +getRegularExpressionMatchCount ( + const RegularExpressionHandlerParameters *parameters +) { + return parameters->matches.count; +} + int getRegularExpressionMatch ( const RegularExpressionHandlerParameters *parameters, unsigned int index, int *start, int *end ) { if (index < 0) return 0; - if (index >= parameters->matches.size) return 0; + if (index > parameters->matches.count) return 0; - const regmatch_t *matches = parameters->matches.array; - const regmatch_t *match = &matches[index]; + PCRE2_SIZE *matches = pcre2_get_ovector_pointer(parameters->matches.internal); + matches += index * 2; - if ((*start = match->rm_so) == -1) return 0; - if ((*end = match->rm_eo) == -1) return 0; + if ((*start = matches[0]) == -1) return 0; + if ((*end = matches[1]) == -1) return 0; return 1; } @@ -159,9 +257,7 @@ newRegularExpressionObject (void *data) { if ((rgx = malloc(sizeof(*rgx)))) { memset(rgx, 0, sizeof(*rgx)); rgx->data = data; - - rgx->compileFlags = 0; - rgx->compileFlags |= REG_EXTENDED; + rgx->options = 0; if ((rgx->patterns = newQueue(deallocateRegularExpressionPattern, NULL))) { return rgx; diff --git a/config.h.in b/config.h.in index 51b217abf1..e78885f38c 100644 --- a/config.h.in +++ b/config.h.in @@ -86,6 +86,12 @@ extern "C" { /* Define this if XML parsing support is to be included. */ #undef HAVE_EXPAT +/* Define this if PCR2 support is to be included. */ +#undef HAVE_PCRE + +/* Define this if PCRE2 support is to be included. */ +#undef HAVE_PCRE2 + /* Define this if the header file pwd.h exists. */ #undef HAVE_PWD_H diff --git a/config.mk.in b/config.mk.in index 0c1449bbc7..47e4f46246 100644 --- a/config.mk.in +++ b/config.mk.in @@ -109,6 +109,9 @@ POLKIT_LIBS = @polkit_libs@ EXPAT_INCLUDES = @expat_includes@ EXPAT_LIBS = @expat_libs@ +PCRE_INCLUDES = @pcre_includes@ +PCRE_LIBS = @pcre_libs@ + LOUIS_INCLUDES = @louis_includes@ LOUIS_LIBS = @louis_libs@ diff --git a/configure.ac b/configure.ac index 1e2774abf7..74c48701e2 100644 --- a/configure.ac +++ b/configure.ac @@ -1120,6 +1120,21 @@ BRLTTY_ARG_DISABLE( AC_SUBST([expat_includes]) AC_SUBST([expat_libs]) +pcre_includes="" +pcre_libs="" +BRLTTY_ARG_DISABLE( + [pcre], + [support for Perl-compatible regular expressions], + [], +[dnl + BRLTTY_HAVE_PACKAGE([pcre], [libpcre2-32], [dnl + AC_DEFINE_UNQUOTED([HAVE_PCRE2], [1], + [Define this if PCRE2 support is to be included.]) + ]) +]) +AC_SUBST([pcre_includes]) +AC_SUBST([pcre_libs]) + BRLTTY_ARG_DISABLE( [contracted-braille], [in-line contracted braille],