Skip to content

Commit

Permalink
Convert rgx.[ch] to use pcre2 (rather than regex). (dm)
Browse files Browse the repository at this point in the history
  • Loading branch information
DaveMielke committed Oct 9, 2018
1 parent b3c195d commit 579cae4
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 61 deletions.
54 changes: 41 additions & 13 deletions Headers/rgx.h
Expand Up @@ -29,36 +29,64 @@ extern RegularExpressionObject *newRegularExpressionObject (void *data);
extern void destroyRegularExpressionObject (RegularExpressionObject *rgx);

typedef struct {
const char *string;
const char *expression;
struct {
const wchar_t *characters;
void *internal;
size_t length;
} string;

struct {
const wchar_t *characters;
size_t length;
} expression;

struct {
const void *array;
size_t size;
void *internal;
size_t count;
} matches;

void *objectData;
void *patternData;
void *matchData;
struct {
void *object;
void *pattern;
void *match;
} data;
} RegularExpressionHandlerParameters;

#define REGULAR_EXPRESSION_HANDLER(name) void name (const RegularExpressionHandlerParameters *parameters)
typedef REGULAR_EXPRESSION_HANDLER(RegularExpressionHandler);

extern int addRegularExpression (
extern int addRegularExpressionCharacters (
RegularExpressionObject *rgx,
const wchar_t *characters, size_t length,
RegularExpressionHandler *handler, void *data
);

extern int
addRegularExpressionString (
RegularExpressionObject *rgx,
const wchar_t *string,
RegularExpressionHandler *handler, void *data
);

extern int matchRegularExpressionsCharacters (
RegularExpressionObject *rgx,
const char *expression,
size_t submatches,
RegularExpressionHandler *handler,
const wchar_t *characters,
size_t length,
void *data
);

extern int matchRegularExpressions (
extern int
matchRegularExpressionsString (
RegularExpressionObject *rgx,
const char *string,
const wchar_t *string,
void *data
);

extern unsigned int
getRegularExpressionMatchCount (
const RegularExpressionHandlerParameters *parameters
);

extern int getRegularExpressionMatch (
const RegularExpressionHandlerParameters *parameters,
unsigned int index, int *start, int *end
Expand Down
6 changes: 3 additions & 3 deletions Programs/Makefile.in
Expand Up @@ -305,7 +305,7 @@ queue.$O:
$(CC) $(LIBCFLAGS) -c $(SRC_DIR)/queue.c

rgx.$O:
$(CC) $(LIBCFLAGS) -c $(SRC_DIR)/rgx.c
$(CC) $(LIBCFLAGS) $(PCRE_INCLUDES) -c $(SRC_DIR)/rgx.c

datafile.$O:
$(CC) $(LIBCFLAGS) -c $(SRC_DIR)/datafile.c
Expand Down Expand Up @@ -676,7 +676,7 @@ cldr.$O:

###############################################################################

CORE_OBJECTS = core.$O $(PROGRAM_OBJECTS) revision.$O report.$O config.$O $(SERVICE_OBJECTS) activity.$O $(PREFS_OBJECTS) profile.$O menu.$O menu_prefs.$O ses.$O status.$O update.$O blink.$O dataarea.$O $(CMD_OBJECTS) pipe.$O $(TTB_OBJECTS) $(ATB_OBJECTS) $(CTB_OBJECTS) $(KTB_OBJECTS) ktb_keyboard.$O $(KBD_OBJECTS) kbd_keycodes.$O $(BELL_OBJECTS) $(LEDS_OBJECTS) $(ALERT_OBJECTS) hidkeys.$O drivers.$O driver.$O $(SCREEN_OBJECTS) $(SPECIAL_SCREEN_OBJECTS) $(BRAILLE_OBJECTS) $(SPEECH_OBJECTS) spk_input.$O api_control.$O $(API_SERVER_OBJECTS)
CORE_OBJECTS = core.$O $(PROGRAM_OBJECTS) revision.$O report.$O config.$O rgx.$O $(SERVICE_OBJECTS) activity.$O $(PREFS_OBJECTS) profile.$O menu.$O menu_prefs.$O ses.$O status.$O update.$O blink.$O dataarea.$O $(CMD_OBJECTS) pipe.$O $(TTB_OBJECTS) $(ATB_OBJECTS) $(CTB_OBJECTS) $(KTB_OBJECTS) ktb_keyboard.$O $(KBD_OBJECTS) kbd_keycodes.$O $(BELL_OBJECTS) $(LEDS_OBJECTS) $(ALERT_OBJECTS) hidkeys.$O drivers.$O driver.$O $(SCREEN_OBJECTS) $(SPECIAL_SCREEN_OBJECTS) $(BRAILLE_OBJECTS) $(SPEECH_OBJECTS) spk_input.$O api_control.$O $(API_SERVER_OBJECTS)
CORE_NAME = brltty

brltty-core: $(CORE_OBJECTS)
Expand Down Expand Up @@ -775,7 +775,7 @@ learn.$O:
###############################################################################

BRLTTY_OBJECTS = brltty.$O $(CORE_OBJECTS)
BRLTTY_LIBRARIES = $(BRAILLE_DRIVER_LIBRARIES) $(SPEECH_DRIVER_LIBRARIES) $(SCREEN_DRIVER_LIBRARIES) $(SERVICE_LIBS) $(PCM_LIBS) $(MIDI_LIBS) $(USB_LIBS) $(BLUETOOTH_LIBS) $(LOUIS_LIBS) $(EXPAT_LIBS) $(POLKIT_LIBS) $(LDLIBS)
BRLTTY_LIBRARIES = $(BRAILLE_DRIVER_LIBRARIES) $(SPEECH_DRIVER_LIBRARIES) $(SCREEN_DRIVER_LIBRARIES) $(SERVICE_LIBS) $(PCM_LIBS) $(MIDI_LIBS) $(USB_LIBS) $(BLUETOOTH_LIBS) $(LOUIS_LIBS) $(EXPAT_LIBS) $(PCRE_LIBS) $(POLKIT_LIBS) $(LDLIBS)

brltty$X: $(BRLTTY_OBJECTS)
$(CC) $(LDFLAGS) -o $@ $(BRLTTY_OBJECTS) $(BRLTTY_LIBRARIES)
Expand Down
186 changes: 141 additions & 45 deletions Programs/rgx.c
Expand Up @@ -19,72 +19,127 @@
#include "prologue.h"

#include <string.h>
#include <regex.h>

#ifdef HAVE_PCRE2
#define PCRE2_CODE_UNIT_WIDTH 32
#include <pcre2.h>
#else /* Unicode regular expression support */
#warning Unicode regular expression support has not been included
#endif /* Unicode regular expression support */

#include "log.h"
#include "rgx.h"
#include "queue.h"

struct RegularExpressionObjectStruct {
uint32_t options;
Queue *patterns;
int compileFlags;
void *data;
};

typedef struct {
char *expression;
regex_t matcher;
size_t submatches;
struct {
wchar_t *characters;
size_t length;
} expression;

struct {
pcre2_code *code;
pcre2_match_data *matches;
} compiled;

uint32_t options;
RegularExpressionHandler *handler;
void *data;
} RegularExpressionPattern;

static void
logRegularExpressionError (const RegularExpressionPattern *pattern, int error) {
char message[0X100];
regerror(error, &pattern->matcher, message, sizeof(message));
logMessage(LOG_WARNING, "regular expression error: %s", message);
size_t size = 0X100;
PCRE2_UCHAR message[size];
pcre2_get_error_message(error, message, size);
wchar_t characters[size];

{
const PCRE2_UCHAR *from = message;
wchar_t *to = characters;
while ((*to++ = *from++));
}

logMessage(LOG_WARNING,
"regular expression error %d: %"PRIws,
error, characters
);
}

static void
deallocateRegularExpressionPattern (void *item, void *data) {
RegularExpressionPattern *pattern = item;

regfree(&pattern->matcher);
free(pattern->expression);
pcre2_match_data_free(pattern->compiled.matches);
pcre2_code_free(pattern->compiled.code);
free(pattern->expression.characters);
free(pattern);
}

int
addRegularExpression (
addRegularExpressionCharacters (
RegularExpressionObject *rgx,
const char *expression,
size_t submatches,
RegularExpressionHandler *handler,
void *data
const wchar_t *characters, size_t length,
RegularExpressionHandler *handler, void *data
) {
RegularExpressionPattern *pattern;

if ((pattern = malloc(sizeof(*pattern)))) {
memset(pattern, 0, sizeof(*pattern));
pattern->submatches = submatches;

pattern->options = 0;
pattern->handler = handler;
pattern->data = data;

if ((pattern->expression = strdup(expression))) {
int error = regcomp(&pattern->matcher, expression, rgx->compileFlags);
pattern->expression.characters = calloc(
(pattern->expression.length = length),
sizeof(*pattern->expression.characters)
);

if (!error) {
if (enqueueItem(rgx->patterns, pattern)) {
return 1;
if (pattern->expression.characters) {
PCRE2_UCHAR internal[length];

for (unsigned int index=0; index<length; index+=1) {
wchar_t character = characters[index];
internal[index] = character;
pattern->expression.characters[index] = character;
}

int error;
PCRE2_SIZE offset;

pattern->compiled.code = pcre2_compile(
internal, length, rgx->options,
&error, &offset, NULL
);

if (pattern->compiled.code) {
pattern->compiled.matches = pcre2_match_data_create_from_pattern(
pattern->compiled.code, NULL
);

if (pattern->compiled.matches) {
if (enqueueItem(rgx->patterns, pattern)) {
return 1;
}

pcre2_match_data_free(pattern->compiled.matches);
} else {
logMallocError();
}

regfree(&pattern->matcher);
pcre2_code_free(pattern->compiled.code);
} else {
logRegularExpressionError(pattern, error);
}

free(pattern->expression);
free(pattern->expression.characters);
} else {
logMallocError();
}
Expand All @@ -97,58 +152,101 @@ addRegularExpression (
return 0;
}

int
addRegularExpressionString (
RegularExpressionObject *rgx,
const wchar_t *string,
RegularExpressionHandler *handler, void *data
) {
return addRegularExpressionCharacters(rgx, string, wcslen(string), handler, data);
}

static int
testRegularExpressionPattern (const void *item, void *data) {
const RegularExpressionPattern *pattern = item;
RegularExpressionHandlerParameters *parameters = data;

size_t matches = pattern->submatches + 1;
regmatch_t match[matches];
int error = regexec(&pattern->matcher, parameters->string, matches, match, 0);
int matches = pcre2_match(
pattern->compiled.code,
parameters->string.internal, parameters->string.length,
0, pattern->options, pattern->compiled.matches, NULL
);

if (matches > 0) {
parameters->data.pattern = pattern->data;

if (!error) {
parameters->expression = pattern->expression;
parameters->patternData = pattern->data;
parameters->expression.characters = pattern->expression.characters;
parameters->expression.length = pattern->expression.length;

parameters->matches.array = match;
parameters->matches.size = matches;
parameters->matches.internal = pattern->compiled.matches;
parameters->matches.count = matches - 1;

pattern->handler(parameters);
return 1;
}

if (error != REG_NOMATCH) logRegularExpressionError(pattern, error);
if (matches != PCRE2_ERROR_NOMATCH) logRegularExpressionError(pattern, matches);
return 0;
}

int
matchRegularExpressions (
matchRegularExpressionsCharacters (
RegularExpressionObject *rgx,
const char *string,
const wchar_t *characters,
size_t length,
void *data
) {
PCRE2_UCHAR internal[length];

for (unsigned int index=0; index<length; index+=1) {
internal[index] = characters[index];
}

RegularExpressionHandlerParameters parameters = {
.string = string,
.objectData = rgx->data,
.matchData = data
.string = {
.characters = characters,
.internal = internal,
.length = length
},

.data = {
.object = rgx->data,
.match = data
}
};

return !!findElement(rgx->patterns, testRegularExpressionPattern, &parameters);
}

int
matchRegularExpressionsString (
RegularExpressionObject *rgx,
const wchar_t *string,
void *data
) {
return matchRegularExpressionsCharacters(rgx, string, wcslen(string), data);
}

unsigned int
getRegularExpressionMatchCount (
const RegularExpressionHandlerParameters *parameters
) {
return parameters->matches.count;
}

int
getRegularExpressionMatch (
const RegularExpressionHandlerParameters *parameters,
unsigned int index, int *start, int *end
) {
if (index < 0) return 0;
if (index >= parameters->matches.size) return 0;
if (index > parameters->matches.count) return 0;

const regmatch_t *matches = parameters->matches.array;
const regmatch_t *match = &matches[index];
PCRE2_SIZE *matches = pcre2_get_ovector_pointer(parameters->matches.internal);
matches += index * 2;

if ((*start = match->rm_so) == -1) return 0;
if ((*end = match->rm_eo) == -1) return 0;
if ((*start = matches[0]) == -1) return 0;
if ((*end = matches[1]) == -1) return 0;
return 1;
}

Expand All @@ -159,9 +257,7 @@ newRegularExpressionObject (void *data) {
if ((rgx = malloc(sizeof(*rgx)))) {
memset(rgx, 0, sizeof(*rgx));
rgx->data = data;

rgx->compileFlags = 0;
rgx->compileFlags |= REG_EXTENDED;
rgx->options = 0;

if ((rgx->patterns = newQueue(deallocateRegularExpressionPattern, NULL))) {
return rgx;
Expand Down

0 comments on commit 579cae4

Please sign in to comment.