Skip to content

Commit

Permalink
Don't heap-allocate result buffer in rx operator (envoyproxy#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
anuraaga committed Sep 21, 2022
1 parent 0df5526 commit 71aac7a
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 22 deletions.
50 changes: 47 additions & 3 deletions buildtools/re2/cre2.patch
Original file line number Diff line number Diff line change
@@ -1,16 +1,44 @@
diff --git a/src/cre2.cpp b/src/cre2.cpp
index 5a63b93..12bc2ed 100644
index 5a63b93..54152ff 100644
--- a/src/cre2.cpp
+++ b/src/cre2.cpp
@@ -462,7 +462,6 @@ DEFINE_MATCH_REX_FUN2(cre2_find_and_consume_re,FindAndConsumeN)
@@ -288,6 +288,27 @@ cre2_match (const cre2_regexp_t *re , const char *text,
}
return (retval)? 1 : 0;
}
+
+int
+cre2_match8 (const cre2_regexp_t *re , const char *text,
+ int textlen, int startpos, int endpos, cre2_anchor_t anchor,
+ cre2_string_t *match, int nmatch)
+{
+ re2::StringPiece text_re2(text, textlen);
+ re2::StringPiece match_re2[8];
+ RE2::Anchor anchor_re2 = to_cre2_anchor(anchor);
+ bool retval; // 0 for no match
+ // 1 for successful matching
+ retval = TO_CONST_RE2(re)->Match(text_re2, startpos, endpos, anchor_re2, match_re2, 8);
+ if (retval) {
+ for (int i=0; i<8; i++) {
+ match[i].data = match_re2[i].data();
+ match[i].length = match_re2[i].length();
+ }
+ }
+ return (retval)? 1 : 0;
+}
+
int
cre2_easy_match (const char * pattern, int pattern_len,
const char *text, int text_len,
@@ -462,7 +483,6 @@ DEFINE_MATCH_REX_FUN2(cre2_find_and_consume_re,FindAndConsumeN)
int
cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string_t * rewrite)
{
- try {
std::string S(text_and_target->data, text_and_target->length);
re2::StringPiece R(rewrite->data, rewrite->length);
char * buffer; /* this exists to make GCC shut up about const */
@@ -477,12 +476,6 @@ cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string
@@ -477,12 +497,6 @@ cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string
} else
return -1;
return int(retval);
Expand All @@ -23,3 +51,19 @@ index 5a63b93..12bc2ed 100644
}
int
cre2_replace_re (cre2_regexp_t * rex, cre2_string_t * text_and_target, cre2_string_t * rewrite)
diff --git a/src/cre2.h b/src/cre2.h
index 92eaf65..a34051f 100644
--- a/src/cre2.h
+++ b/src/cre2.h
@@ -160,6 +160,11 @@ cre2_decl int cre2_match (const cre2_regexp_t * re,
int startpos, int endpos, cre2_anchor_t anchor,
cre2_string_t * match, int nmatch);

+cre2_decl int cre2_match8 (const cre2_regexp_t * re,
+ const char * text, int textlen,
+ int startpos, int endpos, cre2_anchor_t anchor,
+ cre2_string_t * match, int nmatch);
+
cre2_decl int cre2_easy_match (const char * pattern, int pattern_len,
const char * text, int text_len,
cre2_string_t * match, int nmatch);
15 changes: 4 additions & 11 deletions internal/operators/rx.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,9 @@ func (o *rx) Init(options coraza.RuleOperatorOptions) error {
}

func (o *rx) Evaluate(tx *coraza.Transaction, value string) bool {
matches := o.re.FindStringSubmatch(value, 8)
if len(matches) == 0 {
return false
}

if tx.Capture {
for i, c := range matches {
tx.CaptureField(i, c)
return o.re.FindStringSubmatch8(value, func(i int, match string) {
if tx.Capture {
tx.CaptureField(i, match)
}
}

return true
})
}
20 changes: 12 additions & 8 deletions internal/re2/re2.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ func cre2Delete(rePtr unsafe.Pointer)
func cre2Match(rePtr unsafe.Pointer, textPtr unsafe.Pointer, textLen uint32, startPos uint32, endPos uint32,
anchor uint32, matchArrPtr unsafe.Pointer, nmatch uint32) uint32

//export cre2_match8
func cre2Match8(rePtr unsafe.Pointer, textPtr unsafe.Pointer, textLen uint32, startPos uint32, endPos uint32,
anchor uint32, matchArrPtr unsafe.Pointer, nmatch uint32) uint32

type RegExp struct {
ptr unsafe.Pointer
}
Expand All @@ -33,31 +37,31 @@ func Compile(pattern string) (RegExp, error) {
return RegExp{ptr: rePtr}, nil
}

func (re RegExp) FindStringSubmatch(text string, n int) []string {
func (re RegExp) FindStringSubmatch8(text string, f func(int, string)) bool {
sh := (*reflect.StringHeader)(unsafe.Pointer(&text))
// Array of cre2_string_t, which is const char* and int, easiest way to get it is an array of ints.
matchArr := make([]uint32, 2*n)
var matchArr [16]uint32
matchArrPtr := unsafe.Pointer(&matchArr[0])
res := cre2Match(re.ptr, unsafe.Pointer(sh.Data), uint32(sh.Len), 0, uint32(sh.Len), 0, matchArrPtr, uint32(n))
res := cre2Match8(re.ptr, unsafe.Pointer(sh.Data), uint32(sh.Len), 0, uint32(sh.Len), 0, matchArrPtr, 8)
if res == 0 {
return nil
return false
}

// Pointer math! re2 will return matches which are memory pointers into memory corresponding to text.
// GC semantics are clearest if we convert them to indexes within text rather than dereference the
// pointers directly.
textPtr := uint32(sh.Data)

var matches []string
for i := 0; i < n; i++ {
for i := 0; i < 8; i++ {
sPtr := matchArr[2*i]
if sPtr == 0 {
break
}
sLen := matchArr[2*i+1]

textIdx := sPtr - textPtr
matches = append(matches, text[textIdx:textIdx+sLen])
f(i, text[textIdx:textIdx+sLen])
}
return matches

return true
}
Binary file modified lib/libcre2.a
Binary file not shown.

0 comments on commit 71aac7a

Please sign in to comment.