From 71aac7a0f625c4704d7d43a32e2d2f0805c77986 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Wed, 21 Sep 2022 13:27:37 +0900 Subject: [PATCH] Don't heap-allocate result buffer in rx operator (#32) --- buildtools/re2/cre2.patch | 50 +++++++++++++++++++++++++++++++++++--- internal/operators/rx.go | 15 +++--------- internal/re2/re2.go | 20 +++++++++------ lib/libcre2.a | Bin 21442 -> 21792 bytes 4 files changed, 63 insertions(+), 22 deletions(-) diff --git a/buildtools/re2/cre2.patch b/buildtools/re2/cre2.patch index 8f609e34fc738..8ef08f3c16ebf 100644 --- a/buildtools/re2/cre2.patch +++ b/buildtools/re2/cre2.patch @@ -1,8 +1,36 @@ diff --git a/src/cre2.cpp b/src/cre2.cpp -index 5a63b93..12bc2ed 100644 +index 5a63b93..54152ff 100644 --- a/src/cre2.cpp +++ b/src/cre2.cpp -@@ -462,7 +462,6 @@ DEFINE_MATCH_REX_FUN2(cre2_find_and_consume_re,FindAndConsumeN) +@@ -288,6 +288,27 @@ cre2_match (const cre2_regexp_t *re , const char *text, + } + return (retval)? 1 : 0; + } ++ ++int ++cre2_match8 (const cre2_regexp_t *re , const char *text, ++ int textlen, int startpos, int endpos, cre2_anchor_t anchor, ++ cre2_string_t *match, int nmatch) ++{ ++ re2::StringPiece text_re2(text, textlen); ++ re2::StringPiece match_re2[8]; ++ RE2::Anchor anchor_re2 = to_cre2_anchor(anchor); ++ bool retval; // 0 for no match ++ // 1 for successful matching ++ retval = TO_CONST_RE2(re)->Match(text_re2, startpos, endpos, anchor_re2, match_re2, 8); ++ if (retval) { ++ for (int i=0; i<8; i++) { ++ match[i].data = match_re2[i].data(); ++ match[i].length = match_re2[i].length(); ++ } ++ } ++ return (retval)? 1 : 0; ++} ++ + int + cre2_easy_match (const char * pattern, int pattern_len, + const char *text, int text_len, +@@ -462,7 +483,6 @@ DEFINE_MATCH_REX_FUN2(cre2_find_and_consume_re,FindAndConsumeN) int cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string_t * rewrite) { @@ -10,7 +38,7 @@ index 5a63b93..12bc2ed 100644 std::string S(text_and_target->data, text_and_target->length); re2::StringPiece R(rewrite->data, rewrite->length); char * buffer; /* this exists to make GCC shut up about const */ -@@ -477,12 +476,6 @@ cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string +@@ -477,12 +497,6 @@ cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string } else return -1; return int(retval); @@ -23,3 +51,19 @@ index 5a63b93..12bc2ed 100644 } int cre2_replace_re (cre2_regexp_t * rex, cre2_string_t * text_and_target, cre2_string_t * rewrite) +diff --git a/src/cre2.h b/src/cre2.h +index 92eaf65..a34051f 100644 +--- a/src/cre2.h ++++ b/src/cre2.h +@@ -160,6 +160,11 @@ cre2_decl int cre2_match (const cre2_regexp_t * re, + int startpos, int endpos, cre2_anchor_t anchor, + cre2_string_t * match, int nmatch); + ++cre2_decl int cre2_match8 (const cre2_regexp_t * re, ++ const char * text, int textlen, ++ int startpos, int endpos, cre2_anchor_t anchor, ++ cre2_string_t * match, int nmatch); ++ + cre2_decl int cre2_easy_match (const char * pattern, int pattern_len, + const char * text, int text_len, + cre2_string_t * match, int nmatch); diff --git a/internal/operators/rx.go b/internal/operators/rx.go index 5f5052095a4d6..791f2ce0a1869 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -26,16 +26,9 @@ func (o *rx) Init(options coraza.RuleOperatorOptions) error { } func (o *rx) Evaluate(tx *coraza.Transaction, value string) bool { - matches := o.re.FindStringSubmatch(value, 8) - if len(matches) == 0 { - return false - } - - if tx.Capture { - for i, c := range matches { - tx.CaptureField(i, c) + return o.re.FindStringSubmatch8(value, func(i int, match string) { + if tx.Capture { + tx.CaptureField(i, match) } - } - - return true + }) } diff --git a/internal/re2/re2.go b/internal/re2/re2.go index 44a1c79154d56..b96e4d98bf2be 100644 --- a/internal/re2/re2.go +++ b/internal/re2/re2.go @@ -21,6 +21,10 @@ func cre2Delete(rePtr unsafe.Pointer) func cre2Match(rePtr unsafe.Pointer, textPtr unsafe.Pointer, textLen uint32, startPos uint32, endPos uint32, anchor uint32, matchArrPtr unsafe.Pointer, nmatch uint32) uint32 +//export cre2_match8 +func cre2Match8(rePtr unsafe.Pointer, textPtr unsafe.Pointer, textLen uint32, startPos uint32, endPos uint32, + anchor uint32, matchArrPtr unsafe.Pointer, nmatch uint32) uint32 + type RegExp struct { ptr unsafe.Pointer } @@ -33,14 +37,14 @@ func Compile(pattern string) (RegExp, error) { return RegExp{ptr: rePtr}, nil } -func (re RegExp) FindStringSubmatch(text string, n int) []string { +func (re RegExp) FindStringSubmatch8(text string, f func(int, string)) bool { sh := (*reflect.StringHeader)(unsafe.Pointer(&text)) // Array of cre2_string_t, which is const char* and int, easiest way to get it is an array of ints. - matchArr := make([]uint32, 2*n) + var matchArr [16]uint32 matchArrPtr := unsafe.Pointer(&matchArr[0]) - res := cre2Match(re.ptr, unsafe.Pointer(sh.Data), uint32(sh.Len), 0, uint32(sh.Len), 0, matchArrPtr, uint32(n)) + res := cre2Match8(re.ptr, unsafe.Pointer(sh.Data), uint32(sh.Len), 0, uint32(sh.Len), 0, matchArrPtr, 8) if res == 0 { - return nil + return false } // Pointer math! re2 will return matches which are memory pointers into memory corresponding to text. @@ -48,8 +52,7 @@ func (re RegExp) FindStringSubmatch(text string, n int) []string { // pointers directly. textPtr := uint32(sh.Data) - var matches []string - for i := 0; i < n; i++ { + for i := 0; i < 8; i++ { sPtr := matchArr[2*i] if sPtr == 0 { break @@ -57,7 +60,8 @@ func (re RegExp) FindStringSubmatch(text string, n int) []string { sLen := matchArr[2*i+1] textIdx := sPtr - textPtr - matches = append(matches, text[textIdx:textIdx+sLen]) + f(i, text[textIdx:textIdx+sLen]) } - return matches + + return true } diff --git a/lib/libcre2.a b/lib/libcre2.a index ebe1cfd0ec614dca27dcc92698f343f0ff16052d..d06e68a31e57e854cd05b1f797f587c2594fd036 100644 GIT binary patch delta 2551 zcmciCdr(x@9S895SuUKt#x@HAfdFO?k_aNiP`R)Pmb*Ug6IVuEe#XmMyweT2qZ+hhvWs%eWvGZ{sV(f)SV$4vk1A7SQm z@9+0JuY2xUZtpI(eGgmrqB}L+?E9-21t147zWcp$f8PuK&wANEtTrlfWsH;(Y2lPxVR$nhLAC3}WV@g{rG=;hvuVbtC$-iGdHgNwO)uYoc5 zII6e6+K<{;W1KHG8_fpAFq3XfF=UyI*7TaGNDBZ(lO9F!k0SG~A zf88f2?nljomlD|hTEyiEVPAXb0eM|@Lv3}#B7^b5I!c?nU!C?|VC)mlMpsI?|2QBO zJn2haj;5dZYmf>(N!gL1Hh;syWp#BA8UpQ};_Pv($n#pZ%I15HW=Dh-kM@mrE@^05 zR_}r-1X4+u=S)y|lxYm41tGrH$5WIyx*(N$VsawF%KXzm80l*$kMP9s$YiO)KWU?D zX`RDKlZL_+22(gBhkwk22H%)UPkT-zb9(mXL{6#lk8{1++~{yN!xRC@^EcNLb{4ywwbc2Q#%k)*)Z9q#5TlF)WWxh3%)yC;8M7P>)|Tyge&Z$r!;nryb+*$Qi!hxu4{5mQLXQE{ckguX;{%y3tsu&A4#WeAqF@hhA zp>4z{xE-Tn{z|Ts)n<)tH)}X!R^a~JtgolU2`YY%BSY zoF@We%vUA~{4CMJ zHzf+bli~}B3WgI^j3g?od!oj!lGY@RZ6SxqDRPP2CF_$l^d>7fldQ50&+BX_x$(S? z?US;wZ&E%!nv{fdFUYt|ZoJ@@`HB>QwiFBBm?C%=#e*pdhEi1ANKx3Yex$MgkQI|P zwuu}hesr?rVIQ#-NJ9C3%=YU zaKxhEghjGi1D>$>`8z-l+-f*DU-un&5pDx9AGmbQSOD z3j0jg*jrf|>m(nOvt)>@%+}au(w(j0LN*;GegArLbk>)nyV?3&9lg2v7??@FY;td= z%-_rtSf6L%JMskIPw`+L?POLOPR`20-)0r!pRff=d48!x#_f{qlyl^O7IWJ$5jG-RjC-Ls^voz->PchtuDdWx~T42iuEWHLZ-LP9ds z?pAHpL|j0F3lzOjQGD=FKzx)TBH$w+SQaE84-G*rgF;-^5FP?WgaxxFGb!ELkNd%? z`t?0?PTxoOnVQYJ*r6S4-DAP&DPuyP(!l^`0)y84u7bb!hX1qQ^#A8+o2uDfscBlo z%LDFvSW9X8Vo(o{XsN5K!%Xv}GR5pwCMd3gAy4;Z1`Ph?LSLat9X5|QeH`C8Df+xg z3GT00@>Io=g#!kiUPoVRo8HgZIxs5ITjNV{`#iOOaaVc*-bIk|W6T*5vC)z&YS-CZb4XTPbm5TDd~el~+GUkKD8nJmi#9tVVltHxkcx(fs=qDa zuJJ)Cjt*t3JvD&}PygDXLP>bX{>4J6`EjO&$4`;m;eBd+)qanc_KAWL4J8IrX?RY5 zAvDKhc4WmDl*NR0S?I4SqfP$1?hUWy`*Wbi;|)NGgH+!C8p5g2D!y=P>9VST&t2&Y zc%Z~XT43fl#H2P@l)JW~vf59t#;^RE_EuF^S5QtF1F3T8ujz1;6amGI%nq2_?c?ra%w=|D zUW#Qb8>^nD)()=m`K!EB(sMJ6f%gWOZ|3}U;8OI;OU=o-GlJbig>-jl1h71kOD&NU zoQ&k?jGTm5lqc}HqT;Ng<2=Qk!(?0@X2C5Qdxyz<^>Bd=!!>?wgy45a2<(kgaX3oH z@hFu&7j3XZXv69;c5E19!7d@=1JWX7-Yx{b7aG4J1iwS^ zp0O%E8LOjptjbo6GuT0LmRuvxK47rTWFI+6{`G)?jpJ2p9j~+Q@m6++9DUG=n-AJh zH^G7W2^PGOBx8S)9K_KinV(4#_%2D~my-m)P4RmZRUDkCqh+GXR!%b50n$#cl2s2G z>{YUtoFEq;GO%H?iY=3Mc4@Me-6lsYR@|`I&}+%Tb5ktXnJi-;Ig%{%wq${G$r|rY z7Q81pD6o5~isq?0j!aeA-ySyD$E1y1CM!}5_A=Q+j*}l!44BhYygE&1KTfl<9`f0A zda~)6SgFd`NVcmo->nLKq-y-AD)^TauT534DOJbosVe(E)nG3^YOvj;B}kX^IRPMDuQ3>I@|Vyl{J#OC#`trNe5ceWqeJ# z(q(=tU7#*QFg2uANyqV(G avP#}g`SX2wZxv2^tI_49`@LpUzW+bT&WCIO