diff --git a/buildtools/re2/Dockerfile b/buildtools/re2/Dockerfile deleted file mode 100644 index 17d7d6e02269..000000000000 --- a/buildtools/re2/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2022 The OWASP Coraza contributors -# SPDX-License-Identifier: Apache-2.0 - -FROM ghcr.io/corazawaf/coraza-proxy-wasm/buildtools-wasi-sdk:main - -RUN apt-get install -y patch - -# Use post-release commit for now as it includes support for no-threads -RUN mkdir -p /re2 && curl -L https://github.com/google/re2/archive/954656f47fe8fb505d4818da1e128417a79ea500.tar.gz | tar -xz --strip-components 1 -C /re2 -WORKDIR /re2 -ENV RE2_CXXFLAGS -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. -DRE2_NO_THREADS -RUN make obj/libre2.a - -RUN mkdir -p /cre2 && curl -L https://bitbucket.org/marcomaggi/cre2/downloads/cre2-0.4.0-devel.2.tar.gz | tar -xz --strip-components 1 -C /cre2 -WORKDIR /cre2 -ADD cre2.patch cre2.patch -RUN patch -p1 < cre2.patch -# host is required by configure but not used so set it arbitrarily -RUN RE2_CFLAGS=-I/re2 RE2_LIBS=/re2/obj ./configure --host=i686-pc-linux-gnu --enable-shared=false && \ - make - -CMD ["bash", "-c", "cp /re2/obj/libre2.a /out/libre2.a && cp /cre2/.libs/libcre2.a /out/libcre2.a"] diff --git a/buildtools/re2/cre2.patch b/buildtools/re2/cre2.patch deleted file mode 100644 index 8ef08f3c16eb..000000000000 --- a/buildtools/re2/cre2.patch +++ /dev/null @@ -1,69 +0,0 @@ -diff --git a/src/cre2.cpp b/src/cre2.cpp -index 5a63b93..54152ff 100644 ---- a/src/cre2.cpp -+++ b/src/cre2.cpp -@@ -288,6 +288,27 @@ cre2_match (const cre2_regexp_t *re , const char *text, - } - return (retval)? 1 : 0; - } -+ -+int -+cre2_match8 (const cre2_regexp_t *re , const char *text, -+ int textlen, int startpos, int endpos, cre2_anchor_t anchor, -+ cre2_string_t *match, int nmatch) -+{ -+ re2::StringPiece text_re2(text, textlen); -+ re2::StringPiece match_re2[8]; -+ RE2::Anchor anchor_re2 = to_cre2_anchor(anchor); -+ bool retval; // 0 for no match -+ // 1 for successful matching -+ retval = TO_CONST_RE2(re)->Match(text_re2, startpos, endpos, anchor_re2, match_re2, 8); -+ if (retval) { -+ for (int i=0; i<8; i++) { -+ match[i].data = match_re2[i].data(); -+ match[i].length = match_re2[i].length(); -+ } -+ } -+ return (retval)? 1 : 0; -+} -+ - int - cre2_easy_match (const char * pattern, int pattern_len, - const char *text, int text_len, -@@ -462,7 +483,6 @@ DEFINE_MATCH_REX_FUN2(cre2_find_and_consume_re,FindAndConsumeN) - int - cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string_t * rewrite) - { -- try { - std::string S(text_and_target->data, text_and_target->length); - re2::StringPiece R(rewrite->data, rewrite->length); - char * buffer; /* this exists to make GCC shut up about const */ -@@ -477,12 +497,6 @@ cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string - } else - return -1; - return int(retval); -- } catch(const std::exception &e) { -- // e.what(); -- return -1; -- } catch(...) { -- return -1; -- } - } - int - cre2_replace_re (cre2_regexp_t * rex, cre2_string_t * text_and_target, cre2_string_t * rewrite) -diff --git a/src/cre2.h b/src/cre2.h -index 92eaf65..a34051f 100644 ---- a/src/cre2.h -+++ b/src/cre2.h -@@ -160,6 +160,11 @@ cre2_decl int cre2_match (const cre2_regexp_t * re, - int startpos, int endpos, cre2_anchor_t anchor, - cre2_string_t * match, int nmatch); - -+cre2_decl int cre2_match8 (const cre2_regexp_t * re, -+ const char * text, int textlen, -+ int startpos, int endpos, cre2_anchor_t anchor, -+ cre2_string_t * match, int nmatch); -+ - cre2_decl int cre2_easy_match (const char * pattern, int pattern_len, - const char * text, int text_len, - cre2_string_t * match, int nmatch); diff --git a/go.mod b/go.mod index b4c276f280d6..14260ae0069d 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/stretchr/testify v1.8.0 github.com/tetratelabs/proxy-wasm-go-sdk v0.20.1-0.20221031045735-89d180d022a5 github.com/tidwall/gjson v1.14.3 + github.com/wasilibs/go-re2 v0.0.0-20221219074959-3ec67f9038f0 ) require ( @@ -16,7 +17,7 @@ require ( github.com/magefile/mage v1.14.0 // indirect github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/tetratelabs/wazero v1.0.0-pre.3 // indirect + github.com/tetratelabs/wazero v1.0.0-pre.4.0.20221213074253-2e13f57f56a1 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect golang.org/x/net v0.1.0 // indirect diff --git a/go.sum b/go.sum index a67871b6890a..44bbba3fae29 100644 --- a/go.sum +++ b/go.sum @@ -25,8 +25,8 @@ github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PK github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/tetratelabs/proxy-wasm-go-sdk v0.20.1-0.20221031045735-89d180d022a5 h1:gbsZkzeu+H7oX9xJA97eIuNHCuXfppuJh32mX2Cpeqc= github.com/tetratelabs/proxy-wasm-go-sdk v0.20.1-0.20221031045735-89d180d022a5/go.mod h1:A0osZ5uU1yRt5ZOdRRzIHxJZf8xzsxvEkeL8Ae698+s= -github.com/tetratelabs/wazero v1.0.0-pre.3 h1:Z5fbogMUGcERzaQb9mQU8+yJSy0bVvv2ce3dfR4wcZg= -github.com/tetratelabs/wazero v1.0.0-pre.3/go.mod h1:M8UDNECGm/HVjOfq0EOe4QfCY9Les1eq54IChMLETbc= +github.com/tetratelabs/wazero v1.0.0-pre.4.0.20221213074253-2e13f57f56a1 h1:L+/AG1GzZc8u7tIl7ijAl508T/FHu9esMf+E3hZ1JVA= +github.com/tetratelabs/wazero v1.0.0-pre.4.0.20221213074253-2e13f57f56a1/go.mod h1:u8wrFmpdrykiFK0DFPiFm5a4+0RzsdmXYVtijBKqUVo= github.com/tidwall/gjson v1.14.3 h1:9jvXn7olKEHU1S9vwoMGliaT8jq1vJ7IH/n9zD9Dnlw= github.com/tidwall/gjson v1.14.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -34,6 +34,8 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/wasilibs/go-re2 v0.0.0-20221219074959-3ec67f9038f0 h1:+dy0jRJ7Y0sMNJPUkTeZ8qC9qc9tNWJ/Noha+L6w2ZE= +github.com/wasilibs/go-re2 v0.0.0-20221219074959-3ec67f9038f0/go.mod h1:9YbcVrlaRryN9yCvk1fAjJTn5MLKPEd9/LnCJPkGWxY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= diff --git a/init_tinygo.go b/init_tinygo.go index b3a514d46957..5e6382531696 100644 --- a/init_tinygo.go +++ b/init_tinygo.go @@ -7,5 +7,5 @@ package main import _ "github.com/corazawaf/coraza-proxy-wasm/internal/gc" -// #cgo LDFLAGS: lib/libinjection.a lib/libre2.a lib/libcre2.a lib/libc++.a lib/libc++abi.a lib/libaho_corasick.a lib/libmimalloc.a lib/libgc.a +// #cgo LDFLAGS: lib/libinjection.a lib/libaho_corasick.a lib/libmimalloc.a lib/libgc.a import "C" diff --git a/internal/operators/rx.go b/internal/operators/rx.go index f40b72934e78..a6d332757407 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -6,44 +6,41 @@ package operators import ( - "fmt" - "github.com/corazawaf/coraza/v3/rules" - - "github.com/corazawaf/coraza-proxy-wasm/internal/re2" + re2 "github.com/wasilibs/go-re2" ) type rx struct { - re re2.RegExp - debug bool + re *re2.Regexp } var _ rules.Operator = (*rx)(nil) func newRX(options rules.OperatorOptions) (rules.Operator, error) { - o := &rx{} data := options.Arguments - if data == `(?:\$(?:\((?:\(.*\)|.*)\)|\{.*})|\/\w*\[!?.+\]|[<>]\(.*\))` { - o.debug = true - fmt.Println("enabling rx debug!") - } - re, err := re2.Compile(data) if err != nil { return nil, err } - - o.re = re - return o, err + return &rx{re: re}, nil } func (o *rx) Evaluate(tx rules.TransactionState, value string) bool { - res := o.re.FindStringSubmatch8(value, func(i int, match string) { - tx.CaptureField(i, match) - }) - if o.debug { - fmt.Println(res) + + if tx.Capturing() { + match := o.re.FindStringSubmatch(value) + if len(match) == 0 { + return false + } + for i, c := range match { + if i == 9 { + return true + } + tx.CaptureField(i, c) + } + return true + } else { + return o.re.MatchString(value) } - return res } diff --git a/internal/re2/re2.go b/internal/re2/re2.go deleted file mode 100644 index 08414f1a22c9..000000000000 --- a/internal/re2/re2.go +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright The OWASP Coraza contributors -// SPDX-License-Identifier: Apache-2.0 - -//go:build tinygo - -package re2 - -import ( - "reflect" - "runtime" - "unsafe" -) - -//export cre2_new -func cre2New(patternPtr unsafe.Pointer, patternLen uint32, opts unsafe.Pointer) unsafe.Pointer - -//export cre2_delete -func cre2Delete(rePtr unsafe.Pointer) - -//export cre2_opt_new -func cre2OptNew() unsafe.Pointer - -//export cre2_opt_delete -func cre2OptDelete(ptr unsafe.Pointer) - -//export cre2_opt_set_max_mem -func cre2OptSetMaxMem(ptr unsafe.Pointer, maxMem uint64) - -//export cre2_match -func cre2Match(rePtr unsafe.Pointer, textPtr unsafe.Pointer, textLen uint32, startPos uint32, endPos uint32, - anchor uint32, matchArrPtr unsafe.Pointer, nmatch uint32) uint32 - -//export cre2_match8 -func cre2Match8(rePtr unsafe.Pointer, textPtr unsafe.Pointer, textLen uint32, startPos uint32, endPos uint32, - anchor uint32, matchArrPtr unsafe.Pointer, nmatch uint32) uint32 - -type RegExp struct { - ptr unsafe.Pointer -} - -func Compile(pattern string) (RegExp, error) { - sh := (*reflect.StringHeader)(unsafe.Pointer(&pattern)) - opts := cre2OptNew() - defer cre2OptDelete(opts) - cre2OptSetMaxMem(opts, 8<<20 /* 8MB */) - rePtr := cre2New(unsafe.Pointer(sh.Data), uint32(sh.Len), opts) - runtime.KeepAlive(pattern) - // TODO(anuraaga): Propagate compilation errors from re2. - return RegExp{ptr: rePtr}, nil -} - -func (re RegExp) FindStringSubmatch8(text string, f func(int, string)) bool { - sh := (*reflect.StringHeader)(unsafe.Pointer(&text)) - // Array of cre2_string_t, which is const char* and int, easiest way to get it is an array of ints. - var matchArr [16]uint32 - matchArrPtr := unsafe.Pointer(&matchArr[0]) - res := cre2Match8(re.ptr, unsafe.Pointer(sh.Data), uint32(sh.Len), 0, uint32(sh.Len), 0, matchArrPtr, 8) - if res == 0 { - return false - } - - // Pointer math! re2 will return matches which are memory pointers into memory corresponding to text. - // GC semantics are clearest if we convert them to indexes within text rather than dereference the - // pointers directly. - textPtr := uint32(sh.Data) - - for i := 0; i < 8; i++ { - sPtr := matchArr[2*i] - if sPtr == 0 { - break - } - sLen := matchArr[2*i+1] - - textIdx := sPtr - textPtr - f(i, text[textIdx:textIdx+sLen]) - } - - return true -} diff --git a/lib/libc++.a b/lib/libc++.a deleted file mode 100644 index 07f697ee7732..000000000000 Binary files a/lib/libc++.a and /dev/null differ diff --git a/lib/libc++abi.a b/lib/libc++abi.a deleted file mode 100644 index 5fff4b7d8462..000000000000 Binary files a/lib/libc++abi.a and /dev/null differ diff --git a/lib/libcre2.a b/lib/libcre2.a deleted file mode 100644 index dcf839c1d9cf..000000000000 Binary files a/lib/libcre2.a and /dev/null differ diff --git a/magefiles/magefile.go b/magefiles/magefile.go index 852f2f0ce9c3..832e9a9cc0cb 100644 --- a/magefiles/magefile.go +++ b/magefiles/magefile.go @@ -203,7 +203,7 @@ tinygo build -gc=custom -opt=2 -o %s -scheduler=none -target=wasi %s`, filepath. // UpdateLibs updates the C++ filter dependencies. func UpdateLibs() error { - libs := []string{"aho-corasick", "bdwgc", "libinjection", "mimalloc", "re2"} + libs := []string{"aho-corasick", "bdwgc", "libinjection", "mimalloc"} for _, lib := range libs { if err := sh.RunV("docker", "build", "-t", "ghcr.io/corazawaf/coraza-proxy-wasm/buildtools-"+lib, filepath.Join("buildtools", lib)); err != nil { return err