forked from corazawaf/coraza
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rx.go
135 lines (114 loc) · 3.21 KB
/
rx.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0
//go:build !coraza.disabled_operators.rx
package operators
import (
"fmt"
"regexp"
"strconv"
"unicode/utf8"
"rsc.io/binaryregexp"
"github.com/crowdsecurity/coraza/v3/experimental/plugins/plugintypes"
"github.com/crowdsecurity/coraza/v3/internal/memoize"
)
type rx struct {
re *regexp.Regexp
}
var _ plugintypes.Operator = (*rx)(nil)
func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) {
// (?sm) enables multiline and dotall mode, required by some CRS rules and matching ModSec behavior, see
// - https://stackoverflow.com/a/27680233
// - https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E
data := fmt.Sprintf("(?sm)%s", options.Arguments)
if matchesArbitraryBytes(data) {
// Use binary regex matcher if expression matches non-utf8 bytes. The binary matcher does
// not match unicode, meaning we cannot support expressions with both unicode and non-utf8
// matches. This should not be commonly needed.
return newBinaryRX(options)
}
re, err := memoize.Do(data, func() (interface{}, error) { return regexp.Compile(data) })
if err != nil {
return nil, err
}
return &rx{re: re.(*regexp.Regexp)}, nil
}
func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool {
if tx.Capturing() {
match := o.re.FindStringSubmatch(value)
if len(match) == 0 {
return false
}
for i, c := range match {
if i == 9 {
return true
}
tx.CaptureField(i, c)
}
return true
} else {
return o.re.MatchString(value)
}
}
// binaryRx is exactly the same as rx, but using the binaryregexp package for matching
// arbitrary bytes.
type binaryRX struct {
re *binaryregexp.Regexp
}
var _ plugintypes.Operator = (*binaryRX)(nil)
func newBinaryRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) {
data := options.Arguments
re, err := memoize.Do(data, func() (interface{}, error) { return binaryregexp.Compile(data) })
if err != nil {
return nil, err
}
return &binaryRX{re: re.(*binaryregexp.Regexp)}, nil
}
func (o *binaryRX) Evaluate(tx plugintypes.TransactionState, value string) bool {
if tx.Capturing() {
match := o.re.FindStringSubmatch(value)
if len(match) == 0 {
return false
}
for i, c := range match {
if i == 9 {
return true
}
tx.CaptureField(i, c)
}
return true
} else {
return o.re.MatchString(value)
}
}
func init() {
Register("rx", newRX)
}
// matchesArbitraryBytes checks for control sequences for byte matches in the expression.
// If the sequences are not valid utf8, it returns true.
func matchesArbitraryBytes(expr string) bool {
decoded := make([]byte, 0, len(expr))
for i := 0; i < len(expr); i++ {
c := expr[i]
if c != '\\' {
decoded = append(decoded, c)
continue
}
if i+3 >= len(expr) {
decoded = append(decoded, expr[i:]...)
break
}
if expr[i+1] != 'x' {
decoded = append(decoded, expr[i])
continue
}
v, mb, _, err := strconv.UnquoteChar(expr[i:], 0)
if err != nil || mb {
// Wasn't a byte escape sequence, shouldn't happen in practice.
decoded = append(decoded, expr[i])
continue
}
decoded = append(decoded, byte(v))
i += 3
}
return !utf8.Valid(decoded)
}