-
Notifications
You must be signed in to change notification settings - Fork 2
/
match.go
66 lines (59 loc) · 1.96 KB
/
match.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
// provides fast matching algorithms
// TODO: aho-corasic on substring matching
package sieve
import (
"github.com/ActiveState/log"
"regexp"
"strings"
)
// MultiRegexpMatch allows matching a string against multiple regular
// expressions along with substrings for a fast fail-early matching.
type MultiRegexpMatcher struct {
substrings map[string]string // substring to name
regexps map[string]*regexp.Regexp // name to regexp
substringsRegexp *regexp.Regexp // substring regex combined
}
func NewMultiRegexpMatcher() *MultiRegexpMatcher {
return &MultiRegexpMatcher{
make(map[string]string),
make(map[string]*regexp.Regexp),
nil}
}
func (m *MultiRegexpMatcher) MustAdd(name string, substring string, re string) {
if oldName, ok := m.substrings[substring]; ok {
log.Fatalf(
"substring %s already added under %s; being added again by %s",
substring, oldName, name)
}
if _, ok := m.regexps[name]; ok {
log.Fatal("already in regexps")
}
m.substrings[substring] = name
m.regexps[name] = regexp.MustCompile(re)
}
func (m *MultiRegexpMatcher) Build() {
escaped := make([]string, 0, len(m.substrings))
for substring, _ := range m.substrings {
escaped = append(escaped, regexp.QuoteMeta(substring))
}
m.substringsRegexp = regexp.MustCompile(strings.Join(escaped, "|"))
}
// Match tries to match the text against one of the substring/regexp
// as efficiently as possible.
func (m *MultiRegexpMatcher) Match(text string) (string, []string) {
// TODO: use aho-corasick instead of regexp to match the substrings.
substring := m.substringsRegexp.FindString(text)
if substring == "" {
// fail return early so we don't have to waste time on futile regex
// matching (below)
return "", nil
}
if name, ok := m.substrings[substring]; ok {
if re, ok := m.regexps[name]; ok {
// TODO: if this regex fails, should we try the next
// matching substring?
return name, re.FindStringSubmatch(text)
}
}
panic("not reachable")
}