Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace *regexp.Regexp with match.Matcher #3469

Merged
merged 6 commits into from Jan 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.asciidoc
Expand Up @@ -64,6 +64,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff]
- Files created by Beats (logs, registry, file output) will have 0600 permissions. {pull}3387[3387].
- RPM/deb packages will now install the config file with 0600 permissions. {pull}3382[3382]
- Add the option to pass custom HTTP headers to the Elasticsearch output. {pull}3400[3400]
- Unify `regexp` and `contains` conditionals, for both to support array of strings and convert numbers to strings if required. {pull}3469[3469]

*Metricbeat*

Expand All @@ -87,6 +88,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff]
- Kafka consumer groups metricset. {pull}3240[3240]
- Add dynamic configuration reloading for modules. {pull}3281[3281]
- Add docker health metricset {pull}3357[3357]
- System module uses new matchers for white-listing processes. {pull}3469[3469]

*Packetbeat*

Expand All @@ -96,6 +98,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff]
- Add enabled config option to prospectors. {pull}3157[3157]
- Add target option for decoded_json_field. {pull}3169[3169]
- Add the `pipeline` config option at the prospector level, for configuring the Ingest Node pipeline ID. {pull}3433[3433]
- Update regular expressions used for matching file names or lines (multiline, include/exclude functionality) to new matchers improving performance of simple string matches. {pull}3469[3469]

*Winlogbeat*

Expand Down
6 changes: 3 additions & 3 deletions filebeat/harvester/config.go
Expand Up @@ -2,12 +2,12 @@ package harvester

import (
"fmt"
"regexp"
"time"

cfg "github.com/elastic/beats/filebeat/config"
"github.com/elastic/beats/filebeat/harvester/reader"
"github.com/elastic/beats/libbeat/common"
"github.com/elastic/beats/libbeat/common/match"

"github.com/dustin/go-humanize"
"github.com/elastic/beats/libbeat/logp"
Expand Down Expand Up @@ -47,8 +47,8 @@ type harvesterConfig struct {
CloseEOF bool `config:"close_eof"`
CloseTimeout time.Duration `config:"close_timeout" validate:"min=0"`
ForceCloseFiles bool `config:"force_close_files"`
ExcludeLines []*regexp.Regexp `config:"exclude_lines"`
IncludeLines []*regexp.Regexp `config:"include_lines"`
ExcludeLines []match.Matcher `config:"exclude_lines"`
IncludeLines []match.Matcher `config:"include_lines"`
MaxBytes int `config:"max_bytes" validate:"min=0,nonzero"`
Multiline *reader.MultilineConfig `config:"multiline"`
JSON *reader.JSONConfig `config:"json"`
Expand Down
4 changes: 2 additions & 2 deletions filebeat/harvester/log.go
Expand Up @@ -163,14 +163,14 @@ func (h *Harvester) sendEvent(event *input.Event) bool {
// the include_lines and exclude_lines options.
func (h *Harvester) shouldExportLine(line string) bool {
if len(h.config.IncludeLines) > 0 {
if !MatchAnyRegexps(h.config.IncludeLines, line) {
if !MatchAny(h.config.IncludeLines, line) {
// drop line
logp.Debug("harvester", "Drop line as it does not match any of the include patterns %s", line)
return false
}
}
if len(h.config.ExcludeLines) > 0 {
if MatchAnyRegexps(h.config.ExcludeLines, line) {
if MatchAny(h.config.ExcludeLines, line) {
// drop line
logp.Debug("harvester", "Drop line as it does match one of the exclude patterns%s", line)
return false
Expand Down
22 changes: 8 additions & 14 deletions filebeat/harvester/log_test.go
Expand Up @@ -104,29 +104,23 @@ func TestReadLine(t *testing.T) {
}

func TestExcludeLine(t *testing.T) {

regexp, err := InitRegexps([]string{"^DBG"})

regexp, err := InitMatchers("^DBG")
assert.Nil(t, err)

assert.True(t, MatchAnyRegexps(regexp, "DBG: a debug message"))
assert.False(t, MatchAnyRegexps(regexp, "ERR: an error message"))
assert.True(t, MatchAny(regexp, "DBG: a debug message"))
assert.False(t, MatchAny(regexp, "ERR: an error message"))
}

func TestIncludeLine(t *testing.T) {

regexp, err := InitRegexps([]string{"^ERR", "^WARN"})
regexp, err := InitMatchers("^ERR", "^WARN")

assert.Nil(t, err)

assert.False(t, MatchAnyRegexps(regexp, "DBG: a debug message"))
assert.True(t, MatchAnyRegexps(regexp, "ERR: an error message"))
assert.True(t, MatchAnyRegexps(regexp, "WARNING: a simple warning message"))
assert.False(t, MatchAny(regexp, "DBG: a debug message"))
assert.True(t, MatchAny(regexp, "ERR: an error message"))
assert.True(t, MatchAny(regexp, "WARNING: a simple warning message"))
}

func TestInitRegexp(t *testing.T) {

_, err := InitRegexps([]string{"((((("})
_, err := InitMatchers("(((((")
assert.NotNil(t, err)
}

Expand Down
17 changes: 9 additions & 8 deletions filebeat/harvester/reader/multiline.go
Expand Up @@ -3,8 +3,9 @@ package reader
import (
"errors"
"fmt"
"regexp"
"time"

"github.com/elastic/beats/libbeat/common/match"
)

// MultiLine reader combining multiple line events into one multi-line event.
Expand Down Expand Up @@ -55,7 +56,7 @@ func NewMultiline(
maxBytes int,
config *MultilineConfig,
) (*Multiline, error) {
types := map[string]func(*regexp.Regexp) (matcher, error){
types := map[string]func(match.Matcher) (matcher, error){
"before": beforeMatcher,
"after": afterMatcher,
}
Expand Down Expand Up @@ -280,14 +281,14 @@ func (mlr *Multiline) setState(next func(mlr *Multiline) (Message, error)) {

// matchers

func afterMatcher(regex *regexp.Regexp) (matcher, error) {
return genPatternMatcher(regex, func(last, current []byte) []byte {
func afterMatcher(pat match.Matcher) (matcher, error) {
return genPatternMatcher(pat, func(last, current []byte) []byte {
return current
})
}

func beforeMatcher(regex *regexp.Regexp) (matcher, error) {
return genPatternMatcher(regex, func(last, current []byte) []byte {
func beforeMatcher(pat match.Matcher) (matcher, error) {
return genPatternMatcher(pat, func(last, current []byte) []byte {
return last
})
}
Expand All @@ -299,12 +300,12 @@ func negatedMatcher(m matcher) matcher {
}

func genPatternMatcher(
regex *regexp.Regexp,
pat match.Matcher,
sel func(last, current []byte) []byte,
) (matcher, error) {
matcher := func(last, current []byte) bool {
line := sel(last, current)
return regex.Match(line)
return pat.Match(line)
}
return matcher, nil
}
5 changes: 3 additions & 2 deletions filebeat/harvester/reader/multiline_config.go
Expand Up @@ -2,15 +2,16 @@ package reader

import (
"fmt"
"regexp"
"time"

"github.com/elastic/beats/libbeat/common/match"
)

type MultilineConfig struct {
Negate bool `config:"negate"`
Match string `config:"match" validate:"required"`
MaxLines *int `config:"max_lines"`
Pattern *regexp.Regexp `config:"pattern"`
Pattern match.Matcher `config:"pattern"`
Timeout *time.Duration `config:"timeout" validate:"positive"`
}

Expand Down
12 changes: 6 additions & 6 deletions filebeat/harvester/reader/multiline_test.go
Expand Up @@ -6,12 +6,12 @@ import (
"bytes"
"errors"
"os"
"regexp"
"strings"
"testing"
"time"

"github.com/elastic/beats/filebeat/harvester/encoding"
"github.com/elastic/beats/libbeat/common/match"
"github.com/stretchr/testify/assert"
)

Expand All @@ -26,7 +26,7 @@ func (p bufferSource) Continuable() bool { return false }
func TestMultilineAfterOK(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`^[ \t] +`), // next line is indented by spaces
Pattern: match.MustCompile(`^[ \t] +`), // next line is indented by spaces
Match: "after",
},
2,
Expand All @@ -38,7 +38,7 @@ func TestMultilineAfterOK(t *testing.T) {
func TestMultilineBeforeOK(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`\\$`), // previous line ends with \
Pattern: match.MustCompile(`\\$`), // previous line ends with \
Match: "before",
},
2,
Expand All @@ -50,7 +50,7 @@ func TestMultilineBeforeOK(t *testing.T) {
func TestMultilineAfterNegateOK(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`^-`), // first line starts with '-' at beginning of line
Pattern: match.MustCompile(`^-`), // first line starts with '-' at beginning of line
Negate: true,
Match: "after",
},
Expand All @@ -63,7 +63,7 @@ func TestMultilineAfterNegateOK(t *testing.T) {
func TestMultilineBeforeNegateOK(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`;$`), // last line ends with ';'
Pattern: match.MustCompile(`;$`), // last line ends with ';'
Negate: true,
Match: "before",
},
Expand All @@ -76,7 +76,7 @@ func TestMultilineBeforeNegateOK(t *testing.T) {
func TestMultilineBeforeNegateOKWithEmptyLine(t *testing.T) {
testMultilineOK(t,
MultilineConfig{
Pattern: regexp.MustCompile(`;$`), // last line ends with ';'
Pattern: match.MustCompile(`;$`), // last line ends with ';'
Negate: true,
Match: "before",
},
Expand Down
12 changes: 5 additions & 7 deletions filebeat/harvester/util.go
@@ -1,15 +1,13 @@
package harvester

import "regexp"
import "github.com/elastic/beats/libbeat/common/match"

// MatchAnyRegexps checks if the text matches any of the regular expressions
func MatchAnyRegexps(regexps []*regexp.Regexp, text string) bool {

for _, rexp := range regexps {
if rexp.MatchString(text) {
// MatchAny checks if the text matches any of the regular expressions
func MatchAny(matchers []match.Matcher, text string) bool {
for _, m := range matchers {
if m.MatchString(text) {
return true
}
}

return false
}
23 changes: 9 additions & 14 deletions filebeat/harvester/util_test.go
Expand Up @@ -3,20 +3,20 @@
package harvester

import (
"regexp"
"testing"

"github.com/elastic/beats/libbeat/logp"
"github.com/stretchr/testify/assert"
)

// InitRegexps initializes a list of compiled regular expressions.
func InitRegexps(exprs []string) ([]*regexp.Regexp, error) {
"github.com/elastic/beats/libbeat/common/match"
"github.com/elastic/beats/libbeat/logp"
)

result := []*regexp.Regexp{}
// InitMatchers initializes a list of compiled regular expressions.
func InitMatchers(exprs ...string) ([]match.Matcher, error) {
result := []match.Matcher{}

for _, exp := range exprs {
rexp, err := regexp.Compile(exp)
rexp, err := match.Compile(exp)
if err != nil {
logp.Err("Fail to compile the regexp %s: %s", exp, err)
return nil, err
Expand All @@ -27,13 +27,8 @@ func InitRegexps(exprs []string) ([]*regexp.Regexp, error) {
}

func TestMatchAnyRegexps(t *testing.T) {

patterns := []string{"\\.gz$"}

regexps, err := InitRegexps(patterns)

matchers, err := InitMatchers("\\.gz$")
assert.Nil(t, err)

assert.Equal(t, MatchAnyRegexps(regexps, "/var/log/log.gz"), true)
assert.Equal(t, MatchAny(matchers, "/var/log/log.gz"), true)

}
24 changes: 12 additions & 12 deletions filebeat/prospector/config.go
Expand Up @@ -2,10 +2,10 @@ package prospector

import (
"fmt"
"regexp"
"time"

cfg "github.com/elastic/beats/filebeat/config"
"github.com/elastic/beats/libbeat/common/match"
)

var (
Expand All @@ -23,17 +23,17 @@ var (
)

type prospectorConfig struct {
Enabled bool `config:"enabled"`
ExcludeFiles []*regexp.Regexp `config:"exclude_files"`
IgnoreOlder time.Duration `config:"ignore_older"`
Paths []string `config:"paths"`
ScanFrequency time.Duration `config:"scan_frequency" validate:"min=0,nonzero"`
InputType string `config:"input_type"`
CleanInactive time.Duration `config:"clean_inactive" validate:"min=0"`
CleanRemoved bool `config:"clean_removed"`
HarvesterLimit uint64 `config:"harvester_limit" validate:"min=0"`
Symlinks bool `config:"symlinks"`
TailFiles bool `config:"tail_files"`
Enabled bool `config:"enabled"`
ExcludeFiles []match.Matcher `config:"exclude_files"`
IgnoreOlder time.Duration `config:"ignore_older"`
Paths []string `config:"paths"`
ScanFrequency time.Duration `config:"scan_frequency" validate:"min=0,nonzero"`
InputType string `config:"input_type"`
CleanInactive time.Duration `config:"clean_inactive" validate:"min=0"`
CleanRemoved bool `config:"clean_removed"`
HarvesterLimit uint64 `config:"harvester_limit" validate:"min=0"`
Symlinks bool `config:"symlinks"`
TailFiles bool `config:"tail_files"`
}

func (config *prospectorConfig) Validate() error {
Expand Down
2 changes: 1 addition & 1 deletion filebeat/prospector/prospector_log.go
Expand Up @@ -342,7 +342,7 @@ func (p *ProspectorLog) handleIgnoreOlder(lastState, newState file.State) error
// isFileExcluded checks if the given path should be excluded
func (p *ProspectorLog) isFileExcluded(file string) bool {
patterns := p.config.ExcludeFiles
return len(patterns) > 0 && harvester.MatchAnyRegexps(patterns, file)
return len(patterns) > 0 && harvester.MatchAny(patterns, file)
}

// isIgnoreOlder checks if the given state reached ignore_older
Expand Down
8 changes: 4 additions & 4 deletions filebeat/prospector/prospector_log_other_test.go
Expand Up @@ -3,19 +3,19 @@
package prospector

import (
"regexp"
"testing"

"github.com/elastic/beats/filebeat/input"
"github.com/elastic/beats/filebeat/input/file"
"github.com/elastic/beats/libbeat/common/match"

"github.com/stretchr/testify/assert"
)

var matchTests = []struct {
file string
paths []string
excludeFiles []*regexp.Regexp
excludeFiles []match.Matcher
result bool
}{
{
Expand Down Expand Up @@ -45,13 +45,13 @@ var matchTests = []struct {
{
"test/test.log",
[]string{"test/*"},
[]*regexp.Regexp{regexp.MustCompile("test.log")},
[]match.Matcher{match.MustCompile("test.log")},
false,
},
{
"test/test.log",
[]string{"test/*"},
[]*regexp.Regexp{regexp.MustCompile("test2.log")},
[]match.Matcher{match.MustCompile("test2.log")},
true,
},
}
Expand Down