From ac11f76ceb6879441c3d22e4ca76708a9f014b5a Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Fri, 18 Mar 2022 21:27:33 +0100 Subject: [PATCH 1/6] Cache regexp compilations on field validation --- cmd/testrunner.go | 10 ++++++++++ internal/fields/validate.go | 21 +++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/cmd/testrunner.go b/cmd/testrunner.go index 9247b56b8f..964beded2c 100644 --- a/cmd/testrunner.go +++ b/cmd/testrunner.go @@ -6,8 +6,10 @@ package cmd import ( "fmt" + "log" "os" "path/filepath" + "runtime/pprof" "strings" "github.com/pkg/errors" @@ -95,6 +97,14 @@ func setupTestCommand() *cobraext.Command { func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.CommandAction { testType := runner.Type() return func(cmd *cobra.Command, args []string) error { + f, err := os.Create("cpu.profile") + if err != nil { + log.Fatal(err) + } + defer f.Close() + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + cmd.Printf("Run %s tests for the package\n", testType) failOnMissing, err := cmd.Flags().GetBool(cobraext.FailOnMissingFlagName) diff --git a/internal/fields/validate.go b/internal/fields/validate.go index 0c99883e90..7368166020 100644 --- a/internal/fields/validate.go +++ b/internal/fields/validate.go @@ -302,6 +302,21 @@ func FindElementDefinition(searchedKey string, fieldDefinitions []FieldDefinitio return findElementDefinitionForRoot("", searchedKey, fieldDefinitions) } +var compileCache map[string]*regexp.Regexp + +func init() { + compileCache = make(map[string]*regexp.Regexp) +} + +func mustCompile(str string) *regexp.Regexp { + if r, found := compileCache[str]; found { + return r + } + r := regexp.MustCompile(str) + compileCache[str] = r + return r +} + func compareKeys(key string, def FieldDefinition, searchedKey string) bool { k := strings.ReplaceAll(key, ".", "\\.") k = strings.ReplaceAll(k, "*", "[^.]+") @@ -313,10 +328,8 @@ func compareKeys(key string, def FieldDefinition, searchedKey string) bool { } k = fmt.Sprintf("^%s$", k) - matched, err := regexp.MatchString(k, searchedKey) - if err != nil { - panic(errors.Wrapf(err, "regexp built using the given field/key (%s) is invalid", k)) - } + r := mustCompile(k) + matched := r.MatchString(searchedKey) return matched } From ae6c1dd4283aa2ea741f1b357d61dec7b3238f73 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Mon, 21 Mar 2022 16:51:29 +0100 Subject: [PATCH 2/6] Remove profiling --- cmd/testrunner.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/cmd/testrunner.go b/cmd/testrunner.go index 964beded2c..9247b56b8f 100644 --- a/cmd/testrunner.go +++ b/cmd/testrunner.go @@ -6,10 +6,8 @@ package cmd import ( "fmt" - "log" "os" "path/filepath" - "runtime/pprof" "strings" "github.com/pkg/errors" @@ -97,14 +95,6 @@ func setupTestCommand() *cobraext.Command { func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.CommandAction { testType := runner.Type() return func(cmd *cobra.Command, args []string) error { - f, err := os.Create("cpu.profile") - if err != nil { - log.Fatal(err) - } - defer f.Close() - pprof.StartCPUProfile(f) - defer pprof.StopCPUProfile() - cmd.Printf("Run %s tests for the package\n", testType) failOnMissing, err := cmd.Flags().GetBool(cobraext.FailOnMissingFlagName) From 631ffabdbbb67142a281255d196c5dcf5687811d Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Mon, 21 Mar 2022 17:28:50 +0100 Subject: [PATCH 3/6] Refactor to don't need regular expressions --- internal/fields/validate.go | 50 +++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/internal/fields/validate.go b/internal/fields/validate.go index 7368166020..e891080b2e 100644 --- a/internal/fields/validate.go +++ b/internal/fields/validate.go @@ -302,35 +302,41 @@ func FindElementDefinition(searchedKey string, fieldDefinitions []FieldDefinitio return findElementDefinitionForRoot("", searchedKey, fieldDefinitions) } -var compileCache map[string]*regexp.Regexp - -func init() { - compileCache = make(map[string]*regexp.Regexp) -} - -func mustCompile(str string) *regexp.Regexp { - if r, found := compileCache[str]; found { - return r +func compareKeys(key string, def FieldDefinition, searchedKey string) bool { + if key == searchedKey { + return true } - r := regexp.MustCompile(str) - compileCache[str] = r - return r -} -func compareKeys(key string, def FieldDefinition, searchedKey string) bool { - k := strings.ReplaceAll(key, ".", "\\.") - k = strings.ReplaceAll(k, "*", "[^.]+") + keyParts := strings.Split(key, ".") + searchedParts := strings.Split(searchedKey, ".") + if len(searchedParts) < len(keyParts) { + return false + } + for i, part := range keyParts { + if part == "*" { + continue + } + if part == searchedParts[i] { + continue + } + return false + } + if len(keyParts) == len(searchedParts) { + return true + } // Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions. // Unfortunately we have to assume that imported field could be a geo_point (nasty workaround). - if def.Type == "geo_point" || def.External != "" { - k += "(\\.lon|\\.lat|)" + if len(keyParts)+1 == len(searchedParts) { + if def.Type == "geo_point" || def.External != "" { + extraPart := searchedParts[len(searchedParts)-1] + if extraPart == "lon" || extraPart == "lat" { + return true + } + } } - k = fmt.Sprintf("^%s$", k) - r := mustCompile(k) - matched := r.MatchString(searchedKey) - return matched + return false } func (v *Validator) parseElementValue(key string, definition FieldDefinition, val interface{}) error { From 61db995a3d84f1c95776ef0c5815a3191f27132d Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Mon, 21 Mar 2022 17:53:07 +0100 Subject: [PATCH 4/6] Further refactor for char by char comparison --- internal/fields/validate.go | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/internal/fields/validate.go b/internal/fields/validate.go index e891080b2e..6fb319dde7 100644 --- a/internal/fields/validate.go +++ b/internal/fields/validate.go @@ -303,34 +303,36 @@ func FindElementDefinition(searchedKey string, fieldDefinitions []FieldDefinitio } func compareKeys(key string, def FieldDefinition, searchedKey string) bool { - if key == searchedKey { - return true - } - - keyParts := strings.Split(key, ".") - searchedParts := strings.Split(searchedKey, ".") - if len(searchedParts) < len(keyParts) { - return false - } - for i, part := range keyParts { - if part == "*" { + var i int + var j int + for i = 0; i < len(key); i++ { + if j >= len(searchedKey) { + return false + } + if key[i] == searchedKey[j] { + j++ continue } - if part == searchedParts[i] { + if key[i] == '*' { + for ; j < len(searchedKey); j++ { + if searchedKey[j] == '.' { + j-- + break + } + } continue } return false } - if len(keyParts) == len(searchedParts) { + if len(searchedKey) == j { return true } - // Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions. // Unfortunately we have to assume that imported field could be a geo_point (nasty workaround). - if len(keyParts)+1 == len(searchedParts) { + if len(searchedKey) > j { if def.Type == "geo_point" || def.External != "" { - extraPart := searchedParts[len(searchedParts)-1] - if extraPart == "lon" || extraPart == "lat" { + extraPart := searchedKey[j:] + if extraPart == ".lon" || extraPart == ".lat" { return true } } From eac341b76af88b26f5343f3a337447a929c091a9 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 22 Mar 2022 10:28:08 +0100 Subject: [PATCH 5/6] Apply comments from review --- internal/fields/validate.go | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/internal/fields/validate.go b/internal/fields/validate.go index 6fb319dde7..a12b90363e 100644 --- a/internal/fields/validate.go +++ b/internal/fields/validate.go @@ -302,31 +302,37 @@ func FindElementDefinition(searchedKey string, fieldDefinitions []FieldDefinitio return findElementDefinitionForRoot("", searchedKey, fieldDefinitions) } +// compareKeys checks if `searchedKey` matches with the given `key`. `key` can contain +// wildcards (`*`), that match any sequence of characters in `searchedKey` different to dots. func compareKeys(key string, def FieldDefinition, searchedKey string) bool { - var i int + // Loop over every byte in `key` to find if there is a matching byte in `searchedKey`. var j int - for i = 0; i < len(key); i++ { + for _, k := range []byte(key) { if j >= len(searchedKey) { return false } - if key[i] == searchedKey[j] { + switch k { + case searchedKey[j]: + // Match, continue. j++ - continue - } - if key[i] == '*' { - for ; j < len(searchedKey); j++ { - if searchedKey[j] == '.' { - j-- - break - } + case '*': + // Wildcard, match everything till next dot. + if idx := strings.Index(searchedKey[j:], "."); idx != -1 { + j += idx - 1 + } else { + // If there is no dot, everything has matched. + j = len(searchedKey) } - continue + default: + // No match. + return false } - return false } + // If everything matched, searched key has been found. if len(searchedKey) == j { return true } + // Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions. // Unfortunately we have to assume that imported field could be a geo_point (nasty workaround). if len(searchedKey) > j { From 6aefd51d93d2f4557078c8e1e8431b14a0d50255 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 22 Mar 2022 12:44:36 +0100 Subject: [PATCH 6/6] Add unit test and fix wildcard cases --- internal/fields/validate.go | 14 +++- internal/fields/validate_test.go | 121 +++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 4 deletions(-) diff --git a/internal/fields/validate.go b/internal/fields/validate.go index a12b90363e..7a8ac84f29 100644 --- a/internal/fields/validate.go +++ b/internal/fields/validate.go @@ -309,6 +309,7 @@ func compareKeys(key string, def FieldDefinition, searchedKey string) bool { var j int for _, k := range []byte(key) { if j >= len(searchedKey) { + // End of searched key reached before maching all characters in the key. return false } switch k { @@ -317,11 +318,16 @@ func compareKeys(key string, def FieldDefinition, searchedKey string) bool { j++ case '*': // Wildcard, match everything till next dot. - if idx := strings.Index(searchedKey[j:], "."); idx != -1 { - j += idx - 1 - } else { - // If there is no dot, everything has matched. + switch idx := strings.IndexByte(searchedKey[j:], '.'); idx { + default: + // Jump till next dot. + j += idx + case -1: + // No dots, wildcard matches with the rest of the searched key. j = len(searchedKey) + case 0: + // Empty name on wildcard, this is not permitted (e.g. `example..foo`). + return false } default: // No match. diff --git a/internal/fields/validate_test.go b/internal/fields/validate_test.go index 9b46936b55..1e142927c5 100644 --- a/internal/fields/validate_test.go +++ b/internal/fields/validate_test.go @@ -9,6 +9,7 @@ import ( "os" "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -224,6 +225,126 @@ func Test_parseElementValue(t *testing.T) { } } +func TestCompareKeys(t *testing.T) { + cases := []struct { + key string + def FieldDefinition + searchedKey string + expected bool + }{ + { + key: "example.foo", + searchedKey: "example.foo", + expected: true, + }, + { + key: "example.bar", + searchedKey: "example.foo", + expected: false, + }, + { + key: "example.foo", + searchedKey: "example.foos", + expected: false, + }, + { + key: "example.foo", + searchedKey: "example.fo", + expected: false, + }, + { + key: "example.*", + searchedKey: "example.foo", + expected: true, + }, + { + key: "example.foo", + searchedKey: "example.*", + expected: false, + }, + { + key: "example.*", + searchedKey: "example.", + expected: false, + }, + { + key: "example.*.foo", + searchedKey: "example.group.foo", + expected: true, + }, + { + key: "example.*.*", + searchedKey: "example.group.foo", + expected: true, + }, + { + key: "example.*.*", + searchedKey: "example..foo", + expected: false, + }, + { + key: "example.*", + searchedKey: "example.group.foo", + expected: false, + }, + { + key: "example.geo", + def: FieldDefinition{Type: "geo_point"}, + searchedKey: "example.geo.lat", + expected: true, + }, + { + key: "example.geo", + def: FieldDefinition{Type: "geo_point"}, + searchedKey: "example.geo.lon", + expected: true, + }, + { + key: "example.geo", + def: FieldDefinition{Type: "geo_point"}, + searchedKey: "example.geo.foo", + expected: false, + }, + { + key: "example.ecs.geo", + def: FieldDefinition{External: "ecs"}, + searchedKey: "example.ecs.geo.lat", + expected: true, + }, + { + key: "example.ecs.geo", + def: FieldDefinition{External: "ecs"}, + searchedKey: "example.ecs.geo.lon", + expected: true, + }, + { + key: "example.*", + def: FieldDefinition{Type: "geo_point"}, + searchedKey: "example.geo.lon", + expected: true, + }, + { + key: "example.*", + def: FieldDefinition{External: "ecs"}, + searchedKey: "example.geo.lat", + expected: true, + }, + { + key: "example.*", + def: FieldDefinition{Type: "geo_point"}, + searchedKey: "example.geo.foo", + expected: false, + }, + } + + for _, c := range cases { + t.Run(c.key+" matches "+c.searchedKey, func(t *testing.T) { + found := compareKeys(c.key, c.def, c.searchedKey) + assert.Equal(t, c.expected, found) + }) + } +} + func readTestResults(t *testing.T, path string) (f results) { c, err := os.ReadFile(path) require.NoError(t, err)