From 0554d9eb11d0bde96cb91161845278437b1c7667 Mon Sep 17 00:00:00 2001 From: marco Date: Wed, 21 May 2025 09:20:45 +0200 Subject: [PATCH 01/12] move package yamlpatch -> csyaml --- .golangci.yml | 10 +++++----- {yamlpatch => csyaml/internal/merge}/merge.go | 2 +- {yamlpatch => csyaml/internal/merge}/merge_test.go | 2 +- {yamlpatch => csyaml}/patcher.go | 6 ++++-- {yamlpatch => csyaml}/patcher_test.go | 8 ++++---- {yamlpatch => csyaml}/testdata/base.yaml | 0 {yamlpatch => csyaml}/testdata/expect.yaml | 0 {yamlpatch => csyaml}/testdata/production.yaml | 0 8 files changed, 15 insertions(+), 13 deletions(-) rename {yamlpatch => csyaml/internal/merge}/merge.go (99%) rename {yamlpatch => csyaml/internal/merge}/merge_test.go (99%) rename {yamlpatch => csyaml}/patcher.go (96%) rename {yamlpatch => csyaml}/patcher_test.go (97%) rename {yamlpatch => csyaml}/testdata/base.yaml (100%) rename {yamlpatch => csyaml}/testdata/expect.yaml (100%) rename {yamlpatch => csyaml}/testdata/production.yaml (100%) diff --git a/.golangci.yml b/.golangci.yml index f20fcd3..ba45b83 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -41,7 +41,7 @@ linters: rules: yaml: files: - - 'yamlpatch/patcher.go' + - '!**/csyaml/patcher.go' deny: - pkg: gopkg.in/yaml.v2 desc: yaml.v2 is deprecated for new code in favor of yaml.v3 @@ -157,8 +157,8 @@ linters: text: the methods of "DurationWithDays" use pointer receiver and non-pointer receiver. paths: - - yamlpatch/merge.go - - yamlpatch/merge_test.go + - csyaml/internal/merge/merge.go + - csyaml/internal/merge/merge_test.go - third_party$ - builtin$ - examples$ @@ -182,8 +182,8 @@ formatters: exclusions: paths: - - yamlpatch/merge.go - - yamlpatch/merge_test.go + - csyaml/internal/merge/merge.go + - csyaml/internal/merge/merge_test.go - third_party$ - builtin$ - examples$ diff --git a/yamlpatch/merge.go b/csyaml/internal/merge/merge.go similarity index 99% rename from yamlpatch/merge.go rename to csyaml/internal/merge/merge.go index 6f16cfa..b01e8c8 100644 --- a/yamlpatch/merge.go +++ b/csyaml/internal/merge/merge.go @@ -21,7 +21,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -package yamlpatch +package merge import ( "bytes" diff --git a/yamlpatch/merge_test.go b/csyaml/internal/merge/merge_test.go similarity index 99% rename from yamlpatch/merge_test.go rename to csyaml/internal/merge/merge_test.go index e86f6fe..93175f6 100644 --- a/yamlpatch/merge_test.go +++ b/csyaml/internal/merge/merge_test.go @@ -18,7 +18,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -package yamlpatch +package merge import ( "bytes" diff --git a/yamlpatch/patcher.go b/csyaml/patcher.go similarity index 96% rename from yamlpatch/patcher.go rename to csyaml/patcher.go index 6f0fd12..d9af10e 100644 --- a/yamlpatch/patcher.go +++ b/csyaml/patcher.go @@ -1,4 +1,4 @@ -package yamlpatch +package csyaml import ( "bytes" @@ -9,6 +9,8 @@ import ( log "github.com/sirupsen/logrus" "gopkg.in/yaml.v2" + + "github.com/crowdsecurity/go-cs-lib/csyaml/internal/merge" ) type Patcher struct { @@ -72,7 +74,7 @@ func (p *Patcher) MergedPatchContent() ([]byte, error) { // strict mode true, will raise errors for duplicate map keys and // overriding with a different type - patched, err := YAML([][]byte{base, over}, true) + patched, err := merge.YAML([][]byte{base, over}, true) if err != nil { return nil, err } diff --git a/yamlpatch/patcher_test.go b/csyaml/patcher_test.go similarity index 97% rename from yamlpatch/patcher_test.go rename to csyaml/patcher_test.go index 0f945bf..b9697cd 100644 --- a/yamlpatch/patcher_test.go +++ b/csyaml/patcher_test.go @@ -1,4 +1,4 @@ -package yamlpatch_test +package csyaml_test import ( "os" @@ -7,7 +7,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/crowdsecurity/go-cs-lib/yamlpatch" + "github.com/crowdsecurity/go-cs-lib/csyaml" ) // similar to the one in cstest, but with test number too. We cannot import @@ -197,7 +197,7 @@ func TestMergedPatchContent(t *testing.T) { err = os.WriteFile(patchPath, []byte(tc.patch), 0o600) require.NoError(t, err) - patcher := yamlpatch.NewPatcher(configPath, ".local") + patcher := csyaml.NewPatcher(configPath, ".local") patchedBytes, err := patcher.MergedPatchContent() requireErrorContains(t, err, tc.expectedErr) require.YAMLEq(t, tc.expected, string(patchedBytes)) @@ -298,7 +298,7 @@ func TestPrependedPatchContent(t *testing.T) { err = os.WriteFile(patchPath, []byte(tc.patch), 0o600) require.NoError(t, err) - patcher := yamlpatch.NewPatcher(configPath, ".local") + patcher := csyaml.NewPatcher(configPath, ".local") patchedBytes, err := patcher.PrependedPatchContent() requireErrorContains(t, err, tc.expectedErr) // YAMLeq does not handle multiple documents diff --git a/yamlpatch/testdata/base.yaml b/csyaml/testdata/base.yaml similarity index 100% rename from yamlpatch/testdata/base.yaml rename to csyaml/testdata/base.yaml diff --git a/yamlpatch/testdata/expect.yaml b/csyaml/testdata/expect.yaml similarity index 100% rename from yamlpatch/testdata/expect.yaml rename to csyaml/testdata/expect.yaml diff --git a/yamlpatch/testdata/production.yaml b/csyaml/testdata/production.yaml similarity index 100% rename from yamlpatch/testdata/production.yaml rename to csyaml/testdata/production.yaml From 53df9553ee2d7193a92c84a0ccac94440c1fa4c1 Mon Sep 17 00:00:00 2001 From: marco Date: Wed, 21 May 2025 09:59:00 +0200 Subject: [PATCH 02/12] csyaml/GetDocumentKeys() --- csyaml/keys.go | 52 +++++++++++++++++++++++++++++++++++++++ csyaml/keys_test.go | 60 +++++++++++++++++++++++++++++++++++++++++++++ go.mod | 1 + go.sum | 2 ++ 4 files changed, 115 insertions(+) create mode 100644 csyaml/keys.go create mode 100644 csyaml/keys_test.go diff --git a/csyaml/keys.go b/csyaml/keys.go new file mode 100644 index 0000000..e192dec --- /dev/null +++ b/csyaml/keys.go @@ -0,0 +1,52 @@ +package csyaml + +import ( + "errors" + "fmt" + "io" + + "github.com/goccy/go-yaml" +) + +// GetDocumentKeys reads all YAML documents from r and for each one +// returns a slice of its top-level keys, in order. Non-mapping documents yield +// an empty slice. +func GetDocumentKeys(r io.Reader) ([][]string, error) { + // Decode into Go types, but force mappings into MapSlice + dec := yaml.NewDecoder(r, yaml.UseOrderedMap()) + + allKeys := make([][]string, 0) + + idx := -1 + + for { + var raw any + + idx++ + + if err := dec.Decode(&raw); err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, fmt.Errorf("position %d: %s", idx, yaml.FormatError(err, false, false)) + } + keys := []string{} + + // Only mapping nodes become MapSlice with UseOrderedMap() + if ms, ok := raw.(yaml.MapSlice); ok { + for _, item := range ms { + // Key is interface{}—here we expect strings + if ks, ok := item.Key.(string); ok { + keys = append(keys, ks) + } else { + // fallback to string form of whatever it is + keys = append(keys, fmt.Sprint(item.Key)) + } + } + } + + allKeys = append(allKeys, keys) + } + + return allKeys, nil +} diff --git a/csyaml/keys_test.go b/csyaml/keys_test.go new file mode 100644 index 0000000..9244508 --- /dev/null +++ b/csyaml/keys_test.go @@ -0,0 +1,60 @@ +package csyaml + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/crowdsecurity/go-cs-lib/cstest" +) + +func TestCollectTopLevelKeys(t *testing.T) { + tests := []struct { + name string + input string + want [][]string + wantErr string + }{ + { + name: "single mapping", + input: "a: 1\nb: 2\n", + want: [][]string{{"a", "b"}}, + }, + { + name: "multiple documents", + input: `--- +a: 1 +b: 2 +--- +- 1 +--- +c: 1 +b: 2 +--- +"scalar" +`, + want: [][]string{{"a", "b"}, {}, {"c", "b"}, {}}, + }, + { + name: "empty input", + input: "", + want: [][]string{}, + }, + { + name: "invalid YAML", + input: "list: [1, 2,", + want: nil, + wantErr: "position 0: [1:7] sequence end token ']' not found", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + r := strings.NewReader(tc.input) + got, err := GetDocumentKeys(r) + cstest.RequireErrorContains(t, err, tc.wantErr) + assert.Equal(t, tc.want, got) + }) + } +} diff --git a/go.mod b/go.mod index 5446ce1..c2f5c77 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.23 require ( github.com/blackfireio/osinfo v1.0.5 github.com/coreos/go-systemd/v22 v22.5.0 + github.com/goccy/go-yaml v1.17.1 github.com/sirupsen/logrus v1.9.3 github.com/spf13/cobra v1.9.1 github.com/stretchr/testify v1.10.0 diff --git a/go.sum b/go.sum index b419592..b52ed27 100644 --- a/go.sum +++ b/go.sum @@ -6,6 +6,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6N github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/goccy/go-yaml v1.17.1 h1:LI34wktB2xEE3ONG/2Ar54+/HJVBriAGJ55PHls4YuY= +github.com/goccy/go-yaml v1.17.1/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= From 897ae753e98f80aea53c1ba462cc344aa50e36dd Mon Sep 17 00:00:00 2001 From: marco Date: Wed, 21 May 2025 10:52:01 +0200 Subject: [PATCH 03/12] csyaml.SplitDocuments() --- .../{ => internal/merge}/testdata/base.yaml | 0 .../{ => internal/merge}/testdata/expect.yaml | 0 .../merge}/testdata/production.yaml | 0 csyaml/keys.go | 6 +- csyaml/keys_test.go | 11 ++- csyaml/patcher_test.go | 19 +---- csyaml/split.go | 44 +++++++++++ csyaml/split_test.go | 73 +++++++++++++++++++ 8 files changed, 133 insertions(+), 20 deletions(-) rename csyaml/{ => internal/merge}/testdata/base.yaml (100%) rename csyaml/{ => internal/merge}/testdata/expect.yaml (100%) rename csyaml/{ => internal/merge}/testdata/production.yaml (100%) create mode 100644 csyaml/split.go create mode 100644 csyaml/split_test.go diff --git a/csyaml/testdata/base.yaml b/csyaml/internal/merge/testdata/base.yaml similarity index 100% rename from csyaml/testdata/base.yaml rename to csyaml/internal/merge/testdata/base.yaml diff --git a/csyaml/testdata/expect.yaml b/csyaml/internal/merge/testdata/expect.yaml similarity index 100% rename from csyaml/testdata/expect.yaml rename to csyaml/internal/merge/testdata/expect.yaml diff --git a/csyaml/testdata/production.yaml b/csyaml/internal/merge/testdata/production.yaml similarity index 100% rename from csyaml/testdata/production.yaml rename to csyaml/internal/merge/testdata/production.yaml diff --git a/csyaml/keys.go b/csyaml/keys.go index e192dec..967a3d4 100644 --- a/csyaml/keys.go +++ b/csyaml/keys.go @@ -9,8 +9,10 @@ import ( ) // GetDocumentKeys reads all YAML documents from r and for each one -// returns a slice of its top-level keys, in order. Non-mapping documents yield -// an empty slice. +// returns a slice of its top-level keys, in order. +// +// Non-mapping documents yield an empty slice. Duplicate keys +// are now allowed and return an error. func GetDocumentKeys(r io.Reader) ([][]string, error) { // Decode into Go types, but force mappings into MapSlice dec := yaml.NewDecoder(r, yaml.UseOrderedMap()) diff --git a/csyaml/keys_test.go b/csyaml/keys_test.go index 9244508..50b1427 100644 --- a/csyaml/keys_test.go +++ b/csyaml/keys_test.go @@ -1,4 +1,4 @@ -package csyaml +package csyaml_test import ( "strings" @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/crowdsecurity/go-cs-lib/cstest" + "github.com/crowdsecurity/go-cs-lib/csyaml" ) func TestCollectTopLevelKeys(t *testing.T) { @@ -21,6 +22,12 @@ func TestCollectTopLevelKeys(t *testing.T) { input: "a: 1\nb: 2\n", want: [][]string{{"a", "b"}}, }, + { + name: "duplicate keys mapping", + input: "a: 1\nb: 2\na: 3\n", + want: nil, + wantErr: `position 0: [3:1] mapping key "a" already defined at [1:1]`, + }, { name: "multiple documents", input: `--- @@ -52,7 +59,7 @@ b: 2 for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { r := strings.NewReader(tc.input) - got, err := GetDocumentKeys(r) + got, err := csyaml.GetDocumentKeys(r) cstest.RequireErrorContains(t, err, tc.wantErr) assert.Equal(t, tc.want, got) }) diff --git a/csyaml/patcher_test.go b/csyaml/patcher_test.go index b9697cd..ed72fd8 100644 --- a/csyaml/patcher_test.go +++ b/csyaml/patcher_test.go @@ -8,22 +8,9 @@ import ( "github.com/stretchr/testify/require" "github.com/crowdsecurity/go-cs-lib/csyaml" + "github.com/crowdsecurity/go-cs-lib/cstest" ) -// similar to the one in cstest, but with test number too. We cannot import -// cstest here because of circular dependency. -func requireErrorContains(t *testing.T, err error, expectedErr string) { - t.Helper() - - if expectedErr != "" { - require.ErrorContains(t, err, expectedErr) - - return - } - - require.NoError(t, err) -} - func TestMergedPatchContent(t *testing.T) { t.Parallel() @@ -199,7 +186,7 @@ func TestMergedPatchContent(t *testing.T) { patcher := csyaml.NewPatcher(configPath, ".local") patchedBytes, err := patcher.MergedPatchContent() - requireErrorContains(t, err, tc.expectedErr) + cstest.RequireErrorContains(t, err, tc.expectedErr) require.YAMLEq(t, tc.expected, string(patchedBytes)) }) } @@ -300,7 +287,7 @@ func TestPrependedPatchContent(t *testing.T) { patcher := csyaml.NewPatcher(configPath, ".local") patchedBytes, err := patcher.PrependedPatchContent() - requireErrorContains(t, err, tc.expectedErr) + cstest.RequireErrorContains(t, err, tc.expectedErr) // YAMLeq does not handle multiple documents require.Equal(t, tc.expected, string(patchedBytes)) }) diff --git a/csyaml/split.go b/csyaml/split.go new file mode 100644 index 0000000..1a92158 --- /dev/null +++ b/csyaml/split.go @@ -0,0 +1,44 @@ +package csyaml + +import ( + "errors" + "fmt" + "io" + + "github.com/goccy/go-yaml" +) + +// SplitDocuments reads every YAML document from r and returns each +// as a separate []byte. +// +// Documents are round-tripped through goccy’s Decoder/Marshal pipeline, +// comments and exact original formatting may be lost. +func SplitDocuments(r io.Reader) ([][]byte, error) { + docs := make([][]byte, 0) + + dec := yaml.NewDecoder(r) + + idx := -1 + + for { + var v any + + idx++ + + if err := dec.Decode(&v); err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, fmt.Errorf("decoding document %d: %s", idx, yaml.FormatError(err, false, false)) + } + + out, err := yaml.Marshal(v) + if err != nil { + return nil, err + } + + docs = append(docs, out) + } + + return docs, nil +} diff --git a/csyaml/split_test.go b/csyaml/split_test.go new file mode 100644 index 0000000..0288a25 --- /dev/null +++ b/csyaml/split_test.go @@ -0,0 +1,73 @@ +package csyaml_test + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/crowdsecurity/go-cs-lib/cstest" + "github.com/crowdsecurity/go-cs-lib/csyaml" +) + +func TestSplitDocuments(t *testing.T) { + tests := []struct { + name string + input string + want [][]byte + wantErr string + }{ + { + name: "single mapping", + input: "a: 1\nb: 2\n", + want: [][]byte{[]byte("a: 1\nb: 2\n")}, + }, + { + name: "sequence doc", + input: "- 1\n- 2\n", + want: [][]byte{[]byte("- 1\n- 2\n")}, + }, + { + name: "scalar doc", + input: "\"scalar\"\n", + want: [][]byte{[]byte("scalar\n")}, + }, + { + name: "multiple documents", + input: `--- +a: 1 +b: 2 +--- +- 1 +- 2 +--- +"scalar" +`, + want: [][]byte{ + []byte("a: 1\nb: 2\n"), + []byte("- 1\n- 2\n"), + []byte("scalar\n"), + }, + }, + { + name: "empty input", + input: "", + want: [][]byte{}, + }, + { + name: "invalid YAML", + input: "list: [1, 2", + want: nil, + wantErr: "decoding document 0: [1:7] sequence end token ']' not found", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + r := strings.NewReader(tc.input) + docs, err := csyaml.SplitDocuments(r) + cstest.RequireErrorContains(t, err, tc.wantErr) + assert.Equal(t, tc.want, docs) + }) + } +} From e7712687940574978d0544d51e93dc2e4899e902 Mon Sep 17 00:00:00 2001 From: marco Date: Wed, 21 May 2025 11:02:12 +0200 Subject: [PATCH 04/12] format --- csyaml/patcher_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csyaml/patcher_test.go b/csyaml/patcher_test.go index ed72fd8..c74eccc 100644 --- a/csyaml/patcher_test.go +++ b/csyaml/patcher_test.go @@ -7,8 +7,8 @@ import ( "github.com/stretchr/testify/require" - "github.com/crowdsecurity/go-cs-lib/csyaml" "github.com/crowdsecurity/go-cs-lib/cstest" + "github.com/crowdsecurity/go-cs-lib/csyaml" ) func TestMergedPatchContent(t *testing.T) { From 66629ca9839269e8ab41cf94bb83cd9ce6725974 Mon Sep 17 00:00:00 2001 From: marco Date: Wed, 21 May 2025 11:17:01 +0200 Subject: [PATCH 05/12] deprecate yamlpatch.NewPatcher() --- yamlpatch/merge.go | 166 ++++++++++++++++ yamlpatch/merge_test.go | 238 ++++++++++++++++++++++ yamlpatch/patcher.go | 161 +++++++++++++++ yamlpatch/patcher_test.go | 308 +++++++++++++++++++++++++++++ yamlpatch/testdata/base.yaml | 13 ++ yamlpatch/testdata/expect.yaml | 13 ++ yamlpatch/testdata/production.yaml | 13 ++ 7 files changed, 912 insertions(+) create mode 100644 yamlpatch/merge.go create mode 100644 yamlpatch/merge_test.go create mode 100644 yamlpatch/patcher.go create mode 100644 yamlpatch/patcher_test.go create mode 100644 yamlpatch/testdata/base.yaml create mode 100644 yamlpatch/testdata/expect.yaml create mode 100644 yamlpatch/testdata/production.yaml diff --git a/yamlpatch/merge.go b/yamlpatch/merge.go new file mode 100644 index 0000000..6f16cfa --- /dev/null +++ b/yamlpatch/merge.go @@ -0,0 +1,166 @@ +// +// from https://github.com/uber-go/config/tree/master/internal/merge +// +// Copyright (c) 2019 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package yamlpatch + +import ( + "bytes" + "fmt" + "io" + + yaml "gopkg.in/yaml.v2" +) + +type ( + // YAML has three fundamental types. When unmarshaled into interface{}, + // they're represented like this. + mapping = map[interface{}]interface{} + sequence = []interface{} +) + +// YAML deep-merges any number of YAML sources, with later sources taking +// priority over earlier ones. +// +// Maps are deep-merged. For example, +// {"one": 1, "two": 2} + {"one": 42, "three": 3} +// == {"one": 42, "two": 2, "three": 3} +// Sequences are replaced. For example, +// {"foo": [1, 2, 3]} + {"foo": [4, 5, 6]} +// == {"foo": [4, 5, 6]} +// +// In non-strict mode, duplicate map keys are allowed within a single source, +// with later values overwriting previous ones. Attempting to merge +// mismatched types (e.g., merging a sequence into a map) replaces the old +// value with the new. +// +// Enabling strict mode returns errors in both of the above cases. +func YAML(sources [][]byte, strict bool) (*bytes.Buffer, error) { + var merged interface{} + var hasContent bool + for _, r := range sources { + d := yaml.NewDecoder(bytes.NewReader(r)) + d.SetStrict(strict) + + var contents interface{} + if err := d.Decode(&contents); err == io.EOF { + // Skip empty and comment-only sources, which we should handle + // differently from explicit nils. + continue + } else if err != nil { + return nil, fmt.Errorf("couldn't decode source: %v", err) + } + + hasContent = true + pair, err := merge(merged, contents, strict) + if err != nil { + return nil, err // error is already descriptive enough + } + merged = pair + } + + buf := &bytes.Buffer{} + if !hasContent { + // No sources had any content. To distinguish this from a source with just + // an explicit top-level null, return an empty buffer. + return buf, nil + } + enc := yaml.NewEncoder(buf) + if err := enc.Encode(merged); err != nil { + return nil, fmt.Errorf("couldn't re-serialize merged YAML: %w", err) + } + return buf, nil +} + +func merge(into, from interface{}, strict bool) (interface{}, error) { + // It's possible to handle this with a mass of reflection, but we only need + // to merge whole YAML files. Since we're always unmarshaling into + // interface{}, we only need to handle a few types. This ends up being + // cleaner if we just handle each case explicitly. + if into == nil { + return from, nil + } + if from == nil { + // Allow higher-priority YAML to explicitly nil out lower-priority entries. + return nil, nil + } + if IsScalar(into) && IsScalar(from) { + return from, nil + } + if IsSequence(into) && IsSequence(from) { + return from, nil + } + if IsMapping(into) && IsMapping(from) { + return mergeMapping(into.(mapping), from.(mapping), strict) + } + // YAML types don't match, so no merge is possible. For backward + // compatibility, ignore mismatches unless we're in strict mode and return + // the higher-priority value. + if !strict { + return from, nil + } + return nil, fmt.Errorf("can't merge a %s into a %s", describe(from), describe(into)) +} + +func mergeMapping(into, from mapping, strict bool) (mapping, error) { + merged := make(mapping, len(into)) + for k, v := range into { + merged[k] = v + } + for k := range from { + m, err := merge(merged[k], from[k], strict) + if err != nil { + return nil, err + } + merged[k] = m + } + return merged, nil +} + +// IsMapping reports whether a type is a mapping in YAML, represented as a +// map[interface{}]interface{}. +func IsMapping(i interface{}) bool { + _, is := i.(mapping) + return is +} + +// IsSequence reports whether a type is a sequence in YAML, represented as an +// []interface{}. +func IsSequence(i interface{}) bool { + _, is := i.(sequence) + return is +} + +// IsScalar reports whether a type is a scalar value in YAML. +func IsScalar(i interface{}) bool { + return !IsMapping(i) && !IsSequence(i) +} + +func describe(i interface{}) string { + if IsMapping(i) { + return "mapping" + } + if IsSequence(i) { + return "sequence" + } + return "scalar" +} diff --git a/yamlpatch/merge_test.go b/yamlpatch/merge_test.go new file mode 100644 index 0000000..e86f6fe --- /dev/null +++ b/yamlpatch/merge_test.go @@ -0,0 +1,238 @@ +// Copyright (c) 2018 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package yamlpatch + +import ( + "bytes" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + yaml "gopkg.in/yaml.v2" +) + +func trimcr(s string) string { + return strings.ReplaceAll(s, "\r\n", "\n") +} + +func mustRead(t testing.TB, fname string) []byte { + contents, err := os.ReadFile(fname) + require.NoError(t, err, "failed to read file: %s", fname) + return contents +} + +func dump(t testing.TB, actual, expected string) { + // It's impossible to debug YAML if the actual and expected values are + // printed on a single line. + t.Logf("Actual:\n\n%s\n\n", actual) + t.Logf("Expected:\n\n%s\n\n", expected) +} + +func strip(s string) string { + // It's difficult to write string constants that are valid YAML. Normalize + // strings for ease of testing. + s = strings.TrimSpace(s) + s = strings.Replace(s, "\t", " ", -1) + return s +} + +func canonicalize(t testing.TB, s string) string { + // round-trip to canonicalize formatting + var i interface{} + require.NoError(t, + yaml.Unmarshal([]byte(strip(s)), &i), + "canonicalize: couldn't unmarshal YAML", + ) + formatted, err := yaml.Marshal(i) + require.NoError(t, err, "canonicalize: couldn't marshal YAML") + return string(bytes.TrimSpace(formatted)) +} + +func unmarshal(t testing.TB, s string) interface{} { + var i interface{} + require.NoError(t, yaml.Unmarshal([]byte(strip(s)), &i), "unmarshaling failed") + return i +} + +func succeeds(t testing.TB, strict bool, left, right, expect string) { + l, r := unmarshal(t, left), unmarshal(t, right) + m, err := merge(l, r, strict) + require.NoError(t, err, "merge failed") + + actualBytes, err := yaml.Marshal(m) + require.NoError(t, err, "couldn't marshal merged structure") + actual := canonicalize(t, string(actualBytes)) + expect = canonicalize(t, expect) + if !assert.Equal(t, expect, actual) { + dump(t, actual, expect) + } +} + +func fails(t testing.TB, strict bool, left, right string) { + _, err := merge(unmarshal(t, left), unmarshal(t, right), strict) + assert.Error(t, err, "merge succeeded") +} + +func TestIntegration(t *testing.T) { + base := mustRead(t, "testdata/base.yaml") + prod := mustRead(t, "testdata/production.yaml") + expect := mustRead(t, "testdata/expect.yaml") + + merged, err := YAML([][]byte{base, prod}, true /* strict */) + require.NoError(t, err, "merge failed") + + if !assert.Equal(t, trimcr(string(expect)), merged.String(), "unexpected contents") { + dump(t, merged.String(), string(expect)) + } +} + +func TestEmpty(t *testing.T) { + full := []byte("foo: bar\n") + null := []byte("~") + + tests := []struct { + desc string + sources [][]byte + expect string + }{ + {"empty base", [][]byte{nil, full}, string(full)}, + {"empty override", [][]byte{full, nil}, string(full)}, + {"both empty", [][]byte{nil, nil}, ""}, + {"null base", [][]byte{null, full}, string(full)}, + {"null override", [][]byte{full, null}, "null\n"}, + {"empty base and null override", [][]byte{nil, null}, "null\n"}, + {"null base and empty override", [][]byte{null, nil}, "null\n"}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.desc, func(t *testing.T) { + merged, err := YAML(tt.sources, true /* strict */) + require.NoError(t, err, "merge failed") + assert.Equal(t, tt.expect, merged.String(), "wrong contents after merge") + }) + } +} + +func TestSuccess(t *testing.T) { + left := ` +fun: [maserati, porsche] +practical: {toyota: camry, honda: accord} +occupants: + honda: {driver: jane, backseat: [nate]} + ` + right := ` +fun: [lamborghini, porsche] +practical: {honda: civic, nissan: altima} +occupants: + honda: {passenger: arthur, backseat: [nora]} + ` + expect := ` +fun: [lamborghini, porsche] +practical: {toyota: camry, honda: civic, nissan: altima} +occupants: + honda: {passenger: arthur, driver: jane, backseat: [nora]} + ` + succeeds(t, true, left, right, expect) + succeeds(t, false, left, right, expect) +} + +func TestErrors(t *testing.T) { + check := func(t testing.TB, strict bool, sources ...[]byte) error { + _, err := YAML(sources, strict) + return err + } + t.Run("tabs in source", func(t *testing.T) { + src := []byte("foo:\n\tbar:baz") + assert.Error(t, check(t, false, src), "expected error in permissive mode") + assert.Error(t, check(t, true, src), "expected error in strict mode") + }) + + t.Run("duplicated keys", func(t *testing.T) { + src := []byte("{foo: bar, foo: baz}") + assert.NoError(t, check(t, false, src), "expected success in permissive mode") + assert.Error(t, check(t, true, src), "expected error in permissive mode") + }) + + t.Run("merge error", func(t *testing.T) { + left := []byte("foo: [1, 2]") + right := []byte("foo: {bar: baz}") + assert.NoError(t, check(t, false, left, right), "expected success in permissive mode") + assert.Error(t, check(t, true, left, right), "expected error in strict mode") + }) +} + +func TestMismatchedTypes(t *testing.T) { + tests := []struct { + desc string + left, right string + }{ + {"sequence and mapping", "[one, two]", "{foo: bar}"}, + {"sequence and scalar", "[one, two]", "foo"}, + {"mapping and scalar", "{foo: bar}", "foo"}, + {"nested", "{foo: [one, two]}", "{foo: bar}"}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.desc+" strict", func(t *testing.T) { + fails(t, true, tt.left, tt.right) + }) + t.Run(tt.desc+" permissive", func(t *testing.T) { + // prefer the higher-priority value + succeeds(t, false, tt.left, tt.right, tt.right) + }) + } +} + +func TestBooleans(t *testing.T) { + // YAML helpfully interprets many strings as Booleans. + tests := []struct { + in, out string + }{ + {"yes", "true"}, + {"YES", "true"}, + {"on", "true"}, + {"ON", "true"}, + {"no", "false"}, + {"NO", "false"}, + {"off", "false"}, + {"OFF", "false"}, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.in, func(t *testing.T) { + succeeds(t, true, "", tt.in, tt.out) + succeeds(t, false, "", tt.in, tt.out) + }) + } +} + +func TestExplicitNil(t *testing.T) { + base := `foo: {one: two}` + override := `foo: ~` + expect := `foo: ~` + succeeds(t, true, base, override, expect) + succeeds(t, false, base, override, expect) +} diff --git a/yamlpatch/patcher.go b/yamlpatch/patcher.go new file mode 100644 index 0000000..42f6346 --- /dev/null +++ b/yamlpatch/patcher.go @@ -0,0 +1,161 @@ +package yamlpatch + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + + log "github.com/sirupsen/logrus" + "gopkg.in/yaml.v2" +) + +type Patcher struct { + BaseFilePath string + PatchFilePath string + quiet bool +} + +// Deprecated: use csyaml.NewPatcher instead +func NewPatcher(filePath string, suffix string) *Patcher { + return &Patcher{ + BaseFilePath: filePath, + PatchFilePath: filePath + suffix, + quiet: false, + } +} + +// SetQuiet sets the quiet flag, which will log as DEBUG_LEVEL instead of INFO. +func (p *Patcher) SetQuiet(quiet bool) { + p.quiet = quiet +} + +// read a single YAML file, check for errors (the merge package doesn't) then return the content as bytes. +func readYAML(filePath string) ([]byte, error) { + content, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("while reading yaml file: %w", err) + } + + var yamlMap map[any]any + + if err = yaml.Unmarshal(content, &yamlMap); err != nil { + return nil, fmt.Errorf("%s: %w", filePath, err) + } + + return content, nil +} + +// MergedPatchContent reads a YAML file and, if it exists, its patch file, +// then merges them and returns it serialized. +func (p *Patcher) MergedPatchContent() ([]byte, error) { + base, err := readYAML(p.BaseFilePath) + if err != nil { + return nil, err + } + + over, err := readYAML(p.PatchFilePath) + if errors.Is(err, os.ErrNotExist) { + return base, nil + } + + if err != nil { + return nil, err + } + + logf := log.Infof + if p.quiet { + logf = log.Debugf + } + + logf("Loading yaml file: '%s' with additional values from '%s'", p.BaseFilePath, p.PatchFilePath) + + // strict mode true, will raise errors for duplicate map keys and + // overriding with a different type + patched, err := YAML([][]byte{base, over}, true) + if err != nil { + return nil, err + } + + return patched.Bytes(), nil +} + +// read multiple YAML documents inside a file, and writes them to a buffer +// separated by the appropriate '---' terminators. +func decodeDocuments(file *os.File, buf *bytes.Buffer, finalDashes bool) error { + dec := yaml.NewDecoder(file) + dec.SetStrict(true) + + dashTerminator := false + + for { + yml := make(map[any]any) + + err := dec.Decode(&yml) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + + return fmt.Errorf("while decoding %s: %w", file.Name(), err) + } + + docBytes, err := yaml.Marshal(&yml) + if err != nil { + return fmt.Errorf("while marshaling %s: %w", file.Name(), err) + } + + if dashTerminator { + buf.WriteString("---\n") + } + + buf.Write(docBytes) + + dashTerminator = true + } + + if dashTerminator && finalDashes { + buf.WriteString("---\n") + } + + return nil +} + +// PrependedPatchContent collates the base .yaml file with the .yaml.patch, by putting +// the content of the patch BEFORE the base document. The result is a multi-document +// YAML in all cases, even if the base and patch files are single documents. +func (p *Patcher) PrependedPatchContent() ([]byte, error) { + patchFile, err := os.Open(p.PatchFilePath) + // optional file, ignore if it does not exist + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("while opening %s: %w", p.PatchFilePath, err) + } + + var result bytes.Buffer + + if patchFile != nil { + if err = decodeDocuments(patchFile, &result, true); err != nil { + return nil, err + } + + logf := log.Infof + + if p.quiet { + logf = log.Debugf + } + + logf("Prepending yaml: '%s' with '%s'", p.BaseFilePath, p.PatchFilePath) + } + + baseFile, err := os.Open(p.BaseFilePath) + if err != nil { + return nil, fmt.Errorf("while opening %s: %w", p.BaseFilePath, err) + } + + if err = decodeDocuments(baseFile, &result, false); err != nil { + return nil, err + } + + return result.Bytes(), nil +} diff --git a/yamlpatch/patcher_test.go b/yamlpatch/patcher_test.go new file mode 100644 index 0000000..0f945bf --- /dev/null +++ b/yamlpatch/patcher_test.go @@ -0,0 +1,308 @@ +package yamlpatch_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/crowdsecurity/go-cs-lib/yamlpatch" +) + +// similar to the one in cstest, but with test number too. We cannot import +// cstest here because of circular dependency. +func requireErrorContains(t *testing.T, err error, expectedErr string) { + t.Helper() + + if expectedErr != "" { + require.ErrorContains(t, err, expectedErr) + + return + } + + require.NoError(t, err) +} + +func TestMergedPatchContent(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + base string + patch string + expected string + expectedErr string + }{ + { + "invalid yaml in base", + "notayaml", + "", + "", + "config.yaml: yaml: unmarshal errors:", + }, + { + "invalid yaml in base (detailed message)", + "notayaml", + "", + "", + "cannot unmarshal !!str `notayaml`", + }, + { + "invalid yaml in patch", + "", + "notayaml", + "", + "config.yaml.local: yaml: unmarshal errors:", + }, + { + "invalid yaml in patch (detailed message)", + "", + "notayaml", + "", + "cannot unmarshal !!str `notayaml`", + }, + { + "basic merge", + "{'first':{'one':1,'two':2},'second':{'three':3}}", + "{'first':{'one':10,'dos':2}}", + "{'first':{'one':10,'dos':2,'two':2},'second':{'three':3}}", + "", + }, + + // bools and zero values; here the "mergo" package had issues + // so we used something simpler. + + { + "bool merge - off if false", + "bool: on", + "bool: off", + "bool: false", + "", + }, + { + "bool merge - on is true", + "bool: off", + "bool: on", + "bool: true", + "", + }, + { + "string is not a bool - on to off", + "{'bool': 'on'}", + "{'bool': 'off'}", + "{'bool': 'off'}", + "", + }, + { + "string is not a bool - off to on", + "{'bool': 'off'}", + "{'bool': 'on'}", + "{'bool': 'on'}", + "", + }, + { + "bool merge - true to false", + "{'bool': true}", + "{'bool': false}", + "{'bool': false}", + "", + }, + { + "bool merge - false to true", + "{'bool': false}", + "{'bool': true}", + "{'bool': true}", + "", + }, + { + "string merge - value to value", + "{'string': 'value'}", + "{'string': ''}", + "{'string': ''}", + "", + }, + { + "sequence merge - value to empty", + "{'sequence': [1, 2]}", + "{'sequence': []}", + "{'sequence': []}", + "", + }, + { + "map merge - value to value", + "{'map': {'one': 1, 'two': 2}}", + "{'map': {}}", + "{'map': {'one': 1, 'two': 2}}", + "", + }, + + // mismatched types + + { + "can't merge a sequence into a mapping", + "map: {'key': 'value'}", + "map: ['value1', 'value2']", + "", + "can't merge a sequence into a mapping", + }, + { + "can't merge a scalar into a mapping", + "map: {'key': 'value'}", + "map: 3", + "", + "can't merge a scalar into a mapping", + }, + { + "can't merge a mapping into a sequence", + "sequence: ['value1', 'value2']", + "sequence: {'key': 'value'}", + "", + "can't merge a mapping into a sequence", + }, + { + "can't merge a scalar into a sequence", + "sequence: ['value1', 'value2']", + "sequence: 3", + "", + "can't merge a scalar into a sequence", + }, + { + "can't merge a sequence into a scalar", + "scalar: true", + "scalar: ['value1', 'value2']", + "", + "can't merge a sequence into a scalar", + }, + { + "can't merge a mapping into a scalar", + "scalar: true", + "scalar: {'key': 'value'}", + "", + "can't merge a mapping into a scalar", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + dirPath := t.TempDir() + + configPath := filepath.Join(dirPath, "config.yaml") + patchPath := filepath.Join(dirPath, "config.yaml.local") + err := os.WriteFile(configPath, []byte(tc.base), 0o600) + require.NoError(t, err) + + err = os.WriteFile(patchPath, []byte(tc.patch), 0o600) + require.NoError(t, err) + + patcher := yamlpatch.NewPatcher(configPath, ".local") + patchedBytes, err := patcher.MergedPatchContent() + requireErrorContains(t, err, tc.expectedErr) + require.YAMLEq(t, tc.expected, string(patchedBytes)) + }) + } +} + +func TestPrependedPatchContent(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + base string + patch string + expected string + expectedErr string + }{ + // we test with scalars here, because YAMLeq does not work + // with multi-document files, so we need char-to-char comparison + // which is noisy with sequences and (unordered) mappings + { + "newlines are always appended, if missing, by yaml.Marshal()", + "foo: bar", + "", + "foo: bar\n", + "", + }, + { + "prepend empty document", + "foo: bar\n", + "", + "foo: bar\n", + "", + }, + { + "prepend a document to another", + "foo: bar", + "baz: qux", + "baz: qux\n---\nfoo: bar\n", + "", + }, + { + "prepend document with same key", + "foo: true", + "foo: false", + "foo: false\n---\nfoo: true\n", + "", + }, + { + "prepend multiple documents", + "one: 1\n---\ntwo: 2\n---\none: 3", + "four: 4\n---\none: 1.1", + "four: 4\n---\none: 1.1\n---\none: 1\n---\ntwo: 2\n---\none: 3\n", + "", + }, + { + "invalid yaml in base", + "blablabla", + "", + "", + "config.yaml: yaml: unmarshal errors:", + }, + { + "invalid yaml in base (detailed message)", + "blablabla", + "", + "", + "cannot unmarshal !!str `blablabla`", + }, + { + "invalid yaml in patch", + "", + "blablabla", + "", + "config.yaml.local: yaml: unmarshal errors:", + }, + { + "invalid yaml in patch (detailed message)", + "", + "blablabla", + "", + "cannot unmarshal !!str `blablabla`", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + dirPath := t.TempDir() + + configPath := filepath.Join(dirPath, "config.yaml") + patchPath := filepath.Join(dirPath, "config.yaml.local") + + err := os.WriteFile(configPath, []byte(tc.base), 0o600) + require.NoError(t, err) + + err = os.WriteFile(patchPath, []byte(tc.patch), 0o600) + require.NoError(t, err) + + patcher := yamlpatch.NewPatcher(configPath, ".local") + patchedBytes, err := patcher.PrependedPatchContent() + requireErrorContains(t, err, tc.expectedErr) + // YAMLeq does not handle multiple documents + require.Equal(t, tc.expected, string(patchedBytes)) + }) + } +} diff --git a/yamlpatch/testdata/base.yaml b/yamlpatch/testdata/base.yaml new file mode 100644 index 0000000..4ac551a --- /dev/null +++ b/yamlpatch/testdata/base.yaml @@ -0,0 +1,13 @@ +fun: + - maserati + - porsche + +practical: + toyota: camry + honda: accord + +occupants: + honda: + driver: jane + backseat: + - nate diff --git a/yamlpatch/testdata/expect.yaml b/yamlpatch/testdata/expect.yaml new file mode 100644 index 0000000..c190915 --- /dev/null +++ b/yamlpatch/testdata/expect.yaml @@ -0,0 +1,13 @@ +fun: +- lamborghini +- porsche +occupants: + honda: + backseat: + - nora + driver: jane + passenger: arthur +practical: + honda: civic + nissan: altima + toyota: camry diff --git a/yamlpatch/testdata/production.yaml b/yamlpatch/testdata/production.yaml new file mode 100644 index 0000000..7dab2ae --- /dev/null +++ b/yamlpatch/testdata/production.yaml @@ -0,0 +1,13 @@ +fun: + - lamborghini + - porsche + +practical: + honda: civic + nissan: altima + +occupants: + honda: + passenger: arthur + backseat: + - nora From bb03667f8a8b9230a12ffb3210a3faaf1482f04f Mon Sep 17 00:00:00 2001 From: marco Date: Wed, 21 May 2025 11:51:13 +0200 Subject: [PATCH 06/12] re-implement Merge with goccy --- csyaml/merge.go | 131 +++++++++++++++++++++++++++++++++++++++++++ csyaml/merge_test.go | 79 ++++++++++++++++++++++++++ csyaml/split_test.go | 2 +- 3 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 csyaml/merge.go create mode 100644 csyaml/merge_test.go diff --git a/csyaml/merge.go b/csyaml/merge.go new file mode 100644 index 0000000..6f75b85 --- /dev/null +++ b/csyaml/merge.go @@ -0,0 +1,131 @@ +// Package merge implements a deep-merge over multiple YAML documents, +// preserving key order and rejecting invalid documents. +// +// Maps are deep-merged; sequences and scalars are replaced by later inputs. +// Type mismatches result in an error. +// +// Adapted from https://github.com/uber-go/config/tree/master/internal/merge +package csyaml + +import ( + "bytes" + "errors" + "fmt" + "io" + "reflect" + + "github.com/goccy/go-yaml" +) + +// Merge reads each YAML source in inputs, merges them in order (later +// sources override earlier), and returns the result as a bytes.Buffer. +// Always runs in strict mode: type mismatches or duplicate keys cause an error. +func Merge(inputs [][]byte) (*bytes.Buffer, error) { + var merged any + hasContent := false + for idx, data := range inputs { + dec := yaml.NewDecoder(bytes.NewReader(data), yaml.UseOrderedMap(), yaml.Strict()) + + var value any + if err := dec.Decode(&value); err != nil { + if errors.Is(err, io.EOF) { + continue + } + return nil, fmt.Errorf("decoding document %d: %s", idx, yaml.FormatError(err, false, false)) + } + hasContent = true + + mergedValue, err := mergeValue(merged, value) + if err != nil { + return nil, err + } + + merged = mergedValue + } + + buf := &bytes.Buffer{} + if !hasContent { + return buf, nil + } + + enc := yaml.NewEncoder(buf) + if err := enc.Encode(merged); err != nil { + return nil, fmt.Errorf("encoding merged YAML: %w", err) + } + + return buf, nil +} + +// mergeValue merges from+into in strict mode. +func mergeValue(into, from any) (any, error) { + if into == nil { + return from, nil + } + + if from == nil { + return nil, nil + } + + // Scalars: override + if !isMapping(into) && !isSequence(into) && !isMapping(from) && !isSequence(from) { + return from, nil + } + + // Sequences: replace + if isSequence(into) && isSequence(from) { + return from, nil + } + + // Mappings: deep-merge + if isMapping(into) && isMapping(from) { + return mergeMap(into.(yaml.MapSlice), from.(yaml.MapSlice)) + } + + // Type mismatch: strict + return nil, fmt.Errorf("cannot merge %s into %s", describe(from), describe(into)) +} + +// mergeMap deep-merges two ordered maps (MapSlice) in strict mode. +func mergeMap(into, from yaml.MapSlice) (yaml.MapSlice, error) { + out := make(yaml.MapSlice, len(into)) + copy(out, into) + for _, item := range from { + matched := false + for i, existing := range out { + if !reflect.DeepEqual(existing.Key, item.Key) { + continue + } + + mergedVal, err := mergeValue(existing.Value, item.Value) + if err != nil { + return nil, err + } + out[i].Value = mergedVal + matched = true + } + if !matched { + out = append(out, yaml.MapItem{Key: item.Key, Value: item.Value}) + } + } + return out, nil +} + +func isMapping(i any) bool { + _, ok := i.(yaml.MapSlice) + return ok +} + +func isSequence(i any) bool { + _, ok := i.([]any) + return ok +} + +func describe(i any) string { + if isMapping(i) { + return "mapping" + } + if isSequence(i) { + return "sequence" + } + return "scalar" +} diff --git a/csyaml/merge_test.go b/csyaml/merge_test.go new file mode 100644 index 0000000..0bb7d27 --- /dev/null +++ b/csyaml/merge_test.go @@ -0,0 +1,79 @@ +package csyaml_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/crowdsecurity/go-cs-lib/cstest" + "github.com/crowdsecurity/go-cs-lib/csyaml" +) + +func TestMergeYAML(t *testing.T) { + tests := []struct { + name string + inputs []string + want string + wantErr string + }{ + { + name: "single doc passes through", + inputs: []string{"a: 1\nb: 2\n"}, + want: "a: 1\nb: 2\n", + }, + { + name: "merge maps deep", + inputs: []string{ + "one: 1\ntwo: 2\n", + "two: 20\nthree: 3\n", + }, + want: "one: 1\ntwo: 20\nthree: 3\n", + }, + { + name: "sequence replaced", + inputs: []string{ + "list: [1,2,3]\n", + "list: [4,5]\n", + }, + want: "list:\n- 4\n- 5\n", + }, + { + name: "scalar override", + inputs: []string{ + "foo: bar\n", + "foo: baz\n", + }, + want: "foo: baz\n", + }, + { + name: "type mismatch error", + inputs: []string{"foo: 1\n", "foo:\n - a\n"}, + wantErr: "cannot merge sequence into scalar", + }, + { + name: "invalid yaml error", + inputs: []string{"list: [1,2,"}, + wantErr: "decoding document 0: [1:7] sequence end token ']' not found", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // build byte slices + var bs [][]byte + for _, s := range tc.inputs { + bs = append(bs, []byte(s)) + } + + buf, err := csyaml.Merge(bs) + + cstest.RequireErrorContains(t, err, tc.wantErr) + if tc.wantErr != "" { + require.Nil(t, buf) + } else { + assert.Equal(t, tc.want, buf.String()) + } + }) + } +} diff --git a/csyaml/split_test.go b/csyaml/split_test.go index 0288a25..da999af 100644 --- a/csyaml/split_test.go +++ b/csyaml/split_test.go @@ -56,7 +56,7 @@ b: 2 }, { name: "invalid YAML", - input: "list: [1, 2", + input: "list: [1, 2,", want: nil, wantErr: "decoding document 0: [1:7] sequence end token ']' not found", }, From a183994f7f03b0c3f74e5b0de507aec2075221c5 Mon Sep 17 00:00:00 2001 From: marco Date: Wed, 21 May 2025 12:34:00 +0200 Subject: [PATCH 07/12] tests --- .golangci.yml | 10 +- csyaml/internal/merge/merge.go | 166 ------------ csyaml/internal/merge/merge_test.go | 238 ------------------ csyaml/internal/merge/testdata/base.yaml | 13 - csyaml/internal/merge/testdata/expect.yaml | 13 - .../internal/merge/testdata/production.yaml | 13 - csyaml/merge.go | 8 +- csyaml/merge_test.go | 145 ++++++++++- csyaml/patcher.go | 23 +- csyaml/patcher_test.go | 25 +- yamlpatch/patcher.go | 2 +- 11 files changed, 168 insertions(+), 488 deletions(-) delete mode 100644 csyaml/internal/merge/merge.go delete mode 100644 csyaml/internal/merge/merge_test.go delete mode 100644 csyaml/internal/merge/testdata/base.yaml delete mode 100644 csyaml/internal/merge/testdata/expect.yaml delete mode 100644 csyaml/internal/merge/testdata/production.yaml diff --git a/.golangci.yml b/.golangci.yml index ba45b83..5d9db65 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -41,7 +41,7 @@ linters: rules: yaml: files: - - '!**/csyaml/patcher.go' + - '!**/yamlpatch/patcher.go' deny: - pkg: gopkg.in/yaml.v2 desc: yaml.v2 is deprecated for new code in favor of yaml.v3 @@ -157,8 +157,8 @@ linters: text: the methods of "DurationWithDays" use pointer receiver and non-pointer receiver. paths: - - csyaml/internal/merge/merge.go - - csyaml/internal/merge/merge_test.go + - yamlpatch/merge.go + - yamlpatch/merge_test.go - third_party$ - builtin$ - examples$ @@ -182,8 +182,8 @@ formatters: exclusions: paths: - - csyaml/internal/merge/merge.go - - csyaml/internal/merge/merge_test.go + - yamlpatch/merge.go + - yamlpatch/merge_test.go - third_party$ - builtin$ - examples$ diff --git a/csyaml/internal/merge/merge.go b/csyaml/internal/merge/merge.go deleted file mode 100644 index b01e8c8..0000000 --- a/csyaml/internal/merge/merge.go +++ /dev/null @@ -1,166 +0,0 @@ -// -// from https://github.com/uber-go/config/tree/master/internal/merge -// -// Copyright (c) 2019 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package merge - -import ( - "bytes" - "fmt" - "io" - - yaml "gopkg.in/yaml.v2" -) - -type ( - // YAML has three fundamental types. When unmarshaled into interface{}, - // they're represented like this. - mapping = map[interface{}]interface{} - sequence = []interface{} -) - -// YAML deep-merges any number of YAML sources, with later sources taking -// priority over earlier ones. -// -// Maps are deep-merged. For example, -// {"one": 1, "two": 2} + {"one": 42, "three": 3} -// == {"one": 42, "two": 2, "three": 3} -// Sequences are replaced. For example, -// {"foo": [1, 2, 3]} + {"foo": [4, 5, 6]} -// == {"foo": [4, 5, 6]} -// -// In non-strict mode, duplicate map keys are allowed within a single source, -// with later values overwriting previous ones. Attempting to merge -// mismatched types (e.g., merging a sequence into a map) replaces the old -// value with the new. -// -// Enabling strict mode returns errors in both of the above cases. -func YAML(sources [][]byte, strict bool) (*bytes.Buffer, error) { - var merged interface{} - var hasContent bool - for _, r := range sources { - d := yaml.NewDecoder(bytes.NewReader(r)) - d.SetStrict(strict) - - var contents interface{} - if err := d.Decode(&contents); err == io.EOF { - // Skip empty and comment-only sources, which we should handle - // differently from explicit nils. - continue - } else if err != nil { - return nil, fmt.Errorf("couldn't decode source: %v", err) - } - - hasContent = true - pair, err := merge(merged, contents, strict) - if err != nil { - return nil, err // error is already descriptive enough - } - merged = pair - } - - buf := &bytes.Buffer{} - if !hasContent { - // No sources had any content. To distinguish this from a source with just - // an explicit top-level null, return an empty buffer. - return buf, nil - } - enc := yaml.NewEncoder(buf) - if err := enc.Encode(merged); err != nil { - return nil, fmt.Errorf("couldn't re-serialize merged YAML: %w", err) - } - return buf, nil -} - -func merge(into, from interface{}, strict bool) (interface{}, error) { - // It's possible to handle this with a mass of reflection, but we only need - // to merge whole YAML files. Since we're always unmarshaling into - // interface{}, we only need to handle a few types. This ends up being - // cleaner if we just handle each case explicitly. - if into == nil { - return from, nil - } - if from == nil { - // Allow higher-priority YAML to explicitly nil out lower-priority entries. - return nil, nil - } - if IsScalar(into) && IsScalar(from) { - return from, nil - } - if IsSequence(into) && IsSequence(from) { - return from, nil - } - if IsMapping(into) && IsMapping(from) { - return mergeMapping(into.(mapping), from.(mapping), strict) - } - // YAML types don't match, so no merge is possible. For backward - // compatibility, ignore mismatches unless we're in strict mode and return - // the higher-priority value. - if !strict { - return from, nil - } - return nil, fmt.Errorf("can't merge a %s into a %s", describe(from), describe(into)) -} - -func mergeMapping(into, from mapping, strict bool) (mapping, error) { - merged := make(mapping, len(into)) - for k, v := range into { - merged[k] = v - } - for k := range from { - m, err := merge(merged[k], from[k], strict) - if err != nil { - return nil, err - } - merged[k] = m - } - return merged, nil -} - -// IsMapping reports whether a type is a mapping in YAML, represented as a -// map[interface{}]interface{}. -func IsMapping(i interface{}) bool { - _, is := i.(mapping) - return is -} - -// IsSequence reports whether a type is a sequence in YAML, represented as an -// []interface{}. -func IsSequence(i interface{}) bool { - _, is := i.(sequence) - return is -} - -// IsScalar reports whether a type is a scalar value in YAML. -func IsScalar(i interface{}) bool { - return !IsMapping(i) && !IsSequence(i) -} - -func describe(i interface{}) string { - if IsMapping(i) { - return "mapping" - } - if IsSequence(i) { - return "sequence" - } - return "scalar" -} diff --git a/csyaml/internal/merge/merge_test.go b/csyaml/internal/merge/merge_test.go deleted file mode 100644 index 93175f6..0000000 --- a/csyaml/internal/merge/merge_test.go +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright (c) 2018 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package merge - -import ( - "bytes" - "os" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - yaml "gopkg.in/yaml.v2" -) - -func trimcr(s string) string { - return strings.ReplaceAll(s, "\r\n", "\n") -} - -func mustRead(t testing.TB, fname string) []byte { - contents, err := os.ReadFile(fname) - require.NoError(t, err, "failed to read file: %s", fname) - return contents -} - -func dump(t testing.TB, actual, expected string) { - // It's impossible to debug YAML if the actual and expected values are - // printed on a single line. - t.Logf("Actual:\n\n%s\n\n", actual) - t.Logf("Expected:\n\n%s\n\n", expected) -} - -func strip(s string) string { - // It's difficult to write string constants that are valid YAML. Normalize - // strings for ease of testing. - s = strings.TrimSpace(s) - s = strings.Replace(s, "\t", " ", -1) - return s -} - -func canonicalize(t testing.TB, s string) string { - // round-trip to canonicalize formatting - var i interface{} - require.NoError(t, - yaml.Unmarshal([]byte(strip(s)), &i), - "canonicalize: couldn't unmarshal YAML", - ) - formatted, err := yaml.Marshal(i) - require.NoError(t, err, "canonicalize: couldn't marshal YAML") - return string(bytes.TrimSpace(formatted)) -} - -func unmarshal(t testing.TB, s string) interface{} { - var i interface{} - require.NoError(t, yaml.Unmarshal([]byte(strip(s)), &i), "unmarshaling failed") - return i -} - -func succeeds(t testing.TB, strict bool, left, right, expect string) { - l, r := unmarshal(t, left), unmarshal(t, right) - m, err := merge(l, r, strict) - require.NoError(t, err, "merge failed") - - actualBytes, err := yaml.Marshal(m) - require.NoError(t, err, "couldn't marshal merged structure") - actual := canonicalize(t, string(actualBytes)) - expect = canonicalize(t, expect) - if !assert.Equal(t, expect, actual) { - dump(t, actual, expect) - } -} - -func fails(t testing.TB, strict bool, left, right string) { - _, err := merge(unmarshal(t, left), unmarshal(t, right), strict) - assert.Error(t, err, "merge succeeded") -} - -func TestIntegration(t *testing.T) { - base := mustRead(t, "testdata/base.yaml") - prod := mustRead(t, "testdata/production.yaml") - expect := mustRead(t, "testdata/expect.yaml") - - merged, err := YAML([][]byte{base, prod}, true /* strict */) - require.NoError(t, err, "merge failed") - - if !assert.Equal(t, trimcr(string(expect)), merged.String(), "unexpected contents") { - dump(t, merged.String(), string(expect)) - } -} - -func TestEmpty(t *testing.T) { - full := []byte("foo: bar\n") - null := []byte("~") - - tests := []struct { - desc string - sources [][]byte - expect string - }{ - {"empty base", [][]byte{nil, full}, string(full)}, - {"empty override", [][]byte{full, nil}, string(full)}, - {"both empty", [][]byte{nil, nil}, ""}, - {"null base", [][]byte{null, full}, string(full)}, - {"null override", [][]byte{full, null}, "null\n"}, - {"empty base and null override", [][]byte{nil, null}, "null\n"}, - {"null base and empty override", [][]byte{null, nil}, "null\n"}, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.desc, func(t *testing.T) { - merged, err := YAML(tt.sources, true /* strict */) - require.NoError(t, err, "merge failed") - assert.Equal(t, tt.expect, merged.String(), "wrong contents after merge") - }) - } -} - -func TestSuccess(t *testing.T) { - left := ` -fun: [maserati, porsche] -practical: {toyota: camry, honda: accord} -occupants: - honda: {driver: jane, backseat: [nate]} - ` - right := ` -fun: [lamborghini, porsche] -practical: {honda: civic, nissan: altima} -occupants: - honda: {passenger: arthur, backseat: [nora]} - ` - expect := ` -fun: [lamborghini, porsche] -practical: {toyota: camry, honda: civic, nissan: altima} -occupants: - honda: {passenger: arthur, driver: jane, backseat: [nora]} - ` - succeeds(t, true, left, right, expect) - succeeds(t, false, left, right, expect) -} - -func TestErrors(t *testing.T) { - check := func(t testing.TB, strict bool, sources ...[]byte) error { - _, err := YAML(sources, strict) - return err - } - t.Run("tabs in source", func(t *testing.T) { - src := []byte("foo:\n\tbar:baz") - assert.Error(t, check(t, false, src), "expected error in permissive mode") - assert.Error(t, check(t, true, src), "expected error in strict mode") - }) - - t.Run("duplicated keys", func(t *testing.T) { - src := []byte("{foo: bar, foo: baz}") - assert.NoError(t, check(t, false, src), "expected success in permissive mode") - assert.Error(t, check(t, true, src), "expected error in permissive mode") - }) - - t.Run("merge error", func(t *testing.T) { - left := []byte("foo: [1, 2]") - right := []byte("foo: {bar: baz}") - assert.NoError(t, check(t, false, left, right), "expected success in permissive mode") - assert.Error(t, check(t, true, left, right), "expected error in strict mode") - }) -} - -func TestMismatchedTypes(t *testing.T) { - tests := []struct { - desc string - left, right string - }{ - {"sequence and mapping", "[one, two]", "{foo: bar}"}, - {"sequence and scalar", "[one, two]", "foo"}, - {"mapping and scalar", "{foo: bar}", "foo"}, - {"nested", "{foo: [one, two]}", "{foo: bar}"}, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.desc+" strict", func(t *testing.T) { - fails(t, true, tt.left, tt.right) - }) - t.Run(tt.desc+" permissive", func(t *testing.T) { - // prefer the higher-priority value - succeeds(t, false, tt.left, tt.right, tt.right) - }) - } -} - -func TestBooleans(t *testing.T) { - // YAML helpfully interprets many strings as Booleans. - tests := []struct { - in, out string - }{ - {"yes", "true"}, - {"YES", "true"}, - {"on", "true"}, - {"ON", "true"}, - {"no", "false"}, - {"NO", "false"}, - {"off", "false"}, - {"OFF", "false"}, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.in, func(t *testing.T) { - succeeds(t, true, "", tt.in, tt.out) - succeeds(t, false, "", tt.in, tt.out) - }) - } -} - -func TestExplicitNil(t *testing.T) { - base := `foo: {one: two}` - override := `foo: ~` - expect := `foo: ~` - succeeds(t, true, base, override, expect) - succeeds(t, false, base, override, expect) -} diff --git a/csyaml/internal/merge/testdata/base.yaml b/csyaml/internal/merge/testdata/base.yaml deleted file mode 100644 index 4ac551a..0000000 --- a/csyaml/internal/merge/testdata/base.yaml +++ /dev/null @@ -1,13 +0,0 @@ -fun: - - maserati - - porsche - -practical: - toyota: camry - honda: accord - -occupants: - honda: - driver: jane - backseat: - - nate diff --git a/csyaml/internal/merge/testdata/expect.yaml b/csyaml/internal/merge/testdata/expect.yaml deleted file mode 100644 index c190915..0000000 --- a/csyaml/internal/merge/testdata/expect.yaml +++ /dev/null @@ -1,13 +0,0 @@ -fun: -- lamborghini -- porsche -occupants: - honda: - backseat: - - nora - driver: jane - passenger: arthur -practical: - honda: civic - nissan: altima - toyota: camry diff --git a/csyaml/internal/merge/testdata/production.yaml b/csyaml/internal/merge/testdata/production.yaml deleted file mode 100644 index 7dab2ae..0000000 --- a/csyaml/internal/merge/testdata/production.yaml +++ /dev/null @@ -1,13 +0,0 @@ -fun: - - lamborghini - - porsche - -practical: - honda: civic - nissan: altima - -occupants: - honda: - passenger: arthur - backseat: - - nora diff --git a/csyaml/merge.go b/csyaml/merge.go index 6f75b85..f9e1441 100644 --- a/csyaml/merge.go +++ b/csyaml/merge.go @@ -77,12 +77,14 @@ func mergeValue(into, from any) (any, error) { } // Mappings: deep-merge - if isMapping(into) && isMapping(from) { - return mergeMap(into.(yaml.MapSlice), from.(yaml.MapSlice)) + if mi, ok := into.(yaml.MapSlice); ok { + if mf, ok2 := from.(yaml.MapSlice); ok2 { + return mergeMap(mi, mf) + } } // Type mismatch: strict - return nil, fmt.Errorf("cannot merge %s into %s", describe(from), describe(into)) + return nil, fmt.Errorf("can't merge a %s into a %s", describe(from), describe(into)) } // mergeMap deep-merges two ordered maps (MapSlice) in strict mode. diff --git a/csyaml/merge_test.go b/csyaml/merge_test.go index 0bb7d27..d9dae6f 100644 --- a/csyaml/merge_test.go +++ b/csyaml/merge_test.go @@ -1,6 +1,7 @@ package csyaml_test import ( + "strings" "testing" "github.com/stretchr/testify/assert" @@ -49,31 +50,161 @@ func TestMergeYAML(t *testing.T) { { name: "type mismatch error", inputs: []string{"foo: 1\n", "foo:\n - a\n"}, - wantErr: "cannot merge sequence into scalar", + wantErr: "can't merge a sequence into a scalar", }, { name: "invalid yaml error", - inputs: []string{"list: [1,2,"}, - wantErr: "decoding document 0: [1:7] sequence end token ']' not found", + inputs: []string{"ref: *foo\n"}, // undefined alias + wantErr: `decoding document 0: [1:7] could not find alias "foo"`, }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - // build byte slices var bs [][]byte for _, s := range tc.inputs { bs = append(bs, []byte(s)) } buf, err := csyaml.Merge(bs) - cstest.RequireErrorContains(t, err, tc.wantErr) if tc.wantErr != "" { require.Nil(t, buf) - } else { - assert.Equal(t, tc.want, buf.String()) + return } + + require.NotNil(t, buf) + assert.Equal(t, tc.want, buf.String()) }) } } + +func TestEmptyVsNilSources(t *testing.T) { + tests := []struct { + desc string + sources [][]byte + expect string + }{ + {"empty base", [][]byte{nil, []byte("foo: bar\n")}, "foo: bar\n"}, + {"empty override", [][]byte{[]byte("foo: bar\n"), nil}, "foo: bar\n"}, + {"both empty", [][]byte{nil, nil}, ""}, + {"null base", [][]byte{[]byte("~\n"), []byte("foo: bar\n")}, "foo: bar\n"}, + // not interested in overriding a document with "null", but it can be used as a base, why not + {"null wont't override", [][]byte{[]byte("foo: bar\n"), []byte("~\n")}, "foo: bar\n"}, + {"empty base & null override", [][]byte{nil, []byte("~\n")}, ""}, + {"null base & empty override", [][]byte{[]byte("~\n"), nil}, ""}, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + merged, err := csyaml.Merge(tt.sources) + require.NoError(t, err) + assert.Equal(t, tt.expect, merged.String()) + }) + } +} + +func TestDuplicateKeyError(t *testing.T) { + src := []byte("{foo: bar, foo: baz}") + _, err := csyaml.Merge([][]byte{src}) + cstest.RequireErrorContains(t, err, `decoding document 0: [1:12] mapping key "foo" already defined at [1:2]`) +} + +func TestTabsInSource(t *testing.T) { + src := []byte("foo:\n\tbar: baz") + _, err := csyaml.Merge([][]byte{src}) + cstest.RequireErrorContains(t, err, "decoding document 0: [2:1] found character '\t' that cannot start any token") +} + +func TestNestedDeepMergePreservesOrder(t *testing.T) { + left := ` +settings: + ui: + theme: light + toolbar: + - cut + - copy +` + right := ` +settings: + ui: + toolbar: + - paste + security: strict +` + expect := `settings: + ui: + theme: light + toolbar: + - paste + security: strict +` + merged, err := csyaml.Merge([][]byte{[]byte(left), []byte(right)}) + require.NoError(t, err) + assert.Equal(t, expect, merged.String()) +} + +// Don't coerce boolean-like strings to true/false (YAML 1.2 / goccy/go-yaml behavior). +func TestBooleanNoCoercion(t *testing.T) { + tests := []struct { + in, out string + }{ + {"foo: yes", `foo: "yes"`}, + {"foo: YES", `foo: "YES"`}, + {"foo: no", `foo: "no"`}, + {"foo: NO", `foo: "NO"`}, + {"foo: on", `foo: "on"`}, + {"foo: ON", `foo: "ON"`}, + {"foo: off", `foo: "off"`}, + {"foo: OFF", `foo: "OFF"`}, + } + + for _, tt := range tests { + t.Run(tt.in, func(t *testing.T) { + buf, err := csyaml.Merge([][]byte{nil, []byte(tt.in)}) + require.NoError(t, err) + assert.Equal(t, tt.out, strings.TrimSuffix(buf.String(), "\n")) + }) + } +} + +// Do coerce boolean-like values to true/false (YAML 1.1 / yaml.v3 behavior). +// func TestBooleanCoercion(t *testing.T) { +// tests := []struct { +// in, out string +// }{ +// {"yes\n", "true\n"}, +// {"YES\n", "true\n"}, +// {"no\n", "false\n"}, +// {"NO\n", "false\n"}, +// {"on\n", "true\n"}, +// {"ON\n", "true\n"}, +// {"off\n", "false\n"}, +// {"OFF\n", "false\n"}, +// } +// +// for _, tt := range tests { +// t.Run(tt.in, func(t *testing.T) { +// buf, err := csyaml.Merge([][]byte{nil, []byte(tt.in)}) +// require.NoError(t, err) +// assert.Equal(t, tt.out, buf.String()) +// }) +// } +//} + +func TestExplicitNilOverride(t *testing.T) { + base := []byte("foo: {one: two}\n") + override := []byte("foo: ~\n") + merged, err := csyaml.Merge([][]byte{base, override}) + require.NoError(t, err) + assert.Equal(t, "foo: null\n", merged.String()) +} + +func TestOrderPreservation(t *testing.T) { + left := []byte("a: 1\nb: 2\n") + right := []byte("c: 3\nb: 20\n") + expect := "a: 1\nb: 20\nc: 3\n" + merged, err := csyaml.Merge([][]byte{left, right}) + require.NoError(t, err) + assert.Equal(t, expect, merged.String()) +} diff --git a/csyaml/patcher.go b/csyaml/patcher.go index d9af10e..8e9ffe7 100644 --- a/csyaml/patcher.go +++ b/csyaml/patcher.go @@ -7,10 +7,8 @@ import ( "io" "os" - log "github.com/sirupsen/logrus" - "gopkg.in/yaml.v2" - - "github.com/crowdsecurity/go-cs-lib/csyaml/internal/merge" + "github.com/goccy/go-yaml" + "github.com/sirupsen/logrus" ) type Patcher struct { @@ -42,7 +40,7 @@ func readYAML(filePath string) ([]byte, error) { var yamlMap map[any]any if err = yaml.Unmarshal(content, &yamlMap); err != nil { - return nil, fmt.Errorf("%s: %w", filePath, err) + return nil, fmt.Errorf("%s: %s", filePath, yaml.FormatError(err, false, false)) } return content, nil @@ -65,16 +63,16 @@ func (p *Patcher) MergedPatchContent() ([]byte, error) { return nil, err } - logf := log.Infof + logf := logrus.Infof if p.quiet { - logf = log.Debugf + logf = logrus.Debugf } logf("Loading yaml file: '%s' with additional values from '%s'", p.BaseFilePath, p.PatchFilePath) // strict mode true, will raise errors for duplicate map keys and // overriding with a different type - patched, err := merge.YAML([][]byte{base, over}, true) + patched, err := Merge([][]byte{base, over}) if err != nil { return nil, err } @@ -85,8 +83,7 @@ func (p *Patcher) MergedPatchContent() ([]byte, error) { // read multiple YAML documents inside a file, and writes them to a buffer // separated by the appropriate '---' terminators. func decodeDocuments(file *os.File, buf *bytes.Buffer, finalDashes bool) error { - dec := yaml.NewDecoder(file) - dec.SetStrict(true) + dec := yaml.NewDecoder(file, yaml.Strict()) dashTerminator := false @@ -99,7 +96,7 @@ func decodeDocuments(file *os.File, buf *bytes.Buffer, finalDashes bool) error { break } - return fmt.Errorf("while decoding %s: %w", file.Name(), err) + return fmt.Errorf("while decoding %s: %s", file.Name(), yaml.FormatError(err, false, false)) } docBytes, err := yaml.Marshal(&yml) @@ -140,10 +137,10 @@ func (p *Patcher) PrependedPatchContent() ([]byte, error) { return nil, err } - logf := log.Infof + logf := logrus.Infof if p.quiet { - logf = log.Debugf + logf = logrus.Debugf } logf("Prepending yaml: '%s' with '%s'", p.BaseFilePath, p.PatchFilePath) diff --git a/csyaml/patcher_test.go b/csyaml/patcher_test.go index c74eccc..6617da3 100644 --- a/csyaml/patcher_test.go +++ b/csyaml/patcher_test.go @@ -26,28 +26,28 @@ func TestMergedPatchContent(t *testing.T) { "notayaml", "", "", - "config.yaml: yaml: unmarshal errors:", + "config.yaml: [1:1] string was used where mapping is expected", }, { "invalid yaml in base (detailed message)", "notayaml", "", "", - "cannot unmarshal !!str `notayaml`", + "config.yaml: [1:1] string was used where mapping is expected", }, { "invalid yaml in patch", "", "notayaml", "", - "config.yaml.local: yaml: unmarshal errors:", + "config.yaml.local: [1:1] string was used where mapping is expected", }, { "invalid yaml in patch (detailed message)", "", "notayaml", "", - "cannot unmarshal !!str `notayaml`", + "config.yaml.local: [1:1] string was used where mapping is expected", }, { "basic merge", @@ -61,17 +61,10 @@ func TestMergedPatchContent(t *testing.T) { // so we used something simpler. { - "bool merge - off if false", + "don't convert on/off to boolean", "bool: on", "bool: off", - "bool: false", - "", - }, - { - "bool merge - on is true", "bool: off", - "bool: on", - "bool: true", "", }, { @@ -245,28 +238,28 @@ func TestPrependedPatchContent(t *testing.T) { "blablabla", "", "", - "config.yaml: yaml: unmarshal errors:", + "config.yaml: [1:1] string was used where mapping is expected", }, { "invalid yaml in base (detailed message)", "blablabla", "", "", - "cannot unmarshal !!str `blablabla`", + "config.yaml: [1:1] string was used where mapping is expected", }, { "invalid yaml in patch", "", "blablabla", "", - "config.yaml.local: yaml: unmarshal errors:", + "config.yaml.local: [1:1] string was used where mapping is expected", }, { "invalid yaml in patch (detailed message)", "", "blablabla", "", - "cannot unmarshal !!str `blablabla`", + "config.yaml.local: [1:1] string was used where mapping is expected", }, } diff --git a/yamlpatch/patcher.go b/yamlpatch/patcher.go index 42f6346..31bd335 100644 --- a/yamlpatch/patcher.go +++ b/yamlpatch/patcher.go @@ -17,7 +17,7 @@ type Patcher struct { quiet bool } -// Deprecated: use csyaml.NewPatcher instead +// Deprecated: use csyaml.NewPatcher instead. func NewPatcher(filePath string, suffix string) *Patcher { return &Patcher{ BaseFilePath: filePath, From a5e17f0cc8c6cc81a82a7b3c933b915531771f97 Mon Sep 17 00:00:00 2001 From: marco Date: Wed, 11 Jun 2025 10:35:38 +0200 Subject: [PATCH 08/12] typo --- csyaml/keys.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csyaml/keys.go b/csyaml/keys.go index 967a3d4..1c3247a 100644 --- a/csyaml/keys.go +++ b/csyaml/keys.go @@ -12,7 +12,7 @@ import ( // returns a slice of its top-level keys, in order. // // Non-mapping documents yield an empty slice. Duplicate keys -// are now allowed and return an error. +// are not allowed and return an error. func GetDocumentKeys(r io.Reader) ([][]string, error) { // Decode into Go types, but force mappings into MapSlice dec := yaml.NewDecoder(r, yaml.UseOrderedMap()) From e1c07273af36219e61bf759f7045156e57b19999 Mon Sep 17 00:00:00 2001 From: marco Date: Wed, 23 Jul 2025 14:14:52 +0200 Subject: [PATCH 09/12] csyaml.IsEmptyYAML() --- csyaml/empty.go | 44 +++++++++++++++++ csyaml/empty_test.go | 114 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 csyaml/empty.go create mode 100644 csyaml/empty_test.go diff --git a/csyaml/empty.go b/csyaml/empty.go new file mode 100644 index 0000000..6d5d91d --- /dev/null +++ b/csyaml/empty.go @@ -0,0 +1,44 @@ +package csyaml + +import ( + "errors" + "io" + + yaml "github.com/goccy/go-yaml" + "github.com/goccy/go-yaml/parser" +) + +// IsEmptyYAML reads one or more YAML documents from r and returns true +// if they are all empty or contain only comments. +// It will reports errors if the input is not valid YAML. +func IsEmptyYAML(r io.Reader) (bool, error) { + src, err := io.ReadAll(r) + if err != nil { + return false, err + } + + if len(src) == 0 { + return true, nil + } + + file, err := parser.ParseBytes(src, 0) + if err != nil { + if errors.Is(err, io.EOF) { + return true, nil + } + + return false, errors.New(yaml.FormatError(err, false, false)) + } + + if file == nil || len(file.Docs) == 0 { + return true, nil + } + + for _, doc := range file.Docs { + if doc.Body != nil { + return false, nil + } + } + + return true, nil +} diff --git a/csyaml/empty_test.go b/csyaml/empty_test.go new file mode 100644 index 0000000..d75347e --- /dev/null +++ b/csyaml/empty_test.go @@ -0,0 +1,114 @@ +package csyaml + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/crowdsecurity/go-cs-lib/cstest" // adjust this import to your package +) + +func TestIsEmptyYAML(t *testing.T) { + tests := []struct { + name string + input string + want bool + wantErr string + }{ + { + name: "empty document", + input: ``, + want: true, + }, + { + name: "just a key", + input: "foo:", + want: false, + }, + { + name: "just newline", + input: "\n", + want: true, + }, + { + name: "just comment", + input: "# only a comment", + want: true, + }, + { + name: "comments and empty lines", + input: "# only a comment\n\n# another one\n\n", + want: true, + }, + { + name: "empty doc with separator", + input: "---", + want: true, + }, + { + name: "empty mapping", + input: "{}", + want: false, + }, + { + name: "empty sequence", + input: "[]", + want: false, + }, + { + name: "non-empty mapping", + input: "foo: bar", + want: false, + }, + { + name: "non-empty sequence", + input: "- 1\n- 2", + want: false, + }, + { + name: "non-empty scalar", + input: "hello", + want: false, + }, + { + name: "empty scalar", + input: "''", + want: false, + }, + { + name: "explicit nil", + input: "null", + want: false, + }, + { + name: "malformed YAML", + input: "foo: [1,", + wantErr: "[1:6] sequence end token ']' not found", + }, + { + name: "multiple empty documents", + input: "---\n---\n---\n#comment", + want: true, + }, + { + name: "second document is not empty", + input: "---\nfoo: bar\n---\n#comment", + want: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := IsEmptyYAML(strings.NewReader(tc.input)) + + cstest.RequireErrorContains(t, err, tc.wantErr) + + if tc.wantErr != "" { + return + } + + assert.Equal(t, tc.want, got) + }) + } +} From 2c8f01ff69ea3e767f8f4a9a9a503c74a7b2cdf0 Mon Sep 17 00:00:00 2001 From: marco Date: Mon, 28 Jul 2025 15:50:39 +0200 Subject: [PATCH 10/12] SplitDocuments: preserve comments --- csyaml/split.go | 53 ++++++++---------- csyaml/split_test.go | 73 ------------------------- csyaml/splittext.go | 52 ++++++++++++++++++ csyaml/splittext_test.go | 112 ++++++++++++++++++++++++++++++++++++++ csyaml/splityaml.go | 48 +++++++++++++++++ csyaml/splityaml_test.go | 113 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 347 insertions(+), 104 deletions(-) delete mode 100644 csyaml/split_test.go create mode 100644 csyaml/splittext.go create mode 100644 csyaml/splittext_test.go create mode 100644 csyaml/splityaml.go create mode 100644 csyaml/splityaml_test.go diff --git a/csyaml/split.go b/csyaml/split.go index 1a92158..687e8a9 100644 --- a/csyaml/split.go +++ b/csyaml/split.go @@ -1,44 +1,35 @@ package csyaml import ( - "errors" - "fmt" + "bytes" "io" - - "github.com/goccy/go-yaml" ) -// SplitDocuments reads every YAML document from r and returns each -// as a separate []byte. +// SplitDocuments returns a slice of byte slices, each representing a YAML document. +// +// Since preserving formatting and comments is important but the existing go packages +// all have some issue, this function attempts two strategies: one that decodes and +// re-encodes the YAML content, and another that simply splits the input text. +// If both methods return the same number of documents, we assume the text-based +// function is sufficient. It retains comments and formatting better. +// Otherwise, the round-trip version is used. It retains comments but +// the formatting may be off. The semantics of the document will still be the same +// but if it contains parsing errors, they may refer to a wrong line or column. // -// Documents are round-tripped through goccy’s Decoder/Marshal pipeline, -// comments and exact original formatting may be lost. +// This function returns reading errors but any parsing errors are ignored and +// trigger the text-based splitting method. func SplitDocuments(r io.Reader) ([][]byte, error) { - docs := make([][]byte, 0) - - dec := yaml.NewDecoder(r) - - idx := -1 - - for { - var v any - - idx++ - - if err := dec.Decode(&v); err != nil { - if errors.Is(err, io.EOF) { - break - } - return nil, fmt.Errorf("decoding document %d: %s", idx, yaml.FormatError(err, false, false)) - } + input, err := io.ReadAll(r) + if err != nil { + return nil, err + } - out, err := yaml.Marshal(v) - if err != nil { - return nil, err - } + textDocs, errText := SplitDocumentsText(bytes.NewReader(input)) + decEncDocs, errDecEnc := SplitDocumentsDecEnc(bytes.NewReader(input)) - docs = append(docs, out) + if errDecEnc == nil && len(decEncDocs) != len(textDocs) { + return decEncDocs, nil } - return docs, nil + return textDocs, errText } diff --git a/csyaml/split_test.go b/csyaml/split_test.go deleted file mode 100644 index da999af..0000000 --- a/csyaml/split_test.go +++ /dev/null @@ -1,73 +0,0 @@ -package csyaml_test - -import ( - "strings" - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/crowdsecurity/go-cs-lib/cstest" - "github.com/crowdsecurity/go-cs-lib/csyaml" -) - -func TestSplitDocuments(t *testing.T) { - tests := []struct { - name string - input string - want [][]byte - wantErr string - }{ - { - name: "single mapping", - input: "a: 1\nb: 2\n", - want: [][]byte{[]byte("a: 1\nb: 2\n")}, - }, - { - name: "sequence doc", - input: "- 1\n- 2\n", - want: [][]byte{[]byte("- 1\n- 2\n")}, - }, - { - name: "scalar doc", - input: "\"scalar\"\n", - want: [][]byte{[]byte("scalar\n")}, - }, - { - name: "multiple documents", - input: `--- -a: 1 -b: 2 ---- -- 1 -- 2 ---- -"scalar" -`, - want: [][]byte{ - []byte("a: 1\nb: 2\n"), - []byte("- 1\n- 2\n"), - []byte("scalar\n"), - }, - }, - { - name: "empty input", - input: "", - want: [][]byte{}, - }, - { - name: "invalid YAML", - input: "list: [1, 2,", - want: nil, - wantErr: "decoding document 0: [1:7] sequence end token ']' not found", - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - r := strings.NewReader(tc.input) - docs, err := csyaml.SplitDocuments(r) - cstest.RequireErrorContains(t, err, tc.wantErr) - assert.Equal(t, tc.want, docs) - }) - } -} diff --git a/csyaml/splittext.go b/csyaml/splittext.go new file mode 100644 index 0000000..9d3df58 --- /dev/null +++ b/csyaml/splittext.go @@ -0,0 +1,52 @@ +package csyaml + +import ( + "bufio" + "bytes" + "io" + "strings" +) + +// SplitDocumentsText splits a YAML input stream into separate documents by looking for the `---` separator. +// No encoding or decoding is performed; the input is treated as raw text. +// Comments and whitespace are preserved. Malformed documents are returned as-is. +func SplitDocumentsText(r io.Reader) ([][]byte, error) { + var ( + docs [][]byte + current bytes.Buffer + ) + + scanner := bufio.NewScanner(r) + + for scanner.Scan() { + line := scanner.Text() + trimmed := strings.TrimSpace(line) + + isSeparator := strings.HasPrefix(trimmed, "---") && + (trimmed == "---" || strings.HasPrefix(trimmed, "--- ")) + + // Always write the line first + current.WriteString(line) + current.WriteByte('\n') + + if isSeparator && current.Len() > len(line)+1 { // +1 for newline just added + // Separator starts a new doc → commit previous one + // (everything up to this line is the previous doc) + n := current.Len() + // rewind to just before this separator line + doc := current.Bytes()[:n-len(line)-1] + docs = append(docs, append([]byte(nil), doc...)) + current = *bytes.NewBuffer(current.Bytes()[n-len(line)-1:]) + } + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + if current.Len() > 0 { + docs = append(docs, current.Bytes()) + } + + return docs, nil +} diff --git a/csyaml/splittext_test.go b/csyaml/splittext_test.go new file mode 100644 index 0000000..6f14870 --- /dev/null +++ b/csyaml/splittext_test.go @@ -0,0 +1,112 @@ +package csyaml_test + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/crowdsecurity/go-cs-lib/cstest" + "github.com/crowdsecurity/go-cs-lib/csyaml" +) + +func TestSplitDocumentsText(t *testing.T) { + tests := []struct { + name string + input string + want [][]byte + wantErr string + }{ + { + name: "single mapping", + input: "a: 1\nb: 2\n", + want: [][]byte{[]byte("a: 1\nb: 2\n")}, + }, + { + name: "sequence doc", + input: "- 1\n- 2\n", + want: [][]byte{[]byte("- 1\n- 2\n")}, + }, + { + name: "scalar doc", + input: "\"scalar\"\n", + want: [][]byte{[]byte("\"scalar\"\n")}, + }, + { + name: "multiple documents", + input: `--- +a: 1 +b: 2 +--- +- 1 +- 2 +--- +"scalar" +`, + want: [][]byte{ + []byte("---\na: 1\nb: 2\n"), + []byte("---\n- 1\n- 2\n"), + []byte("---\n\"scalar\"\n"), + }, + }, + { + name: "empty input", + input: "", + want: [][]byte(nil), + }, + { + name: "invalid YAML", + input: "list: [1, 2,", + want: [][]byte{[]byte("list: [1, 2,\n")}, + }, + { + name: "preserve comments", + input: `# comment 1 +a: 1 +# comment 2 +b: 2 +--- +# comment 3 +- 1 +# comment 4 +- 2 +# comment 5 +--- +# comment 6 +"scalar" +# comment 7 +`, + want: [][]byte{ + []byte("# comment 1\na: 1\n# comment 2\nb: 2\n"), + []byte("---\n# comment 3\n- 1\n# comment 4\n- 2\n# comment 5\n"), + []byte("---\n# comment 6\n\"scalar\"\n# comment 7\n"), + }, + }, + { + name: "tricky separator", + input: `--- +text: | + This is a multi-line string. + It includes a line that looks like a document separator: + --- + But it's just part of the string. +--- +key: value +`, + want: [][]byte{ + []byte("---\ntext: |\n This is a multi-line string.\n It includes a line that looks like a document separator:\n"), + []byte(" ---\n But it's just part of the string.\n"), + []byte("---\nkey: value\n"), + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + r := strings.NewReader(tc.input) + docs, err := csyaml.SplitDocumentsText(r) + cstest.RequireErrorContains(t, err, tc.wantErr) + assert.Equal(t, tc.want, docs) + }) + } +} diff --git a/csyaml/splityaml.go b/csyaml/splityaml.go new file mode 100644 index 0000000..0923331 --- /dev/null +++ b/csyaml/splityaml.go @@ -0,0 +1,48 @@ +package csyaml + +import ( + "bytes" + "errors" + "fmt" + "io" + + "gopkg.in/yaml.v3" +) + +// SplitDocumentsDecEnc splits documents from reader and returns them as +// re-encoded []byte slices, preserving comments but not exact original +// whitespace. It returns an error if any document cannot be decoded or +// re-encoded. +func SplitDocumentsDecEnc(r io.Reader) ([][]byte, error) { + dec := yaml.NewDecoder(r) + + var docs [][]byte + + idx := 0 + + for { + var node yaml.Node + if err := dec.Decode(&node); err != nil { + if errors.Is(err, io.EOF) { + break + } + + return nil, fmt.Errorf("decode doc %d: %w", idx, err) + } + + var buf bytes.Buffer + + enc := yaml.NewEncoder(&buf) + enc.SetIndent(2) + if err := enc.Encode(&node); err != nil { + return nil, fmt.Errorf("encode doc %d: %w", idx, err) + } + + _ = enc.Close() + + docs = append(docs, buf.Bytes()) + idx++ + } + + return docs, nil +} diff --git a/csyaml/splityaml_test.go b/csyaml/splityaml_test.go new file mode 100644 index 0000000..4a5816e --- /dev/null +++ b/csyaml/splityaml_test.go @@ -0,0 +1,113 @@ +package csyaml_test + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/crowdsecurity/go-cs-lib/cstest" + "github.com/crowdsecurity/go-cs-lib/csyaml" +) + +func TestSplitDocumentsDecEnc(t *testing.T) { + tests := []struct { + name string + input string + want [][]byte + wantErr string + }{ + { + name: "single mapping", + input: "a: 1\nb: 2\n", + want: [][]byte{[]byte("a: 1\nb: 2\n")}, + }, + { + name: "sequence doc", + input: "- 1\n- 2\n", + want: [][]byte{[]byte("- 1\n- 2\n")}, + }, + { + name: "scalar doc", + input: "\"scalar\"\n", + want: [][]byte{[]byte("\"scalar\"\n")}, + }, + { + name: "multiple documents", + input: `--- +a: 1 +b: 2 +--- +- 1 +- 2 +--- +"scalar" +`, + want: [][]byte{ + []byte("a: 1\nb: 2\n"), + []byte("- 1\n- 2\n"), + []byte("\"scalar\"\n"), + }, + }, + { + name: "empty input", + input: "", + want: [][]byte(nil), + }, + { + name: "invalid YAML", + input: "list: [1, 2,", + want: nil, + wantErr: "decode doc 0: yaml: line 1: did not find expected node content", + }, + { + name: "preserve comments", + input: `# comment 1 +a: 1 +# comment 2 +b: 2 +--- +# comment 3 +- 1 +# comment 4 +- 2 +# comment 5 +--- +# comment 6 +"scalar" +# comment 7 +`, + want: [][]byte{ + []byte("# comment 1\na: 1\n# comment 2\nb: 2\n"), + // not sure how to get rid of the extra new line here. + []byte("# comment 3\n- 1\n# comment 4\n- 2\n\n# comment 5\n"), + []byte("# comment 6\n\"scalar\"\n# comment 7\n"), + }, + }, + { + name: "tricky separator", + input: `--- +text: | + This is a multi-line string. + It includes a line that looks like a document separator: + --- + But it's just part of the string. +--- +key: value +`, + want: [][]byte{ + []byte("text: |\n This is a multi-line string.\n It includes a line that looks like a document separator:\n ---\n But it's just part of the string.\n"), + []byte("key: value\n"), + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + r := strings.NewReader(tc.input) + docs, err := csyaml.SplitDocumentsDecEnc(r) + cstest.RequireErrorContains(t, err, tc.wantErr) + assert.Equal(t, tc.want, docs) + }) + } +} From 9d83e80296d93f72224ab0626c3760716392d990 Mon Sep 17 00:00:00 2001 From: marco Date: Fri, 1 Aug 2025 10:14:49 +0200 Subject: [PATCH 11/12] dependencies --- .github/workflows/tests.yml | 6 +++--- .golangci.yml | 6 +++++- go.mod | 4 ++-- go.sum | 8 ++++---- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 70374c0..587bff9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,10 +17,10 @@ jobs: steps: - name: Check out code into the Go module directory - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0 with: go-version-file: go.mod @@ -32,7 +32,7 @@ jobs: RICHGO_FORCE_COLOR: 1 - name: golangci-lint - uses: golangci/golangci-lint-action@v7 + uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8.0.0 with: version: v2.1 args: --issues-exit-code=1 --timeout 10m diff --git a/.golangci.yml b/.golangci.yml index 5d9db65..1c3b7c5 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -11,6 +11,7 @@ linters: - inamedparam # reports interfaces with unnamed method parameters - wrapcheck # Checks that errors returned from external packages are wrapped - err113 # Go linter to check the errors handling expressions + - noinlineerr - paralleltest # Detects missing usage of t.Parallel() method in your Go test - testpackage # linter that makes you use a separate _test package - exhaustruct # Checks if all structure fields are initialized @@ -27,13 +28,14 @@ linters: - gocognit # revive - gocyclo # revive - lll # revive + - wsl # wsl_v5 # # Formatting only, useful in IDE but should not be forced on CI? # - nlreturn # nlreturn checks for a new line before return and branch statements to increase code clarity - - wsl # add or remove empty lines + - wsl_v5 # add or remove empty lines settings: @@ -102,6 +104,8 @@ linters: - 43 - name: defer disabled: true + - name: enforce-switch-style + disabled: true - name: flag-parameter disabled: true - name: function-length diff --git a/go.mod b/go.mod index c2f5c77..5bd53b6 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,9 @@ module github.com/crowdsecurity/go-cs-lib go 1.23 require ( - github.com/blackfireio/osinfo v1.0.5 + github.com/blackfireio/osinfo v1.1.0 github.com/coreos/go-systemd/v22 v22.5.0 - github.com/goccy/go-yaml v1.17.1 + github.com/goccy/go-yaml v1.18.0 github.com/sirupsen/logrus v1.9.3 github.com/spf13/cobra v1.9.1 github.com/stretchr/testify v1.10.0 diff --git a/go.sum b/go.sum index b52ed27..826f733 100644 --- a/go.sum +++ b/go.sum @@ -1,13 +1,13 @@ -github.com/blackfireio/osinfo v1.0.5 h1:6hlaWzfcpb87gRmznVf7wSdhysGqLRz9V/xuSdCEXrA= -github.com/blackfireio/osinfo v1.0.5/go.mod h1:Pd987poVNmd5Wsx6PRPw4+w7kLlf9iJxoRKPtPAjOrA= +github.com/blackfireio/osinfo v1.1.0 h1:1LMkMiFL42+Brx7r3MKuf7UTlXBRgebFLJQAfoFafj8= +github.com/blackfireio/osinfo v1.1.0/go.mod h1:Pd987poVNmd5Wsx6PRPw4+w7kLlf9iJxoRKPtPAjOrA= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/goccy/go-yaml v1.17.1 h1:LI34wktB2xEE3ONG/2Ar54+/HJVBriAGJ55PHls4YuY= -github.com/goccy/go-yaml v1.17.1/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= +github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= From 3090e7d5a0c88c4b68d94cd505dbfdbe8e51d9f0 Mon Sep 17 00:00:00 2001 From: marco Date: Fri, 1 Aug 2025 10:43:58 +0200 Subject: [PATCH 12/12] update null merge behavior --- .golangci.yml | 8 ++++---- csyaml/merge.go | 2 +- csyaml/merge_test.go | 7 +++---- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 1c3b7c5..9d99f7a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -11,7 +11,7 @@ linters: - inamedparam # reports interfaces with unnamed method parameters - wrapcheck # Checks that errors returned from external packages are wrapped - err113 # Go linter to check the errors handling expressions - - noinlineerr + #- noinlineerr - paralleltest # Detects missing usage of t.Parallel() method in your Go test - testpackage # linter that makes you use a separate _test package - exhaustruct # Checks if all structure fields are initialized @@ -35,7 +35,7 @@ linters: # - nlreturn # nlreturn checks for a new line before return and branch statements to increase code clarity - - wsl_v5 # add or remove empty lines + #- wsl_v5 # add or remove empty lines settings: @@ -104,8 +104,8 @@ linters: - 43 - name: defer disabled: true - - name: enforce-switch-style - disabled: true + #- name: enforce-switch-style + # disabled: true - name: flag-parameter disabled: true - name: function-length diff --git a/csyaml/merge.go b/csyaml/merge.go index f9e1441..7c9a547 100644 --- a/csyaml/merge.go +++ b/csyaml/merge.go @@ -44,7 +44,7 @@ func Merge(inputs [][]byte) (*bytes.Buffer, error) { } buf := &bytes.Buffer{} - if !hasContent { + if merged == nil && !hasContent { return buf, nil } diff --git a/csyaml/merge_test.go b/csyaml/merge_test.go index d9dae6f..ceadb35 100644 --- a/csyaml/merge_test.go +++ b/csyaml/merge_test.go @@ -89,10 +89,9 @@ func TestEmptyVsNilSources(t *testing.T) { {"empty override", [][]byte{[]byte("foo: bar\n"), nil}, "foo: bar\n"}, {"both empty", [][]byte{nil, nil}, ""}, {"null base", [][]byte{[]byte("~\n"), []byte("foo: bar\n")}, "foo: bar\n"}, - // not interested in overriding a document with "null", but it can be used as a base, why not - {"null wont't override", [][]byte{[]byte("foo: bar\n"), []byte("~\n")}, "foo: bar\n"}, - {"empty base & null override", [][]byte{nil, []byte("~\n")}, ""}, - {"null base & empty override", [][]byte{[]byte("~\n"), nil}, ""}, + {"explicit null override", [][]byte{[]byte("foo: bar\n"), []byte("~\n")}, "null\n"}, + {"empty base & null override", [][]byte{nil, []byte("~\n")}, "null\n"}, + {"null base & empty override", [][]byte{[]byte("~\n"), nil}, "null\n"}, } for _, tt := range tests {