From 29f0ff1c9dd42a358061ff3d0285ec0851db4e09 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 29 Mar 2022 19:46:29 +0200 Subject: [PATCH 1/8] Fetch ECS fields from the generated flat fields --- internal/fields/dependency_manager.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/internal/fields/dependency_manager.go b/internal/fields/dependency_manager.go index 3e5c11f118..b22dd55783 100644 --- a/internal/fields/dependency_manager.go +++ b/internal/fields/dependency_manager.go @@ -25,8 +25,8 @@ const ( ecsSchemaName = "ecs" gitReferencePrefix = "git@" - ecsSchemaFile = "fields.ecs.yml" - ecsSchemaURL = "https://raw.githubusercontent.com/elastic/ecs/%s/generated/beats/%s" + ecsSchemaFile = "ecs_flat.yml" + ecsSchemaURL = "https://raw.githubusercontent.com/elastic/ecs/%s/generated/ecs/%s" ) // DependencyManager is responsible for resolving external field dependencies. @@ -111,12 +111,18 @@ func loadECSFieldsSchema(dep buildmanifest.ECSDependency) ([]FieldDefinition, er } } - var f []FieldDefinition + var f map[string]FieldDefinition err = yaml.Unmarshal(content, &f) if err != nil { return nil, errors.Wrap(err, "unmarshalling field body failed") } - return f[0].Fields, nil + + fields := make([]FieldDefinition, 0, len(f)) + for name, field := range f { + field.Name = name + fields = append(fields, field) + } + return fields, nil } func asGitReference(reference string) (string, error) { From 7dfc1384a138906c5127a7c82430a1ba4fd626dd Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 29 Mar 2022 20:05:21 +0200 Subject: [PATCH 2/8] Fix incorrect field --- .../fields_tests/data_stream/first/fields/geo-fields.yml | 2 -- .../other/fields_tests/data_stream/first/sample_event.json | 6 +++--- test/packages/other/fields_tests/docs/README.md | 5 ++--- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/test/packages/other/fields_tests/data_stream/first/fields/geo-fields.yml b/test/packages/other/fields_tests/data_stream/first/fields/geo-fields.yml index 056a08c6fb..37fbc8fa03 100644 --- a/test/packages/other/fields_tests/data_stream/first/fields/geo-fields.yml +++ b/test/packages/other/fields_tests/data_stream/first/fields/geo-fields.yml @@ -1,6 +1,4 @@ - name: destination.geo.location external: ecs -- name: geo.location - external: ecs - name: source.geo.location external: ecs diff --git a/test/packages/other/fields_tests/data_stream/first/sample_event.json b/test/packages/other/fields_tests/data_stream/first/sample_event.json index 97d1717c5d..a220e452a0 100644 --- a/test/packages/other/fields_tests/data_stream/first/sample_event.json +++ b/test/packages/other/fields_tests/data_stream/first/sample_event.json @@ -3,6 +3,6 @@ "lat": 1.0, "lon": "2.0" }, - "geo.location.lat": 3.0, - "geo.location.lon": 4.0 -} \ No newline at end of file + "destination.geo.location.lat": 3.0, + "destination.geo.location.lon": 4.0 +} diff --git a/test/packages/other/fields_tests/docs/README.md b/test/packages/other/fields_tests/docs/README.md index e1174d9492..8fbce827f4 100644 --- a/test/packages/other/fields_tests/docs/README.md +++ b/test/packages/other/fields_tests/docs/README.md @@ -8,8 +8,8 @@ An example event for `first` looks as following: "lat": 1.0, "lon": "2.0" }, - "geo.location.lat": 3.0, - "geo.location.lon": 4.0 + "destination.geo.location.lat": 3.0, + "destination.geo.location.lon": 4.0 } ``` @@ -22,5 +22,4 @@ An example event for `first` looks as following: | data_stream.namespace | Data stream namespace. | constant_keyword | | data_stream.type | Data stream type. | constant_keyword | | destination.geo.location | Longitude and latitude. | geo_point | -| geo.location | Longitude and latitude. | geo_point | | source.geo.location | Longitude and latitude. | geo_point | From 44da651684f7ef4fc6c85e448467b4a2672d2d94 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 29 Mar 2022 20:15:00 +0200 Subject: [PATCH 3/8] Remove external groups --- test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml | 4 ---- test/packages/parallel/gcp/docs/compute.md | 4 +--- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml b/test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml index 83e3f6f122..c122c1a9d6 100644 --- a/test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml +++ b/test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml @@ -1,5 +1,3 @@ -- external: ecs - name: cloud - external: ecs name: cloud.account.id - external: ecs @@ -16,8 +14,6 @@ name: cloud.region - external: ecs name: ecs.version -- external: ecs - name: error - external: ecs name: error.message - external: ecs diff --git a/test/packages/parallel/gcp/docs/compute.md b/test/packages/parallel/gcp/docs/compute.md index c3c4b42f54..c0f371fafe 100644 --- a/test/packages/parallel/gcp/docs/compute.md +++ b/test/packages/parallel/gcp/docs/compute.md @@ -98,10 +98,9 @@ An example event for `compute` looks as following: | Field | Description | Type | |---|---|---| | @timestamp | Event timestamp. | date | -| cloud | Fields related to the cloud or infrastructure the events are coming from. | group | | cloud.account.id | The cloud account or organization id used to identify different entities in a multi-tenant environment. Examples: AWS account id, Google Cloud ORG Id, or other unique identifier. | keyword | | cloud.account.name | The cloud account name or alias used to identify different entities in a multi-tenant environment. Examples: AWS account name, Google Cloud ORG display name. | keyword | -| cloud.availability_zone | Availability zone in which this host is running. | keyword | +| cloud.availability_zone | Availability zone in which this host, resource, or service is located. | keyword | | cloud.image.id | Image ID for the cloud instance. | keyword | | cloud.instance.id | Instance ID of the host machine. | keyword | | cloud.instance.name | Instance name of the host machine. | keyword | @@ -117,7 +116,6 @@ An example event for `compute` looks as following: | data_stream.namespace | Data stream namespace. | constant_keyword | | data_stream.type | Data stream type. | constant_keyword | | ecs.version | ECS version this event conforms to. `ecs.version` is a required field and must exist in all events. When querying across multiple indices -- which may conform to slightly different ECS versions -- this field lets integrations adjust to the schema version of the events. | keyword | -| error | These fields can represent errors of any kind. Use them for errors that happen while fetching events or in cases where the event itself contains an error. | group | | error.message | Error message. | match_only_text | | event.dataset | Event dataset | constant_keyword | | event.module | Event module | constant_keyword | From 616bc701f0ff1ff4d296990b3c7a8c788798b92c Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 30 Mar 2022 11:39:09 +0200 Subject: [PATCH 4/8] Refactor ecs schema loader --- internal/fields/dependency_manager.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/internal/fields/dependency_manager.go b/internal/fields/dependency_manager.go index b22dd55783..676489efd5 100644 --- a/internal/fields/dependency_manager.go +++ b/internal/fields/dependency_manager.go @@ -61,6 +61,15 @@ func loadECSFieldsSchema(dep buildmanifest.ECSDependency) ([]FieldDefinition, er return nil, nil } + content, err := readECSFieldsSchemaFile(dep) + if err != nil { + return nil, errors.Wrap(err, "error reading ECS fields schema file") + } + + return parseECSFieldsSchema(content) +} + +func readECSFieldsSchemaFile(dep buildmanifest.ECSDependency) ([]byte, error) { gitReference, err := asGitReference(dep.Reference) if err != nil { return nil, errors.Wrap(err, "can't process the value as Git reference") @@ -70,12 +79,8 @@ func loadECSFieldsSchema(dep buildmanifest.ECSDependency) ([]FieldDefinition, er if err != nil { return nil, errors.Wrap(err, "error fetching profile path") } - cachedSchemaPath := filepath.Join(loc.FieldsCacheDir(), ecsSchemaName, gitReference, ecsSchemaFile) content, err := os.ReadFile(cachedSchemaPath) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return nil, errors.Wrapf(err, "can't read cached schema (path: %s)", cachedSchemaPath) - } if errors.Is(err, os.ErrNotExist) { logger.Debugf("Pulling ECS dependency using reference: %s", dep.Reference) @@ -109,10 +114,16 @@ func loadECSFieldsSchema(dep buildmanifest.ECSDependency) ([]FieldDefinition, er if err != nil { return nil, errors.Wrapf(err, "can't write cached schema (path: %s)", cachedSchemaPath) } + } else if err != nil { + return nil, errors.Wrapf(err, "can't read cached schema (path: %s)", cachedSchemaPath) } + return content, nil +} + +func parseECSFieldsSchema(content []byte) ([]FieldDefinition, error) { var f map[string]FieldDefinition - err = yaml.Unmarshal(content, &f) + err := yaml.Unmarshal(content, &f) if err != nil { return nil, errors.Wrap(err, "unmarshalling field body failed") } From 02ea183ce2f8f2597f032bc6d807b85985028ead Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 30 Mar 2022 13:00:18 +0200 Subject: [PATCH 5/8] Support parsing ecs flattened and nested files --- internal/fields/dependency_manager.go | 11 ++-- internal/fields/model.go | 73 ++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 9 deletions(-) diff --git a/internal/fields/dependency_manager.go b/internal/fields/dependency_manager.go index 676489efd5..8ae592c38b 100644 --- a/internal/fields/dependency_manager.go +++ b/internal/fields/dependency_manager.go @@ -25,7 +25,7 @@ const ( ecsSchemaName = "ecs" gitReferencePrefix = "git@" - ecsSchemaFile = "ecs_flat.yml" + ecsSchemaFile = "ecs_nested.yml" ecsSchemaURL = "https://raw.githubusercontent.com/elastic/ecs/%s/generated/ecs/%s" ) @@ -122,17 +122,12 @@ func readECSFieldsSchemaFile(dep buildmanifest.ECSDependency) ([]byte, error) { } func parseECSFieldsSchema(content []byte) ([]FieldDefinition, error) { - var f map[string]FieldDefinition - err := yaml.Unmarshal(content, &f) + var fields FieldDefinitions + err := yaml.Unmarshal(content, &fields) if err != nil { return nil, errors.Wrap(err, "unmarshalling field body failed") } - fields := make([]FieldDefinition, 0, len(f)) - for name, field := range f { - field.Name = name - fields = append(fields, field) - } return fields, nil } diff --git a/internal/fields/model.go b/internal/fields/model.go index 7ac71e7545..308b12e2d4 100644 --- a/internal/fields/model.go +++ b/internal/fields/model.go @@ -4,6 +4,13 @@ package fields +import ( + "fmt" + "strings" + + "gopkg.in/yaml.v3" +) + // FieldDefinition describes a single field with its properties. type FieldDefinition struct { Name string `yaml:"name"` @@ -16,7 +23,7 @@ type FieldDefinition struct { External string `yaml:"external"` Index *bool `yaml:"index"` DocValues *bool `yaml:"doc_values"` - Fields []FieldDefinition `yaml:"fields,omitempty"` + Fields FieldDefinitions `yaml:"fields,omitempty"` MultiFields []FieldDefinition `yaml:"multi_fields,omitempty"` } @@ -82,3 +89,67 @@ func updateFields(origFields, fields []FieldDefinition) []FieldDefinition { } return updatedFields } + +// FieldDefinitions is an array of FieldDefinition, this can be unmarshalled from +// a yaml list or a yaml map. +type FieldDefinitions []FieldDefinition + +func (fds *FieldDefinitions) UnmarshalYAML(value *yaml.Node) error { + nilNode := yaml.Kind(0) + switch value.Kind { + case yaml.SequenceNode: + // Fields are defined as a list, this happens in Beats fields files. + var fields []FieldDefinition + err := value.Decode(&fields) + if err != nil { + return err + } + *fds = fields + return nil + case yaml.MappingNode: + // Fields are defined as a map, this happens in ecs fields files. + var fields []FieldDefinition + for i := 0; i < len(value.Content); i += 2 { + key := value.Content[i] + value := value.Content[i+1] + + var name string + err := key.Decode(&name) + if err != nil { + return err + } + + var field FieldDefinition + err = value.Decode(&field) + if err != nil { + return err + } + + // "base" group is used by convention in ECS to include + // fields that can appear in the root level of the document. + // Append its child fields directly instead. + if name == "base" { + fields = append(fields, field.Fields...) + } else { + field.Name = name + cleanNestedNames(field.Name, field.Fields) + fields = append(fields, field) + } + } + *fds = fields + return nil + case nilNode: + *fds = nil + return nil + default: + return fmt.Errorf("expected map or sequence") + } +} + +func cleanNestedNames(parent string, fields []FieldDefinition) { + for i := range fields { + if strings.HasPrefix(fields[i].Name, parent+".") { + fields[i].Name = fields[i].Name[len(parent)+1:] + } + } +} From 4cc5185181aec13d54affdd312024229962a5a63 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 30 Mar 2022 14:11:22 +0200 Subject: [PATCH 6/8] Revert "Remove external groups" This reverts commit 44da651684f7ef4fc6c85e448467b4a2672d2d94. --- test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml | 4 ++++ test/packages/parallel/gcp/docs/compute.md | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml b/test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml index c122c1a9d6..83e3f6f122 100644 --- a/test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml +++ b/test/packages/parallel/gcp/data_stream/compute/fields/ecs.yml @@ -1,3 +1,5 @@ +- external: ecs + name: cloud - external: ecs name: cloud.account.id - external: ecs @@ -14,6 +16,8 @@ name: cloud.region - external: ecs name: ecs.version +- external: ecs + name: error - external: ecs name: error.message - external: ecs diff --git a/test/packages/parallel/gcp/docs/compute.md b/test/packages/parallel/gcp/docs/compute.md index c0f371fafe..c3c4b42f54 100644 --- a/test/packages/parallel/gcp/docs/compute.md +++ b/test/packages/parallel/gcp/docs/compute.md @@ -98,9 +98,10 @@ An example event for `compute` looks as following: | Field | Description | Type | |---|---|---| | @timestamp | Event timestamp. | date | +| cloud | Fields related to the cloud or infrastructure the events are coming from. | group | | cloud.account.id | The cloud account or organization id used to identify different entities in a multi-tenant environment. Examples: AWS account id, Google Cloud ORG Id, or other unique identifier. | keyword | | cloud.account.name | The cloud account name or alias used to identify different entities in a multi-tenant environment. Examples: AWS account name, Google Cloud ORG display name. | keyword | -| cloud.availability_zone | Availability zone in which this host, resource, or service is located. | keyword | +| cloud.availability_zone | Availability zone in which this host is running. | keyword | | cloud.image.id | Image ID for the cloud instance. | keyword | | cloud.instance.id | Instance ID of the host machine. | keyword | | cloud.instance.name | Instance name of the host machine. | keyword | @@ -116,6 +117,7 @@ An example event for `compute` looks as following: | data_stream.namespace | Data stream namespace. | constant_keyword | | data_stream.type | Data stream type. | constant_keyword | | ecs.version | ECS version this event conforms to. `ecs.version` is a required field and must exist in all events. When querying across multiple indices -- which may conform to slightly different ECS versions -- this field lets integrations adjust to the schema version of the events. | keyword | +| error | These fields can represent errors of any kind. Use them for errors that happen while fetching events or in cases where the event itself contains an error. | group | | error.message | Error message. | match_only_text | | event.dataset | Event dataset | constant_keyword | | event.module | Event module | constant_keyword | From a0fdf1ec93848b819c7f622f0a45fc745d2be25c Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 30 Mar 2022 19:33:11 +0200 Subject: [PATCH 7/8] Fix format --- .../other/fields_tests/data_stream/first/sample_event.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/packages/other/fields_tests/data_stream/first/sample_event.json b/test/packages/other/fields_tests/data_stream/first/sample_event.json index a220e452a0..ec22a93517 100644 --- a/test/packages/other/fields_tests/data_stream/first/sample_event.json +++ b/test/packages/other/fields_tests/data_stream/first/sample_event.json @@ -5,4 +5,4 @@ }, "destination.geo.location.lat": 3.0, "destination.geo.location.lon": 4.0 -} +} \ No newline at end of file From c79194b8c42f7239e051d74d8179f16cfc99d19e Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Thu, 31 Mar 2022 10:48:24 +0200 Subject: [PATCH 8/8] Add safer constraints when looping through map elements --- internal/fields/model.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/fields/model.go b/internal/fields/model.go index 308b12e2d4..9b6a3d2cca 100644 --- a/internal/fields/model.go +++ b/internal/fields/model.go @@ -108,8 +108,11 @@ func (fds *FieldDefinitions) UnmarshalYAML(value *yaml.Node) error { return nil case yaml.MappingNode: // Fields are defined as a map, this happens in ecs fields files. + if len(value.Content)%2 != 0 { + return fmt.Errorf("pairs of key-values expected in map") + } var fields []FieldDefinition - for i := 0; i < len(value.Content); i += 2 { + for i := 0; i+1 < len(value.Content); i += 2 { key := value.Content[i] value := value.Content[i+1]