From c5b345b73ea28a70c3260c435f53ca48257e5b9a Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 23 Mar 2022 12:34:08 +0100 Subject: [PATCH 01/10] Import multi-field definitions on external fields --- internal/fields/dependency_manager.go | 16 ++++++ internal/fields/dependency_manager_test.go | 35 ++++++++++++ internal/fields/model.go | 55 ++++++++++++++++--- .../apache/data_stream/access/fields/ecs.yml | 4 ++ test/packages/parallel/apache/docs/README.md | 2 + 5 files changed, 103 insertions(+), 9 deletions(-) diff --git a/internal/fields/dependency_manager.go b/internal/fields/dependency_manager.go index a4c76e9351..679427988a 100644 --- a/internal/fields/dependency_manager.go +++ b/internal/fields/dependency_manager.go @@ -221,5 +221,21 @@ func transformImportedField(fd FieldDefinition) common.MapStr { } m.Put("fields", t) } + + if len(fd.MultiFields) > 0 { + var t []common.MapStr + for _, f := range fd.MultiFields { + i := transformImportedMultiField(f) + t = append(t, i) + } + m.Put("multi_fields", t) + } return m } + +func transformImportedMultiField(fd MultiFieldDefinition) common.MapStr { + return common.MapStr{ + "name": fd.Name, + "type": fd.Type, + } +} diff --git a/internal/fields/dependency_manager_test.go b/internal/fields/dependency_manager_test.go index eca32a47bc..803b982b63 100644 --- a/internal/fields/dependency_manager_test.go +++ b/internal/fields/dependency_manager_test.go @@ -116,6 +116,30 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) { changed: true, valid: true, }, + { + title: "multi fields", + defs: []common.MapStr{ + { + "name": "process.command_line", + "external": "test", + }, + }, + result: []common.MapStr{ + { + "name": "process.command_line", + "type": "wildcard", + "description": "Full command line that started the process.", + "multi_fields": []common.MapStr{ + { + "name": "text", + "type": "match_only_text", + }, + }, + }, + }, + changed: true, + valid: true, + }, { title: "unknown field", defs: []common.MapStr{ @@ -144,6 +168,17 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) { Description: "Data stream dataset.", Type: "constant_keyword", }, + { + Name: "process.command_line", + Description: "Full command line that started the process.", + Type: "wildcard", + MultiFields: []MultiFieldDefinition{ + { + Name: "text", + Type: "match_only_text", + }, + }, + }, }} dm := &DependencyManager{schema: schema} diff --git a/internal/fields/model.go b/internal/fields/model.go index 19cff7d71a..5df6f99f4d 100644 --- a/internal/fields/model.go +++ b/internal/fields/model.go @@ -6,15 +6,16 @@ package fields // FieldDefinition describes a single field with its properties. type FieldDefinition struct { - Name string `yaml:"name"` - Description string `yaml:"description"` - Type string `yaml:"type"` - Value string `yaml:"value"` // The value to associate with a constant_keyword field. - Pattern string `yaml:"pattern"` - Unit string `yaml:"unit"` - MetricType string `yaml:"metric_type"` - External string `yaml:"external"` - Fields []FieldDefinition `yaml:"fields"` + Name string `yaml:"name"` + Description string `yaml:"description"` + Type string `yaml:"type"` + Value string `yaml:"value"` // The value to associate with a constant_keyword field. + Pattern string `yaml:"pattern"` + Unit string `yaml:"unit"` + MetricType string `yaml:"metric_type"` + External string `yaml:"external"` + Fields []FieldDefinition `yaml:"fields,omitempty"` + MultiFields []MultiFieldDefinition `yaml:"multi_fields,omitempty"` } func (orig *FieldDefinition) Update(fd FieldDefinition) { @@ -64,4 +65,40 @@ func (orig *FieldDefinition) Update(fd FieldDefinition) { } orig.Fields = updatedFields } + + if len(fd.MultiFields) > 0 { + updatedFields := make([]MultiFieldDefinition, len(orig.MultiFields)) + copy(updatedFields, orig.MultiFields) + for _, newField := range fd.MultiFields { + found := false + for i, origField := range orig.MultiFields { + if origField.Name != newField.Name { + continue + } + + found = true + updatedFields[i].Update(newField) + break + } + if !found { + updatedFields = append(updatedFields, newField) + } + } + orig.MultiFields = updatedFields + } +} + +// MultiFieldDefinition describes a multi field with its properties. +type MultiFieldDefinition struct { + Name string `yaml:"name"` + Type string `yaml:"type"` +} + +func (orig *MultiFieldDefinition) Update(fd MultiFieldDefinition) { + if fd.Name != "" { + orig.Name = fd.Name + } + if fd.Type != "" { + orig.Type = fd.Type + } } diff --git a/test/packages/parallel/apache/data_stream/access/fields/ecs.yml b/test/packages/parallel/apache/data_stream/access/fields/ecs.yml index 12993b0268..f05ee491d0 100644 --- a/test/packages/parallel/apache/data_stream/access/fields/ecs.yml +++ b/test/packages/parallel/apache/data_stream/access/fields/ecs.yml @@ -10,6 +10,8 @@ name: event.created - external: ecs name: event.kind +- external: ecs + name: event.original - external: ecs name: event.outcome - external: ecs @@ -30,6 +32,8 @@ name: log.level - external: ecs name: message +- external: ecs + name: process.command_line - external: ecs name: process.pid - external: ecs diff --git a/test/packages/parallel/apache/docs/README.md b/test/packages/parallel/apache/docs/README.md index d00e556d05..9763850143 100644 --- a/test/packages/parallel/apache/docs/README.md +++ b/test/packages/parallel/apache/docs/README.md @@ -45,6 +45,7 @@ Access logs collects the Apache access logs. | event.dataset | Event dataset | constant_keyword | | event.kind | This is one of four ECS Categorization Fields, and indicates the highest level in the ECS category hierarchy. `event.kind` gives high-level information about what type of information the event contains, without being specific to the contents of the event. For example, values of this field distinguish alert events from metric events. The value of this field can be used to inform how these kinds of events should be handled. They may warrant different retention, different access control, it may also help understand whether the data coming in at a regular interval or not. | keyword | | event.module | Event module | constant_keyword | +| event.original | Raw text message of entire event. Used to demonstrate log integrity or where the full log message (before splitting it up in multiple parts) may be required, e.g. for reindex. This field is not indexed and doc_values are disabled. It cannot be searched, but it can be retrieved from `_source`. If users wish to override this and index this field, please see `Field data types` in the `Elasticsearch Reference`. | keyword | | event.outcome | This is one of four ECS Categorization Fields, and indicates the lowest level in the ECS category hierarchy. `event.outcome` simply denotes whether the event represents a success or a failure from the perspective of the entity that produced the event. Note that when a single transaction is described in multiple events, each event may populate different values of `event.outcome`, according to their perspective. Also note that in the case of a compound event (a single event that contains multiple logical events), this field should be populated with the value that best captures the overall success or failure from the perspective of the event producer. Further note that not all events will have an associated outcome. For example, this field is generally not populated for metric events, events with `event.type:info`, or any events for which an outcome does not make logical sense. | keyword | | file.path | Full path to the file, including the file name. It should include the drive letter, when appropriate. | keyword | | host.architecture | Operating system architecture. | keyword | @@ -73,6 +74,7 @@ Access logs collects the Apache access logs. | log.level | Original log level of the log event. If the source of the event provides a log level or textual severity, this is the one that goes in `log.level`. If your source doesn't specify one, you may put your event transport's severity here (e.g. Syslog severity). Some examples are `warn`, `err`, `i`, `informational`. | keyword | | log.offset | Log offset | long | | message | For log events the message field contains the log message, optimized for viewing in a log viewer. For structured logs without an original message field, other fields can be concatenated to form a human-readable summary of the event. If multiple messages exist, they can be combined into one message. | match_only_text | +| process.command_line | Full command line that started the process, including the absolute path to the executable, and all arguments. Some arguments may be filtered to protect sensitive information. | wildcard | | process.pid | Process id. | long | | process.thread.id | Thread ID. | long | | source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword | From b4388ee50d34cfd0205116bf69b172a0d4aedee9 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Wed, 23 Mar 2022 12:59:16 +0100 Subject: [PATCH 02/10] Import index and doc values on external fields --- internal/fields/dependency_manager.go | 8 ++++ internal/fields/dependency_manager_test.go | 49 ++++++++++++++++++++++ internal/fields/model.go | 10 +++++ 3 files changed, 67 insertions(+) diff --git a/internal/fields/dependency_manager.go b/internal/fields/dependency_manager.go index 679427988a..796f8df1bb 100644 --- a/internal/fields/dependency_manager.go +++ b/internal/fields/dependency_manager.go @@ -213,6 +213,14 @@ func transformImportedField(fd FieldDefinition) common.MapStr { "type": fd.Type, } + if fd.Index != nil { + m["index"] = *fd.Index + } + + if fd.DocValues != nil { + m["doc_values"] = *fd.DocValues + } + if len(fd.Fields) > 0 { var t []common.MapStr for _, f := range fd.Fields { diff --git a/internal/fields/dependency_manager_test.go b/internal/fields/dependency_manager_test.go index 803b982b63..8be595b316 100644 --- a/internal/fields/dependency_manager_test.go +++ b/internal/fields/dependency_manager_test.go @@ -140,6 +140,47 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) { changed: true, valid: true, }, + { + title: "not indexed external", + defs: []common.MapStr{ + { + "name": "event.original", + "external": "test", + }, + }, + result: []common.MapStr{ + { + "name": "event.original", + "type": "text", + "description": "Original event.", + "index": false, + "doc_values": false, + }, + }, + changed: true, + valid: true, + }, + { + title: "override not indexed external", + defs: []common.MapStr{ + { + "name": "event.original", + "index": true, + "external": "test", + }, + }, + result: []common.MapStr{ + { + "name": "event.original", + "type": "text", + "description": "Original event.", + "index": true, + "doc_values": false, + }, + }, + changed: true, + valid: true, + }, { title: "unknown field", defs: []common.MapStr{ @@ -152,6 +193,7 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) { }, } + indexFalse := false schema := map[string][]FieldDefinition{"test": []FieldDefinition{ { Name: "container.id", @@ -179,6 +221,13 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) { }, }, }, + { + Name: "event.original", + Description: "Original event.", + Type: "text", + Index: &indexFalse, + DocValues: &indexFalse, + }, }} dm := &DependencyManager{schema: schema} diff --git a/internal/fields/model.go b/internal/fields/model.go index 5df6f99f4d..0d9cf1cdf1 100644 --- a/internal/fields/model.go +++ b/internal/fields/model.go @@ -14,6 +14,8 @@ type FieldDefinition struct { Unit string `yaml:"unit"` MetricType string `yaml:"metric_type"` External string `yaml:"external"` + Index *bool `yaml:"index"` + DocValues *bool `yaml:"doc_values"` Fields []FieldDefinition `yaml:"fields,omitempty"` MultiFields []MultiFieldDefinition `yaml:"multi_fields,omitempty"` } @@ -43,6 +45,14 @@ func (orig *FieldDefinition) Update(fd FieldDefinition) { if fd.External != "" { orig.External = fd.External } + if fd.Index != nil { + v := *fd.Index + orig.Index = &v + } + if fd.DocValues != nil { + v := *fd.DocValues + orig.DocValues = &v + } if len(fd.Fields) > 0 { // When a subfield the same name exists, update it. When not, append it. From a1fc78f8b211ff24829cb4edc6dfbd0c2d08bdf0 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Mon, 28 Mar 2022 17:18:58 +0200 Subject: [PATCH 03/10] Don't copy index and doc values --- internal/fields/model.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/internal/fields/model.go b/internal/fields/model.go index 0d9cf1cdf1..db1ee9b026 100644 --- a/internal/fields/model.go +++ b/internal/fields/model.go @@ -46,12 +46,10 @@ func (orig *FieldDefinition) Update(fd FieldDefinition) { orig.External = fd.External } if fd.Index != nil { - v := *fd.Index - orig.Index = &v + orig.Index = fd.Index } if fd.DocValues != nil { - v := *fd.DocValues - orig.DocValues = &v + orig.DocValues = fd.DocValues } if len(fd.Fields) > 0 { From 347e0b9b41e1b6a3152389f21cf6a1d1b1ca1baa Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Mon, 28 Mar 2022 17:24:15 +0200 Subject: [PATCH 04/10] Extract fields updaters to separate methods --- internal/fields/model.go | 75 ++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/internal/fields/model.go b/internal/fields/model.go index db1ee9b026..ad43c4254b 100644 --- a/internal/fields/model.go +++ b/internal/fields/model.go @@ -53,47 +53,56 @@ func (orig *FieldDefinition) Update(fd FieldDefinition) { } if len(fd.Fields) > 0 { - // When a subfield the same name exists, update it. When not, append it. - updatedFields := make([]FieldDefinition, len(orig.Fields)) - copy(updatedFields, orig.Fields) - for _, newField := range fd.Fields { - found := false - for i, origField := range orig.Fields { - if origField.Name != newField.Name { - continue - } - - found = true - updatedFields[i].Update(newField) - break - } - if !found { - updatedFields = append(updatedFields, newField) - } - } - orig.Fields = updatedFields + orig.updateFields(fd.Fields) } if len(fd.MultiFields) > 0 { - updatedFields := make([]MultiFieldDefinition, len(orig.MultiFields)) - copy(updatedFields, orig.MultiFields) - for _, newField := range fd.MultiFields { - found := false - for i, origField := range orig.MultiFields { - if origField.Name != newField.Name { - continue - } + orig.updateMultiFields(fd.MultiFields) + } +} - found = true - updatedFields[i].Update(newField) - break +func (orig *FieldDefinition) updateFields(fields []FieldDefinition) { + // When a subfield the same name exists, update it. When not, append it. + updatedFields := make([]FieldDefinition, len(orig.Fields)) + copy(updatedFields, orig.Fields) + for _, newField := range fields { + found := false + for i, origField := range orig.Fields { + if origField.Name != newField.Name { + continue } - if !found { - updatedFields = append(updatedFields, newField) + + found = true + updatedFields[i].Update(newField) + break + } + if !found { + updatedFields = append(updatedFields, newField) + } + } + orig.Fields = updatedFields +} + +func (orig *FieldDefinition) updateMultiFields(fields []MultiFieldDefinition) { + // When a subfield the same name exists, update it. When not, append it. + updatedFields := make([]MultiFieldDefinition, len(orig.MultiFields)) + copy(updatedFields, orig.MultiFields) + for _, newField := range fields { + found := false + for i, origField := range orig.MultiFields { + if origField.Name != newField.Name { + continue } + + found = true + updatedFields[i].Update(newField) + break + } + if !found { + updatedFields = append(updatedFields, newField) } - orig.MultiFields = updatedFields } + orig.MultiFields = updatedFields } // MultiFieldDefinition describes a multi field with its properties. From 633f703e6e16a943203ff059df189ec955aee454 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Mon, 28 Mar 2022 17:57:59 +0200 Subject: [PATCH 05/10] Allow multi fields as documented fields on pipeline tests --- internal/fields/validate.go | 22 ++++++++++++++++++---- internal/fields/validate_test.go | 12 ++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/internal/fields/validate.go b/internal/fields/validate.go index 7a8ac84f29..9f6b21d57b 100644 --- a/internal/fields/validate.go +++ b/internal/fields/validate.go @@ -339,12 +339,26 @@ func compareKeys(key string, def FieldDefinition, searchedKey string) bool { return true } - // Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions. - // Unfortunately we have to assume that imported field could be a geo_point (nasty workaround). + // Only a dot can be accepted now. + if searchedKey[j] != '.' { + return false + } + j++ + if len(searchedKey) > j { + extraPart := searchedKey[j:] + + // Check if this is a multi field. + for _, multiField := range def.MultiFields { + if extraPart == multiField.Name { + return true + } + } + + // Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions. + // Unfortunately we have to assume that imported field could be a geo_point (nasty workaround). if def.Type == "geo_point" || def.External != "" { - extraPart := searchedKey[j:] - if extraPart == ".lon" || extraPart == ".lat" { + if extraPart == "lon" || extraPart == "lat" { return true } } diff --git a/internal/fields/validate_test.go b/internal/fields/validate_test.go index 1e142927c5..34a66be46b 100644 --- a/internal/fields/validate_test.go +++ b/internal/fields/validate_test.go @@ -335,6 +335,18 @@ func TestCompareKeys(t *testing.T) { searchedKey: "example.geo.foo", expected: false, }, + { + key: "example.command_line", + def: FieldDefinition{ + MultiFields: []MultiFieldDefinition{ + { + Name: "text", + }, + }, + }, + searchedKey: "example.command_line.text", + expected: true, + }, } for _, c := range cases { From a792837e6bbb3ddbb5fd92c4f0c09fc3d99d428f Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Mon, 28 Mar 2022 18:11:59 +0200 Subject: [PATCH 06/10] Revert "Allow multi fields as documented fields on pipeline tests" This reverts commit 633f703e6e16a943203ff059df189ec955aee454. Multi-fields don't appear in documents. --- internal/fields/validate.go | 22 ++++------------------ internal/fields/validate_test.go | 12 ------------ 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/internal/fields/validate.go b/internal/fields/validate.go index 9f6b21d57b..7a8ac84f29 100644 --- a/internal/fields/validate.go +++ b/internal/fields/validate.go @@ -339,26 +339,12 @@ func compareKeys(key string, def FieldDefinition, searchedKey string) bool { return true } - // Only a dot can be accepted now. - if searchedKey[j] != '.' { - return false - } - j++ - + // Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions. + // Unfortunately we have to assume that imported field could be a geo_point (nasty workaround). if len(searchedKey) > j { - extraPart := searchedKey[j:] - - // Check if this is a multi field. - for _, multiField := range def.MultiFields { - if extraPart == multiField.Name { - return true - } - } - - // Workaround for potential geo_point, as "lon" and "lat" fields are not present in field definitions. - // Unfortunately we have to assume that imported field could be a geo_point (nasty workaround). if def.Type == "geo_point" || def.External != "" { - if extraPart == "lon" || extraPart == "lat" { + extraPart := searchedKey[j:] + if extraPart == ".lon" || extraPart == ".lat" { return true } } diff --git a/internal/fields/validate_test.go b/internal/fields/validate_test.go index 34a66be46b..1e142927c5 100644 --- a/internal/fields/validate_test.go +++ b/internal/fields/validate_test.go @@ -335,18 +335,6 @@ func TestCompareKeys(t *testing.T) { searchedKey: "example.geo.foo", expected: false, }, - { - key: "example.command_line", - def: FieldDefinition{ - MultiFields: []MultiFieldDefinition{ - { - Name: "text", - }, - }, - }, - searchedKey: "example.command_line.text", - expected: true, - }, } for _, c := range cases { From c4a796c63ab29061806f7bf1aded57a35d54476b Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 29 Mar 2022 15:50:29 +0200 Subject: [PATCH 07/10] Use common structure for fields and multi fields --- internal/fields/dependency_manager.go | 18 ++---- internal/fields/dependency_manager_test.go | 2 +- internal/fields/model.go | 75 ++++++---------------- 3 files changed, 27 insertions(+), 68 deletions(-) diff --git a/internal/fields/dependency_manager.go b/internal/fields/dependency_manager.go index 796f8df1bb..38afd24f63 100644 --- a/internal/fields/dependency_manager.go +++ b/internal/fields/dependency_manager.go @@ -208,9 +208,12 @@ func buildFieldPath(root string, field common.MapStr) string { func transformImportedField(fd FieldDefinition) common.MapStr { m := common.MapStr{ - "name": fd.Name, - "description": fd.Description, - "type": fd.Type, + "name": fd.Name, + "type": fd.Type, + } + + if fd.Description != "" { + m["description"] = fd.Description } if fd.Index != nil { @@ -233,17 +236,10 @@ func transformImportedField(fd FieldDefinition) common.MapStr { if len(fd.MultiFields) > 0 { var t []common.MapStr for _, f := range fd.MultiFields { - i := transformImportedMultiField(f) + i := transformImportedField(f) t = append(t, i) } m.Put("multi_fields", t) } return m } - -func transformImportedMultiField(fd MultiFieldDefinition) common.MapStr { - return common.MapStr{ - "name": fd.Name, - "type": fd.Type, - } -} diff --git a/internal/fields/dependency_manager_test.go b/internal/fields/dependency_manager_test.go index 8be595b316..10b4e63476 100644 --- a/internal/fields/dependency_manager_test.go +++ b/internal/fields/dependency_manager_test.go @@ -214,7 +214,7 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) { Name: "process.command_line", Description: "Full command line that started the process.", Type: "wildcard", - MultiFields: []MultiFieldDefinition{ + MultiFields: []FieldDefinition{ { Name: "text", Type: "match_only_text", diff --git a/internal/fields/model.go b/internal/fields/model.go index ad43c4254b..7ac71e7545 100644 --- a/internal/fields/model.go +++ b/internal/fields/model.go @@ -6,18 +6,18 @@ package fields // FieldDefinition describes a single field with its properties. type FieldDefinition struct { - Name string `yaml:"name"` - Description string `yaml:"description"` - Type string `yaml:"type"` - Value string `yaml:"value"` // The value to associate with a constant_keyword field. - Pattern string `yaml:"pattern"` - Unit string `yaml:"unit"` - MetricType string `yaml:"metric_type"` - External string `yaml:"external"` - Index *bool `yaml:"index"` - DocValues *bool `yaml:"doc_values"` - Fields []FieldDefinition `yaml:"fields,omitempty"` - MultiFields []MultiFieldDefinition `yaml:"multi_fields,omitempty"` + Name string `yaml:"name"` + Description string `yaml:"description"` + Type string `yaml:"type"` + Value string `yaml:"value"` // The value to associate with a constant_keyword field. + Pattern string `yaml:"pattern"` + Unit string `yaml:"unit"` + MetricType string `yaml:"metric_type"` + External string `yaml:"external"` + Index *bool `yaml:"index"` + DocValues *bool `yaml:"doc_values"` + Fields []FieldDefinition `yaml:"fields,omitempty"` + MultiFields []FieldDefinition `yaml:"multi_fields,omitempty"` } func (orig *FieldDefinition) Update(fd FieldDefinition) { @@ -53,21 +53,21 @@ func (orig *FieldDefinition) Update(fd FieldDefinition) { } if len(fd.Fields) > 0 { - orig.updateFields(fd.Fields) + orig.Fields = updateFields(orig.Fields, fd.Fields) } if len(fd.MultiFields) > 0 { - orig.updateMultiFields(fd.MultiFields) + orig.MultiFields = updateFields(orig.MultiFields, fd.MultiFields) } } -func (orig *FieldDefinition) updateFields(fields []FieldDefinition) { +func updateFields(origFields, fields []FieldDefinition) []FieldDefinition { // When a subfield the same name exists, update it. When not, append it. - updatedFields := make([]FieldDefinition, len(orig.Fields)) - copy(updatedFields, orig.Fields) + updatedFields := make([]FieldDefinition, len(origFields)) + copy(updatedFields, origFields) for _, newField := range fields { found := false - for i, origField := range orig.Fields { + for i, origField := range origFields { if origField.Name != newField.Name { continue } @@ -80,42 +80,5 @@ func (orig *FieldDefinition) updateFields(fields []FieldDefinition) { updatedFields = append(updatedFields, newField) } } - orig.Fields = updatedFields -} - -func (orig *FieldDefinition) updateMultiFields(fields []MultiFieldDefinition) { - // When a subfield the same name exists, update it. When not, append it. - updatedFields := make([]MultiFieldDefinition, len(orig.MultiFields)) - copy(updatedFields, orig.MultiFields) - for _, newField := range fields { - found := false - for i, origField := range orig.MultiFields { - if origField.Name != newField.Name { - continue - } - - found = true - updatedFields[i].Update(newField) - break - } - if !found { - updatedFields = append(updatedFields, newField) - } - } - orig.MultiFields = updatedFields -} - -// MultiFieldDefinition describes a multi field with its properties. -type MultiFieldDefinition struct { - Name string `yaml:"name"` - Type string `yaml:"type"` -} - -func (orig *MultiFieldDefinition) Update(fd MultiFieldDefinition) { - if fd.Name != "" { - orig.Name = fd.Name - } - if fd.Type != "" { - orig.Type = fd.Type - } + return updatedFields } From fbb0809f1b91310c73bde8c658ea901d895d98d3 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 29 Mar 2022 15:58:58 +0200 Subject: [PATCH 08/10] Add multi-fields to documentation --- internal/docs/exported_fields.go | 8 ++++++++ internal/fields/dependency_manager.go | 1 + test/packages/parallel/apache/docs/README.md | 17 +++++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/internal/docs/exported_fields.go b/internal/docs/exported_fields.go index d78998bddb..4264cf744f 100644 --- a/internal/docs/exported_fields.go +++ b/internal/docs/exported_fields.go @@ -147,6 +147,14 @@ func visitFields(namePrefix string, f fields.FieldDefinition, records []fieldsTa unit: f.Unit, metricType: f.MetricType, }) + + for _, multiField := range f.MultiFields { + records = append(records, fieldsTableRecord{ + name: name + "." + multiField.Name, + description: fmt.Sprintf("Multi-field of %#q.", name), + aType: f.Type, + }) + } return records, nil } diff --git a/internal/fields/dependency_manager.go b/internal/fields/dependency_manager.go index 38afd24f63..3e5c11f118 100644 --- a/internal/fields/dependency_manager.go +++ b/internal/fields/dependency_manager.go @@ -212,6 +212,7 @@ func transformImportedField(fd FieldDefinition) common.MapStr { "type": fd.Type, } + // Multi-fields don't have descriptions. if fd.Description != "" { m["description"] = fd.Description } diff --git a/test/packages/parallel/apache/docs/README.md b/test/packages/parallel/apache/docs/README.md index 9763850143..4c3a431206 100644 --- a/test/packages/parallel/apache/docs/README.md +++ b/test/packages/parallel/apache/docs/README.md @@ -48,6 +48,7 @@ Access logs collects the Apache access logs. | event.original | Raw text message of entire event. Used to demonstrate log integrity or where the full log message (before splitting it up in multiple parts) may be required, e.g. for reindex. This field is not indexed and doc_values are disabled. It cannot be searched, but it can be retrieved from `_source`. If users wish to override this and index this field, please see `Field data types` in the `Elasticsearch Reference`. | keyword | | event.outcome | This is one of four ECS Categorization Fields, and indicates the lowest level in the ECS category hierarchy. `event.outcome` simply denotes whether the event represents a success or a failure from the perspective of the entity that produced the event. Note that when a single transaction is described in multiple events, each event may populate different values of `event.outcome`, according to their perspective. Also note that in the case of a compound event (a single event that contains multiple logical events), this field should be populated with the value that best captures the overall success or failure from the perspective of the event producer. Further note that not all events will have an associated outcome. For example, this field is generally not populated for metric events, events with `event.type:info`, or any events for which an outcome does not make logical sense. | keyword | | file.path | Full path to the file, including the file name. It should include the drive letter, when appropriate. | keyword | +| file.path.text | Multi-field of `file.path`. | keyword | | host.architecture | Operating system architecture. | keyword | | host.containerized | If the host is a container. | boolean | | host.domain | Name of the domain of which the host is a member. For example, on Windows this could be the host's Active Directory domain or NetBIOS domain name. For Linux this could be the domain of the host's LDAP provider. | keyword | @@ -61,6 +62,7 @@ Access logs collects the Apache access logs. | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | keyword | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -75,11 +77,13 @@ Access logs collects the Apache access logs. | log.offset | Log offset | long | | message | For log events the message field contains the log message, optimized for viewing in a log viewer. For structured logs without an original message field, other fields can be concatenated to form a human-readable summary of the event. If multiple messages exist, they can be combined into one message. | match_only_text | | process.command_line | Full command line that started the process, including the absolute path to the executable, and all arguments. Some arguments may be filtered to protect sensitive information. | wildcard | +| process.command_line.text | Multi-field of `process.command_line`. | wildcard | | process.pid | Process id. | long | | process.thread.id | Thread ID. | long | | source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword | | source.as.number | Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. | long | | source.as.organization.name | Organization name. | keyword | +| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | keyword | | source.domain | Source domain. | keyword | | source.geo.city_name | City name. | keyword | | source.geo.continent_name | Name of the continent. | keyword | @@ -96,14 +100,19 @@ Access logs collects the Apache access logs. | url.domain | Domain of the url, such as "www.elastic.co". In some cases a URL may refer to an IP and/or port directly, without a domain name. In this case, the IP address would go to the `domain` field. If the URL contains a literal IPv6 address enclosed by `[` and `]` (IETF RFC 2732), the `[` and `]` characters should also be captured in the `domain` field. | keyword | | url.extension | The field contains the file extension from the original request url, excluding the leading dot. The file extension is only set if it exists, as not every url has a file extension. The leading period must not be included. For example, the value must be "png", not ".png". Note that when the file name has multiple extensions (example.tar.gz), only the last one should be captured ("gz", not "tar.gz"). | keyword | | url.original | Unmodified original url as seen in the event source. Note that in network monitoring, the observed URL may be a full URL, whereas in access logs, the URL is often just represented as a path. This field is meant to represent the URL as it was observed, complete or not. | wildcard | +| url.original.text | Multi-field of `url.original`. | wildcard | | url.path | Path of the request, such as "/search". | wildcard | | url.query | The query field describes the query string of the request, such as "q=elasticsearch". The `?` is excluded from the query string. If a URL contains no `?`, there is no query field. If there is a `?` but no query, the query field exists with an empty string. The `exists` query can be used to differentiate between the two cases. | keyword | | user.name | Short name or login of the user. | keyword | +| user.name.text | Multi-field of `user.name`. | keyword | | user_agent.device.name | Name of the device. | keyword | | user_agent.name | Name of the user agent. | keyword | | user_agent.original | Unparsed user_agent string. | keyword | +| user_agent.original.text | Multi-field of `user_agent.original`. | keyword | | user_agent.os.full | Operating system name, including the version or code name. | keyword | +| user_agent.os.full.text | Multi-field of `user_agent.os.full`. | keyword | | user_agent.os.name | Operating system name, without the version. | keyword | +| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | keyword | | user_agent.os.version | Operating system version as a raw string. | keyword | | user_agent.version | Version of the user agent. | keyword | @@ -143,6 +152,7 @@ Error logs collects the Apache error logs. | event.timezone | This field should be populated when the event's timestamp does not include timezone information already (e.g. default Syslog timestamps). It's optional otherwise. Acceptable timezone formats are: a canonical ID (e.g. "Europe/Amsterdam"), abbreviated (e.g. "EST") or an HH:mm differential (e.g. "-05:00"). | keyword | | event.type | This is one of four ECS Categorization Fields, and indicates the third level in the ECS category hierarchy. `event.type` represents a categorization "sub-bucket" that, when used along with the `event.category` field values, enables filtering events down to a level appropriate for single visualization. This field is an array. This will allow proper categorization of some events that fall in multiple event types. | keyword | | file.path | Full path to the file, including the file name. It should include the drive letter, when appropriate. | keyword | +| file.path.text | Multi-field of `file.path`. | keyword | | host.architecture | Operating system architecture. | keyword | | host.containerized | If the host is a container. | boolean | | host.domain | Name of the domain of which the host is a member. For example, on Windows this could be the host's Active Directory domain or NetBIOS domain name. For Linux this could be the domain of the host's LDAP provider. | keyword | @@ -156,6 +166,7 @@ Error logs collects the Apache error logs. | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | keyword | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -174,6 +185,7 @@ Error logs collects the Apache error logs. | source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword | | source.as.number | Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. | long | | source.as.organization.name | Organization name. | keyword | +| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | keyword | | source.geo.city_name | City name. | keyword | | source.geo.continent_name | Name of the continent. | keyword | | source.geo.country_iso_code | Country ISO code. | keyword | @@ -187,13 +199,17 @@ Error logs collects the Apache error logs. | url.domain | Domain of the url, such as "www.elastic.co". In some cases a URL may refer to an IP and/or port directly, without a domain name. In this case, the IP address would go to the `domain` field. If the URL contains a literal IPv6 address enclosed by `[` and `]` (IETF RFC 2732), the `[` and `]` characters should also be captured in the `domain` field. | keyword | | url.extension | The field contains the file extension from the original request url, excluding the leading dot. The file extension is only set if it exists, as not every url has a file extension. The leading period must not be included. For example, the value must be "png", not ".png". Note that when the file name has multiple extensions (example.tar.gz), only the last one should be captured ("gz", not "tar.gz"). | keyword | | url.original | Unmodified original url as seen in the event source. Note that in network monitoring, the observed URL may be a full URL, whereas in access logs, the URL is often just represented as a path. This field is meant to represent the URL as it was observed, complete or not. | wildcard | +| url.original.text | Multi-field of `url.original`. | wildcard | | url.path | Path of the request, such as "/search". | wildcard | | url.query | The query field describes the query string of the request, such as "q=elasticsearch". The `?` is excluded from the query string. If a URL contains no `?`, there is no query field. If there is a `?` but no query, the query field exists with an empty string. The `exists` query can be used to differentiate between the two cases. | keyword | | user.name | Short name or login of the user. | keyword | +| user.name.text | Multi-field of `user.name`. | keyword | | user_agent.device.name | Name of the device. | keyword | | user_agent.name | Name of the user agent. | keyword | | user_agent.original | Unparsed user_agent string. | keyword | +| user_agent.original.text | Multi-field of `user_agent.original`. | keyword | | user_agent.os.name | Operating system name, without the version. | keyword | +| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | keyword | ## Metrics @@ -380,6 +396,7 @@ An example event for `status` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | | | host.os.name | Operating system name, without the version. | keyword | | | +| host.os.name.text | Multi-field of `host.os.name`. | keyword | | | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | | | host.os.version | Operating system version as a raw string. | keyword | | | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | | | From f276d82495b14a6c9a45487e4a6403201443e6d3 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 29 Mar 2022 16:03:12 +0200 Subject: [PATCH 09/10] Use proper type in documentation of multi-fields --- internal/docs/exported_fields.go | 2 +- test/packages/parallel/apache/docs/README.md | 34 ++++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/internal/docs/exported_fields.go b/internal/docs/exported_fields.go index 4264cf744f..59babdae9d 100644 --- a/internal/docs/exported_fields.go +++ b/internal/docs/exported_fields.go @@ -152,7 +152,7 @@ func visitFields(namePrefix string, f fields.FieldDefinition, records []fieldsTa records = append(records, fieldsTableRecord{ name: name + "." + multiField.Name, description: fmt.Sprintf("Multi-field of %#q.", name), - aType: f.Type, + aType: multiField.Type, }) } return records, nil diff --git a/test/packages/parallel/apache/docs/README.md b/test/packages/parallel/apache/docs/README.md index 4c3a431206..f94910d4df 100644 --- a/test/packages/parallel/apache/docs/README.md +++ b/test/packages/parallel/apache/docs/README.md @@ -48,7 +48,7 @@ Access logs collects the Apache access logs. | event.original | Raw text message of entire event. Used to demonstrate log integrity or where the full log message (before splitting it up in multiple parts) may be required, e.g. for reindex. This field is not indexed and doc_values are disabled. It cannot be searched, but it can be retrieved from `_source`. If users wish to override this and index this field, please see `Field data types` in the `Elasticsearch Reference`. | keyword | | event.outcome | This is one of four ECS Categorization Fields, and indicates the lowest level in the ECS category hierarchy. `event.outcome` simply denotes whether the event represents a success or a failure from the perspective of the entity that produced the event. Note that when a single transaction is described in multiple events, each event may populate different values of `event.outcome`, according to their perspective. Also note that in the case of a compound event (a single event that contains multiple logical events), this field should be populated with the value that best captures the overall success or failure from the perspective of the event producer. Further note that not all events will have an associated outcome. For example, this field is generally not populated for metric events, events with `event.type:info`, or any events for which an outcome does not make logical sense. | keyword | | file.path | Full path to the file, including the file name. It should include the drive letter, when appropriate. | keyword | -| file.path.text | Multi-field of `file.path`. | keyword | +| file.path.text | Multi-field of `file.path`. | match_only_text | | host.architecture | Operating system architecture. | keyword | | host.containerized | If the host is a container. | boolean | | host.domain | Name of the domain of which the host is a member. For example, on Windows this could be the host's Active Directory domain or NetBIOS domain name. For Linux this could be the domain of the host's LDAP provider. | keyword | @@ -62,7 +62,7 @@ Access logs collects the Apache access logs. | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | -| host.os.name.text | Multi-field of `host.os.name`. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -77,13 +77,13 @@ Access logs collects the Apache access logs. | log.offset | Log offset | long | | message | For log events the message field contains the log message, optimized for viewing in a log viewer. For structured logs without an original message field, other fields can be concatenated to form a human-readable summary of the event. If multiple messages exist, they can be combined into one message. | match_only_text | | process.command_line | Full command line that started the process, including the absolute path to the executable, and all arguments. Some arguments may be filtered to protect sensitive information. | wildcard | -| process.command_line.text | Multi-field of `process.command_line`. | wildcard | +| process.command_line.text | Multi-field of `process.command_line`. | match_only_text | | process.pid | Process id. | long | | process.thread.id | Thread ID. | long | | source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword | | source.as.number | Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. | long | | source.as.organization.name | Organization name. | keyword | -| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | keyword | +| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | match_only_text | | source.domain | Source domain. | keyword | | source.geo.city_name | City name. | keyword | | source.geo.continent_name | Name of the continent. | keyword | @@ -100,19 +100,19 @@ Access logs collects the Apache access logs. | url.domain | Domain of the url, such as "www.elastic.co". In some cases a URL may refer to an IP and/or port directly, without a domain name. In this case, the IP address would go to the `domain` field. If the URL contains a literal IPv6 address enclosed by `[` and `]` (IETF RFC 2732), the `[` and `]` characters should also be captured in the `domain` field. | keyword | | url.extension | The field contains the file extension from the original request url, excluding the leading dot. The file extension is only set if it exists, as not every url has a file extension. The leading period must not be included. For example, the value must be "png", not ".png". Note that when the file name has multiple extensions (example.tar.gz), only the last one should be captured ("gz", not "tar.gz"). | keyword | | url.original | Unmodified original url as seen in the event source. Note that in network monitoring, the observed URL may be a full URL, whereas in access logs, the URL is often just represented as a path. This field is meant to represent the URL as it was observed, complete or not. | wildcard | -| url.original.text | Multi-field of `url.original`. | wildcard | +| url.original.text | Multi-field of `url.original`. | match_only_text | | url.path | Path of the request, such as "/search". | wildcard | | url.query | The query field describes the query string of the request, such as "q=elasticsearch". The `?` is excluded from the query string. If a URL contains no `?`, there is no query field. If there is a `?` but no query, the query field exists with an empty string. The `exists` query can be used to differentiate between the two cases. | keyword | | user.name | Short name or login of the user. | keyword | -| user.name.text | Multi-field of `user.name`. | keyword | +| user.name.text | Multi-field of `user.name`. | match_only_text | | user_agent.device.name | Name of the device. | keyword | | user_agent.name | Name of the user agent. | keyword | | user_agent.original | Unparsed user_agent string. | keyword | -| user_agent.original.text | Multi-field of `user_agent.original`. | keyword | +| user_agent.original.text | Multi-field of `user_agent.original`. | match_only_text | | user_agent.os.full | Operating system name, including the version or code name. | keyword | -| user_agent.os.full.text | Multi-field of `user_agent.os.full`. | keyword | +| user_agent.os.full.text | Multi-field of `user_agent.os.full`. | match_only_text | | user_agent.os.name | Operating system name, without the version. | keyword | -| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | keyword | +| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | match_only_text | | user_agent.os.version | Operating system version as a raw string. | keyword | | user_agent.version | Version of the user agent. | keyword | @@ -152,7 +152,7 @@ Error logs collects the Apache error logs. | event.timezone | This field should be populated when the event's timestamp does not include timezone information already (e.g. default Syslog timestamps). It's optional otherwise. Acceptable timezone formats are: a canonical ID (e.g. "Europe/Amsterdam"), abbreviated (e.g. "EST") or an HH:mm differential (e.g. "-05:00"). | keyword | | event.type | This is one of four ECS Categorization Fields, and indicates the third level in the ECS category hierarchy. `event.type` represents a categorization "sub-bucket" that, when used along with the `event.category` field values, enables filtering events down to a level appropriate for single visualization. This field is an array. This will allow proper categorization of some events that fall in multiple event types. | keyword | | file.path | Full path to the file, including the file name. It should include the drive letter, when appropriate. | keyword | -| file.path.text | Multi-field of `file.path`. | keyword | +| file.path.text | Multi-field of `file.path`. | match_only_text | | host.architecture | Operating system architecture. | keyword | | host.containerized | If the host is a container. | boolean | | host.domain | Name of the domain of which the host is a member. For example, on Windows this could be the host's Active Directory domain or NetBIOS domain name. For Linux this could be the domain of the host's LDAP provider. | keyword | @@ -166,7 +166,7 @@ Error logs collects the Apache error logs. | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | -| host.os.name.text | Multi-field of `host.os.name`. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -185,7 +185,7 @@ Error logs collects the Apache error logs. | source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword | | source.as.number | Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. | long | | source.as.organization.name | Organization name. | keyword | -| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | keyword | +| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | match_only_text | | source.geo.city_name | City name. | keyword | | source.geo.continent_name | Name of the continent. | keyword | | source.geo.country_iso_code | Country ISO code. | keyword | @@ -199,17 +199,17 @@ Error logs collects the Apache error logs. | url.domain | Domain of the url, such as "www.elastic.co". In some cases a URL may refer to an IP and/or port directly, without a domain name. In this case, the IP address would go to the `domain` field. If the URL contains a literal IPv6 address enclosed by `[` and `]` (IETF RFC 2732), the `[` and `]` characters should also be captured in the `domain` field. | keyword | | url.extension | The field contains the file extension from the original request url, excluding the leading dot. The file extension is only set if it exists, as not every url has a file extension. The leading period must not be included. For example, the value must be "png", not ".png". Note that when the file name has multiple extensions (example.tar.gz), only the last one should be captured ("gz", not "tar.gz"). | keyword | | url.original | Unmodified original url as seen in the event source. Note that in network monitoring, the observed URL may be a full URL, whereas in access logs, the URL is often just represented as a path. This field is meant to represent the URL as it was observed, complete or not. | wildcard | -| url.original.text | Multi-field of `url.original`. | wildcard | +| url.original.text | Multi-field of `url.original`. | match_only_text | | url.path | Path of the request, such as "/search". | wildcard | | url.query | The query field describes the query string of the request, such as "q=elasticsearch". The `?` is excluded from the query string. If a URL contains no `?`, there is no query field. If there is a `?` but no query, the query field exists with an empty string. The `exists` query can be used to differentiate between the two cases. | keyword | | user.name | Short name or login of the user. | keyword | -| user.name.text | Multi-field of `user.name`. | keyword | +| user.name.text | Multi-field of `user.name`. | match_only_text | | user_agent.device.name | Name of the device. | keyword | | user_agent.name | Name of the user agent. | keyword | | user_agent.original | Unparsed user_agent string. | keyword | -| user_agent.original.text | Multi-field of `user_agent.original`. | keyword | +| user_agent.original.text | Multi-field of `user_agent.original`. | match_only_text | | user_agent.os.name | Operating system name, without the version. | keyword | -| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | keyword | +| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | match_only_text | ## Metrics @@ -396,7 +396,7 @@ An example event for `status` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | | | host.os.name | Operating system name, without the version. | keyword | | | -| host.os.name.text | Multi-field of `host.os.name`. | keyword | | | +| host.os.name.text | Multi-field of `host.os.name`. | text | | | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | | | host.os.version | Operating system version as a raw string. | keyword | | | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | | | From 6324985d3314f651f7478990276bea9c163949f3 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 29 Mar 2022 16:58:25 +0200 Subject: [PATCH 10/10] Update more pacckages readmes --- test/packages/parallel/gcp/docs/compute.md | 1 + test/packages/parallel/nginx/docs/README.md | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/test/packages/parallel/gcp/docs/compute.md b/test/packages/parallel/gcp/docs/compute.md index 9b301167e4..c3c4b42f54 100644 --- a/test/packages/parallel/gcp/docs/compute.md +++ b/test/packages/parallel/gcp/docs/compute.md @@ -159,6 +159,7 @@ An example event for `compute` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | diff --git a/test/packages/parallel/nginx/docs/README.md b/test/packages/parallel/nginx/docs/README.md index 53ebb724c7..b23e8ec4b0 100644 --- a/test/packages/parallel/nginx/docs/README.md +++ b/test/packages/parallel/nginx/docs/README.md @@ -178,6 +178,7 @@ An example event for `access` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -194,6 +195,7 @@ An example event for `access` looks as following: | source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword | | source.as.number | Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. | long | | source.as.organization.name | Organization name. | keyword | +| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | match_only_text | | source.geo.city_name | City name. | keyword | | source.geo.continent_name | Name of the continent. | keyword | | source.geo.country_iso_code | Country ISO code. | keyword | @@ -207,14 +209,19 @@ An example event for `access` looks as following: | url.extension | The field contains the file extension from the original request url, excluding the leading dot. The file extension is only set if it exists, as not every url has a file extension. The leading period must not be included. For example, the value must be "png", not ".png". Note that when the file name has multiple extensions (example.tar.gz), only the last one should be captured ("gz", not "tar.gz"). | keyword | | url.fragment | Portion of the url after the `#`, such as "top". The `#` is not part of the fragment. | keyword | | url.original | Unmodified original url as seen in the event source. Note that in network monitoring, the observed URL may be a full URL, whereas in access logs, the URL is often just represented as a path. This field is meant to represent the URL as it was observed, complete or not. | wildcard | +| url.original.text | Multi-field of `url.original`. | match_only_text | | url.path | Path of the request, such as "/search". | wildcard | | url.scheme | Scheme of the request, such as "https". Note: The `:` is not part of the scheme. | keyword | | user.name | Short name or login of the user. | keyword | +| user.name.text | Multi-field of `user.name`. | match_only_text | | user_agent.device.name | Name of the device. | keyword | | user_agent.name | Name of the user agent. | keyword | | user_agent.original | Unparsed user_agent string. | keyword | +| user_agent.original.text | Multi-field of `user_agent.original`. | match_only_text | | user_agent.os.full | Operating system name, including the version or code name. | keyword | +| user_agent.os.full.text | Multi-field of `user_agent.os.full`. | match_only_text | | user_agent.os.name | Operating system name, without the version. | keyword | +| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | match_only_text | | user_agent.os.version | Operating system version as a raw string. | keyword | | user_agent.version | Version of the user agent. | keyword | @@ -343,6 +350,7 @@ An example event for `error` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -489,6 +497,7 @@ An example event for `stubstatus` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword |