From 760208bdce3fa67d4a3cf59498e35c7beb12fc57 Mon Sep 17 00:00:00 2001 From: Alex K <8418476+fearful-symmetry@users.noreply.github.com> Date: Thu, 9 May 2024 23:33:23 -0700 Subject: [PATCH] Add error check to groupToEvents so we don't blindly add error values (#39404) * add error check to perfmon groupToEvents * linter... * tests * add changelog * fix changelog * make linter happy --- CHANGELOG.next.asciidoc | 1 + metricbeat/module/windows/perfmon/data.go | 15 +++-- .../module/windows/perfmon/data_test.go | 61 +++++++++++++++++++ 3 files changed, 72 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index a92b42f697d..2b571875e34 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -172,6 +172,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] *Winlogbeat* +- Fix error handling in perfmon metrics. {issue}38140[38140] {pull}39404[39404] *Elastic Logging Plugin* diff --git a/metricbeat/module/windows/perfmon/data.go b/metricbeat/module/windows/perfmon/data.go index 9add5c03896..0391266e65a 100644 --- a/metricbeat/module/windows/perfmon/data.go +++ b/metricbeat/module/windows/perfmon/data.go @@ -20,6 +20,7 @@ package perfmon import ( + "errors" "fmt" "regexp" "strconv" @@ -48,7 +49,7 @@ func (re *Reader) groupToEvents(counters map[string][]pdh.CounterValue) []mb.Eve // The counter has a negative value or the counter was successfully found, but the data returned is not valid. // This error can occur if the counter value is less than the previous value. (Because counter values always increment, the counter value rolls over to zero when it reaches its maximum value.) // This is not an error that stops the application from running successfully and a positive counter value should be retrieved in the later calls. - if val.Err.Error == pdh.PDH_CALC_NEGATIVE_VALUE || val.Err.Error == pdh.PDH_INVALID_DATA { + if errors.Is(val.Err.Error, pdh.PDH_CALC_NEGATIVE_VALUE) || errors.Is(val.Err.Error, pdh.PDH_INVALID_DATA) { re.log.Debugw("Counter value retrieval returned", "error", val.Err.Error, "cstatus", pdh.PdhErrno(val.Err.CStatus), logp.Namespace("perfmon"), "query", counterPath) continue @@ -69,7 +70,9 @@ func (re *Reader) groupToEvents(counters map[string][]pdh.CounterValue) []mb.Eve if _, ok := eventMap[eventKey]; !ok { eventMap[eventKey] = &mb.Event{ MetricSetFields: mapstr.M{}, - Error: fmt.Errorf("failed on query=%v: %w", counterPath, val.Err.Error), + } + if val.Err.Error != nil { + eventMap[eventKey].Error = fmt.Errorf("failed on query=%v: %w", counterPath, val.Err.Error) } if val.Instance != "" { // will ignore instance index @@ -93,9 +96,11 @@ func (re *Reader) groupToEvents(counters map[string][]pdh.CounterValue) []mb.Eve } } // Write the values into the map. - var events []mb.Event + events := make([]mb.Event, len(eventMap)) + iter := 0 for _, val := range eventMap { - events = append(events, *val) + events[iter] = *val + iter++ } return events } @@ -111,7 +116,7 @@ func (re *Reader) groupToSingleEvent(counters map[string][]pdh.CounterValue) mb. // Some counters, such as rate counters, require two counter values in order to compute a displayable value. In this case we must call PdhCollectQueryData twice before calling PdhGetFormattedCounterValue. // For more information, see Collecting Performance Data (https://docs.microsoft.com/en-us/windows/desktop/PerfCtrs/collecting-performance-data). if val.Err.Error != nil { - if val.Err.Error == pdh.PDH_CALC_NEGATIVE_VALUE || val.Err.Error == pdh.PDH_INVALID_DATA { + if errors.Is(val.Err.Error, pdh.PDH_CALC_NEGATIVE_VALUE) || errors.Is(val.Err.Error, pdh.PDH_INVALID_DATA) { re.log.Debugw("Counter value retrieval returned", "error", val.Err.Error, "cstatus", pdh.PdhErrno(val.Err.CStatus), logp.Namespace("perfmon"), "query", counterPath) continue diff --git a/metricbeat/module/windows/perfmon/data_test.go b/metricbeat/module/windows/perfmon/data_test.go index 2e9f15e2de7..9c4691216b3 100644 --- a/metricbeat/module/windows/perfmon/data_test.go +++ b/metricbeat/module/windows/perfmon/data_test.go @@ -28,6 +28,67 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) +func TestGroupErrors(t *testing.T) { + reader := Reader{ + config: Config{ + GroupMeasurements: true, + }, + query: pdh.Query{}, + log: nil, + counters: []PerfCounter{ + { + QueryField: "datagrams_sent_per_sec", + QueryName: `\UDPv4\Datagrams Sent/sec`, + Format: "float", + ObjectName: "UDPv4", + ObjectField: "object", + ChildQueries: []string{`\UDPv4\Datagrams Sent/sec`}, + }, + { + QueryField: "%_processor_time", + QueryName: `\Processor Information(_Total)\% Processor Time`, + Format: "float", + ObjectName: "Processor Information", + ObjectField: "object", + InstanceName: "_Total", + InstanceField: "instance", + ChildQueries: []string{`\Processor Information(_Total)\% Processor Time`}, + }, + { + QueryField: "current_disk_queue_length", + QueryName: `\PhysicalDisk(_Total)\Current Disk Queue Length`, + Format: "float", + ObjectName: "PhysicalDisk", + ObjectField: "object", + InstanceName: "_Total", + InstanceField: "instance", + ChildQueries: []string{`\PhysicalDisk(_Total)\Current Disk Queue Length`}, + }, + }, + } + + counters := map[string][]pdh.CounterValue{ + `\UDPv4\Datagrams Sent/sec`: { + {Instance: "", Measurement: 23}, + }, + `\Processor Information(_Total)\% Processor Time`: { + {Instance: "_Total", Measurement: 11}, + }, + `\PhysicalDisk(_Total)\Current Disk Queue Length`: { + {Instance: "_Total", Measurement: 20}, + }, + } + + events := reader.groupToEvents(counters) + assert.NotNil(t, events) + assert.Equal(t, 3, len(events)) + + for _, event := range events { + assert.NoError(t, event.Error) + } + +} + func TestGroupToEvents(t *testing.T) { reader := Reader{ config: Config{