Skip to content

Commit

Permalink
added warning/critical values in performance data
Browse files Browse the repository at this point in the history
  • Loading branch information
TheFireMike committed Mar 12, 2021
1 parent 1bad910 commit dd42e15
Show file tree
Hide file tree
Showing 27 changed files with 261 additions and 280 deletions.
23 changes: 17 additions & 6 deletions cmd/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cmd

import (
"fmt"
"github.com/inexio/go-monitoringplugin"
"github.com/inexio/thola/core/request"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
Expand Down Expand Up @@ -64,35 +65,45 @@ func getCheckRequest() request.CheckRequest {
}
}

func generateCheckThresholds(cmd *cobra.Command, warningMin, warningMax, criticalMin, criticalMax string) request.CheckThresholds {
var thresholds request.CheckThresholds
func generateCheckThresholds(cmd *cobra.Command, warningMin, warningMax, criticalMin, criticalMax string, setMinToZeroIfEmpty bool) monitoringplugin.Thresholds {
var thresholds monitoringplugin.Thresholds
if flagName := warningMin; flagName != "" && cmd.Flags().Changed(flagName) {
v, err := cmd.Flags().GetFloat64(flagName)
if err != nil {
log.Fatal().Err(err).Msgf("flag '%s' is not a float64", flagName)
}
thresholds.WarningMin = &v
thresholds.WarningMin = v
}
if flagName := warningMax; flagName != "" && cmd.Flags().Changed(flagName) {
v, err := cmd.Flags().GetFloat64(flagName)
if err != nil {
log.Fatal().Err(err).Msgf("flag '%s' is not a float64", flagName)
}
thresholds.WarningMax = &v
thresholds.WarningMax = v
}
if flagName := criticalMin; flagName != "" && cmd.Flags().Changed(flagName) {
v, err := cmd.Flags().GetFloat64(flagName)
if err != nil {
log.Fatal().Err(err).Msgf("flag '%s' is not a float64", flagName)
}
thresholds.CriticalMin = &v
thresholds.CriticalMin = v
}
if flagName := criticalMax; flagName != "" && cmd.Flags().Changed(flagName) {
v, err := cmd.Flags().GetFloat64(flagName)
if err != nil {
log.Fatal().Err(err).Msgf("flag '%s' is not a float64", flagName)
}
thresholds.CriticalMax = &v
thresholds.CriticalMax = v
}

if setMinToZeroIfEmpty {
if thresholds.HasWarning() && thresholds.WarningMin == nil {
thresholds.WarningMin = 0
}
if thresholds.HasCritical() && thresholds.CriticalMin == nil {
thresholds.CriticalMin = 0
}
}

return thresholds
}
2 changes: 1 addition & 1 deletion cmd/check_cpu_load.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ var checkCpuLoad = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
r := request.CheckCPULoadRequest{
CheckDeviceRequest: getCheckDeviceRequest(args[0]),
CPULoadThresholds: generateCheckThresholds(cmd, "", "warning", "", "critical"),
CPULoadThresholds: generateCheckThresholds(cmd, "", "warning", "", "critical", true),
}
handleRequest(&r)
},
Expand Down
2 changes: 1 addition & 1 deletion cmd/check_disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ var checkDiskCMD = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
r := request.CheckDiskRequest{
CheckDeviceRequest: getCheckDeviceRequest(args[0]),
DiskThresholds: generateCheckThresholds(cmd, "warning", "", "critical", ""),
DiskThresholds: generateCheckThresholds(cmd, "warning", "", "critical", "", false),
}
handleRequest(&r)
},
Expand Down
2 changes: 1 addition & 1 deletion cmd/check_memory_usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ var checkMemoryUsage = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
r := request.CheckMemoryUsageRequest{
CheckDeviceRequest: getCheckDeviceRequest(args[0]),
MemoryUsageThresholds: generateCheckThresholds(cmd, "", "warning", "", "critical"),
MemoryUsageThresholds: generateCheckThresholds(cmd, "", "warning", "", "critical", true),
}
handleRequest(&r)
},
Expand Down
2 changes: 1 addition & 1 deletion cmd/check_sbc.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ var checkSBCCMD = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
r := request.CheckSBCRequest{
CheckDeviceRequest: getCheckDeviceRequest(args[0]),
SystemHealthScoreThresholds: generateCheckThresholds(cmd, "system-health-score-warning", "", "system-health-score-critical", ""),
SystemHealthScoreThresholds: generateCheckThresholds(cmd, "system-health-score-warning", "", "system-health-score-critical", "", false),
}
handleRequest(&r)
},
Expand Down
10 changes: 5 additions & 5 deletions cmd/check_ups.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ var checkUPSCMD = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
r := request.CheckUPSRequest{
CheckDeviceRequest: getCheckDeviceRequest(args[0]),
BatteryCurrentThresholds: generateCheckThresholds(cmd, "batt-current-warning-min", "batt-current-warning-max", "batt-current-critical-min", "batt-current-critical-max"),
BatteryTemperatureThresholds: generateCheckThresholds(cmd, "batt-temperature-warning-min", "batt-temperature-warning-max", "batt-temperature-critical-min", "batt-temperature-critical-max"),
CurrentLoadThresholds: generateCheckThresholds(cmd, "current-load-warning-min", "current-load-warning-max", "current-load-warning-max", "current-load-warning-max"),
RectifierCurrentThresholds: generateCheckThresholds(cmd, "rectifier-current-warning-min", "rectifier-current-warning-max", "rectifier-current-critical-min", "rectifier-current-critical-max"),
SystemVoltageThresholds: generateCheckThresholds(cmd, "system-voltage-warning-min", "system-voltage-warning-max", "system-voltage-critical-min", "system-voltage-critical-max"),
BatteryCurrentThresholds: generateCheckThresholds(cmd, "batt-current-warning-min", "batt-current-warning-max", "batt-current-critical-min", "batt-current-critical-max", false),
BatteryTemperatureThresholds: generateCheckThresholds(cmd, "batt-temperature-warning-min", "batt-temperature-warning-max", "batt-temperature-critical-min", "batt-temperature-critical-max", false),
CurrentLoadThresholds: generateCheckThresholds(cmd, "current-load-warning-min", "current-load-warning-max", "current-load-warning-max", "current-load-warning-max", false),
RectifierCurrentThresholds: generateCheckThresholds(cmd, "rectifier-current-warning-min", "rectifier-current-warning-max", "rectifier-current-critical-min", "rectifier-current-critical-max", false),
SystemVoltageThresholds: generateCheckThresholds(cmd, "system-voltage-warning-min", "system-voltage-warning-max", "system-voltage-critical-min", "system-voltage-critical-max", false),
}
handleRequest(&r)
},
Expand Down
2 changes: 1 addition & 1 deletion core/communicator/network_device_communicator.go
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ func (c *networkDeviceCommunicator) GetDiskComponent(ctx context.Context) (devic

func (c *networkDeviceCommunicator) GetHardwareHealthComponent(ctx context.Context) (device.HardwareHealthComponent, error) {
if !c.deviceClassCommunicator.hasAvailableComponent(hardwareHealthComponent) {
return device.HardwareHealthComponent{}, tholaerr.NewComponentNotFoundError("no sbc component available for this device")
return device.HardwareHealthComponent{}, tholaerr.NewComponentNotFoundError("no hardware health component available for this device")
}

var hardwareHealth device.HardwareHealthComponent
Expand Down
9 changes: 6 additions & 3 deletions core/request/check_cpu_load.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package request

import "context"
import (
"context"
"github.com/inexio/go-monitoringplugin"
)

// CheckCPULoadRequest
//
Expand All @@ -9,11 +12,11 @@ import "context"
// swagger:model
type CheckCPULoadRequest struct {
CheckDeviceRequest
CPULoadThresholds CheckThresholds `json:"cpuLoadThresholds" xml:"cpuLoadThresholds"`
CPULoadThresholds monitoringplugin.Thresholds `json:"cpuLoadThresholds" xml:"cpuLoadThresholds"`
}

func (r *CheckCPULoadRequest) validate(ctx context.Context) error {
if err := r.CPULoadThresholds.validate(); err != nil {
if err := r.CPULoadThresholds.Validate(); err != nil {
return err
}
return r.CheckDeviceRequest.validate(ctx)
Expand Down
24 changes: 10 additions & 14 deletions core/request/check_cpu_load_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"context"
"fmt"
"github.com/inexio/go-monitoringplugin"
"github.com/inexio/thola/core/value"
"strconv"
)

Expand All @@ -23,30 +22,27 @@ func (r *CheckCPULoadRequest) process(ctx context.Context) (Response, error) {

for k, cpuLoad := range response.(*ReadCPULoadResponse).CPULoad {
cpuSum += cpuLoad
val := value.New(cpuLoad)

performanceDataLabel := "cpu_load"
if cpuAmount > 1 {
performanceDataLabel += "_" + strconv.Itoa(k)
}
err = r.mon.AddPerformanceDataPoint(monitoringplugin.NewPerformanceDataPoint(performanceDataLabel, val.String(), "%"))
point := monitoringplugin.NewPerformanceDataPoint(performanceDataLabel, cpuLoad).SetUnit("%")
if cpuAmount == 1 {
point.SetThresholds(r.CPULoadThresholds)
}
err = r.mon.AddPerformanceDataPoint(point)
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while adding performance data point", true) {
return &CheckResponse{r.mon.GetInfo()}, nil
}
}

val := value.New(cpuSum / float64(cpuAmount))
if !r.CPULoadThresholds.isEmpty() {
code := r.CPULoadThresholds.checkValue(val)
r.mon.UpdateStatusIf(code != monitoringplugin.OK, code, fmt.Sprintf("average cpu load is %s%%", val))
}

if cpuAmount > 1 {
fl, err := val.Float64()
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "can't parse value to error", true) {
return &CheckResponse{r.mon.GetInfo()}, nil
}
err = r.mon.AddPerformanceDataPoint(monitoringplugin.NewPerformanceDataPoint("cpu_load_average", fmt.Sprintf("%.3f", fl), "%"))
val := cpuSum / float64(cpuAmount)
err = r.mon.AddPerformanceDataPoint(
monitoringplugin.NewPerformanceDataPoint("cpu_load_average", fmt.Sprintf("%.3f", val)).
SetUnit("%").
SetThresholds(r.CPULoadThresholds))
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while adding performance data point", true) {
return &CheckResponse{r.mon.GetInfo()}, nil
}
Expand Down
9 changes: 6 additions & 3 deletions core/request/check_disk_request.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package request

import "context"
import (
"context"
"github.com/inexio/go-monitoringplugin"
)

// CheckDiskRequest
//
Expand All @@ -9,11 +12,11 @@ import "context"
// swagger:model
type CheckDiskRequest struct {
CheckDeviceRequest
DiskThresholds CheckThresholds `json:"diskThresholds" xml:"diskThresholds"`
DiskThresholds monitoringplugin.Thresholds `json:"diskThresholds" xml:"diskThresholds"`
}

func (r *CheckDiskRequest) validate(ctx context.Context) error {
if err := r.DiskThresholds.validate(); err != nil {
if err := r.DiskThresholds.Validate(); err != nil {
return err
}
return r.CheckDeviceRequest.validate(ctx)
Expand Down
14 changes: 5 additions & 9 deletions core/request/check_disk_request_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"context"
"fmt"
"github.com/inexio/go-monitoringplugin"
"github.com/inexio/thola/core/value"
)

func (r *CheckDiskRequest) process(ctx context.Context) (Response, error) {
Expand All @@ -21,14 +20,14 @@ func (r *CheckDiskRequest) process(ctx context.Context) (Response, error) {

for _, storage := range disk.Storages {
if storage.Type != nil && storage.Description != nil && storage.Available != nil && storage.Used != nil {
p := monitoringplugin.NewPerformanceDataPoint("disk_available", *storage.Available, "KB").SetLabel(*storage.Description)
p := monitoringplugin.NewPerformanceDataPoint("disk_available", *storage.Available).SetUnit("KB").SetLabel(*storage.Description)
err = r.mon.AddPerformanceDataPoint(p)
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while adding performance data point", true) {
r.mon.PrintPerformanceData(false)
return &CheckResponse{r.mon.GetInfo()}, nil
}

p = monitoringplugin.NewPerformanceDataPoint("disk_used", *storage.Used, "KB").SetLabel(*storage.Description)
p = monitoringplugin.NewPerformanceDataPoint("disk_used", *storage.Used).SetUnit("KB").SetLabel(*storage.Description)
err = r.mon.AddPerformanceDataPoint(p)
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while adding performance data point", true) {
r.mon.PrintPerformanceData(false)
Expand All @@ -37,17 +36,14 @@ func (r *CheckDiskRequest) process(ctx context.Context) (Response, error) {

// get percentage of free part on the storage
free := fmt.Sprintf("%.2f", 100-float64(*storage.Used)/float64(*storage.Available)*100)
p = monitoringplugin.NewPerformanceDataPoint("disk_free", free, "%").SetLabel(*storage.Description)
p = monitoringplugin.NewPerformanceDataPoint("disk_free", free).SetUnit("%").
SetLabel(*storage.Description).
SetThresholds(r.DiskThresholds)
err = r.mon.AddPerformanceDataPoint(p)
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while adding performance data point", true) {
r.mon.PrintPerformanceData(false)
return &CheckResponse{r.mon.GetInfo()}, nil
}
val := value.New(free)
if !r.DiskThresholds.isEmpty() {
code := r.DiskThresholds.checkValue(val)
r.mon.UpdateStatusIf(code != monitoringplugin.OK, code, fmt.Sprintf("disk usage at %s is %s%%", *storage.Description, val))
}
}
}

Expand Down
10 changes: 5 additions & 5 deletions core/request/check_hardware_health_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@ func (r *CheckHardwareHealthRequest) process(ctx context.Context) (Response, err

hhRequest := ReadHardwareHealthRequest{ReadRequest{r.BaseRequest}}
response, err := hhRequest.process(ctx)
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while processing read sbc request", true) {
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while processing read hardware health request", true) {
return &CheckResponse{r.mon.GetInfo()}, nil
}
res := response.(*ReadHardwareHealthResponse)

if res.EnvironmentMonitorState != nil {
err = r.mon.AddPerformanceDataPoint(monitoringplugin.NewPerformanceDataPoint("environment_monitor_state", *res.EnvironmentMonitorState, ""))
err = r.mon.AddPerformanceDataPoint(monitoringplugin.NewPerformanceDataPoint("environment_monitor_state", *res.EnvironmentMonitorState))
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while adding performance data point", true) {
r.mon.PrintPerformanceData(false)
return &CheckResponse{r.mon.GetInfo()}, nil
}

// state 2 only works for oracle-acme sbs, this needs to be generalized once check hardware health is made for all device classes
// state 2 only works for oracle-acme sbcs, this needs to be generalized once check hardware health is made for all device classes
r.mon.UpdateStatusIf(*res.EnvironmentMonitorState != 2, monitoringplugin.CRITICAL, "environment monitor state is critical")
}

Expand All @@ -33,7 +33,7 @@ func (r *CheckHardwareHealthRequest) process(ctx context.Context) (Response, err
r.mon.PrintPerformanceData(false)
return &CheckResponse{r.mon.GetInfo()}, nil
}
p := monitoringplugin.NewPerformanceDataPoint("fan_state", *fan.State, "").SetLabel(*fan.Description)
p := monitoringplugin.NewPerformanceDataPoint("fan_state", *fan.State).SetLabel(*fan.Description)
err = r.mon.AddPerformanceDataPoint(p)
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while adding performance data point", true) {
r.mon.PrintPerformanceData(false)
Expand All @@ -46,7 +46,7 @@ func (r *CheckHardwareHealthRequest) process(ctx context.Context) (Response, err
r.mon.PrintPerformanceData(false)
return &CheckResponse{r.mon.GetInfo()}, nil
}
p := monitoringplugin.NewPerformanceDataPoint("power_supply_state", *powerSupply.State, "").SetLabel(*powerSupply.Description)
p := monitoringplugin.NewPerformanceDataPoint("power_supply_state", *powerSupply.State).SetLabel(*powerSupply.Description)
err = r.mon.AddPerformanceDataPoint(p)
if r.mon.UpdateStatusOnError(err, monitoringplugin.UNKNOWN, "error while adding performance data point", true) {
r.mon.PrintPerformanceData(false)
Expand Down

0 comments on commit dd42e15

Please sign in to comment.