Skip to content

Commit

Permalink
go.d storcli add initial support for mpt3sas controllers (netdata#17938)
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyam8 committed Jun 17, 2024
1 parent aa9c105 commit 3aaf6bf
Show file tree
Hide file tree
Showing 8 changed files with 2,411 additions and 40 deletions.
37 changes: 33 additions & 4 deletions src/go/collectors/go.d.plugin/modules/storcli/charts.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import (
)

const (
prioControllerStatus = module.Priority + iota
prioControllerHealthStatus = module.Priority + iota
prioControllerStatus
prioControllerBBUStatus

prioPhysDriveErrors
Expand All @@ -22,12 +23,30 @@ const (
prioBBUTemperature
)

var controllerChartsTmpl = module.Charts{
var controllerMegaraidChartsTmpl = module.Charts{
controllerHealthStatusChartTmpl.Copy(),
controllerStatusChartTmpl.Copy(),
controllerBBUStatusChartTmpl.Copy(),
}

var controllerMpt3sasChartsTmpl = module.Charts{
controllerHealthStatusChartTmpl.Copy(),
}

var (
controllerHealthStatusChartTmpl = module.Chart{
ID: "controller_%s_health_status",
Title: "Controller health status",
Units: "status",
Fam: "cntrl status",
Ctx: "storcli.controller_health_status",
Type: module.Line,
Priority: prioControllerHealthStatus,
Dims: module.Dims{
{ID: "cntrl_%s_health_status_healthy", Name: "healthy"},
{ID: "cntrl_%s_health_status_unhealthy", Name: "unhealthy"},
},
}
controllerStatusChartTmpl = module.Chart{
ID: "controller_%s_status",
Title: "Controller status",
Expand Down Expand Up @@ -139,15 +158,25 @@ var (
)

func (s *StorCli) addControllerCharts(cntrl controllerInfo) {
charts := controllerChartsTmpl.Copy()
var charts *module.Charts

switch cntrl.Version.DriverName {
case driverNameMegaraid:
charts = controllerMegaraidChartsTmpl.Copy()
case driverNameSas:
charts = controllerMpt3sasChartsTmpl.Copy()
default:
return
}

num := strconv.Itoa(cntrl.Basics.Controller)

for _, chart := range *charts {
chart.ID = fmt.Sprintf(chart.ID, num)
chart.Labels = []module.Label{
{Key: "controller_number", Value: num},
{Key: "model", Value: cntrl.Basics.Model},
{Key: "model", Value: strings.TrimSpace(cntrl.Basics.Model)},
{Key: "driver_name", Value: cntrl.Version.DriverName},
}
for _, dim := range chart.Dims {
dim.ID = fmt.Sprintf(dim.ID, num)
Expand Down
35 changes: 24 additions & 11 deletions src/go/collectors/go.d.plugin/modules/storcli/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ package storcli

import "fmt"

const (
driverNameMegaraid = "megaraid_sas"
driverNameSas = "mpt3sas"
)

func (s *StorCli) collect() (map[string]int64, error) {
cntrlResp, err := s.queryControllersInfo()
if err != nil {
Expand All @@ -12,20 +17,28 @@ func (s *StorCli) collect() (map[string]int64, error) {

mx := make(map[string]int64)

if err := s.collectControllersInfo(mx, cntrlResp); err != nil {
return nil, fmt.Errorf("error collecting controller info: %s", err)
}

drives := cntrlResp.Controllers[0].ResponseData.PDList
driver := cntrlResp.Controllers[0].ResponseData.Version.DriverName
if driver == "megaraid_sas" && len(drives) > 0 {
drivesResp, err := s.queryDrivesInfo()
if err != nil {
return nil, fmt.Errorf("error collecting drives info: %s", err)

switch driver {
case driverNameMegaraid:
if err := s.collectMegaraidControllersInfo(mx, cntrlResp); err != nil {
return nil, fmt.Errorf("failed to collect megaraid controller info: %s", err)
}
if len(cntrlResp.Controllers[0].ResponseData.PDList) > 0 {
drivesResp, err := s.queryDrivesInfo()
if err != nil {
return nil, fmt.Errorf("failed to collect megaraid drive info: %s", err)
}
if err := s.collectMegaRaidDrives(mx, drivesResp); err != nil {
return nil, err
}
}
if err := s.collectMegaRaidDrives(mx, drivesResp); err != nil {
return nil, err
case driverNameSas:
if err := s.collectMpt3sasControllersInfo(mx, cntrlResp); err != nil {
return nil, fmt.Errorf("failed to collect mpt3sas controller info: %s", err)
}
default:
return nil, fmt.Errorf("unknown driver: %s", driver)
}

return mx, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ type (
DriverName string `json:"Driver Name"`
} `json:"Version"`
Status struct {
ControllerStatus string `json:"Controller Status"`
BBUStatus storNumber `json:"BBU Status"`
ControllerStatus string `json:"Controller Status"`
BBUStatus *storNumber `json:"BBU Status"`
} `json:"Status"`
BBUInfo []struct {
Model string `json:"Model"`
Expand All @@ -43,7 +43,7 @@ type (
}
)

func (s *StorCli) collectControllersInfo(mx map[string]int64, resp *controllersInfoResponse) error {
func (s *StorCli) collectMegaraidControllersInfo(mx map[string]int64, resp *controllersInfoResponse) error {
for _, v := range resp.Controllers {
cntrl := v.ResponseData

Expand All @@ -56,22 +56,33 @@ func (s *StorCli) collectControllersInfo(mx map[string]int64, resp *controllersI

px := fmt.Sprintf("cntrl_%s_", cntrlNum)

for _, st := range []string{"healthy", "unhealthy"} {
mx[px+"health_status_"+st] = 0
}
if strings.ToLower(cntrl.Status.ControllerStatus) == "optimal" {
mx[px+"health_status_healthy"] = 1
} else {
mx[px+"health_status_unhealthy"] = 1
}

for _, st := range []string{"optimal", "degraded", "partially_degraded", "failed"} {
mx[px+"status_"+st] = 0
}
mx[px+"status_"+strings.ToLower(cntrl.Status.ControllerStatus)] = 1

for _, st := range []string{"healthy", "unhealthy", "na"} {
mx[px+"bbu_status_"+st] = 0
}
// https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/issues/27
switch cntrl.Status.BBUStatus {
case "0", "8", "4096": // 0 good, 8 charging
mx[px+"bbu_status_healthy"] = 1
case "NA", "N/A":
mx[px+"bbu_status_na"] = 1
default:
mx[px+"bbu_status_unhealthy"] = 1
if cntrl.Status.BBUStatus != nil {
for _, st := range []string{"healthy", "unhealthy", "na"} {
mx[px+"bbu_status_"+st] = 0
}
// https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/issues/27
switch *cntrl.Status.BBUStatus {
case "0", "8", "4096": // 0 good, 8 charging
mx[px+"bbu_status_healthy"] = 1
case "NA", "N/A":
mx[px+"bbu_status_na"] = 1
default:
mx[px+"bbu_status_unhealthy"] = 1
}
}

for i, bbu := range cntrl.BBUInfo {
Expand All @@ -92,6 +103,32 @@ func (s *StorCli) collectControllersInfo(mx map[string]int64, resp *controllersI
return nil
}

func (s *StorCli) collectMpt3sasControllersInfo(mx map[string]int64, resp *controllersInfoResponse) error {
for _, v := range resp.Controllers {
cntrl := v.ResponseData

cntrlNum := strconv.Itoa(cntrl.Basics.Controller)

if !s.controllers[cntrlNum] {
s.controllers[cntrlNum] = true
s.addControllerCharts(cntrl)
}

px := fmt.Sprintf("cntrl_%s_", cntrlNum)

for _, st := range []string{"healthy", "unhealthy"} {
mx[px+"health_status_"+st] = 0
}
if strings.ToLower(cntrl.Status.ControllerStatus) == "ok" {
mx[px+"health_status_healthy"] = 1
} else {
mx[px+"health_status_unhealthy"] = 1
}
}

return nil
}

func (s *StorCli) queryControllersInfo() (*controllersInfoResponse, error) {
bs, err := s.exec.controllersInfo()
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ type storNumber string // some int values can be 'N/A'
func (n *storNumber) UnmarshalJSON(b []byte) error { *n = storNumber(b); return nil }

func (s *StorCli) collectMegaRaidDrives(mx map[string]int64, resp *drivesInfoResponse) error {
if resp == nil {
return nil
}

for _, cntrl := range resp.Controllers {
var ids []string
for k := range cntrl.ResponseData {
Expand Down
15 changes: 12 additions & 3 deletions src/go/collectors/go.d.plugin/modules/storcli/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ modules:
problems:
list: []
alerts:
- name: storcli_controller_status
metric: storcli.controller_status
info: RAID controller ${label:controller_number} health status is not optimal
- name: storcli_controller_health_status
metric: storcli.controller_health_status
info: RAID controller ${label:controller_number} is unhealthy
link: https://github.com/netdata/netdata/blob/master/src/health/health.d/storcli.conf
- name: storcli_controller_bbu_status
metric: storcli.controller_bbu_status
Expand Down Expand Up @@ -111,7 +111,16 @@ modules:
description: Controller number (index)
- name: model
description: Controller model
- name: driver_name
description: Controller driver (megaraid_sas or mpt3sas)
metrics:
- name: storcli.controller_health_status
description: Controller health status
unit: status
chart_type: line
dimensions:
- name: healthy
- name: unhealthy
- name: storcli.controller_status
description: Controller status
unit: status
Expand Down
27 changes: 23 additions & 4 deletions src/go/collectors/go.d.plugin/modules/storcli/storcli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,17 @@ var (

dataMegaControllerInfo, _ = os.ReadFile("testdata/megaraid-controllers-info.json")
dataMegaDrivesInfo, _ = os.ReadFile("testdata/megaraid-drives-info.json")

dataSasControllerInfo, _ = os.ReadFile("testdata/mpt3sas-controllers-info.json")
)

func Test_testDataIsValid(t *testing.T) {
for name, data := range map[string][]byte{
"dataConfigJSON": dataConfigJSON,
"dataConfigYAML": dataConfigYAML,

"dataConfigJSON": dataConfigJSON,
"dataConfigYAML": dataConfigYAML,
"dataMegaControllerInfo": dataMegaControllerInfo,
"dataMegaDrivesInfo": dataMegaDrivesInfo,
"dataSasControllerInfo": dataSasControllerInfo,
} {
require.NotNil(t, data, name)
}
Expand Down Expand Up @@ -147,12 +149,14 @@ func TestStorCli_Collect(t *testing.T) {
}{
"success MegaRAID controller": {
prepareMock: prepareMockMegaRaidOK,
wantCharts: len(controllerChartsTmpl)*1 + len(physDriveChartsTmpl)*6 + len(bbuChartsTmpl)*1,
wantCharts: len(controllerMegaraidChartsTmpl)*1 + len(physDriveChartsTmpl)*6 + len(bbuChartsTmpl)*1,
wantMetrics: map[string]int64{
"bbu_0_cntrl_0_temperature": 34,
"cntrl_0_bbu_status_healthy": 1,
"cntrl_0_bbu_status_na": 0,
"cntrl_0_bbu_status_unhealthy": 0,
"cntrl_0_health_status_healthy": 1,
"cntrl_0_health_status_unhealthy": 0,
"cntrl_0_status_degraded": 0,
"cntrl_0_status_failed": 0,
"cntrl_0_status_optimal": 1,
Expand Down Expand Up @@ -195,6 +199,14 @@ func TestStorCli_Collect(t *testing.T) {
"phys_drive_5000C500E5659BA7_cntrl_0_temperature": 27,
},
},
"success SAS controller": {
prepareMock: prepareMockSasOK,
wantCharts: len(controllerMpt3sasChartsTmpl) * 1,
wantMetrics: map[string]int64{
"cntrl_0_health_status_healthy": 1,
"cntrl_0_health_status_unhealthy": 0,
},
},
"err on exec": {
prepareMock: prepareMockErr,
wantMetrics: nil,
Expand Down Expand Up @@ -231,6 +243,13 @@ func prepareMockMegaRaidOK() *mockStorCliExec {
}
}

func prepareMockSasOK() *mockStorCliExec {
return &mockStorCliExec{
controllersInfoData: dataSasControllerInfo,
drivesInfoData: nil,
}
}

func prepareMockErr() *mockStorCliExec {
return &mockStorCliExec{
errOnInfo: true,
Expand Down
Loading

0 comments on commit 3aaf6bf

Please sign in to comment.