/
notify.go
431 lines (379 loc) · 14.6 KB
/
notify.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
package notify
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"os"
"strconv"
"strings"
"time"
"github.com/analogj/go-util/utils"
"github.com/analogj/scrutiny/webapp/backend/pkg"
"github.com/analogj/scrutiny/webapp/backend/pkg/config"
"github.com/analogj/scrutiny/webapp/backend/pkg/database"
"github.com/analogj/scrutiny/webapp/backend/pkg/models"
"github.com/analogj/scrutiny/webapp/backend/pkg/models/measurements"
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
"github.com/containrrr/shoutrrr"
shoutrrrTypes "github.com/containrrr/shoutrrr/pkg/types"
"github.com/gin-gonic/gin"
"github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
)
const NotifyFailureTypeEmailTest = "EmailTest"
const NotifyFailureTypeBothFailure = "SmartFailure" //SmartFailure always takes precedence when Scrutiny & Smart failed.
const NotifyFailureTypeSmartFailure = "SmartFailure"
const NotifyFailureTypeScrutinyFailure = "ScrutinyFailure"
// ShouldNotify check if the error Message should be filtered (level mismatch or filtered_attributes)
func ShouldNotify(logger logrus.FieldLogger, device models.Device, smartAttrs measurements.Smart, statusThreshold pkg.MetricsStatusThreshold, statusFilterAttributes pkg.MetricsStatusFilterAttributes, repeatNotifications bool, c *gin.Context, deviceRepo database.DeviceRepo) bool {
// 1. check if the device is healthy
if device.DeviceStatus == pkg.DeviceStatusPassed {
return false
}
//TODO: cannot check for warning notifyLevel yet.
// setup constants for comparison
var requiredDeviceStatus pkg.DeviceStatus
var requiredAttrStatus pkg.AttributeStatus
if statusThreshold == pkg.MetricsStatusThresholdBoth {
// either scrutiny or smart failures should trigger an email
requiredDeviceStatus = pkg.DeviceStatusSet(pkg.DeviceStatusFailedSmart, pkg.DeviceStatusFailedScrutiny)
requiredAttrStatus = pkg.AttributeStatusSet(pkg.AttributeStatusFailedSmart, pkg.AttributeStatusFailedScrutiny)
} else if statusThreshold == pkg.MetricsStatusThresholdSmart {
//only smart failures
requiredDeviceStatus = pkg.DeviceStatusFailedSmart
requiredAttrStatus = pkg.AttributeStatusFailedSmart
} else {
requiredDeviceStatus = pkg.DeviceStatusFailedScrutiny
requiredAttrStatus = pkg.AttributeStatusFailedScrutiny
}
// This is the only case where individual attributes need not be considered
if statusFilterAttributes == pkg.MetricsStatusFilterAttributesAll && repeatNotifications {
return pkg.DeviceStatusHas(device.DeviceStatus, requiredDeviceStatus)
}
var failingAttributes []string
// Loop through the attributes to find the failing ones
for attrId, attrData := range smartAttrs.Attributes {
var status pkg.AttributeStatus = attrData.GetStatus()
// Skip over passing attributes
if status == pkg.AttributeStatusPassed {
continue
}
// If the user only wants to consider critical attributes, we have to check
// if the not-passing attribute is critical or not
if statusFilterAttributes == pkg.MetricsStatusFilterAttributesCritical {
critical := false
if device.IsScsi() {
critical = thresholds.ScsiMetadata[attrId].Critical
} else if device.IsNvme() {
critical = thresholds.NmveMetadata[attrId].Critical
} else {
//this is ATA
attrIdInt, err := strconv.Atoi(attrId)
if err != nil {
continue
}
critical = thresholds.AtaMetadata[attrIdInt].Critical
}
// Skip non-critical, non-passing attributes when this setting is on
if !critical {
continue
}
}
// Record any attribute that doesn't get skipped by the above two checks
failingAttributes = append(failingAttributes, attrId)
}
// If the user doesn't want repeated notifications when the failing value doesn't change, we need to get the last value from the db
var lastPoints []measurements.Smart
var err error
if !repeatNotifications {
lastPoints, err = deviceRepo.GetSmartAttributeHistory(c, c.Param("wwn"), database.DURATION_KEY_FOREVER, 1, 1, failingAttributes)
if err == nil || len(lastPoints) < 1 {
logger.Warningln("Could not get the most recent data points from the database. This is expected to happen only if this is the very first submission of data for the device.")
}
}
for _, attrId := range failingAttributes {
attrStatus := smartAttrs.Attributes[attrId].GetStatus()
if pkg.AttributeStatusHas(attrStatus, requiredAttrStatus) {
if repeatNotifications {
return true
}
// This is checked again here to avoid repeating the entire for loop in the check above.
// Probably unnoticeably worse performance, but cleaner code.
if err != nil || len(lastPoints) < 1 || lastPoints[0].Attributes[attrId].GetTransformedValue() != smartAttrs.Attributes[attrId].GetTransformedValue() {
return true
}
}
}
return false
}
// TODO: include user label for device.
type Payload struct {
HostId string `json:"host_id,omitempty"` //host id (optional)
DeviceType string `json:"device_type"` //ATA/SCSI/NVMe
DeviceName string `json:"device_name"` //dev/sda
DeviceSerial string `json:"device_serial"` //WDDJ324KSO
Test bool `json:"test"` // false
//private, populated during init (marked as Public for JSON serialization)
Date string `json:"date"` //populated by Send function.
FailureType string `json:"failure_type"` //EmailTest, BothFail, SmartFail, ScrutinyFail
Subject string `json:"subject"`
Message string `json:"message"`
}
func NewPayload(device models.Device, test bool, currentTime ...time.Time) Payload {
payload := Payload{
HostId: strings.TrimSpace(device.HostId),
DeviceType: device.DeviceType,
DeviceName: device.DeviceName,
DeviceSerial: device.SerialNumber,
Test: test,
}
//validate that the Payload is populated
var sendDate time.Time
if currentTime != nil && len(currentTime) > 0 {
sendDate = currentTime[0]
} else {
sendDate = time.Now()
}
payload.Date = sendDate.Format(time.RFC3339)
payload.FailureType = payload.GenerateFailureType(device.DeviceStatus)
payload.Subject = payload.GenerateSubject()
payload.Message = payload.GenerateMessage()
return payload
}
func (p *Payload) GenerateFailureType(deviceStatus pkg.DeviceStatus) string {
//generate a failure type, given Test and DeviceStatus
if p.Test {
return NotifyFailureTypeEmailTest // must be an email test if "Test" is true
}
if pkg.DeviceStatusHas(deviceStatus, pkg.DeviceStatusFailedSmart) && pkg.DeviceStatusHas(deviceStatus, pkg.DeviceStatusFailedScrutiny) {
return NotifyFailureTypeBothFailure //both failed
} else if pkg.DeviceStatusHas(deviceStatus, pkg.DeviceStatusFailedSmart) {
return NotifyFailureTypeSmartFailure //only SMART failed
} else {
return NotifyFailureTypeScrutinyFailure //only Scrutiny failed
}
}
func (p *Payload) GenerateSubject() string {
//generate a detailed failure message
var subject string
if len(p.HostId) > 0 {
subject = fmt.Sprintf("Scrutiny SMART error (%s) detected on [host]device: [%s]%s", p.FailureType, p.HostId, p.DeviceName)
} else {
subject = fmt.Sprintf("Scrutiny SMART error (%s) detected on device: %s", p.FailureType, p.DeviceName)
}
return subject
}
func (p *Payload) GenerateMessage() string {
//generate a detailed failure message
messageParts := []string{}
messageParts = append(messageParts, fmt.Sprintf("Scrutiny SMART error notification for device: %s", p.DeviceName))
if len(p.HostId) > 0 {
messageParts = append(messageParts, fmt.Sprintf("Host Id: %s", p.HostId))
}
messageParts = append(messageParts,
fmt.Sprintf("Failure Type: %s", p.FailureType),
fmt.Sprintf("Device Name: %s", p.DeviceName),
fmt.Sprintf("Device Serial: %s", p.DeviceSerial),
fmt.Sprintf("Device Type: %s", p.DeviceType),
"",
fmt.Sprintf("Date: %s", p.Date),
)
if p.Test {
messageParts = append([]string{"TEST NOTIFICATION:"}, messageParts...)
}
return strings.Join(messageParts, "\n")
}
func New(logger logrus.FieldLogger, appconfig config.Interface, device models.Device, test bool) Notify {
return Notify{
Logger: logger,
Config: appconfig,
Payload: NewPayload(device, test),
}
}
type Notify struct {
Logger logrus.FieldLogger
Config config.Interface
Payload Payload
}
func (n *Notify) Send() error {
//retrieve list of notification endpoints from config file
configUrls := n.Config.GetStringSlice("notify.urls")
n.Logger.Debugf("Configured notification services: %v", configUrls)
if len(configUrls) == 0 {
n.Logger.Infof("No notification endpoints configured. Skipping failure notification.")
return nil
}
//remove http:// https:// and script:// prefixed urls
notifyWebhooks := []string{}
notifyScripts := []string{}
notifyShoutrrr := []string{}
for ndx := range configUrls {
if strings.HasPrefix(configUrls[ndx], "https://") || strings.HasPrefix(configUrls[ndx], "http://") {
notifyWebhooks = append(notifyWebhooks, configUrls[ndx])
} else if strings.HasPrefix(configUrls[ndx], "script://") {
notifyScripts = append(notifyScripts, configUrls[ndx])
} else {
notifyShoutrrr = append(notifyShoutrrr, configUrls[ndx])
}
}
n.Logger.Debugf("Configured scripts: %v", notifyScripts)
n.Logger.Debugf("Configured webhooks: %v", notifyWebhooks)
n.Logger.Debugf("Configured shoutrrr: %v", notifyShoutrrr)
//run all scripts, webhooks and shoutrr commands in parallel
//var wg sync.WaitGroup
var eg errgroup.Group
for _, url := range notifyWebhooks {
// execute collection in parallel go-routines
_url := url
eg.Go(func() error { return n.SendWebhookNotification(_url) })
}
for _, url := range notifyScripts {
// execute collection in parallel go-routines
_url := url
eg.Go(func() error { return n.SendScriptNotification(_url) })
}
for _, url := range notifyShoutrrr {
// execute collection in parallel go-routines
_url := url
eg.Go(func() error { return n.SendShoutrrrNotification(_url) })
}
//and wait for completion, error or timeout.
n.Logger.Debugf("Main: waiting for notifications to complete.")
if err := eg.Wait(); err == nil {
n.Logger.Info("Successfully sent notifications. Check logs for more information.")
return nil
} else {
n.Logger.Error("One or more notifications failed to send successfully. See logs for more information.")
return err
}
////wg.Wait()
//if waitTimeout(&wg, time.Minute) { //wait for 1 minute
// fmt.Println("Timed out while sending notifications")
//} else {
//}
//return nil
}
func (n *Notify) SendWebhookNotification(webhookUrl string) error {
n.Logger.Infof("Sending Webhook to %s", webhookUrl)
requestBody, err := json.Marshal(n.Payload)
if err != nil {
n.Logger.Errorf("An error occurred while sending Webhook to %s: %v", webhookUrl, err)
return err
}
resp, err := http.Post(webhookUrl, "application/json", bytes.NewBuffer(requestBody))
if err != nil {
n.Logger.Errorf("An error occurred while sending Webhook to %s: %v", webhookUrl, err)
return err
}
defer resp.Body.Close()
//we don't care about resp body content, but maybe we should log it?
return nil
}
func (n *Notify) SendScriptNotification(scriptUrl string) error {
//check if the script exists.
scriptPath := strings.TrimPrefix(scriptUrl, "script://")
n.Logger.Infof("Executing Script %s", scriptPath)
if !utils.FileExists(scriptPath) {
n.Logger.Errorf("Script does not exist: %s", scriptPath)
return errors.New(fmt.Sprintf("custom script path does not exist: %s", scriptPath))
}
copyEnv := os.Environ()
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_SUBJECT=%s", n.Payload.Subject))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_DATE=%s", n.Payload.Date))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_FAILURE_TYPE=%s", n.Payload.FailureType))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_DEVICE_NAME=%s", n.Payload.DeviceName))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_DEVICE_TYPE=%s", n.Payload.DeviceType))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_DEVICE_SERIAL=%s", n.Payload.DeviceSerial))
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_MESSAGE=%s", n.Payload.Message))
if len(n.Payload.HostId) > 0 {
copyEnv = append(copyEnv, fmt.Sprintf("SCRUTINY_HOST_ID=%s", n.Payload.HostId))
}
err := utils.CmdExec(scriptPath, []string{}, "", copyEnv, "")
if err != nil {
n.Logger.Errorf("An error occurred while executing script %s: %v", scriptPath, err)
return err
}
return nil
}
func (n *Notify) SendShoutrrrNotification(shoutrrrUrl string) error {
fmt.Printf("Sending Notifications to %v", shoutrrrUrl)
n.Logger.Infof("Sending notifications to %v", shoutrrrUrl)
sender, err := shoutrrr.CreateSender(shoutrrrUrl)
if err != nil {
n.Logger.Errorf("An error occurred while sending notifications %v: %v", shoutrrrUrl, err)
return err
}
//sender.SetLogger(n.Logger.)
serviceName, params, err := n.GenShoutrrrNotificationParams(shoutrrrUrl)
n.Logger.Debugf("notification data for %s: (%s)\n%v", serviceName, shoutrrrUrl, params)
if err != nil {
n.Logger.Errorf("An error occurred occurred while generating notification payload for %s:\n %v", serviceName, shoutrrrUrl, err)
return err
}
errs := sender.Send(n.Payload.Message, params)
if len(errs) > 0 {
var errstrings []string
for _, err := range errs {
if err == nil || err.Error() == "" {
continue
}
errstrings = append(errstrings, err.Error())
}
//sometimes there are empty errs, we're going to skip them.
if len(errstrings) == 0 {
return nil
} else {
n.Logger.Errorf("One or more errors occurred while sending notifications for %s:", shoutrrrUrl)
n.Logger.Error(errs)
return errors.New(strings.Join(errstrings, "\n"))
}
}
return nil
}
func (n *Notify) GenShoutrrrNotificationParams(shoutrrrUrl string) (string, *shoutrrrTypes.Params, error) {
serviceURL, err := url.Parse(shoutrrrUrl)
if err != nil {
return "", nil, err
}
serviceName := serviceURL.Scheme
params := &shoutrrrTypes.Params{}
logoUrl := "https://raw.githubusercontent.com/AnalogJ/scrutiny/master/webapp/frontend/src/ms-icon-144x144.png"
subject := n.Payload.Subject
switch serviceName {
// no params supported for these services
case "hangouts", "mattermost", "teams", "rocketchat":
break
case "discord":
(*params)["title"] = subject
case "gotify":
(*params)["title"] = subject
case "ifttt":
(*params)["title"] = subject
case "join":
(*params)["title"] = subject
(*params)["icon"] = logoUrl
case "ntfy":
(*params)["title"] = subject
(*params)["icon"] = logoUrl
case "opsgenie":
(*params)["title"] = subject
case "pushbullet":
(*params)["title"] = subject
case "pushover":
(*params)["title"] = subject
case "slack":
(*params)["title"] = subject
case "smtp":
(*params)["subject"] = subject
case "standard":
(*params)["subject"] = subject
case "telegram":
(*params)["title"] = subject
case "zulip":
(*params)["topic"] = subject
}
return serviceName, params, nil
}