forked from rancher/rancher
-
Notifications
You must be signed in to change notification settings - Fork 0
/
syscomponent.go
111 lines (96 loc) · 3.13 KB
/
syscomponent.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package watcher
import (
"context"
"strings"
"time"
"github.com/rancher/rancher/pkg/controllers/user/alert/manager"
"github.com/rancher/rancher/pkg/ticker"
"github.com/rancher/types/apis/core/v1"
"github.com/rancher/types/apis/management.cattle.io/v3"
"github.com/rancher/types/config"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/labels"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
type SysComponentWatcher struct {
componentStatuses v1.ComponentStatusInterface
clusterAlertLister v3.ClusterAlertLister
alertManager *manager.Manager
clusterName string
clusterLister v3.ClusterLister
}
func StartSysComponentWatcher(ctx context.Context, cluster *config.UserContext, manager *manager.Manager) {
s := &SysComponentWatcher{
componentStatuses: cluster.Core.ComponentStatuses(""),
clusterAlertLister: cluster.Management.Management.ClusterAlerts(cluster.ClusterName).Controller().Lister(),
alertManager: manager,
clusterName: cluster.ClusterName,
clusterLister: cluster.Management.Management.Clusters("").Controller().Lister(),
}
go s.watch(ctx, syncInterval)
}
func (w *SysComponentWatcher) watch(ctx context.Context, interval time.Duration) {
for range ticker.Context(ctx, interval) {
err := w.watchRule()
if err != nil {
logrus.Infof("Failed to watch system component, error: %v", err)
}
}
}
func (w *SysComponentWatcher) watchRule() error {
if w.alertManager.IsDeploy == false {
return nil
}
clusterAlerts, err := w.clusterAlertLister.List("", labels.NewSelector())
if err != nil {
return err
}
statuses, err := w.componentStatuses.List(metav1.ListOptions{})
if err != nil {
return err
}
for _, alert := range clusterAlerts {
if alert.Status.AlertState == "inactive" {
continue
}
if alert.Spec.TargetSystemService != nil {
w.checkComponentHealthy(statuses, alert)
}
}
return nil
}
func (w *SysComponentWatcher) checkComponentHealthy(statuses *v1.ComponentStatusList, alert *v3.ClusterAlert) {
alertID := alert.Namespace + "-" + alert.Name
for _, cs := range statuses.Items {
if strings.HasPrefix(cs.Name, alert.Spec.TargetSystemService.Condition) {
for _, cond := range cs.Conditions {
if cond.Type == corev1.ComponentHealthy {
if cond.Status == corev1.ConditionFalse {
clusterDisplayName := w.clusterName
cluster, err := w.clusterLister.Get("", w.clusterName)
if err != nil {
logrus.Warnf("Failed to get cluster for %s: %v", w.clusterName, err)
} else {
clusterDisplayName = cluster.Spec.DisplayName
}
data := map[string]string{}
data["alert_type"] = "systemService"
data["alert_id"] = alertID
data["severity"] = alert.Spec.Severity
data["alert_name"] = alert.Spec.DisplayName
data["cluster_name"] = clusterDisplayName
data["component_name"] = alert.Spec.TargetSystemService.Condition + ":" + cs.Name
if cond.Message != "" {
data["logs"] = cond.Message
}
if err := w.alertManager.SendAlert(data); err != nil {
logrus.Debugf("Failed to send alert: %v", err)
}
return
}
}
}
}
}
}