-
Notifications
You must be signed in to change notification settings - Fork 35
/
cluster_health.go
112 lines (94 loc) · 3.65 KB
/
cluster_health.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
package collector
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"path"
"github.com/cprobe/cprobe/types"
"github.com/pkg/errors"
)
var (
colors = []string{"green", "yellow", "red"}
)
type clusterHealthResponse struct {
ClusterName string `json:"cluster_name"`
Status string `json:"status"`
TimedOut bool `json:"timed_out"`
NumberOfNodes int `json:"number_of_nodes"`
NumberOfDataNodes int `json:"number_of_data_nodes"`
ActivePrimaryShards int `json:"active_primary_shards"`
ActiveShards int `json:"active_shards"`
RelocatingShards int `json:"relocating_shards"`
InitializingShards int `json:"initializing_shards"`
UnassignedShards int `json:"unassigned_shards"`
DelayedUnassignedShards int `json:"delayed_unassigned_shards"`
NumberOfPendingTasks int `json:"number_of_pending_tasks"`
NumberOfInFlightFetch int `json:"number_of_in_flight_fetch"`
TaskMaxWaitingInQueueMillis int `json:"task_max_waiting_in_queue_millis"`
ActiveShardsPercentAsNumber float64 `json:"active_shards_percent_as_number"`
}
func (c *Config) gatherClusterHealth(ctx context.Context, target *url.URL, httpClient *http.Client, ss *types.Samples, clusterName string) error {
clusterHealthResp, err := c.fetchAndDecodeClusterHealth(target, httpClient)
if err != nil {
return errors.WithMessage(err, "failed to fetch and decode cluster health")
}
prefix := namespace + "_cluster_health"
clusterTag := map[string]string{"cluster": clusterName}
fields := map[string]interface{}{
"active_primary_shards": clusterHealthResp.ActivePrimaryShards,
"active_shards": clusterHealthResp.ActiveShards,
"delayed_unassigned_shards": clusterHealthResp.DelayedUnassignedShards,
"initializing_shards": clusterHealthResp.InitializingShards,
"number_of_data_nodes": clusterHealthResp.NumberOfDataNodes,
"number_of_in_flight_fetch": clusterHealthResp.NumberOfInFlightFetch,
"task_max_waiting_in_queue_millis": clusterHealthResp.TaskMaxWaitingInQueueMillis,
"number_of_nodes": clusterHealthResp.NumberOfNodes,
"number_of_pending_tasks": clusterHealthResp.NumberOfPendingTasks,
"relocating_shards": clusterHealthResp.RelocatingShards,
"unassigned_shards": clusterHealthResp.UnassignedShards,
}
ss.AddMetric(prefix, fields, clusterTag)
for _, color := range colors {
ss.AddMetric(prefix, map[string]interface{}{
"status": colorValue(clusterHealthResp, color),
}, map[string]string{
"cluster": clusterName,
"status": color,
})
}
return nil
}
func (c *Config) fetchAndDecodeClusterHealth(target *url.URL, httpClient *http.Client) (clusterHealthResponse, error) {
var chr clusterHealthResponse
u := *target
u.Path = path.Join(u.Path, "/_cluster/health")
res, err := httpClient.Get(u.String())
if err != nil {
return chr, fmt.Errorf("failed to get cluster health from %s://%s:%s%s: %s",
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
}
if res.Body == nil {
return chr, fmt.Errorf("empty response body")
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return chr, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
}
bts, err := io.ReadAll(res.Body)
if err != nil {
return chr, err
}
if err := json.Unmarshal(bts, &chr); err != nil {
return chr, err
}
return chr, nil
}
func colorValue(clusterHealth clusterHealthResponse, color string) float64 {
if clusterHealth.Status == color {
return 1
}
return 0
}