forked from argoproj/argo-cd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics.go
200 lines (174 loc) · 7.04 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
package metrics
import (
"net/http"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
log "github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/labels"
argoappv1 "github.com/argoproj/argo-cd/pkg/apis/application/v1alpha1"
applister "github.com/argoproj/argo-cd/pkg/client/listers/application/v1alpha1"
"github.com/argoproj/argo-cd/util/git"
"github.com/argoproj/argo-cd/util/healthz"
)
type MetricsServer struct {
*http.Server
syncCounter *prometheus.CounterVec
k8sRequestCounter *prometheus.CounterVec
reconcileHistogram *prometheus.HistogramVec
}
const (
// MetricsPath is the endpoint to collect application metrics
MetricsPath = "/metrics"
)
// Follow Prometheus naming practices
// https://prometheus.io/docs/practices/naming/
var (
descAppDefaultLabels = []string{"namespace", "name", "project"}
descAppInfo = prometheus.NewDesc(
"argocd_app_info",
"Information about application.",
append(descAppDefaultLabels, "repo", "dest_server", "dest_namespace"),
nil,
)
descAppCreated = prometheus.NewDesc(
"argocd_app_created_time",
"Creation time in unix timestamp for an application.",
descAppDefaultLabels,
nil,
)
descAppSyncStatusCode = prometheus.NewDesc(
"argocd_app_sync_status",
"The application current sync status.",
append(descAppDefaultLabels, "sync_status"),
nil,
)
descAppHealthStatus = prometheus.NewDesc(
"argocd_app_health_status",
"The application current health status.",
append(descAppDefaultLabels, "health_status"),
nil,
)
)
// NewMetricsServer returns a new prometheus server which collects application metrics
func NewMetricsServer(addr string, appLister applister.ApplicationLister, healthCheck func() error) *MetricsServer {
mux := http.NewServeMux()
appRegistry := NewAppRegistry(appLister)
appRegistry.MustRegister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}))
appRegistry.MustRegister(prometheus.NewGoCollector())
mux.Handle(MetricsPath, promhttp.HandlerFor(appRegistry, promhttp.HandlerOpts{}))
healthz.ServeHealthCheck(mux, healthCheck)
syncCounter := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "argocd_app_sync_total",
Help: "Number of application syncs.",
},
append(descAppDefaultLabels, "phase"),
)
appRegistry.MustRegister(syncCounter)
k8sRequestCounter := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "argocd_app_k8s_request_total",
Help: "Number of kubernetes requests executed during application reconciliation.",
},
append(descAppDefaultLabels, "response_code"),
)
appRegistry.MustRegister(k8sRequestCounter)
reconcileHistogram := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "argocd_app_reconcile",
Help: "Application reconciliation performance.",
// Buckets chosen after observing a ~2100ms mean reconcile time
Buckets: []float64{0.25, .5, 1, 2, 4, 8, 16},
},
append(descAppDefaultLabels),
)
appRegistry.MustRegister(reconcileHistogram)
return &MetricsServer{
Server: &http.Server{
Addr: addr,
Handler: mux,
},
syncCounter: syncCounter,
k8sRequestCounter: k8sRequestCounter,
reconcileHistogram: reconcileHistogram,
}
}
// IncSync increments the sync counter for an application
func (m *MetricsServer) IncSync(app *argoappv1.Application, state *argoappv1.OperationState) {
if !state.Phase.Completed() {
return
}
m.syncCounter.WithLabelValues(app.Namespace, app.Name, app.Spec.GetProject(), string(state.Phase)).Inc()
}
// IncKubernetesRequest increments the kubernetes requests counter for an application
func (m *MetricsServer) IncKubernetesRequest(app *argoappv1.Application, statusCode int) {
m.k8sRequestCounter.WithLabelValues(app.Namespace, app.Name, app.Spec.GetProject(), strconv.Itoa(statusCode)).Inc()
}
// IncReconcile increments the reconcile counter for an application
func (m *MetricsServer) IncReconcile(app *argoappv1.Application, duration time.Duration) {
m.reconcileHistogram.WithLabelValues(app.Namespace, app.Name, app.Spec.GetProject()).Observe(duration.Seconds())
}
type appCollector struct {
store applister.ApplicationLister
}
// NewAppCollector returns a prometheus collector for application metrics
func NewAppCollector(appLister applister.ApplicationLister) prometheus.Collector {
return &appCollector{
store: appLister,
}
}
// NewAppRegistry creates a new prometheus registry that collects applications
func NewAppRegistry(appLister applister.ApplicationLister) *prometheus.Registry {
registry := prometheus.NewRegistry()
registry.MustRegister(NewAppCollector(appLister))
return registry
}
// Describe implements the prometheus.Collector interface
func (c *appCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- descAppInfo
ch <- descAppCreated
ch <- descAppSyncStatusCode
ch <- descAppHealthStatus
}
// Collect implements the prometheus.Collector interface
func (c *appCollector) Collect(ch chan<- prometheus.Metric) {
apps, err := c.store.List(labels.NewSelector())
if err != nil {
log.Warnf("Failed to collect applications: %v", err)
return
}
for _, app := range apps {
collectApps(ch, app)
}
}
func boolFloat64(b bool) float64 {
if b {
return 1
}
return 0
}
func collectApps(ch chan<- prometheus.Metric, app *argoappv1.Application) {
addConstMetric := func(desc *prometheus.Desc, t prometheus.ValueType, v float64, lv ...string) {
project := app.Spec.GetProject()
lv = append([]string{app.Namespace, app.Name, project}, lv...)
ch <- prometheus.MustNewConstMetric(desc, t, v, lv...)
}
addGauge := func(desc *prometheus.Desc, v float64, lv ...string) {
addConstMetric(desc, prometheus.GaugeValue, v, lv...)
}
addGauge(descAppInfo, 1, git.NormalizeGitURL(app.Spec.Source.RepoURL), app.Spec.Destination.Server, app.Spec.Destination.Namespace)
addGauge(descAppCreated, float64(app.CreationTimestamp.Unix()))
syncStatus := app.Status.Sync.Status
addGauge(descAppSyncStatusCode, boolFloat64(syncStatus == argoappv1.SyncStatusCodeSynced), string(argoappv1.SyncStatusCodeSynced))
addGauge(descAppSyncStatusCode, boolFloat64(syncStatus == argoappv1.SyncStatusCodeOutOfSync), string(argoappv1.SyncStatusCodeOutOfSync))
addGauge(descAppSyncStatusCode, boolFloat64(syncStatus == argoappv1.SyncStatusCodeUnknown || syncStatus == ""), string(argoappv1.SyncStatusCodeUnknown))
healthStatus := app.Status.Health.Status
addGauge(descAppHealthStatus, boolFloat64(healthStatus == argoappv1.HealthStatusUnknown || healthStatus == ""), argoappv1.HealthStatusUnknown)
addGauge(descAppHealthStatus, boolFloat64(healthStatus == argoappv1.HealthStatusProgressing), argoappv1.HealthStatusProgressing)
addGauge(descAppHealthStatus, boolFloat64(healthStatus == argoappv1.HealthStatusSuspended), argoappv1.HealthStatusSuspended)
addGauge(descAppHealthStatus, boolFloat64(healthStatus == argoappv1.HealthStatusHealthy), argoappv1.HealthStatusHealthy)
addGauge(descAppHealthStatus, boolFloat64(healthStatus == argoappv1.HealthStatusDegraded), argoappv1.HealthStatusDegraded)
addGauge(descAppHealthStatus, boolFloat64(healthStatus == argoappv1.HealthStatusMissing), argoappv1.HealthStatusMissing)
}