-
Notifications
You must be signed in to change notification settings - Fork 462
/
component.go
403 lines (347 loc) · 14.6 KB
/
component.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
// SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Gardener contributors
//
// SPDX-License-Identifier: Apache-2.0
package prometheus
import (
"context"
"strings"
"time"
"github.com/Masterminds/semver/v3"
"github.com/go-logr/logr"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
monitoringv1alpha1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1alpha1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"sigs.k8s.io/controller-runtime/pkg/client"
v1beta1constants "github.com/gardener/gardener/pkg/apis/core/v1beta1/constants"
"github.com/gardener/gardener/pkg/client/kubernetes"
"github.com/gardener/gardener/pkg/component"
"github.com/gardener/gardener/pkg/component/observability/monitoring"
monitoringutils "github.com/gardener/gardener/pkg/component/observability/monitoring/utils"
"github.com/gardener/gardener/pkg/utils"
"github.com/gardener/gardener/pkg/utils/managedresources"
secretsmanager "github.com/gardener/gardener/pkg/utils/secrets/manager"
)
const (
dataKeyAdditionalScrapeConfigs = "prometheus.yaml"
dataKeyAdditionalAlertRelabelConfigs = "configs.yaml"
dataKeyAdditionalAlertmanagerConfigs = "configs.yaml"
port = 9090
servicePort = 80
// ServicePortName is the name of the port in the Service specification.
ServicePortName = "web"
)
// Interface contains functions for a Prometheus deployer.
type Interface interface {
component.DeployWaiter
// SetIngressAuthSecret sets the ingress authentication secret name.
SetIngressAuthSecret(*corev1.Secret)
// SetIngressWildcardCertSecret sets the ingress wildcard certificate secret name.
SetIngressWildcardCertSecret(*corev1.Secret)
// SetCentralScrapeConfigs sets the central scrape configs.
SetCentralScrapeConfigs([]*monitoringv1alpha1.ScrapeConfig)
// SetAdditionalScrapeConfigs sets the additional scrape configs.
SetAdditionalScrapeConfigs([]string)
// SetAdditionalResources sets the additional resources.
SetAdditionalResources(...client.Object)
// SetNamespaceUID sets the namespace UID.
SetNamespaceUID(name types.UID)
}
// Values contains configuration values for the prometheus resources.
type Values struct {
// Name is the name of the prometheus. It will be used for the resource names of Prometheus and ManagedResource.
Name string
// Image defines the container image of prometheus.
Image string
// Version is the version of prometheus.
Version string
// ClusterType is the type of the cluster.
ClusterType component.ClusterType
// PriorityClassName is the name of the priority class for the deployment.
PriorityClassName string
// StorageCapacity is the storage capacity of Prometheus.
StorageCapacity resource.Quantity
// Replicas is the number of replicas.
Replicas int32
// Retention is the duration for the data retention.
Retention *monitoringv1.Duration
// RetentionSize is the size for the data retention.
RetentionSize monitoringv1.ByteSize
// RuntimeVersion is the Kubernetes version of the runtime cluster.
RuntimeVersion *semver.Version
// ScrapeTimeout is the timeout duration when scraping targets.
ScrapeTimeout monitoringv1.Duration
// VPAMinAllowed defines the resource list for the minAllowed field for the prometheus container resource policy.
VPAMinAllowed *corev1.ResourceList
// VPAMaxAllowed defines the resource list for the maxAllowed field for the prometheus container resource policy.
VPAMaxAllowed *corev1.ResourceList
// ExternalLabels is the set of external labels for the Prometheus configuration.
ExternalLabels map[string]string
// AdditionalPodLabels is a map containing additional labels for the created pods.
AdditionalPodLabels map[string]string
// NamespaceUID is the UID of the namespace.
NamespaceUID *types.UID
// CentralConfigs contains configuration for this Prometheus instance that is created together with it. This should
// only contain configuration that cannot be directly assigned to another component package.
CentralConfigs CentralConfigs
// IngressValues contains configuration for exposing this Prometheus instance via an Ingress resource.
Ingress *IngressValues
// Alerting contains alerting configuration for this Prometheus instance.
Alerting *AlertingValues
// RemoteWrite contains remote write configuration for this Prometheus instance.
RemoteWrite *RemoteWriteValues
// AdditionalResources contains any additional resources which get added to the ManagedResource.
AdditionalResources []client.Object
// Cortex contains configuration for the cortex frontend sidecar container.
Cortex *CortexValues
// TargetCluster contains configuration in case Prometheus scrapes metrics from another kube-apiserver (e.g.,
// virtual garden, or shoot cluster) or other components running in this cluster.
TargetCluster *TargetClusterValues
// DataMigration is a struct for migrating data from existing disks.
// TODO(rfranzke): Remove this after v1.97 has been released.
DataMigration monitoring.DataMigration
// RestrictToNamespace controls whether the Prometheus instance should only scrape its targets in its own namespace.
RestrictToNamespace bool
}
// CentralConfigs contains configuration for this Prometheus instance that is created together with it. This should
// only contain configuration that cannot be directly assigned to another component package.
type CentralConfigs struct {
// AdditionalScrapeConfigs are additional scrape configs which cannot be modelled with the CRDs of the Prometheus
// operator.
AdditionalScrapeConfigs []string
// PrometheusRules is a list of central PrometheusRule objects for this prometheus instance.
PrometheusRules []*monitoringv1.PrometheusRule
// ScrapeConfigs is a list of central ScrapeConfig objects for this prometheus instance.
ScrapeConfigs []*monitoringv1alpha1.ScrapeConfig
// ServiceMonitors is a list of central ServiceMonitor objects for this prometheus instance.
ServiceMonitors []*monitoringv1.ServiceMonitor
// PodMonitors is a list of central PodMonitor objects for this prometheus instance.
PodMonitors []*monitoringv1.PodMonitor
}
// AlertingValues contains alerting configuration for this Prometheus instance.
type AlertingValues struct {
// AlertmanagerName is the name of the alertmanager to which alerts should be sent.
AlertmanagerName string
// AdditionalAlertmanager contains the data of the 'alerting' secret (url, credentials, etc.).
AdditionalAlertmanager map[string][]byte
}
// RemoteWriteValues contains remote write configuration for this Prometheus instance.
type RemoteWriteValues struct {
// URL is the remote url.
URL string
// KeptMetrics is a list of metrics to keep.
KeptMetrics []string
// GlobalShootRemoteWriteSecret is a secret containing basic auth credentials for the remote write endpoint.
GlobalShootRemoteWriteSecret *corev1.Secret
}
// IngressValues contains configuration for exposing this Prometheus instance via an Ingress resource.
type IngressValues struct {
// AuthSecretName is the name of the auth secret.
AuthSecretName string
// Host is the hostname under which the Prometheus instance should be exposed.
Host string
// SecretsManager is the secrets manager used for generating the TLS certificate if no wildcard certificate is
// provided.
SecretsManager secretsmanager.Interface
// SigningCA is the name of the CA that should be used the sign a self-signed server certificate. Only needed when
// no wildcard certificate secret is provided.
SigningCA string
// WildcardCertSecretName is name of a secret containing the wildcard TLS certificate which is issued for the
// ingress domain. If not provided, a self-signed server certificate will be created.
WildcardCertSecretName *string
// BlockManagementAndTargetAPIAccess controls whether access to the management and target APIs is blocked when
// accessing Prometheus via ingress.
BlockManagementAndTargetAPIAccess bool
}
// TargetClusterValues contains configuration in case Prometheus scrapes metrics from another kube-apiserver (e.g.,
// virtual garden, or shoot cluster) or other components running in this cluster.
type TargetClusterValues struct {
// ServiceAccountName is the name of the ServiceAccount.
ServiceAccountName string
// ScrapesMetrics specifies whether this Prometheus has scrape configs for scraping metrics from components running
// in the target cluster.
ScrapesMetrics bool
}
// CortexValues contains configuration for the cortex frontend sidecar container.
type CortexValues struct {
// Image defines the container image of cortex.
Image string
// CacheValidity defines the validity of the FIFO cache.
CacheValidity time.Duration
}
// New creates a new instance of DeployWaiter for the prometheus.
func New(log logr.Logger, client client.Client, namespace string, values Values) Interface {
return &prometheus{
log: log,
client: client,
namespace: namespace,
values: values,
}
}
type prometheus struct {
log logr.Logger
client client.Client
namespace string
values Values
}
func (p *prometheus) Deploy(ctx context.Context) error {
var (
log = p.log.WithName("prometheus-deployer").WithValues("name", p.values.Name)
registry = managedresources.NewRegistry(kubernetes.SeedScheme, kubernetes.SeedCodec, kubernetes.SeedSerializer)
)
// TODO(rfranzke): Remove this migration code after all Prometheis have been migrated.
takeOverExistingPV, pvs, oldPVCs, err := p.values.DataMigration.ExistingPVTakeOverPrerequisites(ctx, log)
if err != nil {
return err
}
if err := p.addCentralConfigsToRegistry(registry); err != nil {
return err
}
if err := registry.Add(p.values.AdditionalResources...); err != nil {
return err
}
ingress, err := p.ingress(ctx)
if err != nil {
return err
}
var cortexConfigMap *corev1.ConfigMap
if p.values.Cortex != nil {
cortexConfigMap = p.cortexConfigMap()
}
prometheusObj, err := p.prometheus(takeOverExistingPV, cortexConfigMap)
if err != nil {
return err
}
resources, err := registry.AddAllAndSerialize(
p.serviceAccount(),
p.service(),
p.clusterRoleBinding(),
p.secretAdditionalScrapeConfigs(),
p.secretAdditionalAlertRelabelConfigs(),
p.secretAdditionalAlertmanagerConfigs(),
p.secretRemoteWriteBasicAuth(),
cortexConfigMap,
prometheusObj,
p.vpa(),
p.podDisruptionBudget(),
ingress,
)
if err != nil {
return err
}
if takeOverExistingPV {
if err := p.values.DataMigration.PrepareExistingPVTakeOver(ctx, log, pvs, oldPVCs); err != nil {
return err
}
log.Info("Deploy new Prometheus (with init container for renaming the data directory)")
}
if err := managedresources.CreateForSeedWithLabels(ctx, p.client, p.namespace, p.name(), false, map[string]string{v1beta1constants.LabelCareConditionType: v1beta1constants.ObservabilityComponentsHealthy}, resources); err != nil {
return err
}
if p.values.TargetCluster != nil {
registryTarget := managedresources.NewRegistry(kubernetes.ShootScheme, kubernetes.ShootCodec, kubernetes.ShootSerializer)
resourcesTarget, err := registryTarget.AddAllAndSerialize(
p.clusterRoleTarget(),
p.clusterRoleBindingTarget(),
)
if err != nil {
return err
}
if err := managedresources.CreateForShootWithLabels(ctx, p.client, p.namespace, p.name()+"-target", managedresources.LabelValueGardener, false, map[string]string{v1beta1constants.LabelCareConditionType: v1beta1constants.ObservabilityComponentsHealthy}, resourcesTarget); err != nil {
return err
}
} else {
if err := managedresources.DeleteForShoot(ctx, p.client, p.namespace, p.name()+"-target"); err != nil {
return err
}
}
if takeOverExistingPV {
if err := p.values.DataMigration.FinalizeExistingPVTakeOver(ctx, log, pvs); err != nil {
return err
}
log.Info("Deploy new Prometheus again (to remove the migration init container)")
return p.Deploy(ctx)
}
return nil
}
func (p *prometheus) Destroy(ctx context.Context) error {
if err := managedresources.DeleteForShoot(ctx, p.client, p.namespace, p.name()+"-target"); err != nil {
return err
}
return managedresources.DeleteForSeed(ctx, p.client, p.namespace, p.name())
}
// TimeoutWaitForManagedResource is the timeout used while waiting for the ManagedResources to become healthy or
// deleted.
var TimeoutWaitForManagedResource = 5 * time.Minute
func (p *prometheus) Wait(ctx context.Context) error {
timeoutCtx, cancel := context.WithTimeout(ctx, TimeoutWaitForManagedResource)
defer cancel()
return managedresources.WaitUntilHealthy(timeoutCtx, p.client, p.namespace, p.name())
}
func (p *prometheus) WaitCleanup(ctx context.Context) error {
timeoutCtx, cancel := context.WithTimeout(ctx, TimeoutWaitForManagedResource)
defer cancel()
return managedresources.WaitUntilDeleted(timeoutCtx, p.client, p.namespace, p.name())
}
func (p *prometheus) SetIngressAuthSecret(secret *corev1.Secret) {
if p.values.Ingress != nil && secret != nil {
p.values.Ingress.AuthSecretName = secret.Name
}
}
func (p *prometheus) SetIngressWildcardCertSecret(secret *corev1.Secret) {
if p.values.Ingress != nil && secret != nil {
p.values.Ingress.WildcardCertSecretName = &secret.Name
}
}
func (p *prometheus) SetCentralScrapeConfigs(configs []*monitoringv1alpha1.ScrapeConfig) {
p.values.CentralConfigs.ScrapeConfigs = configs
}
func (p *prometheus) SetAdditionalScrapeConfigs(configs []string) {
p.values.CentralConfigs.AdditionalScrapeConfigs = configs
}
func (p *prometheus) SetAdditionalResources(resources ...client.Object) {
p.values.AdditionalResources = resources
}
func (p *prometheus) SetNamespaceUID(uid types.UID) {
p.values.NamespaceUID = &uid
}
func (p *prometheus) name() string {
return "prometheus-" + p.values.Name
}
func (p *prometheus) addCentralConfigsToRegistry(registry *managedresources.Registry) error {
var errs []error
add := func(obj client.Object) {
if !strings.HasPrefix(obj.GetName(), p.values.Name+"-") {
obj.SetName(p.values.Name + "-" + obj.GetName())
}
if obj.GetNamespace() == "" {
obj.SetNamespace(p.namespace)
}
obj.SetLabels(utils.MergeStringMaps(obj.GetLabels(), monitoringutils.Labels(p.values.Name)))
if err := registry.Add(obj); err != nil {
errs = append(errs, err)
}
}
for _, obj := range p.values.CentralConfigs.PrometheusRules {
add(obj)
}
for _, obj := range p.values.CentralConfigs.ScrapeConfigs {
add(obj)
}
for _, obj := range p.values.CentralConfigs.ServiceMonitors {
add(obj)
}
for _, obj := range p.values.CentralConfigs.PodMonitors {
add(obj)
}
return utilerrors.NewAggregate(errs)
}
func (p *prometheus) getLabels() map[string]string {
return map[string]string{
v1beta1constants.LabelApp: "prometheus",
v1beta1constants.LabelRole: v1beta1constants.LabelMonitoring,
"name": p.values.Name,
}
}