diff --git a/bundle.Dockerfile b/bundle.Dockerfile index 4ea891e8..d9c0a88e 100644 --- a/bundle.Dockerfile +++ b/bundle.Dockerfile @@ -6,7 +6,7 @@ LABEL operators.operatorframework.io.bundle.manifests.v1=manifests/ LABEL operators.operatorframework.io.bundle.metadata.v1=metadata/ LABEL operators.operatorframework.io.bundle.package.v1=deployment-validation-operator LABEL operators.operatorframework.io.bundle.channels.v1=alpha -LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v1.28.1 +LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v1.31.0+git LABEL operators.operatorframework.io.metrics.mediatype.v1=metrics+v1 LABEL operators.operatorframework.io.metrics.project_layout=unknown diff --git a/bundle/manifests/deployment-validation-operator.clusterserviceversion.yaml b/bundle/manifests/deployment-validation-operator.clusterserviceversion.yaml index 24507bb2..f91d9e6a 100644 --- a/bundle/manifests/deployment-validation-operator.clusterserviceversion.yaml +++ b/bundle/manifests/deployment-validation-operator.clusterserviceversion.yaml @@ -4,8 +4,8 @@ metadata: annotations: alm-examples: '[]' capabilities: Basic Install - createdAt: "2023-08-24T07:58:38Z" - operators.operatorframework.io/builder: operator-sdk-v1.28.1 + createdAt: "2023-09-12T14:13:09Z" + operators.operatorframework.io/builder: operator-sdk-v1.31.0+git operators.operatorframework.io/project_layout: unknown name: deployment-validation-operator.v0.0.0 namespace: placeholder @@ -82,6 +82,10 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace image: quay.io/deployment-validation-operator/dv-operator:latest imagePullPolicy: Always livenessProbe: diff --git a/bundle/metadata/annotations.yaml b/bundle/metadata/annotations.yaml index a6ce8a57..7831d323 100644 --- a/bundle/metadata/annotations.yaml +++ b/bundle/metadata/annotations.yaml @@ -5,6 +5,6 @@ annotations: operators.operatorframework.io.bundle.metadata.v1: metadata/ operators.operatorframework.io.bundle.package.v1: deployment-validation-operator operators.operatorframework.io.bundle.channels.v1: alpha - operators.operatorframework.io.metrics.builder: operator-sdk-v1.28.1 + operators.operatorframework.io.metrics.builder: operator-sdk-v1.31.0+git operators.operatorframework.io.metrics.mediatype.v1: metrics+v1 operators.operatorframework.io.metrics.project_layout: unknown diff --git a/deploy/openshift/operator.yaml b/deploy/openshift/operator.yaml index 8f32e554..d869b7ea 100644 --- a/deploy/openshift/operator.yaml +++ b/deploy/openshift/operator.yaml @@ -86,6 +86,10 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace volumeMounts: - name: dvo-config mountPath: /config diff --git a/main.go b/main.go index dce83e60..cace9eb2 100644 --- a/main.go +++ b/main.go @@ -15,8 +15,9 @@ import ( apis "github.com/app-sre/deployment-validation-operator/api" dvconfig "github.com/app-sre/deployment-validation-operator/config" "github.com/app-sre/deployment-validation-operator/internal/options" + "github.com/app-sre/deployment-validation-operator/pkg/configmap" "github.com/app-sre/deployment-validation-operator/pkg/controller" - dvo_prom "github.com/app-sre/deployment-validation-operator/pkg/prometheus" + dvoProm "github.com/app-sre/deployment-validation-operator/pkg/prometheus" "github.com/app-sre/deployment-validation-operator/pkg/validations" "github.com/app-sre/deployment-validation-operator/version" "github.com/prometheus/client_golang/prometheus" @@ -86,68 +87,66 @@ func setupManager(log logr.Logger, opts options.Options) (manager.Manager, error return nil, fmt.Errorf("getting config: %w", err) } - log.Info("Initialize Scheme") + log.Info("Initialize Manager") - scheme, err := initializeScheme() + mgr, err := initManager(log, opts, cfg) if err != nil { - return nil, fmt.Errorf("initializing scheme: %w", err) + return nil, fmt.Errorf("initializing manager: %w", err) } - log.Info("Initialize Manager") + log.Info("Registering Components") - mgrOpts, err := getManagerOptions(scheme, opts) - if err != nil { - return nil, fmt.Errorf("getting manager options: %w", err) - } + log.Info("Initialize Prometheus Registry") - mgr, err := manager.New(cfg, mgrOpts) + reg := prometheus.NewRegistry() + metrics, err := dvoProm.PreloadMetrics(reg) if err != nil { - return nil, fmt.Errorf("initializing manager: %w", err) + return nil, fmt.Errorf("preloading kube-linter metrics: %w", err) } - if err := mgr.AddHealthzCheck("health", healthz.Ping); err != nil { - return nil, fmt.Errorf("adding healthz check: %w", err) - } + log.Info(fmt.Sprintf("Initialize Prometheus metrics endpoint on %q", opts.MetricsEndpoint())) - if err := mgr.AddReadyzCheck("check", healthz.Ping); err != nil { - return nil, fmt.Errorf("adding readyz check: %w", err) + srv, err := dvoProm.NewServer(reg, opts.MetricsPath, fmt.Sprintf(":%d", opts.MetricsPort)) + if err != nil { + return nil, fmt.Errorf("initializing metrics server: %w", err) } - log.Info("Registering Components") - - discoveryClient, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig()) - if err != nil { - return nil, fmt.Errorf("initializing discovery client: %w", err) + if err := mgr.Add(srv); err != nil { + return nil, fmt.Errorf("adding metrics server to manager: %w", err) } - gr, err := controller.NewGenericReconciler(mgr.GetClient(), discoveryClient) + log.Info("Initialize ConfigMap watcher") + + cmWatcher, err := configmap.NewWatcher(cfg) if err != nil { - return nil, fmt.Errorf("initializing generic reconciler: %w", err) + return nil, fmt.Errorf("initializing configmap watcher: %w", err) } - if err = gr.AddToManager(mgr); err != nil { - return nil, fmt.Errorf("adding generic reconciler to manager: %w", err) + if err := mgr.Add(cmWatcher); err != nil { + return nil, fmt.Errorf("adding configmap watcher to manager: %w", err) } - log.Info("Initializing Prometheus Registry") + log.Info("Initialize Validation Engine") - reg := prometheus.NewRegistry() + err = validations.InitEngine(opts.ConfigFile, metrics) + if err != nil { + return nil, fmt.Errorf("initializing validation engine: %w", err) + } - log.Info(fmt.Sprintf("Initializing Prometheus metrics endpoint on %q", opts.MetricsEndpoint())) + log.Info("Initialize Reconciler") - srv, err := dvo_prom.NewServer(reg, opts.MetricsPath, fmt.Sprintf(":%d", opts.MetricsPort)) + discoveryClient, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig()) if err != nil { - return nil, fmt.Errorf("initializing metrics server: %w", err) + return nil, fmt.Errorf("initializing discovery client: %w", err) } - if err := mgr.Add(srv); err != nil { - return nil, fmt.Errorf("adding metrics server to manager: %w", err) + gr, err := controller.NewGenericReconciler(mgr.GetClient(), discoveryClient, cmWatcher) + if err != nil { + return nil, fmt.Errorf("initializing generic reconciler: %w", err) } - log.Info("Initializing Validation Engine") - - if err := validations.InitializeValidationEngine(opts.ConfigFile, reg); err != nil { - return nil, fmt.Errorf("initializing validation engine: %w", err) + if err = gr.AddToManager(mgr); err != nil { + return nil, fmt.Errorf("adding generic reconciler to manager: %w", err) } return mgr, nil @@ -235,3 +234,33 @@ func kubeClientQPS() (float32, error) { qps = float32(val) return qps, err } + +func initManager(log logr.Logger, opts options.Options, cfg *rest.Config) (manager.Manager, error) { + log.Info("Initialize Scheme") + scheme, err := initializeScheme() + if err != nil { + return nil, fmt.Errorf("initializing scheme: %w", err) + } + + log.Info("Getting Manager Options") + mgrOpts, err := getManagerOptions(scheme, opts) + if err != nil { + return nil, fmt.Errorf("getting manager options: %w", err) + } + + mgr, err := manager.New(cfg, mgrOpts) + if err != nil { + return nil, fmt.Errorf("getting new manager: %w", err) + } + + log.Info("Adding Healthz and Readyz checks") + if err := mgr.AddHealthzCheck("health", healthz.Ping); err != nil { + return nil, fmt.Errorf("adding healthz check: %w", err) + } + + if err := mgr.AddReadyzCheck("check", healthz.Ping); err != nil { + return nil, fmt.Errorf("adding readyz check: %w", err) + } + + return mgr, nil +} diff --git a/pkg/configmap/configmap_watcher.go b/pkg/configmap/configmap_watcher.go new file mode 100644 index 00000000..263cdb10 --- /dev/null +++ b/pkg/configmap/configmap_watcher.go @@ -0,0 +1,185 @@ +package configmap + +import ( + "context" + "fmt" + "os" + "reflect" + "time" + + "golang.stackrox.io/kube-linter/pkg/config" + "gopkg.in/yaml.v3" + + "github.com/app-sre/deployment-validation-operator/pkg/validations" + "github.com/go-logr/logr" + apicorev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// this structure mirrors Kube-Linter configuration structure +// it is used as a bridge to unmarshall ConfigMap data +// doc: https://pkg.go.dev/golang.stackrox.io/kube-linter/pkg/config#Config +type KubeLinterChecks struct { + Checks struct { + AddAllBuiltIn bool `yaml:"addAllBuiltIn,omitempty"` + DoNotAutoAddDefaults bool `yaml:"doNotAutoAddDefaults,omitempty"` + Exclude []string `yaml:"exclude,omitempty"` + Include []string `yaml:"include,omitempty"` + IgnorePaths []string `yaml:"ignorePaths,omitempty"` + } `yaml:"checks"` +} + +type Watcher struct { + clientset kubernetes.Interface + checks KubeLinterChecks + ch chan config.Config + logger logr.Logger + namespace string +} + +var configMapName = "deployment-validation-operator-config" +var configMapDataAccess = "deployment-validation-operator-config.yaml" + +// NewWatcher creates a new Watcher instance for observing changes to a ConfigMap. +// +// Parameters: +// - cfg: A pointer to a rest.Config representing the Kubernetes client configuration. +// +// Returns: +// - A pointer to a Watcher instance for monitoring changes to DVO ConfigMap resource. +// - An error if there's an issue while initializing the Kubernetes clientset. +func NewWatcher(cfg *rest.Config) (*Watcher, error) { + clientset, err := kubernetes.NewForConfig(cfg) + if err != nil { + return nil, fmt.Errorf("initializing clientset: %w", err) + } + + // the Informer will use this to monitor the namespace for the ConfigMap. + namespace, err := getPodNamespace() + if err != nil { + return nil, fmt.Errorf("getting namespace: %w", err) + } + + return &Watcher{ + clientset: clientset, + logger: log.Log.WithName("ConfigMapWatcher"), + ch: make(chan config.Config), + namespace: namespace, + }, nil +} + +// GetStaticKubelinterConfig returns the ConfigMap's checks configuration +func (cmw *Watcher) GetStaticKubelinterConfig(ctx context.Context) (config.Config, error) { + cm, err := cmw.clientset.CoreV1(). + ConfigMaps(cmw.namespace).Get(ctx, configMapName, v1.GetOptions{}) + if err != nil { + return config.Config{}, fmt.Errorf("getting initial configuration: %w", err) + } + + return cmw.getKubeLinterConfig(cm.Data[configMapDataAccess]) +} + +// Start will update the channel structure with new configuration data from ConfigMap update event +func (cmw Watcher) Start(ctx context.Context) error { + factory := informers.NewSharedInformerFactoryWithOptions( + cmw.clientset, time.Second*30, informers.WithNamespace(cmw.namespace), + ) + informer := factory.Core().V1().ConfigMaps().Informer() + + informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ // nolint:errcheck + AddFunc: func(obj interface{}) { + newCm := obj.(*apicorev1.ConfigMap) + + if configMapName != newCm.GetName() { + return + } + + cmw.logger.Info( + "a ConfigMap has been created under watched namespace", + "name", newCm.GetName(), + "namespace", newCm.GetNamespace(), + ) + + cfg, err := cmw.getKubeLinterConfig(newCm.Data[configMapDataAccess]) + if err != nil { + cmw.logger.Error(err, "ConfigMap data format") + return + } + + cmw.ch <- cfg + }, + UpdateFunc: func(oldObj, newObj interface{}) { + newCm := newObj.(*apicorev1.ConfigMap) + + // This is sometimes triggered even if no change was due to the ConfigMap + if configMapName != newCm.GetName() || reflect.DeepEqual(oldObj, newObj) { + return + } + + cmw.logger.Info( + "a ConfigMap has been updated under watched namespace", + "name", newCm.GetName(), + "namespace", newCm.GetNamespace(), + ) + + cfg, err := cmw.getKubeLinterConfig(newCm.Data[configMapDataAccess]) + if err != nil { + cmw.logger.Error(err, "ConfigMap data format") + return + } + + cmw.ch <- cfg + }, + DeleteFunc: func(oldObj interface{}) { + cm := oldObj.(*apicorev1.ConfigMap) + + cmw.logger.Info( + "a ConfigMap has been deleted under watched namespace", + "name", cm.GetName(), + "namespace", cm.GetNamespace(), + ) + + cmw.ch <- config.Config{ + Checks: validations.GetDefaultChecks(), + } + }, + }) + + factory.Start(ctx.Done()) + + return nil +} + +// ConfigChanged receives push notifications when the configuration is updated +func (cmw *Watcher) ConfigChanged() <-chan config.Config { + return cmw.ch +} + +// getKubeLinterConfig returns a valid Kube-linter Config structure +// based on the checks received by the string +func (cmw *Watcher) getKubeLinterConfig(data string) (config.Config, error) { + var cfg config.Config + + err := yaml.Unmarshal([]byte(data), &cmw.checks) + if err != nil { + return cfg, fmt.Errorf("unmarshalling configmap data: %w", err) + } + + cfg.Checks = config.ChecksConfig(cmw.checks.Checks) + + return cfg, nil +} + +func getPodNamespace() (string, error) { + namespace, exists := os.LookupEnv("POD_NAMESPACE") + if !exists { + return "", fmt.Errorf("could not find DVO pod") + } + + return namespace, nil +} diff --git a/pkg/controller/configmap_watcher_test.go b/pkg/configmap/configmap_watcher_test.go similarity index 92% rename from pkg/controller/configmap_watcher_test.go rename to pkg/configmap/configmap_watcher_test.go index 22ded7b2..56daf852 100644 --- a/pkg/controller/configmap_watcher_test.go +++ b/pkg/configmap/configmap_watcher_test.go @@ -1,4 +1,4 @@ -package controller +package configmap import ( "context" @@ -13,6 +13,8 @@ import ( ) func TestStaticConfigMapWatcher(t *testing.T) { + var configMapNamespace = "deployment-validation-operator" + testCases := []struct { name string data string @@ -58,7 +60,7 @@ func TestStaticConfigMapWatcher(t *testing.T) { }, } client := kubefake.NewSimpleClientset([]runtime.Object{cm}...) - mock := ConfigMapWatcher{clientset: client} + mock := Watcher{clientset: client, namespace: configMapNamespace} // When test, err := mock.GetStaticKubelinterConfig(context.Background()) diff --git a/pkg/controller/configmap_watcher.go b/pkg/controller/configmap_watcher.go deleted file mode 100644 index 31b574ab..00000000 --- a/pkg/controller/configmap_watcher.go +++ /dev/null @@ -1,117 +0,0 @@ -package controller - -import ( - "context" - "fmt" - "time" - - "golang.stackrox.io/kube-linter/pkg/config" - "gopkg.in/yaml.v3" - - "github.com/go-logr/logr" - apicorev1 "k8s.io/api/core/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/informers" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "k8s.io/client-go/tools/cache" - "sigs.k8s.io/controller-runtime/pkg/log" -) - -// this structure mirrors Kube-Linter configuration structure -// it is used as a bridge to unmarshall ConfigMap data -// doc: https://pkg.go.dev/golang.stackrox.io/kube-linter/pkg/config#Config -type KubeLinterChecks struct { - Checks struct { - AddAllBuiltIn bool `yaml:"addAllBuiltIn,omitempty"` - DoNotAutoAddDefaults bool `yaml:"doNotAutoAddDefaults,omitempty"` - Exclude []string `yaml:"exclude,omitempty"` - Include []string `yaml:"include,omitempty"` - IgnorePaths []string `yaml:"ignorePaths,omitempty"` - } `yaml:"checks"` -} - -type ConfigMapWatcher struct { - clientset kubernetes.Interface - checks KubeLinterChecks - ch chan config.Config - logger logr.Logger -} - -var configMapName = "deployment-validation-operator-config" -var configMapNamespace = "deployment-validation-operator" -var configMapDataAccess = "deployment-validation-operator-config.yaml" - -// NewConfigMapWatcher returns a watcher that can be used both: -// basic: with GetStaticDisabledChecks method, it returns an existent ConfigMap data's disabled check -// dynamic: with StartInformer it sets an Informer that will be triggered on ConfigMap update -func NewConfigMapWatcher(cfg *rest.Config) (ConfigMapWatcher, error) { - clientset, err := kubernetes.NewForConfig(cfg) - if err != nil { - return ConfigMapWatcher{}, fmt.Errorf("initializing clientset: %w", err) - } - - return ConfigMapWatcher{ - clientset: clientset, - logger: log.Log.WithName("ConfigMapWatcher"), - }, nil -} - -// GetStaticKubelinterConfig returns the ConfigMap's checks configuration -func (cmw *ConfigMapWatcher) GetStaticKubelinterConfig(ctx context.Context) (config.Config, error) { - cm, err := cmw.clientset.CoreV1(). - ConfigMaps(configMapNamespace).Get(ctx, configMapName, v1.GetOptions{}) - if err != nil { - return config.Config{}, fmt.Errorf("getting initial configuration: %w", err) - } - - return cmw.getKubeLinterConfig(cm.Data[configMapDataAccess]) -} - -// StartInformer will update the channel structure with new configuration data from ConfigMap update event -func (cmw *ConfigMapWatcher) StartInformer(ctx context.Context) error { - factory := informers.NewSharedInformerFactoryWithOptions( - cmw.clientset, time.Second*30, informers.WithNamespace(configMapNamespace), - ) - informer := factory.Core().V1().ConfigMaps().Informer() - - informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ // nolint:errcheck - UpdateFunc: func(oldObj, newObj interface{}) { - newCm := newObj.(*apicorev1.ConfigMap) - - cmw.logger.Info("ConfigMap has been updated") - - cfg, err := cmw.getKubeLinterConfig(newCm.Data[configMapDataAccess]) - if err != nil { - cmw.logger.Error(err, "ConfigMap data format") - return - } - - cmw.ch <- cfg - }, - }) - - factory.Start(ctx.Done()) - - return nil -} - -// ConfigChanged receives push notifications when the configuration is updated -func (cmw *ConfigMapWatcher) ConfigChanged() <-chan config.Config { - return cmw.ch -} - -// getKubeLinterConfig returns a valid Kube-linter Config structure -// based on the checks received by the string -func (cmw *ConfigMapWatcher) getKubeLinterConfig(data string) (config.Config, error) { - var cfg config.Config - - err := yaml.Unmarshal([]byte(data), &cmw.checks) - if err != nil { - return cfg, fmt.Errorf("unmarshalling configmap data: %w", err) - } - - cfg.Checks = config.ChecksConfig(cmw.checks.Checks) - - return cfg, nil -} diff --git a/pkg/controller/generic_reconciler.go b/pkg/controller/generic_reconciler.go index c0ea0748..05f7c1a1 100644 --- a/pkg/controller/generic_reconciler.go +++ b/pkg/controller/generic_reconciler.go @@ -15,6 +15,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/discovery" + "github.com/app-sre/deployment-validation-operator/pkg/configmap" "github.com/app-sre/deployment-validation-operator/pkg/utils" "github.com/app-sre/deployment-validation-operator/pkg/validations" "github.com/go-logr/logr" @@ -37,10 +38,15 @@ type GenericReconciler struct { client client.Client discovery discovery.DiscoveryInterface logger logr.Logger + cmWatcher *configmap.Watcher } // NewGenericReconciler returns a GenericReconciler struct -func NewGenericReconciler(client client.Client, discovery discovery.DiscoveryInterface) (*GenericReconciler, error) { +func NewGenericReconciler( + client client.Client, + discovery discovery.DiscoveryInterface, + cmw *configmap.Watcher, +) (*GenericReconciler, error) { listLimit, err := getListLimit() if err != nil { return nil, err @@ -54,6 +60,7 @@ func NewGenericReconciler(client client.Client, discovery discovery.DiscoveryInt objectValidationCache: newValidationCache(), currentObjects: newValidationCache(), logger: ctrl.Log.WithName("reconcile"), + cmWatcher: cmw, }, nil } @@ -94,6 +101,8 @@ func (gr *GenericReconciler) AddToManager(mgr manager.Manager) error { // Start validating the given object kind every interval. func (gr *GenericReconciler) Start(ctx context.Context) error { + go gr.LookForConfigUpdates(ctx) + for { select { case <-ctx.Done(): @@ -112,6 +121,29 @@ func (gr *GenericReconciler) Start(ctx context.Context) error { } } +func (gr *GenericReconciler) LookForConfigUpdates(ctx context.Context) { + for { + select { + case cfg := <-gr.cmWatcher.ConfigChanged(): + validations.UpdateConfig(cfg) + err := validations.InitRegistry() + if err == nil { + gr.objectValidationCache.drain() + validations.ResetMetrics() + + } else { + gr.logger.Error( + err, + fmt.Sprintf("error updating configuration from ConfigMap: %v\n", cfg), + ) + } + + case <-ctx.Done(): + return + } + } +} + func (gr *GenericReconciler) reconcileEverything(ctx context.Context) error { apiResources, err := reconcileResourceList(gr.discovery, gr.client.Scheme()) if err != nil { diff --git a/pkg/controller/generic_reconciler_test.go b/pkg/controller/generic_reconciler_test.go index dfc5fa35..f1b73c20 100644 --- a/pkg/controller/generic_reconciler_test.go +++ b/pkg/controller/generic_reconciler_test.go @@ -6,6 +6,7 @@ import ( "os" "testing" + "github.com/app-sre/deployment-validation-operator/pkg/configmap" "github.com/app-sre/deployment-validation-operator/pkg/validations" "github.com/stretchr/testify/assert" appsv1 "k8s.io/api/apps/v1" @@ -966,5 +967,5 @@ func createTestReconciler(scheme *runtime.Scheme, objects []client.Object) (*Gen } client := cliBuilder.Build() cli := kubefake.NewSimpleClientset() - return NewGenericReconciler(client, cli.Discovery()) + return NewGenericReconciler(client, cli.Discovery(), &configmap.Watcher{}) } diff --git a/pkg/prometheus/prometheus.go b/pkg/prometheus/prometheus.go index 45932251..6a14d81b 100644 --- a/pkg/prometheus/prometheus.go +++ b/pkg/prometheus/prometheus.go @@ -7,10 +7,13 @@ import ( "strings" "time" + "github.com/app-sre/deployment-validation-operator/config" + "github.com/app-sre/deployment-validation-operator/pkg/validations" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promhttp" dto "github.com/prometheus/client_model/go" + "golang.stackrox.io/kube-linter/pkg/checkregistry" ) type Registry interface { @@ -80,6 +83,70 @@ func getRouter(registry Registry, path string) (*http.ServeMux, error) { return mux, nil } +// PreloadMetrics preloads metrics related to predefined checks into the provided Prometheus registry. +// It retrieves predefined checks from the linter registry, sets up corresponding GaugeVec metrics, +// and registers them in the Prometheus registry. +// +// Parameters: +// - pr: A pointer to a Prometheus registry where the metrics will be registered. +// +// Returns: +// - A map of check names to corresponding GaugeVec metrics. +// - An error if any error occurs during metric setup or registration. +func PreloadMetrics(pr *prometheus.Registry) (map[string]*prometheus.GaugeVec, error) { + preloadedMetrics := make(map[string]*prometheus.GaugeVec) + + klr, err := validations.GetKubeLinterRegistry() + if err != nil { + return nil, err + } + + checks, err := validations.GetAllNamesFromRegistry(klr) + if err != nil { + return nil, err + } + + for _, checkName := range checks { + metric, err := setupMetric(klr, checkName) + if err != nil { + return nil, fmt.Errorf("unable to create metric for check %s", checkName) + } + + if err := pr.Register(metric); err != nil { + return nil, fmt.Errorf("registering metric for check %q: %w", checkName, err) + } + + preloadedMetrics[checkName] = metric + } + + return preloadedMetrics, nil +} + +// setupMetric sets up a Prometheus metric based on the provided checkname and information from a CheckRegistry. +// The metric is created with the formatted name, description, and remediation information from the check specification. +func setupMetric(reg checkregistry.CheckRegistry, name string) (*prometheus.GaugeVec, error) { + check := reg.Load(name) + if check == nil { + return nil, fmt.Errorf("unable to create metric for check %s", name) + } + + return prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: strings.ReplaceAll( + fmt.Sprintf("%s_%s", config.OperatorName, check.Spec.Name), + "-", "_"), + Help: fmt.Sprintf( + "Description: %s ; Remediation: %s", + check.Spec.Description, + check.Spec.Remediation, + ), + ConstLabels: prometheus.Labels{ + "check_description": check.Spec.Description, + "check_remediation": check.Spec.Remediation, + }, + }, []string{"namespace_uid", "namespace", "uid", "name", "kind"}), nil +} + type Server struct { s *http.Server } diff --git a/pkg/validations/base_test.go b/pkg/validations/base_test.go index b79dfdf5..06ef24ae 100644 --- a/pkg/validations/base_test.go +++ b/pkg/validations/base_test.go @@ -32,10 +32,16 @@ func newEngine(c config.Config) (validationEngine, error) { ve := validationEngine{ config: c, } - loadErr := ve.InitRegistry(prometheus.NewRegistry()) + loadErr := ve.InitRegistry() if loadErr != nil { return validationEngine{}, loadErr } + // checks now are preloaded, adding them after Registry init + ve.metrics = make(map[string]*prometheus.GaugeVec) + for _, checkName := range ve.EnabledChecks() { + check := ve.registeredChecks[checkName] + ve.metrics[checkName] = newGaugeVecMetric(check) + } return ve, nil } diff --git a/pkg/validations/utils.go b/pkg/validations/utils.go index 823aeeea..f7bb9b53 100644 --- a/pkg/validations/utils.go +++ b/pkg/validations/utils.go @@ -4,8 +4,12 @@ import ( "fmt" "strings" - "github.com/app-sre/deployment-validation-operator/config" + "golang.stackrox.io/kube-linter/pkg/builtinchecks" + "golang.stackrox.io/kube-linter/pkg/checkregistry" + klConfig "golang.stackrox.io/kube-linter/pkg/config" + "golang.stackrox.io/kube-linter/pkg/configresolver" + "github.com/app-sre/deployment-validation-operator/config" "github.com/prometheus/client_golang/prometheus" ) @@ -13,15 +17,100 @@ func DeleteMetrics(labels prometheus.Labels) { engine.DeleteMetrics(labels) } -func newGaugeVecMetric( - name, help string, labelNames []string, constLabels prometheus.Labels, -) *prometheus.GaugeVec { +// GetKubeLinterRegistry returns a CheckRegistry containing kube-linter built-in validations. +// It initializes a new CheckRegistry, loads the built-in validations into the registry, +// and returns the resulting registry if successful. +// +// Returns: +// - A CheckRegistry containing kube-linter built-in validations if successful. +// - An error if the built-in validations fail to load into the registry. +func GetKubeLinterRegistry() (checkregistry.CheckRegistry, error) { + registry := checkregistry.New() + if err := builtinchecks.LoadInto(registry); err != nil { + log.Error(err, "failed to load kube-linter built-in validations") + return nil, err + } + + return registry, nil +} + +// GetAllNamesFromRegistry retrieves the names of all enabled checks from the provided CheckRegistry. +// It fetches the names of checks that are enabled based on a specified configuration, excluding incompatible ones. +// +// Parameters: +// - reg: A CheckRegistry containing predefined checks and their specifications. +// +// Returns: +// - A slice of strings containing the names of all enabled checks if successful. +// - An error if there's an issue while fetching the enabled check names or validating the configuration. +func GetAllNamesFromRegistry(reg checkregistry.CheckRegistry) ([]string, error) { + // Get all checks except for incompatible ones + cfg := klConfig.Config{ + Checks: klConfig.ChecksConfig{ + AddAllBuiltIn: true, + }, + } + disableIncompatibleChecks(&cfg) + + checks, err := configresolver.GetEnabledChecksAndValidate(&cfg, reg) + if err != nil { + log.Error(err, "error getting enabled validations") + return nil, err + } + + return checks, nil +} + +func newGaugeVecMetric(check klConfig.Check) *prometheus.GaugeVec { + metricName := strings.ReplaceAll(fmt.Sprintf("%s_%s", config.OperatorName, check.Name), "-", "_") + return prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: fmt.Sprintf("%s_%s", strings.ReplaceAll(config.OperatorName, "-", "_"), name), - Help: help, - ConstLabels: constLabels, + Name: metricName, + Help: fmt.Sprintf( + "Description: %s ; Remediation: %s", check.Description, check.Remediation, + ), + ConstLabels: prometheus.Labels{ + "check_description": check.Description, + "check_remediation": check.Remediation, + }, + }, []string{"namespace_uid", "namespace", "uid", "name", "kind"}) +} + +// UpdateConfig provides an access to setup new configuration for the generic reconciler +func UpdateConfig(cfg klConfig.Config) { + engine.config = cfg +} + +// InitRegistry forces Validation Engine to initialize a new registry +func InitRegistry() error { + return engine.InitRegistry() +} + +// ResetMetrics resets all the metrics registered in the Validation Engine +func ResetMetrics() { + for _, metric := range engine.metrics { + metric.Reset() + } +} + +// GetDefaultChecks provides a default set of checks usable in case there is no custom ConfigMap +func GetDefaultChecks() klConfig.ChecksConfig { + return klConfig.ChecksConfig{ + DoNotAutoAddDefaults: true, + Include: []string{ + "host-ipc", + "host-network", + "host-pid", + "non-isolated-pod", + "pdb-max-unavailable", + "pdb-min-available", + "privilege-escalation-container", + "privileged-container", + "run-as-non-root", + "unsafe-sysctls", + "unset-cpu-requirements", + "unset-memory-requirements", }, - labelNames, - ) + } } diff --git a/pkg/validations/validation_engine.go b/pkg/validations/validation_engine.go index 14e91729..f5a5cefd 100644 --- a/pkg/validations/validation_engine.go +++ b/pkg/validations/validation_engine.go @@ -2,22 +2,23 @@ package validations import ( // Used to embed yamls by kube-linter - _ "embed" + + _ "embed" // nolint:golint "fmt" "os" - "strings" + "regexp" // Import checks from DVO - _ "github.com/app-sre/deployment-validation-operator/pkg/validations/all" - "golang.stackrox.io/kube-linter/pkg/builtinchecks" + _ "github.com/app-sre/deployment-validation-operator/pkg/validations/all" // nolint:golint + "golang.stackrox.io/kube-linter/pkg/checkregistry" "golang.stackrox.io/kube-linter/pkg/config" "golang.stackrox.io/kube-linter/pkg/configresolver" "golang.stackrox.io/kube-linter/pkg/diagnostic" // Import and initialize all check templates from kube-linter - _ "golang.stackrox.io/kube-linter/pkg/templates/all" + _ "golang.stackrox.io/kube-linter/pkg/templates/all" // nolint:golint "github.com/prometheus/client_golang/prometheus" @@ -34,6 +35,36 @@ type validationEngine struct { metrics map[string]*prometheus.GaugeVec } +// InitEngine creates a new ValidationEngine instance with the provided configuration path, a watcher, and metrics. +// It initializes a ValidationEngine with the provided watcher for configmap changes and a set of preloaded metrics. +// The engine's configuration is loaded from the specified configuration path, and its check registry is initialized. +// InitRegistry sets this instance in the package scope in engine variable. +// +// Parameters: +// - configPath: The path to the configuration file for the ValidationEngine. +// - cmw: A configmap.Watcher for monitoring changes to configmaps. +// - metrics: A map of preloaded Prometheus GaugeVec metrics. +// +// Returns: +// - An error if there's an issue loading the configuration or initializing the check registry. +func InitEngine(configPath string, metrics map[string]*prometheus.GaugeVec) error { + ve := &validationEngine{ + metrics: metrics, + } + + err := ve.LoadConfig(configPath) + if err != nil { + return err + } + + err = ve.InitRegistry() + if err != nil { + return err + } + + return nil +} + func (ve *validationEngine) CheckRegistry() checkregistry.CheckRegistry { return ve.registry } @@ -54,22 +85,7 @@ func fileExists(filename string) bool { func (ve *validationEngine) LoadConfig(path string) error { if !fileExists(path) { log.Info(fmt.Sprintf("config file %s does not exist. Use default configuration", path)) - // TODO - This hardcode will be removed when a ConfigMap is set by default in regular installation - ve.config.Checks.DoNotAutoAddDefaults = true - ve.config.Checks.Include = []string{ - "host-ipc", - "host-network", - "host-pid", - "non-isolated-pod", - "pdb-max-unavailable", - "pdb-min-available", - "privilege-escalation-container", - "privileged-container", - "run-as-non-root", - "unsafe-sysctls", - "unset-cpu-requirements", - "unset-memory-requirements", - } + ve.config.Checks = GetDefaultChecks() return nil } @@ -82,6 +98,7 @@ func (ve *validationEngine) LoadConfig(path string) error { log.Error(err, "failed to load config") return err } + ve.config = config return nil @@ -91,12 +108,11 @@ type PrometheusRegistry interface { Register(prometheus.Collector) error } -func (ve *validationEngine) InitRegistry(promReg PrometheusRegistry) error { +func (ve *validationEngine) InitRegistry() error { disableIncompatibleChecks(&ve.config) - registry := checkregistry.New() - if err := builtinchecks.LoadInto(registry); err != nil { - log.Error(err, "failed to load built-in validations") + registry, err := GetKubeLinterRegistry() + if err != nil { return err } @@ -105,13 +121,12 @@ func (ve *validationEngine) InitRegistry(promReg PrometheusRegistry) error { return err } - enabledChecks, err := configresolver.GetEnabledChecksAndValidate(&ve.config, registry) + enabledChecks, err := ve.getValidChecks(registry) if err != nil { log.Error(err, "error finding enabled validations") return err } - validationMetrics := map[string]*prometheus.GaugeVec{} registeredChecks := map[string]config.Check{} for _, checkName := range enabledChecks { check := registry.Load(checkName) @@ -119,29 +134,14 @@ func (ve *validationEngine) InitRegistry(promReg PrometheusRegistry) error { return fmt.Errorf("unable to create metric for check %s", checkName) } registeredChecks[check.Spec.Name] = check.Spec - metric := newGaugeVecMetric( - strings.ReplaceAll(check.Spec.Name, "-", "_"), - fmt.Sprintf("Description: %s ; Remediation: %s", - check.Spec.Description, check.Spec.Remediation), - []string{"namespace_uid", "namespace", "uid", "name", "kind"}, - prometheus.Labels{ - "check_description": check.Spec.Description, - "check_remediation": check.Spec.Remediation, - }, - ) - - if err := promReg.Register(metric); err != nil { - return fmt.Errorf("registering metric for check %q: %w", check.Spec.Name, err) - } - - validationMetrics[checkName] = metric } ve.registry = registry ve.enabledChecks = enabledChecks - ve.metrics = validationMetrics ve.registeredChecks = registeredChecks + engine = *ve + return nil } @@ -175,34 +175,58 @@ func (ve *validationEngine) ClearMetrics(reports []diagnostic.WithContext, label } } -// InitializeValidationEngine will initialize the validation engine from scratch. -// If an existing engine exists, it will not be replaced with the new one unless all -// initialization steps succeed. -func InitializeValidationEngine(configPath string, reg PrometheusRegistry) error { - ve := validationEngine{} - - err := ve.LoadConfig(configPath) - if err != nil { - return err +func (ve *validationEngine) GetCheckByName(name string) (config.Check, error) { + check, ok := ve.registeredChecks[name] + if !ok { + return config.Check{}, fmt.Errorf("check '%s' is not registered", name) } + return check, nil +} - err = ve.InitRegistry(reg) +// getValidChecks function fetches and validates the list of enabled checks from the ValidationEngine's +// configuration. It uses the provided check registry to validate the enabled checks against available checks. +// If any checks are found to be invalid (not present in the check registry), they are removed from the configuration. +// The function then recursively calls itself to fetch a new list of valid checks without the invalid ones. +func (ve *validationEngine) getValidChecks(registry checkregistry.CheckRegistry) ([]string, error) { + enabledChecks, err := configresolver.GetEnabledChecksAndValidate(&ve.config, registry) if err != nil { - return err + // error format from configresolver: + // "enabled checks validation error: [check \"check name\" not found, ...]"} + re := regexp.MustCompile(`check \"([^,]*)\" not found`) + if matches := re.FindAllStringSubmatch(err.Error(), -1); matches != nil { + for i := range matches { + log.Info("entered ConfigMap check was not validated and is ignored", + "validation name", matches[i][1], + ) + ve.removeCheckFromConfig(matches[i][1]) + } + return ve.getValidChecks(registry) + } + return []string{}, err } - // Only replace the exisiting engine if no errors occurred - engine = ve - - return nil + return enabledChecks, nil } -func (ve *validationEngine) GetCheckByName(name string) (config.Check, error) { - check, ok := ve.registeredChecks[name] - if !ok { - return config.Check{}, fmt.Errorf("check '%s' is not registered", name) +// removeCheckFromConfig function searches for the given check name in both the "Include" and "Exclude" lists +// of checks in the ValidationEngine's configuration. If the check is found in either list, it is removed by updating +// the respective list. +func (ve *validationEngine) removeCheckFromConfig(check string) { + include := ve.config.Checks.Include + for i := 0; i < len(include); i++ { + if include[i] == check { + ve.config.Checks.Include = append(include[:i], include[i+1:]...) + return + } + } + + exclude := ve.config.Checks.Exclude + for i := 0; i < len(exclude); i++ { + if exclude[i] == check { + ve.config.Checks.Exclude = append(exclude[:i], exclude[i+1:]...) + return + } } - return check, nil } // disableIncompatibleChecks will forcibly update a kube-linter config diff --git a/pkg/validations/validation_engine_test.go b/pkg/validations/validation_engine_test.go new file mode 100644 index 00000000..f7983f22 --- /dev/null +++ b/pkg/validations/validation_engine_test.go @@ -0,0 +1,92 @@ +package validations + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "golang.stackrox.io/kube-linter/pkg/config" +) + +func TestGetValidChecks(t *testing.T) { + testCases := []struct { + name string + included []string + expected []string + }{ + { + name: "it returns only included checks", + included: []string{"host-network", "host-pid"}, + expected: []string{"host-network", "host-pid"}, + }, + { + name: "it returns only validated checks, and not the misspelled", + included: []string{"host-network", "host-pid", "misspelled", "wrong_format"}, + expected: []string{"host-network", "host-pid"}, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + // Given + mock := validationEngine{ + config: config.Config{ + Checks: config.ChecksConfig{ + DoNotAutoAddDefaults: true, + Include: testCase.included, + }, + }, + } + registry, _ := GetKubeLinterRegistry() + + // When + test, err := mock.getValidChecks(registry) + + // Assert + assert.Equal(t, testCase.expected, test) + assert.NoError(t, err) + }) + } +} + +func TestRemoveCheckFromConfig(t *testing.T) { + testCases := []struct { + name string + check string + cfg config.ChecksConfig + expected config.ChecksConfig + }{ + { + name: "function removes misspelled check from Include list", + check: "unset-something", + cfg: config.ChecksConfig{ + Include: []string{"host-network", "host-pid", "unset-something"}, + }, + expected: config.ChecksConfig{ + Include: []string{"host-network", "host-pid"}, + }, + }, + { + name: "function removes misspelled check from Exclude list", + check: "unset-something", + cfg: config.ChecksConfig{ + Exclude: []string{"host-network", "host-pid", "unset-something"}, + }, + expected: config.ChecksConfig{ + Exclude: []string{"host-network", "host-pid"}, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + // Given + mock := validationEngine{config: config.Config{Checks: testCase.cfg}} + + // When + mock.removeCheckFromConfig(testCase.check) + + // Assert + assert.Equal(t, mock.config.Checks, testCase.expected) + }) + } +}