diff --git a/main.go b/main.go index cace9eb2..cd98c33b 100644 --- a/main.go +++ b/main.go @@ -128,7 +128,7 @@ func setupManager(log logr.Logger, opts options.Options) (manager.Manager, error log.Info("Initialize Validation Engine") - err = validations.InitEngine(opts.ConfigFile, metrics) + validationEngine, err := validations.NewValidationEngine(opts.ConfigFile, metrics) if err != nil { return nil, fmt.Errorf("initializing validation engine: %w", err) } @@ -140,7 +140,7 @@ func setupManager(log logr.Logger, opts options.Options) (manager.Manager, error return nil, fmt.Errorf("initializing discovery client: %w", err) } - gr, err := controller.NewGenericReconciler(mgr.GetClient(), discoveryClient, cmWatcher) + gr, err := controller.NewGenericReconciler(mgr.GetClient(), discoveryClient, cmWatcher, validationEngine) if err != nil { return nil, fmt.Errorf("initializing generic reconciler: %w", err) } diff --git a/pkg/controller/generic_reconciler.go b/pkg/controller/generic_reconciler.go index a76bc03d..1d2d9dc1 100644 --- a/pkg/controller/generic_reconciler.go +++ b/pkg/controller/generic_reconciler.go @@ -39,6 +39,7 @@ type GenericReconciler struct { discovery discovery.DiscoveryInterface logger logr.Logger cmWatcher *configmap.Watcher + validationEngine validations.Interface } // NewGenericReconciler returns a GenericReconciler struct @@ -46,6 +47,7 @@ func NewGenericReconciler( client client.Client, discovery discovery.DiscoveryInterface, cmw *configmap.Watcher, + validationEngine validations.Interface, ) (*GenericReconciler, error) { listLimit, err := getListLimit() if err != nil { @@ -61,6 +63,7 @@ func NewGenericReconciler( currentObjects: newValidationCache(), logger: ctrl.Log.WithName("reconcile"), cmWatcher: cmw, + validationEngine: validationEngine, }, nil } @@ -125,18 +128,17 @@ func (gr *GenericReconciler) LookForConfigUpdates(ctx context.Context) { for { select { case cfg := <-gr.cmWatcher.ConfigChanged(): - validations.UpdateConfig(cfg) - err := validations.InitRegistry() - if err == nil { - gr.objectValidationCache.drain() - validations.ResetMetrics() - - } else { + gr.validationEngine.SetConfig(cfg) + err := gr.validationEngine.InitRegistry() + if err != nil { gr.logger.Error( err, fmt.Sprintf("error updating configuration from ConfigMap: %v\n", cfg), ) + continue } + gr.objectValidationCache.drain() + gr.validationEngine.ResetMetrics() case <-ctx.Done(): return @@ -300,7 +302,7 @@ func (gr *GenericReconciler) reconcileGroupOfObjects(ctx context.Context, cliObjects = append(cliObjects, typedClientObject) } - outcome, err := validations.RunValidationsForObjects(cliObjects, namespaceUID) + outcome, err := gr.validationEngine.RunValidationsForObjects(cliObjects, namespaceUID) if err != nil { return fmt.Errorf("running validations: %w", err) } @@ -363,7 +365,7 @@ func (gr *GenericReconciler) handleResourceDeletions() { UID: v.uid, } - validations.DeleteMetrics(req.ToPromLabels()) + gr.validationEngine.DeleteMetrics(req.ToPromLabels()) gr.objectValidationCache.removeKey(k) diff --git a/pkg/controller/generic_reconciler_test.go b/pkg/controller/generic_reconciler_test.go index 637031b4..dd4a57b7 100644 --- a/pkg/controller/generic_reconciler_test.go +++ b/pkg/controller/generic_reconciler_test.go @@ -8,6 +8,7 @@ import ( "github.com/app-sre/deployment-validation-operator/pkg/configmap" "github.com/app-sre/deployment-validation-operator/pkg/validations" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -1029,5 +1030,10 @@ func createTestReconciler(scheme *runtime.Scheme, objects []client.Object) (*Gen } client := cliBuilder.Build() cli := kubefake.NewSimpleClientset() - return NewGenericReconciler(client, cli.Discovery(), &configmap.Watcher{}) + + ve, err := validations.NewValidationEngine("", make(map[string]*prometheus.GaugeVec)) + if err != nil { + return nil, err + } + return NewGenericReconciler(client, cli.Discovery(), &configmap.Watcher{}, ve) } diff --git a/pkg/validations/base.go b/pkg/validations/base.go index 9b017536..6574e3fa 100644 --- a/pkg/validations/base.go +++ b/pkg/validations/base.go @@ -1,152 +1,11 @@ package validations import ( - "fmt" - "reflect" - "sigs.k8s.io/controller-runtime/pkg/client" - logf "sigs.k8s.io/controller-runtime/pkg/log" - - "github.com/app-sre/deployment-validation-operator/pkg/utils" "github.com/prometheus/client_golang/prometheus" - "golang.stackrox.io/kube-linter/pkg/lintcontext" - "golang.stackrox.io/kube-linter/pkg/run" -) - -var log = logf.Log.WithName("validations") - -type ValidationOutcome string - -var ( - ObjectNeedsImprovement ValidationOutcome = "object needs improvement" - ObjectValid ValidationOutcome = "object valid" - ObjectsValid ValidationOutcome = "objects are valid" - ObjectValidationIgnored ValidationOutcome = "object validation ignored" ) -// RunValidationsForObjects runs validation for the group of related objects -func RunValidationsForObjects(objects []client.Object, namespaceUID string) (ValidationOutcome, error) { - lintCtx := &lintContextImpl{} - for _, obj := range objects { - // Only run checks against an object with no owners. This should be - // the object that controls the configuration - if !utils.IsOwner(obj) { - continue - } - // If controller has no replicas clear do not run any validations - if isControllersWithNoReplicas(obj) { - continue - } - lintCtx.addObjects(lintcontext.Object{K8sObject: obj}) - } - lintCtxs := []lintcontext.LintContext{lintCtx} - if len(lintCtxs) == 0 { - return ObjectValidationIgnored, nil - } - result, err := run.Run(lintCtxs, engine.CheckRegistry(), engine.EnabledChecks()) - if err != nil { - log.Error(err, "error running validations") - return "", fmt.Errorf("error running validations: %v", err) - } - - // Clear labels from past run to ensure only results from this run - // are reflected in the metrics - for _, o := range objects { - req := NewRequestFromObject(o) - req.NamespaceUID = namespaceUID - engine.ClearMetrics(result.Reports, req.ToPromLabels()) - } - return processResult(result, namespaceUID) -} - -// isControllersWithNoReplicas checks if the provided object has no replicas -func isControllersWithNoReplicas(obj client.Object) bool { - objValue := reflect.Indirect(reflect.ValueOf(obj)) - spec := objValue.FieldByName("Spec") - if spec.IsValid() { - replicas := spec.FieldByName("Replicas") - if replicas.IsValid() { - numReplicas, ok := replicas.Interface().(*int32) - - // clear labels if we fail to get a value for numReplicas, or if value is <= 0 - if !ok || numReplicas == nil || *numReplicas <= 0 { - req := NewRequestFromObject(obj) - engine.DeleteMetrics(req.ToPromLabels()) - return true - } - } - } - return false -} - -func processResult(result run.Result, namespaceUID string) (ValidationOutcome, error) { - outcome := ObjectValid - for _, report := range result.Reports { - check, err := engine.GetCheckByName(report.Check) - if err != nil { - log.Error(err, fmt.Sprintf("Failed to get check '%s' by name", report.Check)) - return "", fmt.Errorf("error running validations: %v", err) - } - obj := report.Object.K8sObject - logger := log.WithValues( - "request.namespace", obj.GetNamespace(), - "request.name", obj.GetName(), - "kind", obj.GetObjectKind().GroupVersionKind().Kind, - "validation", report.Check, - "check_description", check.Description, - "check_remediation", report.Remediation, - "check_failure_reason", report.Diagnostic.Message, - ) - metric := engine.GetMetric(report.Check) - if metric == nil { - log.Error(nil, "no metric found for validation", report.Check) - } else { - req := NewRequestFromObject(obj) - req.NamespaceUID = namespaceUID - metric.With(req.ToPromLabels()).Set(1) - logger.Info(report.Remediation) - outcome = ObjectNeedsImprovement - } - } - return outcome, nil -} - -// RunValidations will run all the registered validations -func RunValidations(request Request, obj client.Object) (ValidationOutcome, error) { - log.V(2).Info("validation", "kind", request.Kind) - - promLabels := request.ToPromLabels() - - // Only run checks against an object with no owners. This should be - // the object that controls the configuration - if !utils.IsOwner(obj) { - return ObjectValidationIgnored, nil - } - - // If controller has no replicas clear existing metrics and - // do not run any validations - if isControllersWithNoReplicas(obj) { - return ObjectValidationIgnored, nil - } - - lintCtxs := []lintcontext.LintContext{} - lintCtx := &lintContextImpl{} - lintCtx.addObjects(lintcontext.Object{K8sObject: obj}) - lintCtxs = append(lintCtxs, lintCtx) - result, err := run.Run(lintCtxs, engine.CheckRegistry(), engine.EnabledChecks()) - if err != nil { - log.Error(err, "error running validations") - return "", fmt.Errorf("error running validations: %v", err) - } - - // Clear labels from past run to ensure only results from this run - // are reflected in the metrics - engine.ClearMetrics(result.Reports, promLabels) - - return processResult(result, request.NamespaceUID) -} - // NewRequestFromObject converts a client.Object into // a validation request. Note that the NamespaceUID of the // request cannot be derived from the object and should diff --git a/pkg/validations/base_test.go b/pkg/validations/base_test.go index 06ef24ae..460982f4 100644 --- a/pkg/validations/base_test.go +++ b/pkg/validations/base_test.go @@ -1,360 +1,18 @@ package validations import ( - "fmt" - "strings" "testing" - "github.com/app-sre/deployment-validation-operator/pkg/testutils" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/prometheus/client_golang/prometheus" - prom_tu "github.com/prometheus/client_golang/prometheus/testutil" appsv1 "k8s.io/api/apps/v1" "github.com/stretchr/testify/assert" - "golang.stackrox.io/kube-linter/pkg/builtinchecks" - "golang.stackrox.io/kube-linter/pkg/checkregistry" - "golang.stackrox.io/kube-linter/pkg/config" - "golang.stackrox.io/kube-linter/pkg/configresolver" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -const ( - customCheckName = "test-minimum-replicas" - customCheckDescription = "some description" - customCheckRemediation = "some remediation" - customCheckTemplate = "minimum-replicas" -) - -func newEngine(c config.Config) (validationEngine, error) { - ve := validationEngine{ - config: c, - } - loadErr := ve.InitRegistry() - if loadErr != nil { - return validationEngine{}, loadErr - } - // checks now are preloaded, adding them after Registry init - ve.metrics = make(map[string]*prometheus.GaugeVec) - for _, checkName := range ve.EnabledChecks() { - check := ve.registeredChecks[checkName] - ve.metrics[checkName] = newGaugeVecMetric(check) - } - return ve, nil -} - -func newCustomCheck() config.Check { - return config.Check{ - Name: customCheckName, - Description: customCheckDescription, - Remediation: customCheckRemediation, - Template: customCheckTemplate, - Scope: &config.ObjectKindsDesc{ - ObjectKinds: []string{"DeploymentLike"}, - }, - Params: map[string]interface{}{"minReplicas": 3}, - } -} - -func newEngineConfigWithCustomCheck(customCheck []config.Check) config.Config { - - // Create custom config with custom check array - return config.Config{ - CustomChecks: customCheck, - Checks: config.ChecksConfig{ - AddAllBuiltIn: false, - DoNotAutoAddDefaults: true, - }, - } -} - -func newEngineConfigWithAllChecks() config.Config { - return config.Config{ - CustomChecks: []config.Check{}, - Checks: config.ChecksConfig{ - AddAllBuiltIn: true, - DoNotAutoAddDefaults: false, - }, - } -} - -func createTestDeployment(replicas int32) (*appsv1.Deployment, error) { - d, err := testutils.CreateDeploymentFromTemplate( - testutils.NewTemplateArgs()) - if err != nil { - return nil, err - } - d.Spec.Replicas = &replicas - return &d, nil -} - -func initializeEngine(customCheck ...config.Check) error { - // Check if custom check has been set - if len(customCheck) > 0 { - // Initialize engine with custom check - e, err := newEngine(newEngineConfigWithCustomCheck(customCheck)) - if err != nil { - return err - } - engine = e - } else { - // Initialize engine for all checks - e, err := newEngine(newEngineConfigWithAllChecks()) - if err != nil { - return err - } - engine = e - } - return nil -} - -func TestRunValidationsIssueCorrection(t *testing.T) { - - customCheck := newCustomCheck() - - // Initialize engine - err := initializeEngine(customCheck) - if err != nil { - t.Errorf("Error initializing engine %v", err) - } - - replicaCnt := int32(1) - deployment, err := createTestDeployment(replicaCnt) - request := NewRequestFromObject(deployment) - if err != nil { - t.Errorf("Error creating deployment from template %v", err) - } - - _, err = RunValidations(request, deployment) - if err != nil { - t.Errorf("Error running validations: %v", err) - } - - labels := request.ToPromLabels() - metric, err := engine.GetMetric(customCheck.Name).GetMetricWith(labels) - if err != nil { - t.Errorf("Error getting prometheus metric: %v", err) - } - - expectedConstLabelSubString := fmt.Sprintf(""+ - "constLabels: {check_description=\"%s\",check_remediation=\"%s\"}", - customCheck.Description, - customCheck.Remediation, - ) - if !strings.Contains(metric.Desc().String(), expectedConstLabelSubString) { - t.Errorf("Metric is missing expected constant labels! Expected:\n%s\nGot:\n%s", - expectedConstLabelSubString, - metric.Desc().String()) - } - - if metricValue := int(prom_tu.ToFloat64(metric)); metricValue != 1 { - t.Errorf("Deployment test failed %#v: got %d want %d", customCheck.Name, metricValue, 1) - } - - // Problem resolved - replicaCnt = int32(3) - deployment.Spec.Replicas = &replicaCnt - _, err = RunValidations(request, deployment) - if err != nil { - t.Errorf("Error running validations: %v", err) - } - // Metric with label combination should be successfully cleared because problem was resolved. - // The 'GetMetricWith()' function will create a new metric with provided labels if it - // does not exist. The default value of a metric is 0. Therefore, a value of 0 implies we - // successfully cleared the metric label combination. - metric, err = engine.GetMetric(customCheck.Name).GetMetricWith(labels) - if err != nil { - t.Errorf("Error getting prometheus metric: %v", err) - } - - if metricValue := int(prom_tu.ToFloat64(metric)); metricValue != 0 { - t.Errorf("Deployment test failed %#v: got %d want %d", customCheck.Name, metricValue, 0) - } -} - -func TestRunValidationsForObjectsIssueCorrection(t *testing.T) { - customCheck := newCustomCheck() - // Initialize engine - err := initializeEngine(customCheck) - assert.NoError(t, err, "Error initializing engine") - - replicaCnt := int32(1) - deployment, err := createTestDeployment(replicaCnt) - assert.NoError(t, err, "Error creating deployment from template") - request := NewRequestFromObject(deployment) - request.NamespaceUID = "1234-6789-1011-testUID" - - // run validations with "broken" (replica=1) deployment object - _, err = RunValidationsForObjects([]client.Object{deployment}, request.NamespaceUID) - assert.NoError(t, err, "Error running validations") - - labels := request.ToPromLabels() - metric, err := engine.GetMetric(customCheck.Name).GetMetricWith(labels) - assert.NoError(t, err, "Error getting prometheus metric") - - expectedConstLabelSubString := fmt.Sprintf(""+ - "constLabels: {check_description=\"%s\",check_remediation=\"%s\"}", - customCheck.Description, - customCheck.Remediation, - ) - assert.Contains(t, metric.Desc().String(), expectedConstLabelSubString, - "Metric is missing expected constant labels! Expected:\n%s\nGot:\n%s", - expectedConstLabelSubString, - metric.Desc().String()) - metricValue := int(prom_tu.ToFloat64(metric)) - assert.Equal(t, 1, metricValue, "Deployment test failed %#v: got %d want %d", customCheck.Name, metricValue, 1) - - // Problem resolved - replicaCnt = int32(3) - deployment.Spec.Replicas = &replicaCnt - _, err = RunValidationsForObjects([]client.Object{deployment}, request.NamespaceUID) - assert.NoError(t, err, "Error running validations") - // Metric with label combination should be successfully cleared because problem was resolved. - // The 'GetMetricWith()' function will create a new metric with provided labels if it - // does not exist. The default value of a metric is 0. Therefore, a value of 0 implies we - // successfully cleared the metric label combination. - metric, err = engine.GetMetric(customCheck.Name).GetMetricWith(labels) - assert.NoError(t, err, "Error getting prometheus metric") - - metricValue = int(prom_tu.ToFloat64(metric)) - assert.Equal(t, 0, metricValue, "Deployment test failed %#v: got %d want %d", customCheck.Name, metricValue, 0) -} - -func TestIncompatibleChecksAreDisabled(t *testing.T) { - - // Initialize engine - err := initializeEngine() - if err != nil { - t.Errorf("Error initializing engine: %v", err) - } - - badChecks := getIncompatibleChecks() - allKubeLinterChecks, err := getAllBuiltInKubeLinterChecks() - if err != nil { - t.Fatalf("Got unexpected error while getting all checks built-into kube-linter: %v", err) - } - expectedNumChecks := (len(allKubeLinterChecks) - len(badChecks)) - - enabledChecks := engine.EnabledChecks() - if len(enabledChecks) != expectedNumChecks { - t.Errorf("Expected exactly %v checks to be enabled, but got '%v' checks from list '%v'", - expectedNumChecks, len(enabledChecks), enabledChecks) - } - - for _, badCheck := range badChecks { - if stringInSlice(badCheck, enabledChecks) { - t.Errorf("Expected incompatible kube-linter check '%v' to not be enabled, "+ - "but it was in the enabled list '%v'", - badCheck, enabledChecks) - } - } -} - -// Test to check if a resource has 0 replicas it clears metrics -func TestValidateZeroReplicas(t *testing.T) { - - customCheck := newCustomCheck() - - // Initialize Engine - err := initializeEngine(customCheck) - if err != nil { - t.Errorf("Error initializing engine %v", err) - } - - // Setup test deployment file with 0 replicas - replicaCnt := int32(0) - deployment, err := createTestDeployment(replicaCnt) - request := NewRequestFromObject(deployment) - if err != nil { - t.Errorf("Error creating deployment from template %v", err) - } - - // Run validations against test environment - _, err = RunValidations(request, deployment) - if err != nil { - t.Errorf("Error running validations: %v", err) - } - // Acquire labels generated from validations - labels := request.ToPromLabels() - - // The 'GetMetricWith()' function will create a new metric with provided labels if it - // does not exist. The default value of a metric is 0. Therefore, a value of 0 implies we - // successfully cleared the metric label combination. - metric, err := engine.GetMetric(customCheck.Name).GetMetricWith(labels) - if err != nil { - t.Errorf("Error getting prometheus metric: %v", err) - } - - // If metrics are cleared then the value will be == 0 - if metricValue := int(prom_tu.ToFloat64(metric)); metricValue != 0 { - t.Errorf("Deployment test failed %#v: got %d want %d", customCheck.Name, metricValue, 0) - } - - // Check using a replica count above 0 - replicaCnt = int32(1) - deployment.Spec.Replicas = &replicaCnt - _, err = RunValidations(request, deployment) - if err != nil { - t.Errorf("Error running validations: %v", err) - } - metric, err = engine.GetMetric(customCheck.Name).GetMetricWith(labels) - if err != nil { - t.Errorf("Error getting prometheus metric: %v", err) - } - - // If metrics exist then value will be non 0 - if metricValue := int(prom_tu.ToFloat64(metric)); metricValue == 0 { - t.Errorf("Deployment test failed %#v: got %d want %d", customCheck.Name, metricValue, 1) - } - - // Check to see metrics clear by setting replicas to 0 - replicaCnt = int32(0) - deployment.Spec.Replicas = &replicaCnt - _, err = RunValidations(request, deployment) - if err != nil { - t.Errorf("Error running validations: %v", err) - } - metric, err = engine.GetMetric(customCheck.Name).GetMetricWith(labels) - if err != nil { - t.Errorf("Error getting prometheus metric: %v", err) - } - - // If metrics are cleared then the value will be == 0 - if metricValue := int(prom_tu.ToFloat64(metric)); metricValue != 0 { - t.Errorf("Deployment test failed %#v: got %d want %d", customCheck.Name, metricValue, 0) - } - -} - -func stringInSlice(a string, list []string) bool { - for _, b := range list { - if b == a { - return true - } - } - return false -} - -// getAllBuiltInKubeLinterChecks returns every check built-into kube-linter (including checks that DVO disables) -func getAllBuiltInKubeLinterChecks() ([]string, error) { - ve := validationEngine{ - config: newEngineConfigWithAllChecks(), - } - registry := checkregistry.New() - if err := builtinchecks.LoadInto(registry); err != nil { - return nil, fmt.Errorf("failed to load built-in validations: %s", err.Error()) - } - - enabledChecks, err := configresolver.GetEnabledChecksAndValidate(&ve.config, registry) - if err != nil { - return nil, fmt.Errorf("error finding enabled validations: %s", err.Error()) - } - - return enabledChecks, nil -} - func TestRequest(t *testing.T) { t.Parallel() diff --git a/pkg/validations/test-resources/config-with-custom-check.yaml b/pkg/validations/test-resources/config-with-custom-check.yaml new file mode 100644 index 00000000..1abf0bde --- /dev/null +++ b/pkg/validations/test-resources/config-with-custom-check.yaml @@ -0,0 +1,26 @@ +customChecks: + - name: test-minimum-replicas + description: "some description" + remediation: "some remediation" + template: minimum-replicas + params: + minReplicas: 3 + scope: + objectKinds: + - DeploymentLike +checks: + doNotAutoAddDefaults: false + addAllBuiltIn: true + include: + - "host-ipc" + - "host-network" + - "host-pid" + - "non-isolated-pod" + - "pdb-max-unavailable" + - "pdb-min-available" + - "privilege-escalation-container" + - "privileged-container" + - "run-as-non-root" + - "unsafe-sysctls" + - "unset-cpu-requirements" + - "unset-memory-requirements" \ No newline at end of file diff --git a/pkg/validations/test-resources/default-config.yaml b/pkg/validations/test-resources/default-config.yaml new file mode 100644 index 00000000..8844b5fd --- /dev/null +++ b/pkg/validations/test-resources/default-config.yaml @@ -0,0 +1,16 @@ +checks: + doNotAutoAddDefaults: false + addAllBuiltIn: true + include: + - "host-ipc" + - "host-network" + - "host-pid" + - "non-isolated-pod" + - "pdb-max-unavailable" + - "pdb-min-available" + - "privilege-escalation-container" + - "privileged-container" + - "run-as-non-root" + - "unsafe-sysctls" + - "unset-cpu-requirements" + - "unset-memory-requirements" \ No newline at end of file diff --git a/pkg/validations/utils.go b/pkg/validations/utils.go index f7bb9b53..3872bddc 100644 --- a/pkg/validations/utils.go +++ b/pkg/validations/utils.go @@ -13,10 +13,6 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -func DeleteMetrics(labels prometheus.Labels) { - engine.DeleteMetrics(labels) -} - // GetKubeLinterRegistry returns a CheckRegistry containing kube-linter built-in validations. // It initializes a new CheckRegistry, loads the built-in validations into the registry, // and returns the resulting registry if successful. @@ -77,23 +73,6 @@ func newGaugeVecMetric(check klConfig.Check) *prometheus.GaugeVec { }, []string{"namespace_uid", "namespace", "uid", "name", "kind"}) } -// UpdateConfig provides an access to setup new configuration for the generic reconciler -func UpdateConfig(cfg klConfig.Config) { - engine.config = cfg -} - -// InitRegistry forces Validation Engine to initialize a new registry -func InitRegistry() error { - return engine.InitRegistry() -} - -// ResetMetrics resets all the metrics registered in the Validation Engine -func ResetMetrics() { - for _, metric := range engine.metrics { - metric.Reset() - } -} - // GetDefaultChecks provides a default set of checks usable in case there is no custom ConfigMap func GetDefaultChecks() klConfig.ChecksConfig { return klConfig.ChecksConfig{ diff --git a/pkg/validations/validation_engine.go b/pkg/validations/validation_engine.go index f5a5cefd..d1e01301 100644 --- a/pkg/validations/validation_engine.go +++ b/pkg/validations/validation_engine.go @@ -6,16 +6,22 @@ import ( _ "embed" // nolint:golint "fmt" "os" + "reflect" "regexp" // Import checks from DVO + "github.com/app-sre/deployment-validation-operator/pkg/utils" _ "github.com/app-sre/deployment-validation-operator/pkg/validations/all" // nolint:golint + "sigs.k8s.io/controller-runtime/pkg/client" "golang.stackrox.io/kube-linter/pkg/checkregistry" "golang.stackrox.io/kube-linter/pkg/config" "golang.stackrox.io/kube-linter/pkg/configresolver" "golang.stackrox.io/kube-linter/pkg/diagnostic" + "golang.stackrox.io/kube-linter/pkg/lintcontext" + "golang.stackrox.io/kube-linter/pkg/run" + logf "sigs.k8s.io/controller-runtime/pkg/log" // Import and initialize all check templates from kube-linter _ "golang.stackrox.io/kube-linter/pkg/templates/all" // nolint:golint @@ -25,7 +31,30 @@ import ( "github.com/spf13/viper" ) -var engine validationEngine +var log = logf.Log.WithName("validations") + +type ValidationOutcome string + +var ( + ObjectNeedsImprovement ValidationOutcome = "object needs improvement" + ObjectValid ValidationOutcome = "object valid" + ObjectsValid ValidationOutcome = "objects are valid" + ObjectValidationIgnored ValidationOutcome = "object validation ignored" +) + +type Interface interface { + // InitRegistry creates new kubelinter check registry and loads all the enabled + // and custom checks. + InitRegistry() error + // DeleteMetrics deletes the Prometheus Gauge vector with the corresponding labels + DeleteMetrics(labels prometheus.Labels) + // ResetMetrics resets all the Prometheus Gauge vectors + ResetMetrics() + // SetConfig sets the kubelinter configuration + SetConfig(cfg config.Config) + // RunValidationsForObjects runs kubelinter validations for provided slice (group) of objects. + RunValidationsForObjects(objects []client.Object, namespaceUID string) (ValidationOutcome, error) +} type validationEngine struct { config config.Config @@ -35,42 +64,33 @@ type validationEngine struct { metrics map[string]*prometheus.GaugeVec } -// InitEngine creates a new ValidationEngine instance with the provided configuration path, a watcher, and metrics. +// NewValidationEngine creates a new ValidationEngine instance +// with the provided configuration path and metrics. // It initializes a ValidationEngine with the provided watcher for configmap changes and a set of preloaded metrics. -// The engine's configuration is loaded from the specified configuration path, and its check registry is initialized. -// InitRegistry sets this instance in the package scope in engine variable. // // Parameters: // - configPath: The path to the configuration file for the ValidationEngine. -// - cmw: A configmap.Watcher for monitoring changes to configmaps. // - metrics: A map of preloaded Prometheus GaugeVec metrics. // // Returns: // - An error if there's an issue loading the configuration or initializing the check registry. -func InitEngine(configPath string, metrics map[string]*prometheus.GaugeVec) error { - ve := &validationEngine{ - metrics: metrics, +func NewValidationEngine(configPath string, metrics map[string]*prometheus.GaugeVec) (Interface, error) { + cfg, err := loadConfig(configPath) + if err != nil { + return nil, err } - err := ve.LoadConfig(configPath) - if err != nil { - return err + ve := &validationEngine{ + metrics: metrics, + config: cfg, } err = ve.InitRegistry() if err != nil { - return err + return nil, err } - return nil -} - -func (ve *validationEngine) CheckRegistry() checkregistry.CheckRegistry { - return ve.registry -} - -func (ve *validationEngine) EnabledChecks() []string { - return ve.enabledChecks + return ve, nil } // Get info on config file if it exists @@ -82,30 +102,105 @@ func fileExists(filename string) bool { return !info.IsDir() } -func (ve *validationEngine) LoadConfig(path string) error { +func loadConfig(path string) (config.Config, error) { if !fileExists(path) { log.Info(fmt.Sprintf("config file %s does not exist. Use default configuration", path)) - ve.config.Checks = GetDefaultChecks() - - return nil + return config.Config{ + Checks: GetDefaultChecks(), + }, nil } v := viper.New() - // Load Configuration - config, err := config.Load(v, path) + return config.Load(v, path) +} + +// RunValidationsForObjects runs validation for the group of related objects +func (ve *validationEngine) RunValidationsForObjects(objects []client.Object, + namespaceUID string) (ValidationOutcome, error) { + lintCtx := &lintContextImpl{} + for _, obj := range objects { + // Only run checks against an object with no owners. This should be + // the object that controls the configuration + if !utils.IsOwner(obj) { + continue + } + // If controller has no replicas clear do not run any validations + if ve.isControllersWithNoReplicas(obj) { + continue + } + lintCtx.addObjects(lintcontext.Object{K8sObject: obj}) + } + lintCtxs := []lintcontext.LintContext{lintCtx} + if len(lintCtxs) == 0 { + return ObjectValidationIgnored, nil + } + result, err := run.Run(lintCtxs, ve.registry, ve.enabledChecks) if err != nil { - log.Error(err, "failed to load config") - return err + log.Error(err, "error running validations") + return "", fmt.Errorf("error running validations: %v", err) } - ve.config = config + // Clear labels from past run to ensure only results from this run + // are reflected in the metrics + for _, o := range objects { + req := NewRequestFromObject(o) + req.NamespaceUID = namespaceUID + ve.clearMetrics(result.Reports, req.ToPromLabels()) + } + return ve.processResult(result, namespaceUID) +} - return nil +// isControllersWithNoReplicas checks if the provided object has no replicas +func (ve *validationEngine) isControllersWithNoReplicas(obj client.Object) bool { + objValue := reflect.Indirect(reflect.ValueOf(obj)) + spec := objValue.FieldByName("Spec") + if spec.IsValid() { + replicas := spec.FieldByName("Replicas") + if replicas.IsValid() { + numReplicas, ok := replicas.Interface().(*int32) + + // clear labels if we fail to get a value for numReplicas, or if value is <= 0 + if !ok || numReplicas == nil || *numReplicas <= 0 { + req := NewRequestFromObject(obj) + ve.DeleteMetrics(req.ToPromLabels()) + return true + } + } + } + return false } -type PrometheusRegistry interface { - Register(prometheus.Collector) error +func (ve *validationEngine) processResult(result run.Result, namespaceUID string) (ValidationOutcome, error) { + outcome := ObjectValid + for _, report := range result.Reports { + check, err := ve.getCheckByName(report.Check) + if err != nil { + log.Error(err, fmt.Sprintf("Failed to get check '%s' by name", report.Check)) + return "", fmt.Errorf("error running validations: %v", err) + } + obj := report.Object.K8sObject + logger := log.WithValues( + "request.namespace", obj.GetNamespace(), + "request.name", obj.GetName(), + "kind", obj.GetObjectKind().GroupVersionKind().Kind, + "validation", report.Check, + "check_description", check.Description, + "check_remediation", report.Remediation, + "check_failure_reason", report.Diagnostic.Message, + ) + metric := ve.getMetric(report.Check) + if metric == nil { + log.Error(nil, "no metric found for validation", report.Check) + } else { + req := NewRequestFromObject(obj) + req.NamespaceUID = namespaceUID + metric.With(req.ToPromLabels()).Set(1) + logger.Info(report.Remediation) + outcome = ObjectNeedsImprovement + } + } + return outcome, nil } func (ve *validationEngine) InitRegistry() error { @@ -140,12 +235,10 @@ func (ve *validationEngine) InitRegistry() error { ve.enabledChecks = enabledChecks ve.registeredChecks = registeredChecks - engine = *ve - return nil } -func (ve *validationEngine) GetMetric(name string) *prometheus.GaugeVec { +func (ve *validationEngine) getMetric(name string) *prometheus.GaugeVec { m, ok := ve.metrics[name] if !ok { return nil @@ -159,7 +252,7 @@ func (ve *validationEngine) DeleteMetrics(labels prometheus.Labels) { } } -func (ve *validationEngine) ClearMetrics(reports []diagnostic.WithContext, labels prometheus.Labels) { +func (ve *validationEngine) clearMetrics(reports []diagnostic.WithContext, labels prometheus.Labels) { // Create a list of validation names for use to delete the labels from any // metric which isn't in the report but for which there is a metric reportValidationNames := map[string]struct{}{} @@ -175,7 +268,13 @@ func (ve *validationEngine) ClearMetrics(reports []diagnostic.WithContext, label } } -func (ve *validationEngine) GetCheckByName(name string) (config.Check, error) { +func (ve *validationEngine) ResetMetrics() { + for _, metric := range ve.metrics { + metric.Reset() + } +} + +func (ve *validationEngine) getCheckByName(name string) (config.Check, error) { check, ok := ve.registeredChecks[name] if !ok { return config.Check{}, fmt.Errorf("check '%s' is not registered", name) @@ -208,6 +307,10 @@ func (ve *validationEngine) getValidChecks(registry checkregistry.CheckRegistry) return enabledChecks, nil } +func (ve *validationEngine) SetConfig(cfg config.Config) { + ve.config = cfg +} + // removeCheckFromConfig function searches for the given check name in both the "Include" and "Exclude" lists // of checks in the ValidationEngine's configuration. If the check is found in either list, it is removed by updating // the respective list. diff --git a/pkg/validations/validation_engine_test.go b/pkg/validations/validation_engine_test.go index f7983f22..ca6b6cfa 100644 --- a/pkg/validations/validation_engine_test.go +++ b/pkg/validations/validation_engine_test.go @@ -1,10 +1,19 @@ package validations import ( + "fmt" "testing" + "github.com/app-sre/deployment-validation-operator/pkg/testutils" + "github.com/prometheus/client_golang/prometheus" + promUtils "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/assert" + "golang.stackrox.io/kube-linter/pkg/builtinchecks" + "golang.stackrox.io/kube-linter/pkg/checkregistry" "golang.stackrox.io/kube-linter/pkg/config" + "golang.stackrox.io/kube-linter/pkg/configresolver" + appsv1 "k8s.io/api/apps/v1" + "sigs.k8s.io/controller-runtime/pkg/client" ) func TestGetValidChecks(t *testing.T) { @@ -90,3 +99,295 @@ func TestRemoveCheckFromConfig(t *testing.T) { }) } } + +const ( + customCheckName = "test-minimum-replicas" + customCheckDescription = "some description" + customCheckRemediation = "some remediation" + customCheckTemplate = "minimum-replicas" +) + +func newValidationEngine(configPath string, metrics map[string]*prometheus.GaugeVec) (*validationEngine, error) { + config, err := loadConfig(configPath) + if err != nil { + return nil, err + } + + ve := &validationEngine{ + config: config, + metrics: metrics, + } + loadErr := ve.InitRegistry() + if loadErr != nil { + return nil, loadErr + } + + // checks now are preloaded, adding them after Registry init + //ve.metrics = make(map[string]*prometheus.GaugeVec) + for _, checkName := range ve.enabledChecks { + check := ve.registeredChecks[checkName] + ve.metrics[checkName] = newGaugeVecMetric(check) + } + return ve, nil +} + +func newCustomCheck() config.Check { + return config.Check{ + Name: customCheckName, + Description: customCheckDescription, + Remediation: customCheckRemediation, + Template: customCheckTemplate, + Scope: &config.ObjectKindsDesc{ + ObjectKinds: []string{"DeploymentLike"}, + }, + Params: map[string]interface{}{"minReplicas": 3}, + } +} + +func newEngineConfigWithAllChecks() config.Config { + return config.Config{ + CustomChecks: []config.Check{}, + Checks: config.ChecksConfig{ + AddAllBuiltIn: true, + DoNotAutoAddDefaults: false, + }, + } +} + +func createTestDeployment(args testutils.TemplateArgs) (*appsv1.Deployment, error) { + d, err := testutils.CreateDeploymentFromTemplate( + &args) + if err != nil { + return nil, err + } + return &d, nil +} + +func TestUpdateConfig(t *testing.T) { + tests := []struct { + name string + initialConfig config.Config + updatedConfig config.Config + }{ + { + name: "basic update test", + initialConfig: config.Config{ + Checks: GetDefaultChecks(), + }, + + updatedConfig: config.Config{ + CustomChecks: []config.Check{ + newCustomCheck(), + }, + Checks: config.ChecksConfig{ + Include: []string{"foo-a", "foo-b"}, + Exclude: []string{"exclude-1", "exclude-2"}, + }, + }, + }, + { + name: "setting empty config", + initialConfig: config.Config{ + Checks: GetDefaultChecks(), + }, + + updatedConfig: config.Config{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ve, err := newValidationEngine("", make(map[string]*prometheus.GaugeVec)) + assert.NoError(t, err, "failed to create a new validation engine") + disableIncompatibleChecks(&tt.initialConfig) + assert.Equal(t, tt.initialConfig, ve.config) + ve.SetConfig(tt.updatedConfig) + assert.Equal(t, tt.updatedConfig, ve.config) + + }) + } + +} + +func TestRunValidationsForObjects(t *testing.T) { + tests := []struct { + name string + initialReplicaCount int32 + initialExpectedMetricValue int + updatedReplicaCount int32 + updatedExpectedMetricValue int + runAdditionalValidation bool + }{ + { + name: "Custom check (min 3 replicas) is active(failing) and then it's fixed", // nolint: lll + initialReplicaCount: 1, + initialExpectedMetricValue: 1, + updatedReplicaCount: 3, + updatedExpectedMetricValue: 0, + runAdditionalValidation: false, + }, + { + name: "Custom check (min 3 replicas) is not active, then it's failing and it's fixed again", // nolint: lll + initialReplicaCount: 0, + initialExpectedMetricValue: 0, + updatedReplicaCount: 1, + updatedExpectedMetricValue: 1, + runAdditionalValidation: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metrics := make(map[string]*prometheus.GaugeVec) + ve, err := newValidationEngine("test-resources/config-with-custom-check.yaml", metrics) + assert.NoError(t, err, "Error creating a new validation engine") + + deployment, err := createTestDeployment( + testutils.TemplateArgs{Replicas: int(tt.initialReplicaCount)}) + assert.NoError(t, err, "Error creating deployment from template") + request := NewRequestFromObject(deployment) + request.NamespaceUID = "1234-6789-1011-testUID" + + // run validations with "broken" (replica=1) deployment object + _, err = ve.RunValidationsForObjects([]client.Object{deployment}, request.NamespaceUID) + assert.NoError(t, err, "Error running validations") + + labels := request.ToPromLabels() + metric, err := ve.getMetric(customCheckName).GetMetricWith(labels) + assert.NoError(t, err, "Error getting prometheus metric") + + expectedConstLabelSubString := fmt.Sprintf(""+ + "constLabels: {check_description=\"%s\",check_remediation=\"%s\"}", + customCheckDescription, + customCheckRemediation, + ) + assert.Contains(t, metric.Desc().String(), expectedConstLabelSubString, + "Metric is missing expected constant labels! Expected:\n%s\nGot:\n%s", + expectedConstLabelSubString, + metric.Desc().String()) + metricValue := int(promUtils.ToFloat64(metric)) + assert.Equal(t, tt.initialExpectedMetricValue, metricValue, + "Deployment test failed %#v: got %d want %d", + customCheckName, metricValue, tt.initialExpectedMetricValue) + + // Problem resolved + deployment.Spec.Replicas = &tt.updatedReplicaCount + _, err = ve.RunValidationsForObjects([]client.Object{deployment}, request.NamespaceUID) + assert.NoError(t, err, "Error running validations") + // Metric with label combination should be successfully cleared because problem was resolved. + // The 'GetMetricWith()' function will create a new metric with provided labels if it + // does not exist. The default value of a metric is 0. Therefore, a value of 0 implies we + // successfully cleared the metric label combination. + customCheckMetricVal, err := getMetricValue(ve, customCheckName, labels) + assert.NoError(t, err, "Error getting prometheus metric") + assert.Equal(t, tt.updatedExpectedMetricValue, customCheckMetricVal, + "Deployment test failed %#v: got %d want %d", "test-minimum-replicas", + customCheckMetricVal, tt.updatedExpectedMetricValue) + + if tt.runAdditionalValidation { + deployment.Spec.Replicas = &tt.initialReplicaCount + _, err = ve.RunValidationsForObjects([]client.Object{deployment}, request.NamespaceUID) + assert.NoError(t, err, "Error running validations") + + customCheckMetricVal, err := getMetricValue(ve, customCheckName, labels) + assert.NoError(t, err, "Error getting prometheus metric") + assert.Equal(t, tt.initialExpectedMetricValue, customCheckMetricVal, + "Deployment test failed %#v: got %d want %d", "test-minimum-replicas", + customCheckMetricVal, tt.initialExpectedMetricValue) + } + }) + } +} + +func TestRunValidationsForObjectsAndResetMetrics(t *testing.T) { + metrics := make(map[string]*prometheus.GaugeVec) + ve, err := newValidationEngine("test-resources/config-with-custom-check.yaml", metrics) + assert.NoError(t, err, "Error creating a new validation engine") + + deployment, err := createTestDeployment( + testutils.TemplateArgs{Replicas: 1, ResourceLimits: false, ResourceRequests: false}) + assert.NoError(t, err, "Error creating deployment from template") + request := NewRequestFromObject(deployment) + request.NamespaceUID = "1234-6789-1011-testUID" + + // run validations with "broken" (replica=1) deployment object + _, err = ve.RunValidationsForObjects([]client.Object{deployment}, request.NamespaceUID) + assert.NoError(t, err, "Error running validations") + + labels := request.ToPromLabels() + unsetCPUReqMetricVal, err := getMetricValue(ve, "unset-cpu-requirements", labels) + assert.NoError(t, err, "Error getting prometheus metric") + assert.Equal(t, 1, unsetCPUReqMetricVal, + "Deployment test failed unset-cpu-requirements: got %d want %d", + unsetCPUReqMetricVal, 1) + + customCheckMetricVal, err := getMetricValue(ve, customCheckName, labels) + assert.NoError(t, err, "Error getting prometheus metric") + assert.Equal(t, 1, customCheckMetricVal, + "Deployment test failed %#v: got %d want %d", + customCheckName, customCheckMetricVal, 1) + + ve.ResetMetrics() + // metrics have value 0 when reset + unsetCPUReqMetricVal, err = getMetricValue(ve, "unset-cpu-requirements", labels) + assert.NoError(t, err) + assert.Equal(t, 0, unsetCPUReqMetricVal, + "Deployment test failed unset-cpu-requirements: got %d want %d", + unsetCPUReqMetricVal, 0) + + customCheckMetricVal, err = getMetricValue(ve, customCheckName, labels) + assert.NoError(t, err, "Error getting prometheus metric") + assert.Equal(t, 0, customCheckMetricVal, + "Deployment test failed %#v: got %d want %d", + customCheckName, customCheckMetricVal, 0) +} + +func getMetricValue(v *validationEngine, checkName string, labels prometheus.Labels) (int, error) { + gauge := v.getMetric(checkName) + if gauge == nil { + return 0, fmt.Errorf("gauge vector %s not found ", checkName) + } + metric, err := gauge.GetMetricWith(labels) + if err != nil { + return 0, err + } + return int(promUtils.ToFloat64(metric)), nil +} + +func TestIncompatibleChecksAreDisabled(t *testing.T) { + // Initialize engine + ve, err := newValidationEngine("test-resources/default-config.yaml", make(map[string]*prometheus.GaugeVec)) + assert.NoError(t, err, "Error initializing engine") + + badChecks := getIncompatibleChecks() + allKubeLinterChecks, err := getAllBuiltInKubeLinterChecks() + assert.NoError(t, err, "Got unexpected error while getting all checks built-into kube-linter") + expectedNumChecks := (len(allKubeLinterChecks) - len(badChecks)) + + enabledChecks := ve.enabledChecks + assert.Equal(t, expectedNumChecks, len(enabledChecks), + "Expected exactly %v checks to be enabled, but got '%v' checks from list '%v'", + expectedNumChecks, len(enabledChecks), enabledChecks) + + for _, badCheck := range badChecks { + assert.NotContains(t, enabledChecks, badCheck) + } +} + +// getAllBuiltInKubeLinterChecks returns every check built-into kube-linter (including checks that DVO disables) +func getAllBuiltInKubeLinterChecks() ([]string, error) { + ve := validationEngine{ + config: newEngineConfigWithAllChecks(), + } + registry := checkregistry.New() + if err := builtinchecks.LoadInto(registry); err != nil { + return nil, fmt.Errorf("failed to load built-in validations: %s", err.Error()) + } + + enabledChecks, err := configresolver.GetEnabledChecksAndValidate(&ve.config, registry) + if err != nil { + return nil, fmt.Errorf("error finding enabled validations: %s", err.Error()) + } + + return enabledChecks, nil +}