diff --git a/CHANGELOG.md b/CHANGELOG.md index 98399f286..b32a9f095 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## main / unreleased +* [ENHANCEMENT] Check zone-aware PodDisruptionBudget compliance before deleting Pods during rolling updates. #324 + ## v0.31.1 * [ENHANCEMENT] Updated dependencies, including: #314, #315 diff --git a/cmd/rollout-operator/main.go b/cmd/rollout-operator/main.go index 6001207eb..ea26cca67 100644 --- a/cmd/rollout-operator/main.go +++ b/cmd/rollout-operator/main.go @@ -215,13 +215,17 @@ func main() { // watches for validating webhooks being added - this is only started if the TLS server is started webhookObserver := tlscert.NewWebhookObserver(kubeClient, cfg.kubeNamespace, logger) - // controller for pod eviction - this is only started if the TLS server is started + // Controller for pod eviction. + // If the TLS server is started below (webhooks registered), then this controller will handle the validating webhook requests + // for pod evictions and zpdb configuration changes. If the webhooks are not enabled, this controller is still started + // and will be used by the main controller to assist in validating pod deletion requests. evictionController := zpdb.NewEvictionController(kubeClient, dynamicClient, cfg.kubeNamespace, logger) + check(evictionController.Start()) maybeStartTLSServer(cfg, httpRT, logger, kubeClient, restart, metrics, evictionController, webhookObserver) // Init the controller - c := controller.NewRolloutController(kubeClient, restMapper, scaleClient, dynamicClient, cfg.kubeClusterDomain, cfg.kubeNamespace, httpClient, cfg.reconcileInterval, reg, logger) + c := controller.NewRolloutController(kubeClient, restMapper, scaleClient, dynamicClient, cfg.kubeClusterDomain, cfg.kubeNamespace, httpClient, cfg.reconcileInterval, reg, logger, evictionController) check(errors.Wrap(c.Init(), "failed to init controller")) // Listen to sigterm, as well as for restart (like for certificate renewal). @@ -294,9 +298,6 @@ func maybeStartTLSServer(cfg config, rt http.RoundTripper, logger log.Logger, ku check(webhookObserver.Init(webHookListener)) } - // Start monitoring for zpdb configurations and pods - check(evictionController.Start()) - prepDownscaleAdmitFunc := func(ctx context.Context, logger log.Logger, ar v1.AdmissionReview, api *kubernetes.Clientset) *v1.AdmissionResponse { return admission.PrepareDownscale(ctx, rt, logger, ar, api, cfg.useZoneTracker, cfg.zoneTrackerConfigMapName, cfg.kubeClusterDomain) } diff --git a/integration/e2e_test.go b/integration/e2e_test.go index 1a861257b..327ba186b 100644 --- a/integration/e2e_test.go +++ b/integration/e2e_test.go @@ -73,6 +73,74 @@ func TestRolloutHappyCase(t *testing.T) { requireEventuallyPod(t, api, ctx, "mock-zone-c-0", expectReady(), expectVersion("2")) } +// TestRolloutHappyCaseWithZpdb performs pod updates via the rolling update controller and uses the zpdb to determine if the pod delete is safe or not +func TestRolloutHappyCaseWithZpdb(t *testing.T) { + ctx := context.Background() + + cluster := k3t.NewCluster(ctx, t, k3t.WithImages("rollout-operator:latest", "mock-service:latest")) + api := cluster.API() + + path := initManifestFiles(t, "max-unavailable-1") + + { + t.Log("Create rollout operator and check it's running and ready.") + createRolloutOperator(t, ctx, api, cluster.ExtAPI(), path, true) + + _ = createValidatingWebhookConfiguration(t, api, ctx, path+yamlWebhookZpdbValidation) + _ = createValidatingWebhookConfiguration(t, api, ctx, path+yamlWebhookPodEviction) + + rolloutOperatorPod := eventuallyGetFirstPod(ctx, t, api, "name=rollout-operator") + requireEventuallyPod(t, api, ctx, rolloutOperatorPod, expectPodPhase(corev1.PodRunning), expectReady()) + + t.Log("Await CABundle assignment") + require.Eventually(t, awaitCABundleAssignment(2, ctx, api), time.Second*30, time.Millisecond*10, "New webhooks have CABundle added") + } + + { + t.Log("Create a valid zpdb configuration.") + awaitZoneAwarePodDisruptionBudgetCreation(t, ctx, cluster, path+yamlZpdbConfig) + } + + // Create mock service, and check that it is in the desired state. + createMockServiceZone(t, ctx, api, corev1.NamespaceDefault, "mock-zone-a", 1) + createMockServiceZone(t, ctx, api, corev1.NamespaceDefault, "mock-zone-b", 1) + createMockServiceZone(t, ctx, api, corev1.NamespaceDefault, "mock-zone-c", 1) + requireEventuallyPod(t, api, ctx, "mock-zone-a-0", expectPodPhase(corev1.PodRunning), expectReady(), expectVersion("1")) + requireEventuallyPod(t, api, ctx, "mock-zone-b-0", expectPodPhase(corev1.PodRunning), expectReady(), expectVersion("1")) + requireEventuallyPod(t, api, ctx, "mock-zone-c-0", expectPodPhase(corev1.PodRunning), expectReady(), expectVersion("1")) + + // Update all mock service statefulsets. + _, err := api.AppsV1().StatefulSets(corev1.NamespaceDefault).Update(ctx, mockServiceStatefulSet("mock-zone-a", "2", false, 1), metav1.UpdateOptions{}) + require.NoError(t, err, "Can't update StatefulSet") + _, err = api.AppsV1().StatefulSets(corev1.NamespaceDefault).Update(ctx, mockServiceStatefulSet("mock-zone-b", "2", false, 1), metav1.UpdateOptions{}) + require.NoError(t, err, "Can't update StatefulSet") + _, err = api.AppsV1().StatefulSets(corev1.NamespaceDefault).Update(ctx, mockServiceStatefulSet("mock-zone-c", "2", false, 1), metav1.UpdateOptions{}) + require.NoError(t, err, "Can't update StatefulSet") + + // First pod should be now version 2 and not be ready, the rest should be ready yet. + requireEventuallyPod(t, api, ctx, "mock-zone-a-0", expectNotReady(), expectVersion("2")) + requireEventuallyPod(t, api, ctx, "mock-zone-b-0", expectReady(), expectVersion("1")) + requireEventuallyPod(t, api, ctx, "mock-zone-c-0", expectReady(), expectVersion("1")) + + // zone-a becomes ready, zone-b should become not ready and be version 2. + makeMockReady(t, cluster, "mock-zone-a") + requireEventuallyPod(t, api, ctx, "mock-zone-a-0", expectReady(), expectVersion("2")) + requireEventuallyPod(t, api, ctx, "mock-zone-b-0", expectNotReady(), expectVersion("2")) + requireEventuallyPod(t, api, ctx, "mock-zone-c-0", expectReady(), expectVersion("1")) + + // zone-b becomes ready, zone-c should become not ready and be version 2. + makeMockReady(t, cluster, "mock-zone-b") + requireEventuallyPod(t, api, ctx, "mock-zone-a-0", expectReady(), expectVersion("2")) + requireEventuallyPod(t, api, ctx, "mock-zone-b-0", expectReady(), expectVersion("2")) + requireEventuallyPod(t, api, ctx, "mock-zone-c-0", expectNotReady(), expectVersion("2")) + + // zone-c becomes ready, final state. + makeMockReady(t, cluster, "mock-zone-c") + requireEventuallyPod(t, api, ctx, "mock-zone-a-0", expectReady(), expectVersion("2")) + requireEventuallyPod(t, api, ctx, "mock-zone-b-0", expectReady(), expectVersion("2")) + requireEventuallyPod(t, api, ctx, "mock-zone-c-0", expectReady(), expectVersion("2")) +} + func awaitCABundleAssignment(webhookCnt int, ctx context.Context, api *kubernetes.Clientset) func() bool { return func() bool { diff --git a/integration/manifests_rollout_operator_test.go b/integration/manifests_rollout_operator_test.go index 7ac682112..cd3531f3b 100644 --- a/integration/manifests_rollout_operator_test.go +++ b/integration/manifests_rollout_operator_test.go @@ -85,8 +85,9 @@ func createRolloutOperatorDependencies(t *testing.T, ctx context.Context, api *k _, err = api.RbacV1().RoleBindings(corev1.NamespaceDefault).Create(ctx, roleBinding, metav1.CreateOptions{}) require.NoError(t, err) + _ = createZoneAwarePodDistruptionBudgetCustomResourceDefinition(t, extApi) + if webhook { - _ = createZoneAwarePodDistruptionBudgetCustomResourceDefinition(t, extApi) _ = createReplicaTemplateCustomResourceDefinition(t, extApi) operatorRole := loadFromDisk[rbacv1.Role](t, directory+yamlRoleSecret, &rbacv1.Role{}) diff --git a/operations/rollout-operator-tests/test-rollout-operator-enabled-no-webhooks-generated.yaml b/operations/rollout-operator-tests/test-rollout-operator-enabled-no-webhooks-generated.yaml index dae4eb28f..10a35b945 100644 --- a/operations/rollout-operator-tests/test-rollout-operator-enabled-no-webhooks-generated.yaml +++ b/operations/rollout-operator-tests/test-rollout-operator-enabled-no-webhooks-generated.yaml @@ -42,6 +42,14 @@ rules: - get - update - create +- apiGroups: + - rollout-operator.grafana.com + resources: + - zoneawarepoddisruptionbudgets + verbs: + - get + - list + - watch --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding diff --git a/operations/rollout-operator/rollout-operator.libsonnet b/operations/rollout-operator/rollout-operator.libsonnet index 0f8b75d86..c88b3dfb3 100644 --- a/operations/rollout-operator/rollout-operator.libsonnet +++ b/operations/rollout-operator/rollout-operator.libsonnet @@ -61,7 +61,6 @@ assert !$._config.ignore_rollout_operator_prepare_downscale_webhook_failures || $._config.rollout_operator_webhooks_enabled : 'ignore_rollout_operator_prepare_downscale_webhook_failures requires rollout_operator_webhooks_enabled=true', assert !$._config.ignore_rollout_operator_zpdb_eviction_webhook_failures || $._config.rollout_operator_webhooks_enabled : 'ignore_rollout_operator_zpdb_eviction_webhook_failures requires rollout_operator_webhooks_enabled=true', assert !$._config.ignore_rollout_operator_zpdb_validation_webhook_failures || $._config.rollout_operator_webhooks_enabled : 'ignore_rollout_operator_zpdb_validation_webhook_failures requires rollout_operator_webhooks_enabled=true', - assert !$._config.zpdb_custom_resource_definition_enabled || $._config.rollout_operator_webhooks_enabled : 'zpdb_custom_resource_definition_enabled requires rollout_operator_webhooks_enabled=true', assert !$._config.replica_template_custom_resource_definition_enabled || $._config.rollout_operator_webhooks_enabled : 'replica_template_custom_resource_definition_enabled requires rollout_operator_webhooks_enabled=true', local enableWebhooks = $._config.rollout_operator_replica_template_access_enabled || $._config.rollout_operator_webhooks_enabled, @@ -141,13 +140,13 @@ policyRule.withResources(['%s/scale' % $.replica_template.spec.names.plural, '%s/status' % $.replica_template.spec.names.plural]) + policyRule.withVerbs(['get', 'patch']), ] else [] - ) + ( - if enableWebhooks then [ - policyRule.withApiGroups($.zpdb_template.spec.group) + - policyRule.withResources([$.zpdb_template.spec.names.plural]) + - policyRule.withVerbs(['get', 'list', 'watch']), - ] else [] - ) + ) + + [ + policyRule.withApiGroups($.zpdb_template.spec.group) + + policyRule.withResources([$.zpdb_template.spec.names.plural]) + + policyRule.withVerbs(['get', 'list', 'watch']), + ] + ), rollout_operator_rolebinding: if !$._config.rollout_operator_enabled then null else diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 737f7d6af..d15b65786 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -46,6 +46,10 @@ type httpClient interface { Do(req *http.Request) (*http.Response, error) } +type ZPDBEvictionController interface { + MarkPodAsDeleted(ctx context.Context, namespace string, podName string, source string) error +} + type RolloutController struct { kubeClient kubernetes.Interface clusterDomain string @@ -63,6 +67,8 @@ type RolloutController struct { httpClient httpClient logger log.Logger + zpdbController ZPDBEvictionController + // This bool is true if we should trigger a reconcile. shouldReconcile atomic.Bool @@ -80,7 +86,7 @@ type RolloutController struct { discoveredGroups map[string]struct{} } -func NewRolloutController(kubeClient kubernetes.Interface, restMapper meta.RESTMapper, scaleClient scale.ScalesGetter, dynamic dynamic.Interface, clusterDomain string, namespace string, client httpClient, reconcileInterval time.Duration, reg prometheus.Registerer, logger log.Logger) *RolloutController { +func NewRolloutController(kubeClient kubernetes.Interface, restMapper meta.RESTMapper, scaleClient scale.ScalesGetter, dynamic dynamic.Interface, clusterDomain string, namespace string, client httpClient, reconcileInterval time.Duration, reg prometheus.Registerer, logger log.Logger, zpdbController ZPDBEvictionController) *RolloutController { namespaceOpt := informers.WithNamespace(namespace) // Initialise the StatefulSet informer to restrict the returned StatefulSets to only the ones @@ -110,6 +116,7 @@ func NewRolloutController(kubeClient kubernetes.Interface, restMapper meta.RESTM scaleClient: scaleClient, dynamicClient: dynamic, httpClient: client, + zpdbController: zpdbController, logger: logger, stopCh: make(chan struct{}), discoveredGroups: map[string]struct{}{}, @@ -575,7 +582,24 @@ func (c *RolloutController) updateStatefulSetPods(ctx context.Context, sts *v1.S continue } - level.Info(c.logger).Log("msg", fmt.Sprintf("terminating pod %s", pod.Name)) + // Use the ZPDB to determine if this pod delete is allowed. + // The ZPDB serializes requests from this controller and from any incoming voluntary evictions. + // For each request, a full set of tests is performed to confirm the state of all pods in the ZPDB scope. + // This ensures that if any voluntary evictions have been allowed since this reconcile loop commenced that + // the latest information is used to determine the zone/partition disruption level. + // If this request returns without error, the pod will be placed into the ZPDB pod eviction cache and will + // be considered as not ready until it either restarts or is expired from the cache. + err := c.zpdbController.MarkPodAsDeleted(ctx, pod.Namespace, pod.Name, "rollout-controller") + if err != nil { + // Skip this pod. The reconcile loop regularly runs and this pod will have an opportunity to be re-tried. + // Rather than returning false here and abandoning the rest of the updates until the next reconcile, + // we allow this update loop to continue. For configurations which have a partition aware ZPDB it is valid + // to have multiple disruptions as long as there is at least one healthy pod per partition. + level.Debug(c.logger).Log("msg", "zpdb denied pod deletion", "pod", pod.Name, "reason", err) + continue + } + + level.Info(c.logger).Log("msg", "terminating pod (does not violate any relevant ZPDBs)", "pod", pod.Name) if err := c.kubeClient.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{}); err != nil { return false, errors.Wrapf(err, "failed to delete pod %s", pod.Name) } diff --git a/pkg/controller/controller_test.go b/pkg/controller/controller_test.go index b285c97c6..55d4bc344 100644 --- a/pkg/controller/controller_test.go +++ b/pkg/controller/controller_test.go @@ -60,6 +60,7 @@ func TestRolloutController_Reconcile(t *testing.T) { expectedPatchedSets map[string][]string expectedPatchedResources map[string][]string expectedErr string + zpdbErrors []error }{ "should return error if some StatefulSet don't have OnDelete update strategy": { statefulSets: []runtime.Object{ @@ -147,6 +148,38 @@ func TestRolloutController_Reconcile(t *testing.T) { }, expectedDeletedPods: []string{"ingester-zone-b-0", "ingester-zone-b-1"}, }, + "should delete pods that needs to be updated - zpdb allows first delete but denies the second": { + statefulSets: []runtime.Object{ + mockStatefulSet("ingester-zone-a"), + mockStatefulSet("ingester-zone-b", withPrevRevision()), + }, + pods: []runtime.Object{ + mockStatefulSetPod("ingester-zone-a-0", testLastRevisionHash), + mockStatefulSetPod("ingester-zone-a-1", testLastRevisionHash), + mockStatefulSetPod("ingester-zone-a-2", testLastRevisionHash), + mockStatefulSetPod("ingester-zone-b-0", testPrevRevisionHash), + mockStatefulSetPod("ingester-zone-b-1", testPrevRevisionHash), + mockStatefulSetPod("ingester-zone-b-2", testPrevRevisionHash), + }, + expectedDeletedPods: []string{"ingester-zone-b-0"}, + zpdbErrors: []error{nil, errors.New("zpdb denies eviction request")}, + }, + "should delete pods that needs to be updated - zpdb denies first delete but allows the second": { + statefulSets: []runtime.Object{ + mockStatefulSet("ingester-zone-a"), + mockStatefulSet("ingester-zone-b", withPrevRevision()), + }, + pods: []runtime.Object{ + mockStatefulSetPod("ingester-zone-a-0", testLastRevisionHash), + mockStatefulSetPod("ingester-zone-a-1", testLastRevisionHash), + mockStatefulSetPod("ingester-zone-a-2", testLastRevisionHash), + mockStatefulSetPod("ingester-zone-b-0", testPrevRevisionHash), + mockStatefulSetPod("ingester-zone-b-1", testPrevRevisionHash), + mockStatefulSetPod("ingester-zone-b-2", testPrevRevisionHash), + }, + expectedDeletedPods: []string{"ingester-zone-b-1"}, + zpdbErrors: []error{errors.New("zpdb denies eviction request")}, + }, "should default max unavailable to 1 if set to an invalid value": { statefulSets: []runtime.Object{ mockStatefulSet("ingester-zone-a", withPrevRevision(), func(sts *v1.StatefulSet) { @@ -617,7 +650,11 @@ func TestRolloutController_Reconcile(t *testing.T) { // Create the controller and start informers. reg := prometheus.NewPedanticRegistry() - c := NewRolloutController(kubeClient, restMapper, scaleClient, dynamicClient, testClusterDomain, testNamespace, nil, 5*time.Second, reg, log.NewNopLogger()) + + // Pass in a slice of errors. Each eviction request takes and removes from the head and uses this as the eviction response. Once exhausted no evictions will return an error. + zpdb := &mockEvictionController{nextErrorsIfAny: testData.zpdbErrors} + + c := NewRolloutController(kubeClient, restMapper, scaleClient, dynamicClient, testClusterDomain, testNamespace, nil, 5*time.Second, reg, log.NewNopLogger(), zpdb) require.NoError(t, c.Init()) defer c.Stop() @@ -926,7 +963,7 @@ func TestRolloutController_ReconcileStatefulsetWithDownscaleDelay(t *testing.T) // Create the controller and start informers. reg := prometheus.NewPedanticRegistry() - c := NewRolloutController(kubeClient, restMapper, scaleClient, dynamicClient, testClusterDomain, testNamespace, httpClient, 5*time.Second, reg, log.NewNopLogger()) + c := NewRolloutController(kubeClient, restMapper, scaleClient, dynamicClient, testClusterDomain, testNamespace, httpClient, 5*time.Second, reg, log.NewNopLogger(), &mockEvictionController{}) require.NoError(t, c.Init()) defer c.Stop() @@ -942,7 +979,6 @@ func TestRolloutController_ReconcileStatefulsetWithDownscaleDelay(t *testing.T) func createFakeDynamicClient() (*fakedynamic.FakeDynamicClient, map[string][]string) { dynamicClient := &fakedynamic.FakeDynamicClient{} - patchedStatuses := map[string][]string{} dynamicClient.AddReactor("patch", "*", func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { patchAction := action.(ktesting.PatchAction) @@ -1029,7 +1065,7 @@ func TestRolloutController_ReconcileShouldDeleteMetricsForDecommissionedRolloutG // Create the controller and start informers. reg := prometheus.NewPedanticRegistry() - c := NewRolloutController(kubeClient, nil, nil, nil, testClusterDomain, testNamespace, nil, 5*time.Second, reg, log.NewNopLogger()) + c := NewRolloutController(kubeClient, nil, nil, nil, testClusterDomain, testNamespace, nil, 5*time.Second, reg, log.NewNopLogger(), &mockEvictionController{}) require.NoError(t, c.Init()) defer c.Stop() @@ -1254,3 +1290,16 @@ func (f *fakeHttpClient) requests() []string { return f.recordedRequests } + +type mockEvictionController struct { + nextErrorsIfAny []error +} + +func (m *mockEvictionController) MarkPodAsDeleted(ctx context.Context, namespace string, podName string, source string) error { + var response error + if len(m.nextErrorsIfAny) > 0 { + response = m.nextErrorsIfAny[0] + m.nextErrorsIfAny = m.nextErrorsIfAny[1:] + } + return response +} diff --git a/pkg/zpdb/eviction_controller.go b/pkg/zpdb/eviction_controller.go index 27afde25d..740b416d2 100644 --- a/pkg/zpdb/eviction_controller.go +++ b/pkg/zpdb/eviction_controller.go @@ -2,6 +2,7 @@ package zpdb import ( "context" + "fmt" "net/http" "sync" @@ -11,7 +12,11 @@ import ( "github.com/pkg/errors" v1 "k8s.io/api/admission/v1" appsv1 "k8s.io/api/apps/v1" + authenticationv1 "k8s.io/api/authentication/v1" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" @@ -93,6 +98,51 @@ func (c *EvictionController) findLock(name string) *sync.Mutex { return c.locks[name] } +// MarkPodAsDeleted allows for a programmatic pod eviction request. It returns an error if the pod eviction is denied. +// Note that if this func returns without an error, the pod will be marked as pending eviction within the zpdb eviction cache. +// Note also that this func does not actually evict or delete the pod from kubernetes. +func (c *EvictionController) MarkPodAsDeleted(ctx context.Context, namespace string, podName string, source string) error { + request := v1.AdmissionReview{ + Request: &v1.AdmissionRequest{ + // not a real uid. The eviction_controller only uses this for logging purposes + UID: types.UID(fmt.Sprintf("internal-request-pod-eviction-%s-for-rolling-update", podName)), + Kind: metav1.GroupVersionKind{ + Group: "policy", + Version: "v1", + Kind: "Eviction", + }, + Resource: metav1.GroupVersionResource{ + Group: "policy", + Version: "v1", + Resource: "evictions", + }, + Name: podName, + Namespace: namespace, + Operation: v1.Create, + UserInfo: authenticationv1.UserInfo{ + Username: source, + }, + SubResource: "eviction", + Object: runtime.RawExtension{ + Raw: []byte(fmt.Sprintf(`{ + "apiVersion": "policy/v1", + "kind": "Eviction", + "metadata": { + "name": "%s", + "namespace": "%s" + } + }`, podName, namespace)), + }, + }, + } + + response := c.HandlePodEvictionRequest(ctx, request) + if !response.Allowed { + return errors.New(response.Result.Message) + } + return nil +} + func (c *EvictionController) HandlePodEvictionRequest(ctx context.Context, ar v1.AdmissionReview) *v1.AdmissionResponse { logger, _ := spanlogger.New(ctx, c.logger, "admission.PodEviction()", c.resolver) defer logger.Finish() diff --git a/pkg/zpdb/eviction_controller_test.go b/pkg/zpdb/eviction_controller_test.go index 57a458a0a..9161eaf48 100644 --- a/pkg/zpdb/eviction_controller_test.go +++ b/pkg/zpdb/eviction_controller_test.go @@ -107,8 +107,19 @@ func newTestContext(t *testing.T, request admissionv1.AdmissionReview, pdbRawCon return testCtx } -func (c *testContext) stop() { - c.controller.Stop() +func newTestContextWithoutAdmissionReview(t *testing.T, pdbRawConfig *unstructured.Unstructured, objects ...runtime.Object) *testContext { + testCtx := &testContext{ + ctx: context.Background(), + logs: newDummyLogger(), + } + + testCtx.controller = NewEvictionController(fake.NewClientset(objects...), newFakeDynamicClient(), testNamespace, testCtx.logs) + require.NoError(t, testCtx.controller.Start()) + + if pdbRawConfig != nil { + _, _, _ = testCtx.controller.cfgObserver.pdbCache.addOrUpdateRaw(pdbRawConfig) + } + return testCtx } func (c *testContext) assertDenyResponse(t *testing.T, reason string, statusCode int) { @@ -117,14 +128,22 @@ func (c *testContext) assertDenyResponse(t *testing.T, reason string, statusCode require.False(t, response.Allowed) require.Equal(t, reason, response.Result.Message) require.Equal(t, int32(statusCode), response.Result.Code) - c.stop() +} + +func (c *testContext) assertDenyResponseViaMarkPodAsDeleted(t *testing.T, pod string, reason string) { + response := c.controller.MarkPodAsDeleted(c.ctx, testNamespace, pod, "eviction-controller-test") + require.ErrorContains(t, response, reason) +} + +func (c *testContext) assertAllowResponseViaMarkPodAsDeleted(t *testing.T, pod string) { + response := c.controller.MarkPodAsDeleted(t.Context(), testNamespace, pod, "eviction-controller-test") + require.NoError(t, response) } func (c *testContext) assertAllowResponse(t *testing.T) { response := c.controller.HandlePodEvictionRequest(c.ctx, c.request) require.NotNil(t, response.UID) require.True(t, response.Allowed) - c.stop() } func (c *testContext) assertAllowResponseWithWarning(t *testing.T, warning string) { @@ -132,13 +151,13 @@ func (c *testContext) assertAllowResponseWithWarning(t *testing.T, warning strin require.NotNil(t, response.UID) require.True(t, response.Allowed) require.Equal(t, warning, response.Warnings[0]) - c.stop() } func TestPodEviction_NotCreateEvent(t *testing.T) { ar := createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace) ar.Request.Operation = admissionv1.Delete testCtx := newTestContext(t, ar, nil) + defer testCtx.controller.Stop() testCtx.assertDenyResponse(t, "request operation is not create, got: DELETE", 400) expectedLogEntry := []string{ @@ -151,22 +170,26 @@ func TestPodEviction_NotCreateEvent(t *testing.T) { `reason="not a valid create pod eviction request"`, } testCtx.logs.assertHasLog(t, expectedLogEntry) + } func TestPodEviction_NotEvictionSubResource(t *testing.T) { ar := createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace) ar.Request.SubResource = "foo" testCtx := newTestContext(t, ar, nil) + defer testCtx.controller.Stop() testCtx.assertDenyResponse(t, "request SubResource is not eviction, got: foo", 400) } func TestPodEviction_EmptyName(t *testing.T) { testCtx := newTestContext(t, createBasicEvictionAdmissionReview("", testNamespace), nil) + defer testCtx.controller.Stop() testCtx.assertDenyResponse(t, "request did not include both a namespace and a name", 400) } func TestPodEviction_PodNotFound(t *testing.T) { testCtx := newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), nil) + defer testCtx.controller.Stop() testCtx.assertDenyResponse(t, `pods "ingester-zone-a-0" not found`, 400) testCtx.logs.assertHasLog(t, []string{`reason="unable to find pod by name"`}) } @@ -175,6 +198,7 @@ func TestPodEviction_PodNotReady(t *testing.T) { pod := newPodNoOwner(testPodZoneA0) pod.Status.Phase = corev1.PodFailed testCtx := newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), nil, pod) + defer testCtx.controller.Stop() testCtx.assertAllowResponseWithWarning(t, "pod is not ready") testCtx.logs.assertHasLog(t, []string{`reason="pod is not ready"`}) } @@ -182,6 +206,7 @@ func TestPodEviction_PodNotReady(t *testing.T) { func TestPodEviction_PodWithNoOwner(t *testing.T) { pod := newPodNoOwner(testPodZoneA0) testCtx := newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailable(1, rolloutGroupValue), pod) + defer testCtx.controller.Stop() testCtx.assertDenyResponse(t, "unable to find a StatefulSet pod owner", 500) testCtx.logs.assertHasLog(t, []string{`reason="unable to find pod owner"`}) } @@ -189,25 +214,62 @@ func TestPodEviction_PodWithNoOwner(t *testing.T) { func TestPodEviction_UnableToRetrievePdbConfig(t *testing.T) { sts := newEvictionControllerSts(statefulSetZoneA) pod := newPod(testPodZoneA0, sts) - testCtx := newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), nil, pod, sts) + defer testCtx.controller.Stop() testCtx.assertAllowResponse(t) } func TestPodEviction_MaxUnavailableEq0(t *testing.T) { sts := newEvictionControllerSts(statefulSetZoneA) pod := newPod(testPodZoneA0, sts) - testCtx := newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailable(0, rolloutGroupValue), pod, sts) + defer testCtx.controller.Stop() testCtx.assertDenyResponse(t, "max unavailable = 0", 403) testCtx.logs.assertHasLog(t, []string{`reason="max unavailable = 0"`}) } -func TestPodEviction_MaxUnavailablePercentageEq0(t *testing.T) { +func TestPodEviction_MaxUnavailableEq0_ViaAllowPodEviction(t *testing.T) { sts := newEvictionControllerSts(statefulSetZoneA) pod := newPod(testPodZoneA0, sts) + testCtx := newTestContextWithoutAdmissionReview(t, newPDBMaxUnavailable(0, rolloutGroupValue), pod, sts) + defer testCtx.controller.Stop() + testCtx.assertDenyResponseViaMarkPodAsDeleted(t, testPodZoneA0, "max unavailable = 0") + testCtx.logs.assertHasLog(t, []string{`reason="max unavailable = 0"`}) +} +func TestPodEviction_Allowed_ViaAllowPodEviction(t *testing.T) { + objs := make([]runtime.Object, 0, 12) + objs = append(objs, newEvictionControllerSts(statefulSetZoneA)) + objs = append(objs, newEvictionControllerSts(statefulSetZoneB)) + objs = append(objs, newEvictionControllerSts(statefulSetZoneC)) + + for _, p := range []string{testPodZoneA0, testPodZoneA1, testPodZoneA2} { + objs = append(objs, newPod(p, objs[0].(*appsv1.StatefulSet))) + } + for _, p := range []string{testPodZoneB0, testPodZoneB1, testPodZoneB2} { + objs = append(objs, newPod(p, objs[1].(*appsv1.StatefulSet))) + } + for _, p := range []string{testPodZoneC0, testPodZoneC1, testPodZoneC2} { + objs = append(objs, newPod(p, objs[2].(*appsv1.StatefulSet))) + } + + zoneAPod0 := objs[3].(*corev1.Pod) + zoneAPod2 := objs[5].(*corev1.Pod) + + testCtx := newTestContextWithoutAdmissionReview(t, newPDBMaxUnavailable(1, rolloutGroupValue), objs...) + defer testCtx.controller.Stop() + require.False(t, testCtx.controller.podObserver.podEvictCache.hasPendingEviction(zoneAPod0)) + // note that we do not stop the controller after this test + testCtx.assertAllowResponseViaMarkPodAsDeleted(t, zoneAPod0.Name) + require.True(t, testCtx.controller.podObserver.podEvictCache.hasPendingEviction(zoneAPod0)) + testCtx.assertDenyResponseViaMarkPodAsDeleted(t, zoneAPod2.Name, "1 pod not ready in ingester-zone-a") +} + +func TestPodEviction_MaxUnavailablePercentageEq0(t *testing.T) { + sts := newEvictionControllerSts(statefulSetZoneA) + pod := newPod(testPodZoneA0, sts) testCtx := newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailablePercent(0, rolloutGroupValue), pod, sts) + defer testCtx.controller.Stop() testCtx.assertDenyResponse(t, "max unavailable = 0", 403) testCtx.logs.assertHasLog(t, []string{`reason="max unavailable = 0"`}) } @@ -219,6 +281,7 @@ func TestPodEviction_SingleZoneMultiplePodsUpscale(t *testing.T) { pod1 := newPod(testPodZoneA1, sts) testCtx := newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailable(1, rolloutGroupValue), pod0, pod1, sts) + defer testCtx.controller.Stop() testCtx.assertDenyResponse(t, "minimum number of StatefulSets not found", 400) } @@ -252,24 +315,29 @@ func TestPodEviction_MultiZoneClassic(t *testing.T) { require.False(t, testCtx.controller.podObserver.podEvictCache.hasPendingEviction(zoneAPod0)) testCtx.assertAllowResponse(t) require.True(t, testCtx.controller.podObserver.podEvictCache.hasPendingEviction(zoneAPod0)) + testCtx.controller.Stop() // mark a pod in the same zone as failed - with maxUnavailable=1 this will be denied zoneAPod2.Status.Phase = corev1.PodFailed testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailable(1, rolloutGroupValue), objs...) testCtx.assertDenyResponse(t, "1 pod not ready in ingester-zone-a", 429) + testCtx.controller.Stop() // mark a pod in the same zone as failed - with maxUnavailable=2 this will be allowed testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailable(2, rolloutGroupValue), objs...) testCtx.assertAllowResponse(t) + testCtx.controller.Stop() // mark a pod in the another zone as failed - we will deny this eviction zoneCPod2.Status.Phase = corev1.PodFailed testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailable(1, rolloutGroupValue), objs...) testCtx.assertDenyResponse(t, "1 pod not ready in ingester-zone-c", 429) + testCtx.controller.Stop() // mark a pod in the another zone as failed - we will deny this eviction even if max unavailable = 2 testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailable(2, rolloutGroupValue), objs...) testCtx.assertDenyResponse(t, "1 pod not ready in ingester-zone-c", 429) + testCtx.controller.Stop() // reset so all the pods are reporting running zoneAPod2.Status.Phase = corev1.PodRunning @@ -279,6 +347,7 @@ func TestPodEviction_MultiZoneClassic(t *testing.T) { stsZoneB.Status.Replicas = 4 testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailable(1, rolloutGroupValue), objs...) testCtx.assertDenyResponse(t, "1 pod unknown in ingester-zone-b", 429) + testCtx.controller.Stop() } func TestPodEviction_PartitionZones(t *testing.T) { @@ -308,21 +377,25 @@ func TestPodEviction_PartitionZones(t *testing.T) { testCtx := newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailableWithRegex(1, rolloutGroupValue, podPartitionZoneRegex, int64(1)), objs...) testCtx.assertAllowResponse(t) + testCtx.controller.Stop() // mark a pod in the same zone as failed - we will allow this eviction as it's in a different partition zoneAPod1.Status.Phase = corev1.PodFailed testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailableWithRegex(1, rolloutGroupValue, podPartitionZoneRegex, int64(1)), objs...) testCtx.assertAllowResponse(t) + testCtx.controller.Stop() // mark a pod in the another zone + partition as failed - we will allow this eviction as it's in a different partition zoneCPod2.Status.Phase = corev1.PodFailed testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailableWithRegex(1, rolloutGroupValue, podPartitionZoneRegex, int64(1)), objs...) testCtx.assertAllowResponse(t) + testCtx.controller.Stop() // mark a pod in the another zone + same partition as failed - we will deny this eviction zoneBPod0.Status.Phase = corev1.PodFailed testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailableWithRegex(1, rolloutGroupValue, podPartitionZoneRegex, int64(1)), objs...) testCtx.assertDenyResponse(t, "1 pod not ready in partition 0", 429) + testCtx.controller.Stop() // reset so all the pods are reporting running zoneAPod1.Status.Phase = corev1.PodRunning @@ -333,9 +406,11 @@ func TestPodEviction_PartitionZones(t *testing.T) { stsZoneB.Status.Replicas = 4 testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailableWithRegex(1, rolloutGroupValue, podPartitionZoneRegex, int64(1)), objs...) testCtx.assertDenyResponse(t, "1 pod unknown in partition 0", 429) + testCtx.controller.Stop() testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailableWithRegex(1, rolloutGroupValue, "ingester(-foo)?-zone-[a-z]-([0-9]+)", int64(2)), objs...) testCtx.assertDenyResponse(t, "1 pod unknown in partition 0", 429) + testCtx.controller.Stop() } func TestPodEviction_PartitionZonesMaxUnavailable2(t *testing.T) { @@ -365,14 +440,17 @@ func TestPodEviction_PartitionZonesMaxUnavailable2(t *testing.T) { zoneBPod0.Status.Phase = corev1.PodFailed testCtx := newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailableWithRegex(1, rolloutGroupValue, podPartitionZoneRegex, int64(1)), objs...) testCtx.assertDenyResponse(t, "1 pod not ready in partition 0", 429) + testCtx.controller.Stop() testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailableWithRegex(2, rolloutGroupValue, podPartitionZoneRegex, int64(1)), objs...) testCtx.assertAllowResponse(t) + testCtx.controller.Stop() // mark another pod in the another zone + same partition as failed zoneCPod0.Status.Phase = corev1.PodFailed testCtx = newTestContext(t, createBasicEvictionAdmissionReview(testPodZoneA0, testNamespace), newPDBMaxUnavailableWithRegex(2, rolloutGroupValue, podPartitionZoneRegex, int64(1)), objs...) testCtx.assertDenyResponse(t, "2 pods not ready in partition 0", 429) + testCtx.controller.Stop() } // createBasicEvictionAdmissionReview returns a pod eviction request for the given pod name