From 041b5b840ddebf73b5325def921611f97d0716fa Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Thu, 25 Apr 2024 11:48:12 +0300
Subject: [PATCH 01/10] Add VPAAndHPAForAPIServer feature gate

---
 docs/deployment/feature_gates.md   | 2 ++
 pkg/features/features.go           | 8 ++++++++
 pkg/gardenlet/features/features.go | 1 +
 3 files changed, 11 insertions(+)

diff --git a/docs/deployment/feature_gates.md b/docs/deployment/feature_gates.md
index 9f3ca1d98a3..7c265889e3b 100644
--- a/docs/deployment/feature_gates.md
+++ b/docs/deployment/feature_gates.md
@@ -31,6 +31,7 @@ The following tables are a summary of the feature gates that you can set on diff
 | UseNamespacedCloudProfile          | `false` | `Alpha` | `1.92` |        |
 | ShootManagedIssuer                 | `false` | `Alpha` | `1.93` |        |
 | VPAForETCD                         | `false` | `Alpha` | `1.94` |        |
+| VPAAndHPAForAPIServer              | `false` | `Alpha` | `1.95` |        |
 
 ## Feature Gates for Graduated or Deprecated Features
 
@@ -202,3 +203,4 @@ A *General Availability* (GA) feature is also referred to as a *stable* feature.
 | UseNamespacedCloudProfile          | `gardener-apiserver`              | Enables usage of `NamespacedCloudProfile`s in `Shoot`s.                                                                                                                                                                                                                                                                                                                            |
 | ShootManagedIssuer                 | `gardenlet`                       | Enables the shoot managed issuer functionality described in GEP 24.                                                                                                                                                                                                                                                                                                                |
 | VPAForETCD                         | `gardenlet`, `gardener-operator`  | Enables VPA for `etcd-main` and `etcd-events`, regardless of HVPA enablement.                                                                                                                                                                                                                                                                                                      |
+| VPAAndHPAForAPIServer              | `gardenlet`                       | Enables an autoscaling mechanism for shoot kube-apiserver where it is scaled simultaneously by VPA on CPU and memory utilization and by HPA - on CPU and memory usage. The feature gate takes precedence over the `HVPA` feature gate when they are both enabled.                                                                                                                  |
diff --git a/pkg/features/features.go b/pkg/features/features.go
index dbeca8279a3..6cd160ef54c 100644
--- a/pkg/features/features.go
+++ b/pkg/features/features.go
@@ -71,6 +71,13 @@ const (
 	// owner: @dimityrmirchev
 	// alpha: v1.93.0
 	ShootManagedIssuer featuregate.Feature = "ShootManagedIssuer"
+
+	// VPAAndHPAForAPIServer enables an autoscaling mechanism for shoot kube-apiserver
+	// where it is scaled simultaneously by VPA on CPU and memory utilization and by HPA - on CPU and memory usage.
+	// The feature gate takes precedence over the `HVPA` feature gate when they are both enabled.
+	// owner: @ialidzhikov
+	// alpha: v1.95.0
+	VPAAndHPAForAPIServer = "VPAAndHPAForAPIServer"
 )
 
 // DefaultFeatureGate is the central feature gate map used by all gardener components.
@@ -108,6 +115,7 @@ var AllFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
 	ShootManagedIssuer:              {Default: false, PreRelease: featuregate.Alpha},
 	ShootForceDeletion:              {Default: true, PreRelease: featuregate.Beta},
 	UseNamespacedCloudProfile:       {Default: false, PreRelease: featuregate.Alpha},
+	VPAAndHPAForAPIServer:           {Default: false, PreRelease: featuregate.Alpha},
 }
 
 // GetFeatures returns a feature gate map with the respective specifications. Non-existing feature gates are ignored.
diff --git a/pkg/gardenlet/features/features.go b/pkg/gardenlet/features/features.go
index e852d8988a5..fe2b96d8aad 100644
--- a/pkg/gardenlet/features/features.go
+++ b/pkg/gardenlet/features/features.go
@@ -26,5 +26,6 @@ func GetFeatures() []featuregate.Feature {
 		features.CoreDNSQueryRewriting,
 		features.IPv6SingleStack,
 		features.ShootManagedIssuer,
+		features.VPAAndHPAForAPIServer,
 	}
 }

From f4d6c5942a291a1d9aab8febad4a7671e3d7eb1a Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Thu, 25 Apr 2024 14:37:42 +0300
Subject: [PATCH 02/10] Introduce consts for API server autoscaling modes

---
 pkg/component/apiserver/types.go              | 16 ++++++++--
 .../gardener/apiserver/apiserver_test.go      |  8 ++---
 pkg/component/gardener/apiserver/hvpa.go      |  3 +-
 pkg/component/gardener/apiserver/vpa.go       |  4 ++-
 .../kubernetes/apiserver/apiserver_test.go    | 30 +++++++++----------
 .../apiserver/horizontalpodautoscaler.go      |  3 +-
 pkg/component/kubernetes/apiserver/hvpa.go    |  3 +-
 .../apiserver/verticalpodautoscaler.go        |  3 +-
 pkg/component/shared/kubeapiserver.go         |  2 +-
 pkg/component/shared/kubeapiserver_test.go    |  8 ++---
 .../operation/botanist/kubeapiserver.go       | 28 +++++++++++------
 .../operation/botanist/kubeapiserver_test.go  | 18 +++++------
 .../controller/garden/garden/components.go    |  9 +++++-
 13 files changed, 85 insertions(+), 50 deletions(-)

diff --git a/pkg/component/apiserver/types.go b/pkg/component/apiserver/types.go
index fbdea3d6132..b1d353b7075 100644
--- a/pkg/component/apiserver/types.go
+++ b/pkg/component/apiserver/types.go
@@ -76,12 +76,24 @@ type AuditWebhook struct {
 	Version *string
 }
 
+// AutoscalingMode represents the different autoscaling modes for an API Server.
+type AutoscalingMode int8
+
+const (
+	// AutoscalingModeBaseline differs substantially between kube-apiserver and gardener-apiserver.
+	// For kube-apiserver, it is active HPA, plus VPA in passive, recommend-only mode.
+	// For gardener-apiserver, it is VPA only.
+	AutoscalingModeBaseline = 0 + iota
+	// AutoscalingModeHVPA uses Gardener's custom HVPA autoscaler.
+	AutoscalingModeHVPA
+)
+
 // AutoscalingConfig contains information for configuring autoscaling settings for the API server.
 type AutoscalingConfig struct {
+	// Mode is the strategy for scaling the API server.
+	Mode AutoscalingMode
 	// APIServerResources are the resource requirements for the API server container.
 	APIServerResources corev1.ResourceRequirements
-	// HVPAEnabled states whether an HVPA object shall be deployed. If false, HPA and VPA will be used.
-	HVPAEnabled bool
 	// Replicas is the number of pod replicas for the API server.
 	Replicas *int32
 	// MinReplicas are the minimum Replicas for horizontal autoscaling.
diff --git a/pkg/component/gardener/apiserver/apiserver_test.go b/pkg/component/gardener/apiserver/apiserver_test.go
index d01417cd9ef..47e0f1ee0d4 100644
--- a/pkg/component/gardener/apiserver/apiserver_test.go
+++ b/pkg/component/gardener/apiserver/apiserver_test.go
@@ -1466,9 +1466,9 @@ kubeConfigFile: /etc/kubernetes/admission-kubeconfigs/validatingadmissionwebhook
 					Expect(managedResourceSecretVirtual.Labels["resources.gardener.cloud/garbage-collectable-reference"]).To(Equal("true"))
 				})
 
-				Context("when HVPA is disabled", func() {
+				Context("when autoscaling mode is baseline", func() {
 					BeforeEach(func() {
-						values.Values.Autoscaling.HVPAEnabled = false
+						values.Values.Autoscaling.Mode = apiserver.AutoscalingModeBaseline
 						deployer = New(fakeClient, namespace, fakeSecretManager, values)
 					})
 
@@ -1484,9 +1484,9 @@ kubeConfigFile: /etc/kubernetes/admission-kubeconfigs/validatingadmissionwebhook
 					})
 				})
 
-				Context("when HVPA is enabled", func() {
+				Context("when autoscaling mode is HVPA", func() {
 					BeforeEach(func() {
-						values.Values.Autoscaling.HVPAEnabled = true
+						values.Values.Autoscaling.Mode = apiserver.AutoscalingModeHVPA
 						deployer = New(fakeClient, namespace, fakeSecretManager, values)
 					})
 
diff --git a/pkg/component/gardener/apiserver/hvpa.go b/pkg/component/gardener/apiserver/hvpa.go
index e8a03052d19..9ce3c100ca6 100644
--- a/pkg/component/gardener/apiserver/hvpa.go
+++ b/pkg/component/gardener/apiserver/hvpa.go
@@ -15,11 +15,12 @@ import (
 	"k8s.io/utils/ptr"
 
 	resourcesv1alpha1 "github.com/gardener/gardener/pkg/apis/resources/v1alpha1"
+	"github.com/gardener/gardener/pkg/component/apiserver"
 	"github.com/gardener/gardener/pkg/utils"
 )
 
 func (g *gardenerAPIServer) hvpa() *hvpav1alpha1.Hvpa {
-	if !g.values.Autoscaling.HVPAEnabled {
+	if g.values.Autoscaling.Mode != apiserver.AutoscalingModeHVPA {
 		return nil
 	}
 
diff --git a/pkg/component/gardener/apiserver/vpa.go b/pkg/component/gardener/apiserver/vpa.go
index 8c2c4bb6f57..a959effb223 100644
--- a/pkg/component/gardener/apiserver/vpa.go
+++ b/pkg/component/gardener/apiserver/vpa.go
@@ -11,10 +11,12 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	vpaautoscalingv1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+
+	"github.com/gardener/gardener/pkg/component/apiserver"
 )
 
 func (g *gardenerAPIServer) verticalPodAutoscaler() *vpaautoscalingv1.VerticalPodAutoscaler {
-	if g.values.Autoscaling.HVPAEnabled {
+	if g.values.Autoscaling.Mode != apiserver.AutoscalingModeBaseline {
 		return nil
 	}
 
diff --git a/pkg/component/kubernetes/apiserver/apiserver_test.go b/pkg/component/kubernetes/apiserver/apiserver_test.go
index 4580c04617c..27e9a67b395 100644
--- a/pkg/component/kubernetes/apiserver/apiserver_test.go
+++ b/pkg/component/kubernetes/apiserver/apiserver_test.go
@@ -222,14 +222,14 @@ var _ = Describe("KubeAPIServer", func() {
 					Expect(c.Get(ctx, client.ObjectKeyFromObject(horizontalPodAutoscaler), horizontalPodAutoscaler)).To(BeNotFoundError())
 				},
 
-				Entry("HVPA is enabled", apiserver.AutoscalingConfig{HVPAEnabled: true}),
-				Entry("replicas is nil", apiserver.AutoscalingConfig{HVPAEnabled: false, Replicas: nil}),
-				Entry("replicas is 0", apiserver.AutoscalingConfig{HVPAEnabled: false, Replicas: ptr.To[int32](0)}),
+				Entry("autoscaling mode is HVPA", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeHVPA}),
+				Entry("replicas is nil", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline, Replicas: nil}),
+				Entry("replicas is 0", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline, Replicas: ptr.To[int32](0)}),
 			)
 
 			BeforeEach(func() {
 				autoscalingConfig = apiserver.AutoscalingConfig{
-					HVPAEnabled: false,
+					Mode:        apiserver.AutoscalingModeBaseline,
 					Replicas:    ptr.To[int32](2),
 					MinReplicas: 4,
 					MaxReplicas: 6,
@@ -284,9 +284,9 @@ var _ = Describe("KubeAPIServer", func() {
 		})
 
 		Describe("VerticalPodAutoscaler", func() {
-			Context("HVPAEnabled = true", func() {
+			Context("autoscaling mode is HVPA", func() {
 				BeforeEach(func() {
-					autoscalingConfig = apiserver.AutoscalingConfig{HVPAEnabled: true}
+					autoscalingConfig = apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeHVPA}
 				})
 
 				It("should delete the VPA resource", func() {
@@ -297,9 +297,9 @@ var _ = Describe("KubeAPIServer", func() {
 				})
 			})
 
-			Context("HVPAEnabled = false", func() {
+			Context("autoscaling mode is baseline", func() {
 				BeforeEach(func() {
-					autoscalingConfig = apiserver.AutoscalingConfig{HVPAEnabled: false}
+					autoscalingConfig = apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline}
 				})
 
 				It("should successfully deploy the VPA resource", func() {
@@ -350,9 +350,9 @@ var _ = Describe("KubeAPIServer", func() {
 					Expect(c.Get(ctx, client.ObjectKeyFromObject(hvpa), hvpa)).To(BeNotFoundError())
 				},
 
-				Entry("HVPA disabled", apiserver.AutoscalingConfig{HVPAEnabled: false}),
-				Entry("HVPA enabled but replicas nil", apiserver.AutoscalingConfig{HVPAEnabled: true}),
-				Entry("HVPA enabled but replicas zero", apiserver.AutoscalingConfig{HVPAEnabled: true, Replicas: ptr.To[int32](0)}),
+				Entry("autoscaling mode is baseline", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline}),
+				Entry("autoscaling mode is HVPA but replicas nil", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeHVPA}),
+				Entry("autoscaling mode is HVPA but replicas zero", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeHVPA, Replicas: ptr.To[int32](0)}),
 			)
 
 			var (
@@ -516,7 +516,7 @@ var _ = Describe("KubeAPIServer", func() {
 
 				Entry("default behaviour",
 					apiserver.AutoscalingConfig{
-						HVPAEnabled: true,
+						Mode:        apiserver.AutoscalingModeHVPA,
 						Replicas:    ptr.To[int32](2),
 						MinReplicas: 5,
 						MaxReplicas: 5,
@@ -529,7 +529,7 @@ var _ = Describe("KubeAPIServer", func() {
 				),
 				Entry("UseMemoryMetricForHvpaHPA is true",
 					apiserver.AutoscalingConfig{
-						HVPAEnabled:               true,
+						Mode:                      apiserver.AutoscalingModeHVPA,
 						Replicas:                  ptr.To[int32](2),
 						UseMemoryMetricForHvpaHPA: true,
 						MinReplicas:               5,
@@ -558,7 +558,7 @@ var _ = Describe("KubeAPIServer", func() {
 				),
 				Entry("scale down is disabled",
 					apiserver.AutoscalingConfig{
-						HVPAEnabled:              true,
+						Mode:                     apiserver.AutoscalingModeHVPA,
 						Replicas:                 ptr.To[int32](2),
 						MinReplicas:              5,
 						MaxReplicas:              5,
@@ -572,7 +572,7 @@ var _ = Describe("KubeAPIServer", func() {
 				),
 				Entry("max replicas > min replicas",
 					apiserver.AutoscalingConfig{
-						HVPAEnabled: true,
+						Mode:        apiserver.AutoscalingModeHVPA,
 						Replicas:    ptr.To[int32](2),
 						MinReplicas: 3,
 						MaxReplicas: 5,
diff --git a/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go b/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
index 963f7577744..2dadc86c161 100644
--- a/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
+++ b/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
@@ -14,6 +14,7 @@ import (
 	"k8s.io/utils/ptr"
 
 	v1beta1constants "github.com/gardener/gardener/pkg/apis/core/v1beta1/constants"
+	"github.com/gardener/gardener/pkg/component/apiserver"
 	"github.com/gardener/gardener/pkg/controllerutils"
 	kubernetesutils "github.com/gardener/gardener/pkg/utils/kubernetes"
 )
@@ -33,7 +34,7 @@ func (k *kubeAPIServer) emptyHorizontalPodAutoscaler() *autoscalingv2.Horizontal
 }
 
 func (k *kubeAPIServer) reconcileHorizontalPodAutoscaler(ctx context.Context, hpa *autoscalingv2.HorizontalPodAutoscaler, deployment *appsv1.Deployment) error {
-	if k.values.Autoscaling.HVPAEnabled ||
+	if k.values.Autoscaling.Mode != apiserver.AutoscalingModeBaseline ||
 		k.values.Autoscaling.Replicas == nil ||
 		*k.values.Autoscaling.Replicas == 0 {
 		return kubernetesutils.DeleteObject(ctx, k.client.Client(), hpa)
diff --git a/pkg/component/kubernetes/apiserver/hvpa.go b/pkg/component/kubernetes/apiserver/hvpa.go
index 81c255f7a49..0da080222b3 100644
--- a/pkg/component/kubernetes/apiserver/hvpa.go
+++ b/pkg/component/kubernetes/apiserver/hvpa.go
@@ -19,6 +19,7 @@ import (
 
 	v1beta1constants "github.com/gardener/gardener/pkg/apis/core/v1beta1/constants"
 	resourcesv1alpha1 "github.com/gardener/gardener/pkg/apis/resources/v1alpha1"
+	"github.com/gardener/gardener/pkg/component/apiserver"
 	"github.com/gardener/gardener/pkg/controllerutils"
 	kubernetesutils "github.com/gardener/gardener/pkg/utils/kubernetes"
 )
@@ -28,7 +29,7 @@ func (k *kubeAPIServer) emptyHVPA() *hvpav1alpha1.Hvpa {
 }
 
 func (k *kubeAPIServer) reconcileHVPA(ctx context.Context, hvpa *hvpav1alpha1.Hvpa, deployment *appsv1.Deployment) error {
-	if !k.values.Autoscaling.HVPAEnabled ||
+	if k.values.Autoscaling.Mode != apiserver.AutoscalingModeHVPA ||
 		k.values.Autoscaling.Replicas == nil ||
 		*k.values.Autoscaling.Replicas == 0 {
 		return kubernetesutils.DeleteObject(ctx, k.client.Client(), hvpa)
diff --git a/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go b/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
index bf5bd9936f6..d35a75c56f3 100644
--- a/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
+++ b/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
@@ -13,6 +13,7 @@ import (
 	vpaautoscalingv1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 
 	v1beta1constants "github.com/gardener/gardener/pkg/apis/core/v1beta1/constants"
+	"github.com/gardener/gardener/pkg/component/apiserver"
 	"github.com/gardener/gardener/pkg/controllerutils"
 	kubernetesutils "github.com/gardener/gardener/pkg/utils/kubernetes"
 )
@@ -22,7 +23,7 @@ func (k *kubeAPIServer) emptyVerticalPodAutoscaler() *vpaautoscalingv1.VerticalP
 }
 
 func (k *kubeAPIServer) reconcileVerticalPodAutoscaler(ctx context.Context, verticalPodAutoscaler *vpaautoscalingv1.VerticalPodAutoscaler, deployment *appsv1.Deployment) error {
-	if k.values.Autoscaling.HVPAEnabled {
+	if k.values.Autoscaling.Mode != apiserver.AutoscalingModeBaseline {
 		return kubernetesutils.DeleteObject(ctx, k.client.Client(), verticalPodAutoscaler)
 	}
 
diff --git a/pkg/component/shared/kubeapiserver.go b/pkg/component/shared/kubeapiserver.go
index 04495a1e1ab..e1812d29e32 100644
--- a/pkg/component/shared/kubeapiserver.go
+++ b/pkg/component/shared/kubeapiserver.go
@@ -220,7 +220,7 @@ func DeployKubeAPIServer(
 
 	kubeAPIServer.SetAutoscalingReplicas(computeKubeAPIServerReplicas(values.Autoscaling, deployment, wantScaleDown))
 
-	if deployment != nil && values.Autoscaling.HVPAEnabled {
+	if deployment != nil && values.Autoscaling.Mode == apiserver.AutoscalingModeHVPA {
 		for _, container := range deployment.Spec.Template.Spec.Containers {
 			if container.Name == kubeapiserver.ContainerNameKubeAPIServer {
 				// Only set requests to allow limits to be removed
diff --git a/pkg/component/shared/kubeapiserver_test.go b/pkg/component/shared/kubeapiserver_test.go
index f87384724af..a6fa494a7e2 100644
--- a/pkg/component/shared/kubeapiserver_test.go
+++ b/pkg/component/shared/kubeapiserver_test.go
@@ -980,7 +980,7 @@ exemptions:
 				apiserver.AutoscalingConfig{},
 				nil,
 			),
-			Entry("nothing is set because HVPA is disabled",
+			Entry("nothing is set because autoscaling mode is baseline",
 				func() {
 					Expect(runtimeClient.Create(ctx, &appsv1.Deployment{
 						ObjectMeta: metav1.ObjectMeta{
@@ -999,10 +999,10 @@ exemptions:
 						},
 					})).To(Succeed())
 				},
-				apiserver.AutoscalingConfig{HVPAEnabled: false},
+				apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline},
 				nil,
 			),
-			Entry("set the existing requirements because deployment found and HVPA enabled",
+			Entry("set the existing requirements because deployment found and autoscaling mode is HVPA",
 				func() {
 					Expect(runtimeClient.Create(ctx, &appsv1.Deployment{
 						ObjectMeta: metav1.ObjectMeta{
@@ -1021,7 +1021,7 @@ exemptions:
 						},
 					})).To(Succeed())
 				},
-				apiserver.AutoscalingConfig{HVPAEnabled: true},
+				apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeHVPA},
 				&apiServerResources,
 			),
 		)
diff --git a/pkg/gardenlet/operation/botanist/kubeapiserver.go b/pkg/gardenlet/operation/botanist/kubeapiserver.go
index 9e167ebe2cf..b842eee1dd2 100644
--- a/pkg/gardenlet/operation/botanist/kubeapiserver.go
+++ b/pkg/gardenlet/operation/botanist/kubeapiserver.go
@@ -82,7 +82,6 @@ func (b *Botanist) DefaultKubeAPIServer(ctx context.Context) (kubeapiserver.Inte
 
 func (b *Botanist) computeKubeAPIServerAutoscalingConfig() apiserver.AutoscalingConfig {
 	var (
-		hvpaEnabled               = features.DefaultFeatureGate.Enabled(features.HVPA)
 		useMemoryMetricForHvpaHPA = false
 		scaleDownDisabledForHvpa  = false
 		defaultReplicas           *int32
@@ -91,10 +90,6 @@ func (b *Botanist) computeKubeAPIServerAutoscalingConfig() apiserver.Autoscaling
 		apiServerResources        corev1.ResourceRequirements
 	)
 
-	if b.ManagedSeed != nil {
-		hvpaEnabled = features.DefaultFeatureGate.Enabled(features.HVPAForShootedSeed)
-	}
-
 	if b.Shoot.Purpose == gardencorev1beta1.ShootPurposeProduction {
 		minReplicas = 2
 	}
@@ -109,12 +104,13 @@ func (b *Botanist) computeKubeAPIServerAutoscalingConfig() apiserver.Autoscaling
 		scaleDownDisabledForHvpa = true
 	}
 
+	autoscalingMode := b.autoscalingMode()
 	nodeCount := b.Shoot.GetMinNodeCount()
-	if hvpaEnabled {
+	if autoscalingMode == apiserver.AutoscalingModeHVPA {
 		nodeCount = b.Shoot.GetMaxNodeCount()
 	}
 
-	if hvpaEnabled {
+	if autoscalingMode != apiserver.AutoscalingModeBaseline {
 		apiServerResources = corev1.ResourceRequirements{
 			Requests: corev1.ResourceList{
 				corev1.ResourceCPU:    resource.MustParse("500m"),
@@ -132,7 +128,7 @@ func (b *Botanist) computeKubeAPIServerAutoscalingConfig() apiserver.Autoscaling
 			minReplicas = *b.ManagedSeedAPIServer.Autoscaler.MinReplicas
 			maxReplicas = b.ManagedSeedAPIServer.Autoscaler.MaxReplicas
 
-			if !hvpaEnabled {
+			if autoscalingMode == apiserver.AutoscalingModeBaseline {
 				defaultReplicas = b.ManagedSeedAPIServer.Replicas
 				apiServerResources = corev1.ResourceRequirements{
 					Requests: corev1.ResourceList{
@@ -145,8 +141,8 @@ func (b *Botanist) computeKubeAPIServerAutoscalingConfig() apiserver.Autoscaling
 	}
 
 	return apiserver.AutoscalingConfig{
+		Mode:                      autoscalingMode,
 		APIServerResources:        apiServerResources,
-		HVPAEnabled:               hvpaEnabled,
 		Replicas:                  defaultReplicas,
 		MinReplicas:               minReplicas,
 		MaxReplicas:               maxReplicas,
@@ -155,6 +151,20 @@ func (b *Botanist) computeKubeAPIServerAutoscalingConfig() apiserver.Autoscaling
 	}
 }
 
+func (b *Botanist) autoscalingMode() apiserver.AutoscalingMode {
+	var hvpaEnabled bool
+	if b.ManagedSeed != nil {
+		hvpaEnabled = features.DefaultFeatureGate.Enabled(features.HVPAForShootedSeed)
+	} else {
+		hvpaEnabled = features.DefaultFeatureGate.Enabled(features.HVPA)
+	}
+
+	if hvpaEnabled {
+		return apiserver.AutoscalingModeHVPA
+	}
+	return apiserver.AutoscalingModeBaseline
+}
+
 func resourcesRequirementsForKubeAPIServer(nodeCount int32) corev1.ResourceRequirements {
 	var cpuRequest, memoryRequest string
 
diff --git a/pkg/gardenlet/operation/botanist/kubeapiserver_test.go b/pkg/gardenlet/operation/botanist/kubeapiserver_test.go
index a10e013bdf0..0cd05d6d608 100644
--- a/pkg/gardenlet/operation/botanist/kubeapiserver_test.go
+++ b/pkg/gardenlet/operation/botanist/kubeapiserver_test.go
@@ -186,8 +186,8 @@ var _ = Describe("KubeAPIServer", func() {
 					nil,
 					map[featuregate.Feature]bool{features.HVPA: false},
 					apiserver.AutoscalingConfig{
+						Mode:                      apiserver.AutoscalingModeBaseline,
 						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
-						HVPAEnabled:               false,
 						MinReplicas:               1,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: false,
@@ -200,13 +200,13 @@ var _ = Describe("KubeAPIServer", func() {
 						features.HVPA: true,
 					},
 					apiserver.AutoscalingConfig{
+						Mode: apiserver.AutoscalingModeHVPA,
 						APIServerResources: corev1.ResourceRequirements{
 							Requests: corev1.ResourceList{
 								corev1.ResourceCPU:    resource.MustParse("500m"),
 								corev1.ResourceMemory: resource.MustParse("1Gi"),
 							},
 						},
-						HVPAEnabled:               true,
 						MinReplicas:               1,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: false,
@@ -219,8 +219,8 @@ var _ = Describe("KubeAPIServer", func() {
 					},
 					nil,
 					apiserver.AutoscalingConfig{
+						Mode:                      apiserver.AutoscalingModeBaseline,
 						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
-						HVPAEnabled:               false,
 						MinReplicas:               2,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: false,
@@ -233,8 +233,8 @@ var _ = Describe("KubeAPIServer", func() {
 					},
 					nil,
 					apiserver.AutoscalingConfig{
+						Mode:                      apiserver.AutoscalingModeBaseline,
 						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
-						HVPAEnabled:               false,
 						MinReplicas:               4,
 						MaxReplicas:               4,
 						UseMemoryMetricForHvpaHPA: false,
@@ -247,8 +247,8 @@ var _ = Describe("KubeAPIServer", func() {
 					},
 					map[featuregate.Feature]bool{features.HVPAForShootedSeed: false},
 					apiserver.AutoscalingConfig{
+						Mode:                      apiserver.AutoscalingModeBaseline,
 						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
-						HVPAEnabled:               false,
 						MinReplicas:               1,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: true,
@@ -270,13 +270,13 @@ var _ = Describe("KubeAPIServer", func() {
 						features.HVPAForShootedSeed: true,
 					},
 					apiserver.AutoscalingConfig{
+						Mode: apiserver.AutoscalingModeHVPA,
 						APIServerResources: corev1.ResourceRequirements{
 							Requests: corev1.ResourceList{
 								corev1.ResourceCPU:    resource.MustParse("500m"),
 								corev1.ResourceMemory: resource.MustParse("1Gi"),
 							},
 						},
-						HVPAEnabled:               true,
 						MinReplicas:               16,
 						MaxReplicas:               32,
 						UseMemoryMetricForHvpaHPA: true,
@@ -296,13 +296,13 @@ var _ = Describe("KubeAPIServer", func() {
 					},
 					map[featuregate.Feature]bool{features.HVPAForShootedSeed: false},
 					apiserver.AutoscalingConfig{
+						Mode: apiserver.AutoscalingModeBaseline,
 						APIServerResources: corev1.ResourceRequirements{
 							Requests: corev1.ResourceList{
 								corev1.ResourceCPU:    resource.MustParse("1750m"),
 								corev1.ResourceMemory: resource.MustParse("2Gi"),
 							},
 						},
-						HVPAEnabled:               false,
 						MinReplicas:               16,
 						MaxReplicas:               32,
 						Replicas:                  ptr.To[int32](24),
@@ -325,13 +325,13 @@ var _ = Describe("KubeAPIServer", func() {
 						features.HVPAForShootedSeed: false,
 					},
 					apiserver.AutoscalingConfig{
+						Mode: apiserver.AutoscalingModeBaseline,
 						APIServerResources: corev1.ResourceRequirements{
 							Requests: corev1.ResourceList{
 								corev1.ResourceCPU:    resource.MustParse("1750m"),
 								corev1.ResourceMemory: resource.MustParse("2Gi"),
 							},
 						},
-						HVPAEnabled:               false,
 						MinReplicas:               16,
 						MaxReplicas:               32,
 						Replicas:                  ptr.To[int32](24),
@@ -349,8 +349,8 @@ var _ = Describe("KubeAPIServer", func() {
 					},
 					nil,
 					apiserver.AutoscalingConfig{
+						Mode:                      apiserver.AutoscalingModeBaseline,
 						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
-						HVPAEnabled:               false,
 						MinReplicas:               3,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: false,
diff --git a/pkg/operator/controller/garden/garden/components.go b/pkg/operator/controller/garden/garden/components.go
index a75e3dd9932..78474be002f 100644
--- a/pkg/operator/controller/garden/garden/components.go
+++ b/pkg/operator/controller/garden/garden/components.go
@@ -628,14 +628,21 @@ func defaultAPIServerAutoscalingConfig(garden *operatorv1alpha1.Garden) apiserve
 		minReplicas = 3
 	}
 
+	var autoscalingMode apiserver.AutoscalingMode
+	if hvpaEnabled() {
+		autoscalingMode = apiserver.AutoscalingModeHVPA
+	} else {
+		autoscalingMode = apiserver.AutoscalingModeBaseline
+	}
+
 	return apiserver.AutoscalingConfig{
+		Mode: autoscalingMode,
 		APIServerResources: corev1.ResourceRequirements{
 			Requests: corev1.ResourceList{
 				corev1.ResourceCPU:    resource.MustParse("600m"),
 				corev1.ResourceMemory: resource.MustParse("512Mi"),
 			},
 		},
-		HVPAEnabled:               hvpaEnabled(),
 		MinReplicas:               minReplicas,
 		MaxReplicas:               6,
 		UseMemoryMetricForHvpaHPA: true,

From 946789d3ad2327cbe96ff3c3cb612e7549d6e8e3 Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Thu, 25 Apr 2024 15:35:02 +0300
Subject: [PATCH 03/10] Add VPAAndHPA autoscaling mode

---
 pkg/component/apiserver/types.go              |  11 +-
 .../kubernetes/apiserver/apiserver_test.go    | 306 +++++++++++++++---
 .../apiserver/horizontalpodautoscaler.go      |  90 +++++-
 pkg/component/kubernetes/apiserver/hvpa.go    |  77 +++--
 .../apiserver/verticalpodautoscaler.go        |  52 ++-
 pkg/component/shared/kubeapiserver.go         |  10 +-
 pkg/component/shared/kubeapiserver_test.go    |  26 +-
 .../operation/botanist/kubeapiserver.go       |  44 ++-
 .../operation/botanist/kubeapiserver_test.go  |  70 ++--
 .../controller/garden/garden/components.go    |   2 +-
 10 files changed, 532 insertions(+), 156 deletions(-)

diff --git a/pkg/component/apiserver/types.go b/pkg/component/apiserver/types.go
index b1d353b7075..6e324a71615 100644
--- a/pkg/component/apiserver/types.go
+++ b/pkg/component/apiserver/types.go
@@ -81,16 +81,19 @@ type AutoscalingMode int8
 
 const (
 	// AutoscalingModeBaseline differs substantially between kube-apiserver and gardener-apiserver.
-	// For kube-apiserver, it is active HPA, plus VPA in passive, recommend-only mode.
+	// For kube-apiserver, it is active HPA and VPA in passive, recommend-only mode.
 	// For gardener-apiserver, it is VPA only.
 	AutoscalingModeBaseline = 0 + iota
 	// AutoscalingModeHVPA uses Gardener's custom HVPA autoscaler.
 	AutoscalingModeHVPA
+	// AutoscalingModeVPAAndHPA uses VPA on CPU utilization and HPA on CPU usage.
+	AutoscalingModeVPAAndHPA
 )
 
 // AutoscalingConfig contains information for configuring autoscaling settings for the API server.
 type AutoscalingConfig struct {
 	// Mode is the strategy for scaling the API server.
+	// Defaults to AutoscalingModeBaseline.
 	Mode AutoscalingMode
 	// APIServerResources are the resource requirements for the API server container.
 	APIServerResources corev1.ResourceRequirements
@@ -103,9 +106,9 @@ type AutoscalingConfig struct {
 	// UseMemoryMetricForHvpaHPA states whether the memory metric shall be used when the HPA is configured in an HVPA
 	// resource.
 	UseMemoryMetricForHvpaHPA bool
-	// ScaleDownDisabledForHvpa states whether scale-down shall be disabled when HPA or VPA are configured in an HVPA
-	// resource.
-	ScaleDownDisabledForHvpa bool
+	// ScaleDownDisabled states whether scale-down shall be disabled.
+	// Only HVPA and VPAAndHPA autoscaling modes support disabling scale-down.
+	ScaleDownDisabled bool
 }
 
 // ETCDEncryptionConfig contains configuration for the encryption of resources in etcd.
diff --git a/pkg/component/kubernetes/apiserver/apiserver_test.go b/pkg/component/kubernetes/apiserver/apiserver_test.go
index 27e9a67b395..de06cc101b1 100644
--- a/pkg/component/kubernetes/apiserver/apiserver_test.go
+++ b/pkg/component/kubernetes/apiserver/apiserver_test.go
@@ -227,60 +227,127 @@ var _ = Describe("KubeAPIServer", func() {
 				Entry("replicas is 0", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline, Replicas: ptr.To[int32](0)}),
 			)
 
-			BeforeEach(func() {
-				autoscalingConfig = apiserver.AutoscalingConfig{
-					Mode:        apiserver.AutoscalingModeBaseline,
-					Replicas:    ptr.To[int32](2),
-					MinReplicas: 4,
-					MaxReplicas: 6,
-				}
+			DescribeTable("should successfully deploy the HPA resource",
+				func(autoscalingConfig apiserver.AutoscalingConfig, metrics []autoscalingv2.MetricSpec, behavior *autoscalingv2.HorizontalPodAutoscalerBehavior) {
+					kapi = New(kubernetesInterface, namespace, sm, Values{
+						Values: apiserver.Values{
+							Autoscaling:    autoscalingConfig,
+							RuntimeVersion: runtimeVersion,
+						},
+						Version: version},
+					)
 
-				runtimeVersion = semver.MustParse("1.25.0")
-			})
+					Expect(c.Get(ctx, client.ObjectKeyFromObject(horizontalPodAutoscaler), horizontalPodAutoscaler)).To(BeNotFoundError())
+					Expect(kapi.Deploy(ctx)).To(Succeed())
+					Expect(c.Get(ctx, client.ObjectKeyFromObject(horizontalPodAutoscaler), horizontalPodAutoscaler)).To(Succeed())
+					Expect(horizontalPodAutoscaler).To(DeepEqual(&autoscalingv2.HorizontalPodAutoscaler{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:      horizontalPodAutoscaler.Name,
+							Namespace: horizontalPodAutoscaler.Namespace,
+							Labels: map[string]string{
+								"high-availability-config.resources.gardener.cloud/type": "server",
+							},
+							ResourceVersion: "1",
+						},
+						Spec: autoscalingv2.HorizontalPodAutoscalerSpec{
+							MinReplicas: &autoscalingConfig.MinReplicas,
+							MaxReplicas: autoscalingConfig.MaxReplicas,
+							ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
+								APIVersion: "apps/v1",
+								Kind:       "Deployment",
+								Name:       "kube-apiserver",
+							},
+							Metrics:  metrics,
+							Behavior: behavior,
+						},
+					}))
+				},
 
-			It("should successfully deploy the HPA resource", func() {
-				Expect(c.Get(ctx, client.ObjectKeyFromObject(horizontalPodAutoscaler), horizontalPodAutoscaler)).To(BeNotFoundError())
-				Expect(kapi.Deploy(ctx)).To(Succeed())
-				Expect(c.Get(ctx, client.ObjectKeyFromObject(horizontalPodAutoscaler), horizontalPodAutoscaler)).To(Succeed())
-				Expect(horizontalPodAutoscaler).To(DeepEqual(&autoscalingv2.HorizontalPodAutoscaler{
-					ObjectMeta: metav1.ObjectMeta{
-						Name:            horizontalPodAutoscaler.Name,
-						Namespace:       horizontalPodAutoscaler.Namespace,
-						ResourceVersion: "1",
+				Entry("autoscaling mode is baseline",
+					apiserver.AutoscalingConfig{
+						Mode:        apiserver.AutoscalingModeBaseline,
+						Replicas:    ptr.To[int32](2),
+						MinReplicas: 4,
+						MaxReplicas: 6,
 					},
-					Spec: autoscalingv2.HorizontalPodAutoscalerSpec{
-						MinReplicas: &autoscalingConfig.MinReplicas,
-						MaxReplicas: autoscalingConfig.MaxReplicas,
-						ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
-							APIVersion: "apps/v1",
-							Kind:       "Deployment",
-							Name:       "kube-apiserver",
-						},
-						Metrics: []autoscalingv2.MetricSpec{
-							{
-								Type: "Resource",
-								Resource: &autoscalingv2.ResourceMetricSource{
-									Name: "cpu",
-									Target: autoscalingv2.MetricTarget{
-										Type:               autoscalingv2.UtilizationMetricType,
-										AverageUtilization: ptr.To[int32](80),
-									},
+					[]autoscalingv2.MetricSpec{
+						{
+							Type: "Resource",
+							Resource: &autoscalingv2.ResourceMetricSource{
+								Name: "cpu",
+								Target: autoscalingv2.MetricTarget{
+									Type:               autoscalingv2.UtilizationMetricType,
+									AverageUtilization: ptr.To[int32](80),
 								},
 							},
-							{
-								Type: "Resource",
-								Resource: &autoscalingv2.ResourceMetricSource{
-									Name: "memory",
-									Target: autoscalingv2.MetricTarget{
-										Type:               autoscalingv2.UtilizationMetricType,
-										AverageUtilization: ptr.To[int32](80),
-									},
+						},
+						{
+							Type: "Resource",
+							Resource: &autoscalingv2.ResourceMetricSource{
+								Name: "memory",
+								Target: autoscalingv2.MetricTarget{
+									Type:               autoscalingv2.UtilizationMetricType,
+									AverageUtilization: ptr.To[int32](80),
 								},
 							},
 						},
 					},
-				}))
-			})
+					nil,
+				),
+
+				Entry("autoscaling mode is VPAAndHPA",
+					apiserver.AutoscalingConfig{
+						Mode:        apiserver.AutoscalingModeVPAAndHPA,
+						Replicas:    ptr.To[int32](2),
+						MinReplicas: 4,
+						MaxReplicas: 6,
+					},
+					[]autoscalingv2.MetricSpec{
+						{
+							Type: "Resource",
+							Resource: &autoscalingv2.ResourceMetricSource{
+								Name: "cpu",
+								Target: autoscalingv2.MetricTarget{
+									Type:         autoscalingv2.AverageValueMetricType,
+									AverageValue: ptr.To(resource.MustParse("6")),
+								},
+							},
+						},
+						{
+							Type: "Resource",
+							Resource: &autoscalingv2.ResourceMetricSource{
+								Name: "memory",
+								Target: autoscalingv2.MetricTarget{
+									Type:         autoscalingv2.AverageValueMetricType,
+									AverageValue: ptr.To(resource.MustParse("24G")),
+								},
+							},
+						},
+					},
+					&autoscalingv2.HorizontalPodAutoscalerBehavior{
+						ScaleUp: &autoscalingv2.HPAScalingRules{
+							StabilizationWindowSeconds: ptr.To[int32](60),
+							Policies: []autoscalingv2.HPAScalingPolicy{
+								{
+									Type:          autoscalingv2.PercentScalingPolicy,
+									Value:         100,
+									PeriodSeconds: 60,
+								},
+							},
+						},
+						ScaleDown: &autoscalingv2.HPAScalingRules{
+							StabilizationWindowSeconds: ptr.To[int32](1800),
+							Policies: []autoscalingv2.HPAScalingPolicy{
+								{
+									Type:          autoscalingv2.PodsScalingPolicy,
+									Value:         1,
+									PeriodSeconds: 300,
+								},
+							},
+						},
+					},
+				),
+			)
 		})
 
 		Describe("VerticalPodAutoscaler", func() {
@@ -297,6 +364,144 @@ var _ = Describe("KubeAPIServer", func() {
 				})
 			})
 
+			DescribeTable("should successfully deploy the VPA resource",
+				func(autoscalingConfig apiserver.AutoscalingConfig, haVPN bool, vpaUpdateMode *vpaautoscalingv1.UpdateMode, containerPolicies []vpaautoscalingv1.ContainerResourcePolicy, evictionRequirements []*vpaautoscalingv1.EvictionRequirement) {
+					kapi = New(kubernetesInterface, namespace, sm, Values{
+						Values: apiserver.Values{
+							Autoscaling:    autoscalingConfig,
+							RuntimeVersion: runtimeVersion,
+						},
+						Version: version,
+						VPN: VPNConfig{
+							HighAvailabilityEnabled:             haVPN,
+							HighAvailabilityNumberOfSeedServers: 2,
+						},
+					})
+
+					Expect(c.Get(ctx, client.ObjectKeyFromObject(verticalPodAutoscaler), verticalPodAutoscaler)).To(BeNotFoundError())
+					Expect(kapi.Deploy(ctx)).To(Succeed())
+					Expect(c.Get(ctx, client.ObjectKeyFromObject(verticalPodAutoscaler), verticalPodAutoscaler)).To(Succeed())
+					Expect(verticalPodAutoscaler).To(DeepEqual(&vpaautoscalingv1.VerticalPodAutoscaler{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:            verticalPodAutoscaler.Name,
+							Namespace:       verticalPodAutoscaler.Namespace,
+							ResourceVersion: "1",
+						},
+						Spec: vpaautoscalingv1.VerticalPodAutoscalerSpec{
+							TargetRef: &autoscalingv1.CrossVersionObjectReference{
+								APIVersion: "apps/v1",
+								Kind:       "Deployment",
+								Name:       "kube-apiserver",
+							},
+							UpdatePolicy: &vpaautoscalingv1.PodUpdatePolicy{
+								UpdateMode:           vpaUpdateMode,
+								EvictionRequirements: evictionRequirements,
+							},
+							ResourcePolicy: &vpaautoscalingv1.PodResourcePolicy{
+								ContainerPolicies: containerPolicies,
+							},
+						},
+					}))
+				},
+
+				Entry("autoscaling mode is baseline",
+					apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline},
+					false,
+					ptr.To(vpaautoscalingv1.UpdateModeOff),
+					[]vpaautoscalingv1.ContainerResourcePolicy{
+						{
+							ContainerName:    vpaautoscalingv1.DefaultContainerResourcePolicy,
+							ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
+						},
+					},
+					nil,
+				),
+				Entry("autoscaling mode is VPAAndHPA",
+					apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeVPAAndHPA},
+					false,
+					ptr.To(vpaautoscalingv1.UpdateModeAuto),
+					[]vpaautoscalingv1.ContainerResourcePolicy{
+						{
+							ContainerName:    "kube-apiserver",
+							ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
+							MinAllowed: corev1.ResourceList{
+								corev1.ResourceCPU:    resource.MustParse("20m"),
+								corev1.ResourceMemory: resource.MustParse("200M"),
+							},
+							MaxAllowed: corev1.ResourceList{
+								corev1.ResourceCPU:    resource.MustParse("7"),
+								corev1.ResourceMemory: resource.MustParse("28G"),
+							},
+						},
+					},
+					nil,
+				),
+				Entry("autoscaling mode is VPAAndHPA and HA VPN is enabled",
+					apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeVPAAndHPA},
+					true,
+					ptr.To(vpaautoscalingv1.UpdateModeAuto),
+					[]vpaautoscalingv1.ContainerResourcePolicy{
+						{
+							ContainerName:    "kube-apiserver",
+							ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
+							MinAllowed: corev1.ResourceList{
+								corev1.ResourceCPU:    resource.MustParse("20m"),
+								corev1.ResourceMemory: resource.MustParse("200M"),
+							},
+							MaxAllowed: corev1.ResourceList{
+								corev1.ResourceCPU:    resource.MustParse("7"),
+								corev1.ResourceMemory: resource.MustParse("28G"),
+							},
+						},
+						{
+							ContainerName:    "vpn-client-0",
+							ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
+							MinAllowed: corev1.ResourceList{
+								corev1.ResourceMemory: resource.MustParse("20Mi"),
+							},
+						},
+						{
+							ContainerName:    "vpn-client-1",
+							ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
+							MinAllowed: corev1.ResourceList{
+								corev1.ResourceMemory: resource.MustParse("20Mi"),
+							},
+						},
+						{
+							ContainerName:    "vpn-path-controller",
+							ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
+							MinAllowed: corev1.ResourceList{
+								corev1.ResourceMemory: resource.MustParse("20Mi"),
+							},
+						},
+					},
+					nil,
+				),
+				Entry("autoscaling mode is VPAAndHPA and scale-down is disabled",
+					apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeVPAAndHPA, ScaleDownDisabled: true},
+					false,
+					ptr.To(vpaautoscalingv1.UpdateModeAuto),
+					[]vpaautoscalingv1.ContainerResourcePolicy{
+						{
+							ContainerName:    "kube-apiserver",
+							ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
+							MinAllowed: corev1.ResourceList{
+								corev1.ResourceCPU:    resource.MustParse("20m"),
+								corev1.ResourceMemory: resource.MustParse("200M"),
+							},
+							MaxAllowed: corev1.ResourceList{
+								corev1.ResourceCPU:    resource.MustParse("7"),
+								corev1.ResourceMemory: resource.MustParse("28G"),
+							},
+						},
+					},
+					[]*vpaautoscalingv1.EvictionRequirement{{
+						Resources:         []corev1.ResourceName{corev1.ResourceMemory, corev1.ResourceCPU},
+						ChangeRequirement: vpaautoscalingv1.TargetHigherThanRequests,
+					}},
+				),
+			)
+
 			Context("autoscaling mode is baseline", func() {
 				BeforeEach(func() {
 					autoscalingConfig = apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline}
@@ -351,6 +556,7 @@ var _ = Describe("KubeAPIServer", func() {
 				},
 
 				Entry("autoscaling mode is baseline", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline}),
+				Entry("autoscaling mode is VPAAndHPA", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeVPAAndHPA}),
 				Entry("autoscaling mode is HVPA but replicas nil", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeHVPA}),
 				Entry("autoscaling mode is HVPA but replicas zero", apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeHVPA, Replicas: ptr.To[int32](0)}),
 			)
@@ -558,11 +764,11 @@ var _ = Describe("KubeAPIServer", func() {
 				),
 				Entry("scale down is disabled",
 					apiserver.AutoscalingConfig{
-						Mode:                     apiserver.AutoscalingModeHVPA,
-						Replicas:                 ptr.To[int32](2),
-						MinReplicas:              5,
-						MaxReplicas:              5,
-						ScaleDownDisabledForHvpa: true,
+						Mode:              apiserver.AutoscalingModeHVPA,
+						Replicas:          ptr.To[int32](2),
+						MinReplicas:       5,
+						MaxReplicas:       5,
+						ScaleDownDisabled: true,
 					},
 					SNIConfig{},
 					"Off",
diff --git a/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go b/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
index 2dadc86c161..0469c35c439 100644
--- a/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
+++ b/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
@@ -10,10 +10,12 @@ import (
 	appsv1 "k8s.io/api/apps/v1"
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/utils/ptr"
 
 	v1beta1constants "github.com/gardener/gardener/pkg/apis/core/v1beta1/constants"
+	resourcesv1alpha1 "github.com/gardener/gardener/pkg/apis/resources/v1alpha1"
 	"github.com/gardener/gardener/pkg/component/apiserver"
 	"github.com/gardener/gardener/pkg/controllerutils"
 	kubernetesutils "github.com/gardener/gardener/pkg/utils/kubernetes"
@@ -34,13 +36,22 @@ func (k *kubeAPIServer) emptyHorizontalPodAutoscaler() *autoscalingv2.Horizontal
 }
 
 func (k *kubeAPIServer) reconcileHorizontalPodAutoscaler(ctx context.Context, hpa *autoscalingv2.HorizontalPodAutoscaler, deployment *appsv1.Deployment) error {
-	if k.values.Autoscaling.Mode != apiserver.AutoscalingModeBaseline ||
+	if k.values.Autoscaling.Mode == apiserver.AutoscalingModeHVPA ||
 		k.values.Autoscaling.Replicas == nil ||
 		*k.values.Autoscaling.Replicas == 0 {
 		return kubernetesutils.DeleteObject(ctx, k.client.Client(), hpa)
 	}
 
+	if k.values.Autoscaling.Mode == apiserver.AutoscalingModeVPAAndHPA {
+		return k.reconcileHorizontalPodAutoscalerInVPAAndHPAMode(ctx, hpa, deployment)
+	}
+
+	return k.reconcileHorizontalPodAutoscalerInBaselineMode(ctx, hpa, deployment)
+}
+
+func (k *kubeAPIServer) reconcileHorizontalPodAutoscalerInBaselineMode(ctx context.Context, hpa *autoscalingv2.HorizontalPodAutoscaler, deployment *appsv1.Deployment) error {
 	_, err := controllerutils.GetAndCreateOrMergePatch(ctx, k.client.Client(), hpa, func() error {
+		metav1.SetMetaDataLabel(&hpa.ObjectMeta, resourcesv1alpha1.HighAvailabilityConfigType, resourcesv1alpha1.HighAvailabilityConfigTypeServer)
 		hpa.Spec = autoscalingv2.HorizontalPodAutoscalerSpec{
 			MinReplicas: &k.values.Autoscaling.MinReplicas,
 			MaxReplicas: k.values.Autoscaling.MaxReplicas,
@@ -75,6 +86,83 @@ func (k *kubeAPIServer) reconcileHorizontalPodAutoscaler(ctx context.Context, hp
 
 		return nil
 	})
+	return err
+}
+
+func (k *kubeAPIServer) reconcileHorizontalPodAutoscalerInVPAAndHPAMode(ctx context.Context, hpa *autoscalingv2.HorizontalPodAutoscaler, deployment *appsv1.Deployment) error {
+	// The chosen value is 6 CPU: 1 CPU less than the VPA's maxAllowed 7 CPU in VPAAndHPA mode to have a headroom for the horizontal scaling.
+	hpaTargetAverageValueCPU := resource.MustParse("6")
+	// The chosen value is 24G: 4G less than the VPA's maxAllowed 28G in VPAAndHPA mode to have a headroom for the horizontal scaling.
+	hpaTargetAverageValueMemory := resource.MustParse("24G")
+
+	_, err := controllerutils.GetAndCreateOrMergePatch(ctx, k.client.Client(), hpa, func() error {
+		minReplicas := k.values.Autoscaling.MinReplicas
+		if k.values.Autoscaling.ScaleDownDisabled && hpa.Spec.MinReplicas != nil {
+			// If scale-down is disabled and the HPA resource exists and HPA's spec.minReplicas is not nil,
+			// then minReplicas is max(spec.minReplicas, status.desiredReplcias).
+			// When scale-down is disabled, this allows operators to specify a custom value for HPA spec.minReplicas
+			// and this value not to be reverted by gardenlet.
+			minReplicas = max(*hpa.Spec.MinReplicas, hpa.Status.DesiredReplicas)
+		}
+
+		metav1.SetMetaDataLabel(&hpa.ObjectMeta, resourcesv1alpha1.HighAvailabilityConfigType, resourcesv1alpha1.HighAvailabilityConfigTypeServer)
+		hpa.Spec = autoscalingv2.HorizontalPodAutoscalerSpec{
+			MinReplicas: &minReplicas,
+			MaxReplicas: k.values.Autoscaling.MaxReplicas,
+			ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
+				APIVersion: appsv1.SchemeGroupVersion.String(),
+				Kind:       "Deployment",
+				Name:       deployment.Name,
+			},
+			Metrics: []autoscalingv2.MetricSpec{
+				{
+					Type: autoscalingv2.ResourceMetricSourceType,
+					Resource: &autoscalingv2.ResourceMetricSource{
+						Name: corev1.ResourceCPU,
+						Target: autoscalingv2.MetricTarget{
+							Type:         autoscalingv2.AverageValueMetricType,
+							AverageValue: &hpaTargetAverageValueCPU,
+						},
+					},
+				},
+				{
+					Type: autoscalingv2.ResourceMetricSourceType,
+					Resource: &autoscalingv2.ResourceMetricSource{
+						Name: corev1.ResourceMemory,
+						Target: autoscalingv2.MetricTarget{
+							Type:         autoscalingv2.AverageValueMetricType,
+							AverageValue: &hpaTargetAverageValueMemory,
+						},
+					},
+				},
+			},
+			Behavior: &autoscalingv2.HorizontalPodAutoscalerBehavior{
+				ScaleUp: &autoscalingv2.HPAScalingRules{
+					StabilizationWindowSeconds: ptr.To[int32](60),
+					Policies: []autoscalingv2.HPAScalingPolicy{
+						// Allow to upscale 100% of the current number of pods every 1 minute to see whether any upscale recommendation will still hold true after the cluster has settled
+						{
+							Type:          autoscalingv2.PercentScalingPolicy,
+							Value:         100,
+							PeriodSeconds: 60,
+						},
+					},
+				},
+				ScaleDown: &autoscalingv2.HPAScalingRules{
+					StabilizationWindowSeconds: ptr.To[int32](1800),
+					Policies: []autoscalingv2.HPAScalingPolicy{
+						// Allow to downscale one pod every 5 minutes to see whether any downscale recommendation will still hold true after the cluster has settled (conservatively)
+						{
+							Type:          autoscalingv2.PodsScalingPolicy,
+							Value:         1,
+							PeriodSeconds: 300,
+						},
+					},
+				},
+			},
+		}
 
+		return nil
+	})
 	return err
 }
diff --git a/pkg/component/kubernetes/apiserver/hvpa.go b/pkg/component/kubernetes/apiserver/hvpa.go
index 0da080222b3..acb10e13be0 100644
--- a/pkg/component/kubernetes/apiserver/hvpa.go
+++ b/pkg/component/kubernetes/apiserver/hvpa.go
@@ -40,7 +40,6 @@ func (k *kubeAPIServer) reconcileHVPA(ctx context.Context, hvpa *hvpav1alpha1.Hv
 		vpaLabels           = map[string]string{v1beta1constants.LabelRole: v1beta1constants.LabelAPIServer + "-vpa"}
 		updateModeAuto      = hvpav1alpha1.UpdateModeAuto
 		scaleDownUpdateMode = updateModeAuto
-		controlledValues    = vpaautoscalingv1.ContainerControlledValuesRequestsOnly
 		hpaMetrics          = []autoscalingv2beta1.MetricSpec{
 			{
 				Type: autoscalingv2beta1.ResourceMetricSourceType,
@@ -50,18 +49,12 @@ func (k *kubeAPIServer) reconcileHVPA(ctx context.Context, hvpa *hvpav1alpha1.Hv
 				},
 			},
 		}
-		vpaContainerResourcePolicies = []vpaautoscalingv1.ContainerResourcePolicy{
-			{
-				ContainerName: ContainerNameKubeAPIServer,
-				MinAllowed: corev1.ResourceList{
-					corev1.ResourceMemory: resource.MustParse("200M"),
-				},
-				MaxAllowed: corev1.ResourceList{
-					corev1.ResourceCPU:    resource.MustParse("8"),
-					corev1.ResourceMemory: resource.MustParse("25G"),
-				},
-				ControlledValues: &controlledValues,
-			},
+		kubeAPIServerMinAllowed = corev1.ResourceList{
+			corev1.ResourceMemory: resource.MustParse("200M"),
+		}
+		kubeAPIServerMaxAllowed = corev1.ResourceList{
+			corev1.ResourceCPU:    resource.MustParse("8"),
+			corev1.ResourceMemory: resource.MustParse("25G"),
 		}
 		weightBasedScalingIntervals = []hvpav1alpha1.WeightBasedScalingInterval{
 			{
@@ -82,7 +75,7 @@ func (k *kubeAPIServer) reconcileHVPA(ctx context.Context, hvpa *hvpav1alpha1.Hv
 		})
 	}
 
-	if k.values.Autoscaling.ScaleDownDisabledForHvpa {
+	if k.values.Autoscaling.ScaleDownDisabled {
 		scaleDownUpdateMode = hvpav1alpha1.UpdateModeOff
 	}
 
@@ -94,25 +87,6 @@ func (k *kubeAPIServer) reconcileHVPA(ctx context.Context, hvpa *hvpav1alpha1.Hv
 		})
 	}
 
-	if k.values.VPN.HighAvailabilityEnabled {
-		for i := 0; i < k.values.VPN.HighAvailabilityNumberOfSeedServers; i++ {
-			vpaContainerResourcePolicies = append(vpaContainerResourcePolicies, vpaautoscalingv1.ContainerResourcePolicy{
-				ContainerName: fmt.Sprintf("%s-%d", containerNameVPNSeedClient, i),
-				MinAllowed: corev1.ResourceList{
-					corev1.ResourceMemory: resource.MustParse("20Mi"),
-				},
-				ControlledValues: &controlledValues,
-			})
-		}
-		vpaContainerResourcePolicies = append(vpaContainerResourcePolicies, vpaautoscalingv1.ContainerResourcePolicy{
-			ContainerName: containerNameVPNPathController,
-			MinAllowed: corev1.ResourceList{
-				corev1.ResourceMemory: resource.MustParse("20Mi"),
-			},
-			ControlledValues: &controlledValues,
-		})
-	}
-
 	_, err := controllerutils.GetAndCreateOrMergePatch(ctx, k.client.Client(), hvpa, func() error {
 		metav1.SetMetaDataLabel(&hvpa.ObjectMeta, resourcesv1alpha1.HighAvailabilityConfigType, resourcesv1alpha1.HighAvailabilityConfigTypeServer)
 		hvpa.Spec.Replicas = ptr.To[int32](1)
@@ -191,7 +165,7 @@ func (k *kubeAPIServer) reconcileHVPA(ctx context.Context, hvpa *hvpav1alpha1.Hv
 				},
 				Spec: hvpav1alpha1.VpaTemplateSpec{
 					ResourcePolicy: &vpaautoscalingv1.PodResourcePolicy{
-						ContainerPolicies: vpaContainerResourcePolicies,
+						ContainerPolicies: k.computeVerticalPodAutoscalerContainerResourcePolicies(kubeAPIServerMinAllowed, kubeAPIServerMaxAllowed),
 					},
 				},
 			},
@@ -206,3 +180,38 @@ func (k *kubeAPIServer) reconcileHVPA(ctx context.Context, hvpa *hvpav1alpha1.Hv
 	})
 	return err
 }
+
+func (k *kubeAPIServer) computeVerticalPodAutoscalerContainerResourcePolicies(kubeAPIServerMinAllowed, kubeAPIServerMaxAllowed corev1.ResourceList) []vpaautoscalingv1.ContainerResourcePolicy {
+	var (
+		controlledValues             = vpaautoscalingv1.ContainerControlledValuesRequestsOnly
+		vpaContainerResourcePolicies = []vpaautoscalingv1.ContainerResourcePolicy{
+			{
+				ContainerName:    ContainerNameKubeAPIServer,
+				MinAllowed:       kubeAPIServerMinAllowed,
+				MaxAllowed:       kubeAPIServerMaxAllowed,
+				ControlledValues: &controlledValues,
+			},
+		}
+	)
+
+	if k.values.VPN.HighAvailabilityEnabled {
+		for i := 0; i < k.values.VPN.HighAvailabilityNumberOfSeedServers; i++ {
+			vpaContainerResourcePolicies = append(vpaContainerResourcePolicies, vpaautoscalingv1.ContainerResourcePolicy{
+				ContainerName: fmt.Sprintf("%s-%d", containerNameVPNSeedClient, i),
+				MinAllowed: corev1.ResourceList{
+					corev1.ResourceMemory: resource.MustParse("20Mi"),
+				},
+				ControlledValues: &controlledValues,
+			})
+		}
+		vpaContainerResourcePolicies = append(vpaContainerResourcePolicies, vpaautoscalingv1.ContainerResourcePolicy{
+			ContainerName: containerNameVPNPathController,
+			MinAllowed: corev1.ResourceList{
+				corev1.ResourceMemory: resource.MustParse("20Mi"),
+			},
+			ControlledValues: &controlledValues,
+		})
+	}
+
+	return vpaContainerResourcePolicies
+}
diff --git a/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go b/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
index d35a75c56f3..89fbdd68e5a 100644
--- a/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
+++ b/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
@@ -9,6 +9,8 @@ import (
 
 	appsv1 "k8s.io/api/apps/v1"
 	autoscalingv1 "k8s.io/api/autoscaling/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	vpaautoscalingv1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 
@@ -23,10 +25,17 @@ func (k *kubeAPIServer) emptyVerticalPodAutoscaler() *vpaautoscalingv1.VerticalP
 }
 
 func (k *kubeAPIServer) reconcileVerticalPodAutoscaler(ctx context.Context, verticalPodAutoscaler *vpaautoscalingv1.VerticalPodAutoscaler, deployment *appsv1.Deployment) error {
-	if k.values.Autoscaling.Mode != apiserver.AutoscalingModeBaseline {
+	switch mode := k.values.Autoscaling.Mode; mode {
+	case apiserver.AutoscalingModeHVPA:
 		return kubernetesutils.DeleteObject(ctx, k.client.Client(), verticalPodAutoscaler)
+	case apiserver.AutoscalingModeVPAAndHPA:
+		return k.reconcileVerticalPodAutoscalerInVPAAndHPAMode(ctx, verticalPodAutoscaler, deployment)
+	default:
+		return k.reconcileVerticalPodAutoscalerInBaselineMode(ctx, verticalPodAutoscaler, deployment)
 	}
+}
 
+func (k *kubeAPIServer) reconcileVerticalPodAutoscalerInBaselineMode(ctx context.Context, verticalPodAutoscaler *vpaautoscalingv1.VerticalPodAutoscaler, deployment *appsv1.Deployment) error {
 	vpaUpdateMode := vpaautoscalingv1.UpdateModeOff
 	controlledValues := vpaautoscalingv1.ContainerControlledValuesRequestsOnly
 
@@ -51,3 +60,44 @@ func (k *kubeAPIServer) reconcileVerticalPodAutoscaler(ctx context.Context, vert
 	})
 	return err
 }
+
+func (k *kubeAPIServer) reconcileVerticalPodAutoscalerInVPAAndHPAMode(ctx context.Context, verticalPodAutoscaler *vpaautoscalingv1.VerticalPodAutoscaler, deployment *appsv1.Deployment) error {
+	updateMode := vpaautoscalingv1.UpdateModeAuto
+	kubeAPIServerMinAllowed := corev1.ResourceList{
+		corev1.ResourceCPU:    resource.MustParse("20m"),
+		corev1.ResourceMemory: resource.MustParse("200M"),
+	}
+	kubeAPIServerMaxAllowed := corev1.ResourceList{
+		// The CPU and memory are aligned to the machine ration of 1:4.
+		corev1.ResourceCPU:    resource.MustParse("7"),
+		corev1.ResourceMemory: resource.MustParse("28G"),
+	}
+
+	var evictionRequirements []*vpaautoscalingv1.EvictionRequirement
+	if k.values.Autoscaling.ScaleDownDisabled {
+		evictionRequirements = []*vpaautoscalingv1.EvictionRequirement{{
+			Resources:         []corev1.ResourceName{corev1.ResourceMemory, corev1.ResourceCPU},
+			ChangeRequirement: vpaautoscalingv1.TargetHigherThanRequests,
+		}}
+	}
+
+	_, err := controllerutils.GetAndCreateOrMergePatch(ctx, k.client.Client(), verticalPodAutoscaler, func() error {
+		verticalPodAutoscaler.Spec = vpaautoscalingv1.VerticalPodAutoscalerSpec{
+			TargetRef: &autoscalingv1.CrossVersionObjectReference{
+				APIVersion: appsv1.SchemeGroupVersion.String(),
+				Kind:       "Deployment",
+				Name:       deployment.Name,
+			},
+			UpdatePolicy: &vpaautoscalingv1.PodUpdatePolicy{
+				UpdateMode:           &updateMode,
+				EvictionRequirements: evictionRequirements,
+			},
+			ResourcePolicy: &vpaautoscalingv1.PodResourcePolicy{
+				ContainerPolicies: k.computeVerticalPodAutoscalerContainerResourcePolicies(kubeAPIServerMinAllowed, kubeAPIServerMaxAllowed),
+			},
+		}
+		return nil
+	})
+
+	return err
+}
diff --git a/pkg/component/shared/kubeapiserver.go b/pkg/component/shared/kubeapiserver.go
index e1812d29e32..54b7b3f97dc 100644
--- a/pkg/component/shared/kubeapiserver.go
+++ b/pkg/component/shared/kubeapiserver.go
@@ -220,7 +220,15 @@ func DeployKubeAPIServer(
 
 	kubeAPIServer.SetAutoscalingReplicas(computeKubeAPIServerReplicas(values.Autoscaling, deployment, wantScaleDown))
 
-	if deployment != nil && values.Autoscaling.Mode == apiserver.AutoscalingModeHVPA {
+	// For safety reasons, when the Deployment exists we don't overwrite the kube-apiserver container resources
+	// although it is not required in all cases. Few cases that require it:
+	// - When autoscaling mode is HVPA. hvpa-controller updates the resources in Deployment spec directly.
+	//   Overwriting the resources in the Deployment spec would revert hvpa-controller's recommendations.
+	// - When scale-down is disabled, operators might want to overwrite the kube-apiserver container resource requests.
+	// - When transitioning from HVPA to HPAAndVPA autoscaling mode, we need to preserve the kube-apiserver container resources
+	//   to do not cause an unwanted rollout that might be breaking. Otherwise, we would scale down from the potentially
+	//   high resource requests (set by HVPA) to the initial resource requests in HPAAndVPA mode.
+	if deployment != nil {
 		for _, container := range deployment.Spec.Template.Spec.Containers {
 			if container.Name == kubeapiserver.ContainerNameKubeAPIServer {
 				// Only set requests to allow limits to be removed
diff --git a/pkg/component/shared/kubeapiserver_test.go b/pkg/component/shared/kubeapiserver_test.go
index a6fa494a7e2..b99443f17c2 100644
--- a/pkg/component/shared/kubeapiserver_test.go
+++ b/pkg/component/shared/kubeapiserver_test.go
@@ -975,34 +975,12 @@ exemptions:
 				Expect(DeployKubeAPIServer(ctx, runtimeClient, namespace, kubeAPIServer, serviceAccountConfig, serverCertificateConfig, sniConfig, externalHostname, externalServer, &nodeNetworkCIDR, nil, nil, etcdEncryptionKeyRotationPhase, wantScaleDown)).To(Succeed())
 			},
 
-			Entry("nothing is set because deployment is not found",
+			Entry("nothing is set when deployment is not found",
 				nil,
 				apiserver.AutoscalingConfig{},
 				nil,
 			),
-			Entry("nothing is set because autoscaling mode is baseline",
-				func() {
-					Expect(runtimeClient.Create(ctx, &appsv1.Deployment{
-						ObjectMeta: metav1.ObjectMeta{
-							Name:      "kube-apiserver",
-							Namespace: namespace,
-						},
-						Spec: appsv1.DeploymentSpec{
-							Template: corev1.PodTemplateSpec{
-								Spec: corev1.PodSpec{
-									Containers: []corev1.Container{{
-										Name:      "kube-apiserver",
-										Resources: apiServerResources,
-									}},
-								},
-							},
-						},
-					})).To(Succeed())
-				},
-				apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline},
-				nil,
-			),
-			Entry("set the existing requirements because deployment found and autoscaling mode is HVPA",
+			Entry("set the existing requirements when the deployment is found",
 				func() {
 					Expect(runtimeClient.Create(ctx, &appsv1.Deployment{
 						ObjectMeta: metav1.ObjectMeta{
diff --git a/pkg/gardenlet/operation/botanist/kubeapiserver.go b/pkg/gardenlet/operation/botanist/kubeapiserver.go
index b842eee1dd2..da7beb6605a 100644
--- a/pkg/gardenlet/operation/botanist/kubeapiserver.go
+++ b/pkg/gardenlet/operation/botanist/kubeapiserver.go
@@ -83,17 +83,17 @@ func (b *Botanist) DefaultKubeAPIServer(ctx context.Context) (kubeapiserver.Inte
 func (b *Botanist) computeKubeAPIServerAutoscalingConfig() apiserver.AutoscalingConfig {
 	var (
 		useMemoryMetricForHvpaHPA = false
-		scaleDownDisabledForHvpa  = false
+		scaleDownDisabled         = false
 		defaultReplicas           *int32
-		minReplicas               int32 = 1
-		maxReplicas               int32 = 3
-		apiServerResources        corev1.ResourceRequirements
+		// kube-apiserver is a control plane component of type "server".
+		// The HA webhook sets at least 2 replicas to components of type "server" (w/o HA or with w/ HA).
+		// Ref https://github.com/gardener/gardener/blob/master/docs/development/high-availability.md#control-plane-components.
+		// That's why minReplicas is set to 2.
+		minReplicas        int32 = 2
+		maxReplicas        int32 = 3
+		apiServerResources corev1.ResourceRequirements
 	)
 
-	if b.Shoot.Purpose == gardencorev1beta1.ShootPurposeProduction {
-		minReplicas = 2
-	}
-
 	if v1beta1helper.IsHAControlPlaneConfigured(b.Shoot.GetInfo()) {
 		minReplicas = 3
 	}
@@ -101,24 +101,29 @@ func (b *Botanist) computeKubeAPIServerAutoscalingConfig() apiserver.Autoscaling
 	if metav1.HasAnnotation(b.Shoot.GetInfo().ObjectMeta, v1beta1constants.ShootAlphaControlPlaneScaleDownDisabled) {
 		minReplicas = 4
 		maxReplicas = 4
-		scaleDownDisabledForHvpa = true
+		scaleDownDisabled = true
 	}
 
 	autoscalingMode := b.autoscalingMode()
 	nodeCount := b.Shoot.GetMinNodeCount()
-	if autoscalingMode == apiserver.AutoscalingModeHVPA {
-		nodeCount = b.Shoot.GetMaxNodeCount()
-	}
 
-	if autoscalingMode != apiserver.AutoscalingModeBaseline {
+	switch autoscalingMode {
+	case apiserver.AutoscalingModeHVPA:
 		apiServerResources = corev1.ResourceRequirements{
 			Requests: corev1.ResourceList{
 				corev1.ResourceCPU:    resource.MustParse("500m"),
 				corev1.ResourceMemory: resource.MustParse("1Gi"),
 			},
 		}
-	} else {
-		apiServerResources = resourcesRequirementsForKubeAPIServer(nodeCount)
+	case apiserver.AutoscalingModeVPAAndHPA:
+		apiServerResources = corev1.ResourceRequirements{
+			Requests: corev1.ResourceList{
+				corev1.ResourceCPU:    resource.MustParse("250m"),
+				corev1.ResourceMemory: resource.MustParse("500Mi"),
+			},
+		}
+	default:
+		apiServerResources = resourcesRequirementsForKubeAPIServerInBaselineMode(nodeCount)
 	}
 
 	if b.ManagedSeed != nil {
@@ -147,11 +152,16 @@ func (b *Botanist) computeKubeAPIServerAutoscalingConfig() apiserver.Autoscaling
 		MinReplicas:               minReplicas,
 		MaxReplicas:               maxReplicas,
 		UseMemoryMetricForHvpaHPA: useMemoryMetricForHvpaHPA,
-		ScaleDownDisabledForHvpa:  scaleDownDisabledForHvpa,
+		ScaleDownDisabled:         scaleDownDisabled,
 	}
 }
 
 func (b *Botanist) autoscalingMode() apiserver.AutoscalingMode {
+	// The VPAAndHPAForAPIServer feature gate takes precedence over the HVPA feature gate.
+	if features.DefaultFeatureGate.Enabled(features.VPAAndHPAForAPIServer) {
+		return apiserver.AutoscalingModeVPAAndHPA
+	}
+
 	var hvpaEnabled bool
 	if b.ManagedSeed != nil {
 		hvpaEnabled = features.DefaultFeatureGate.Enabled(features.HVPAForShootedSeed)
@@ -165,7 +175,7 @@ func (b *Botanist) autoscalingMode() apiserver.AutoscalingMode {
 	return apiserver.AutoscalingModeBaseline
 }
 
-func resourcesRequirementsForKubeAPIServer(nodeCount int32) corev1.ResourceRequirements {
+func resourcesRequirementsForKubeAPIServerInBaselineMode(nodeCount int32) corev1.ResourceRequirements {
 	var cpuRequest, memoryRequest string
 
 	switch {
diff --git a/pkg/gardenlet/operation/botanist/kubeapiserver_test.go b/pkg/gardenlet/operation/botanist/kubeapiserver_test.go
index 0cd05d6d608..a5e1af4e997 100644
--- a/pkg/gardenlet/operation/botanist/kubeapiserver_test.go
+++ b/pkg/gardenlet/operation/botanist/kubeapiserver_test.go
@@ -182,22 +182,26 @@ var _ = Describe("KubeAPIServer", func() {
 					Expect(kubeAPIServer.GetValues().Autoscaling).To(Equal(expectedConfig))
 				},
 
-				Entry("default behaviour, HVPA is disabled",
+				Entry("default behaviour, HVPA is disabled, VPAAndHPAForAPIServer is disabled",
 					nil,
-					map[featuregate.Feature]bool{features.HVPA: false},
+					map[featuregate.Feature]bool{
+						features.HVPA:                  false,
+						features.VPAAndHPAForAPIServer: false,
+					},
 					apiserver.AutoscalingConfig{
 						Mode:                      apiserver.AutoscalingModeBaseline,
-						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
-						MinReplicas:               1,
+						APIServerResources:        resourcesRequirementsForKubeAPIServerInBaselineMode(4),
+						MinReplicas:               2,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: false,
-						ScaleDownDisabledForHvpa:  false,
+						ScaleDownDisabled:         false,
 					},
 				),
-				Entry("default behaviour, HVPA is enabled",
+				Entry("default behaviour, HVPA is enabled, VPAAndHPAForAPIServer is disabled",
 					nil,
 					map[featuregate.Feature]bool{
-						features.HVPA: true,
+						features.HVPA:                  true,
+						features.VPAAndHPAForAPIServer: false,
 					},
 					apiserver.AutoscalingConfig{
 						Mode: apiserver.AutoscalingModeHVPA,
@@ -207,10 +211,30 @@ var _ = Describe("KubeAPIServer", func() {
 								corev1.ResourceMemory: resource.MustParse("1Gi"),
 							},
 						},
-						MinReplicas:               1,
+						MinReplicas:               2,
+						MaxReplicas:               3,
+						UseMemoryMetricForHvpaHPA: false,
+						ScaleDownDisabled:         false,
+					},
+				),
+				Entry("default behaviour, HVPA is enabled, VPAAndHPAForAPIServer is enabled",
+					nil,
+					map[featuregate.Feature]bool{
+						features.HVPA:                  true,
+						features.VPAAndHPAForAPIServer: true,
+					},
+					apiserver.AutoscalingConfig{
+						Mode: apiserver.AutoscalingModeVPAAndHPA,
+						APIServerResources: corev1.ResourceRequirements{
+							Requests: corev1.ResourceList{
+								corev1.ResourceCPU:    resource.MustParse("250m"),
+								corev1.ResourceMemory: resource.MustParse("500Mi"),
+							},
+						},
+						MinReplicas:               2,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: false,
-						ScaleDownDisabledForHvpa:  false,
+						ScaleDownDisabled:         false,
 					},
 				),
 				Entry("shoot purpose production",
@@ -220,11 +244,11 @@ var _ = Describe("KubeAPIServer", func() {
 					nil,
 					apiserver.AutoscalingConfig{
 						Mode:                      apiserver.AutoscalingModeBaseline,
-						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
+						APIServerResources:        resourcesRequirementsForKubeAPIServerInBaselineMode(4),
 						MinReplicas:               2,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: false,
-						ScaleDownDisabledForHvpa:  false,
+						ScaleDownDisabled:         false,
 					},
 				),
 				Entry("shoot disables scale down",
@@ -234,11 +258,11 @@ var _ = Describe("KubeAPIServer", func() {
 					nil,
 					apiserver.AutoscalingConfig{
 						Mode:                      apiserver.AutoscalingModeBaseline,
-						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
+						APIServerResources:        resourcesRequirementsForKubeAPIServerInBaselineMode(4),
 						MinReplicas:               4,
 						MaxReplicas:               4,
 						UseMemoryMetricForHvpaHPA: false,
-						ScaleDownDisabledForHvpa:  true,
+						ScaleDownDisabled:         true,
 					},
 				),
 				Entry("shoot is a managed seed and HVPAForShootedSeed is disabled",
@@ -248,11 +272,11 @@ var _ = Describe("KubeAPIServer", func() {
 					map[featuregate.Feature]bool{features.HVPAForShootedSeed: false},
 					apiserver.AutoscalingConfig{
 						Mode:                      apiserver.AutoscalingModeBaseline,
-						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
-						MinReplicas:               1,
+						APIServerResources:        resourcesRequirementsForKubeAPIServerInBaselineMode(4),
+						MinReplicas:               2,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: true,
-						ScaleDownDisabledForHvpa:  false,
+						ScaleDownDisabled:         false,
 					},
 				),
 				Entry("shoot is a managed seed w/ APIServer settings and HVPAForShootedSeed is enabled",
@@ -280,7 +304,7 @@ var _ = Describe("KubeAPIServer", func() {
 						MinReplicas:               16,
 						MaxReplicas:               32,
 						UseMemoryMetricForHvpaHPA: true,
-						ScaleDownDisabledForHvpa:  false,
+						ScaleDownDisabled:         false,
 					},
 				),
 				Entry("shoot is a managed seed w/ APIServer settings and HVPAForShootedSeed is disabled",
@@ -307,7 +331,7 @@ var _ = Describe("KubeAPIServer", func() {
 						MaxReplicas:               32,
 						Replicas:                  ptr.To[int32](24),
 						UseMemoryMetricForHvpaHPA: true,
-						ScaleDownDisabledForHvpa:  false,
+						ScaleDownDisabled:         false,
 					},
 				),
 				Entry("shoot is a managed seed w/ APIServer settings and HVPAForShootedSeed is disabled",
@@ -336,7 +360,7 @@ var _ = Describe("KubeAPIServer", func() {
 						MaxReplicas:               32,
 						Replicas:                  ptr.To[int32](24),
 						UseMemoryMetricForHvpaHPA: true,
-						ScaleDownDisabledForHvpa:  false,
+						ScaleDownDisabled:         false,
 					},
 				),
 				Entry("shoot enables HA control planes",
@@ -350,20 +374,20 @@ var _ = Describe("KubeAPIServer", func() {
 					nil,
 					apiserver.AutoscalingConfig{
 						Mode:                      apiserver.AutoscalingModeBaseline,
-						APIServerResources:        resourcesRequirementsForKubeAPIServer(4),
+						APIServerResources:        resourcesRequirementsForKubeAPIServerInBaselineMode(4),
 						MinReplicas:               3,
 						MaxReplicas:               3,
 						UseMemoryMetricForHvpaHPA: false,
-						ScaleDownDisabledForHvpa:  false,
+						ScaleDownDisabled:         false,
 					},
 				),
 			)
 		})
 	})
 
-	DescribeTable("#resourcesRequirementsForKubeAPIServer",
+	DescribeTable("#resourcesRequirementsForKubeAPIServerInBaselineMode",
 		func(nodes int, expectedCPURequest, expectedMemoryRequest string) {
-			Expect(resourcesRequirementsForKubeAPIServer(int32(nodes))).To(Equal(
+			Expect(resourcesRequirementsForKubeAPIServerInBaselineMode(int32(nodes))).To(Equal(
 				corev1.ResourceRequirements{
 					Requests: corev1.ResourceList{
 						corev1.ResourceCPU:    resource.MustParse(expectedCPURequest),
diff --git a/pkg/operator/controller/garden/garden/components.go b/pkg/operator/controller/garden/garden/components.go
index 78474be002f..21d6b49fdcf 100644
--- a/pkg/operator/controller/garden/garden/components.go
+++ b/pkg/operator/controller/garden/garden/components.go
@@ -646,7 +646,7 @@ func defaultAPIServerAutoscalingConfig(garden *operatorv1alpha1.Garden) apiserve
 		MinReplicas:               minReplicas,
 		MaxReplicas:               6,
 		UseMemoryMetricForHvpaHPA: true,
-		ScaleDownDisabledForHvpa:  false,
+		ScaleDownDisabled:         false,
 	}
 }
 

From e3e96bbb5ab21dfa0fd0a35973d1b7051c692560 Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Fri, 26 Apr 2024 17:36:51 +0300
Subject: [PATCH 04/10] Add docs for kube-apiserver autoscaling

---
 docs/README.md                                |  5 +-
 ...> gardener-apiserver-admission-plugins.md} |  0
 .../{apiserver.md => gardener-apiserver.md}   |  0
 docs/concepts/kubernetes-apiserver.md         | 48 +++++++++++++++++++
 4 files changed, 51 insertions(+), 2 deletions(-)
 rename docs/concepts/{apiserver_admission_plugins.md => gardener-apiserver-admission-plugins.md} (100%)
 rename docs/concepts/{apiserver.md => gardener-apiserver.md} (100%)
 create mode 100644 docs/concepts/kubernetes-apiserver.md

diff --git a/docs/README.md b/docs/README.md
index f337d15bd9f..9aa496d760d 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -10,8 +10,8 @@
 ## Concepts
 
 * Components
-  * [Gardener API server](concepts/apiserver.md)
-    * [In-Tree admission plugins](concepts/apiserver_admission_plugins.md)
+  * [Gardener API server](concepts/gardener-apiserver.md)
+    * [In-Tree admission plugins](concepts/gardener-apiserver-admission-plugins.md)
   * [Gardener Controller Manager](concepts/controller-manager.md)
   * [Gardener Scheduler](concepts/scheduler.md)
   * [Gardener Admission Controller](concepts/admission-controller.md)
@@ -21,6 +21,7 @@
   * [Gardenlet](concepts/gardenlet.md)
 * [Backup Restore](concepts/backup-restore.md)
 * [etcd](concepts/etcd.md)
+* [Kubernetes API Server](concepts/kubernetes-apiserver.md)
 * [Relation between Gardener API and Cluster API](concepts/cluster-api.md)
 
 ## Usage
diff --git a/docs/concepts/apiserver_admission_plugins.md b/docs/concepts/gardener-apiserver-admission-plugins.md
similarity index 100%
rename from docs/concepts/apiserver_admission_plugins.md
rename to docs/concepts/gardener-apiserver-admission-plugins.md
diff --git a/docs/concepts/apiserver.md b/docs/concepts/gardener-apiserver.md
similarity index 100%
rename from docs/concepts/apiserver.md
rename to docs/concepts/gardener-apiserver.md
diff --git a/docs/concepts/kubernetes-apiserver.md b/docs/concepts/kubernetes-apiserver.md
new file mode 100644
index 00000000000..79bd957b893
--- /dev/null
+++ b/docs/concepts/kubernetes-apiserver.md
@@ -0,0 +1,48 @@
+---
+title: Kubernetes API Server
+---
+
+## Overview
+
+The Kubernetes API Server is a component of the Kubernetes control plane that exposes the Kubernetes API. It is the gateway to the Kubernetes cluster - the central touch point that is accessed by all users, automation, and components in the Kubernetes cluster.
+
+## Shoot Kubernetes API Server Autoscaling
+
+There are three supported autoscaling modes for the Shoot Kubernetes API Server.
+
+- `Baseline`
+
+   In `Baseline` mode, the Shoot Kubernetes API server is scaled by active HPA and VPA in passive, recommend-only mode.
+
+   The API server resource requests are computed based on the Shoot's minimum Nodes count:
+   | Range       | Resource Requests |
+   |-------------|-------------------|
+   | [0, 2]      | `800m`, `800Mi`   |
+   | (2, 10]     | `1000m`, `1100Mi` |
+   | (10, 50]    | `1200m`, `1600Mi` |
+   | (50, 100]   | `2500m`, `5200Mi` |
+   | (100, inf.) | `3000m`, `5200Mi` |
+
+   The `Baseline` mode is the used autoscaling mode when the `HVPA` and `VPAAndHPAForAPIServer` feature gates are not enabled.
+
+- `HVPA`
+
+   In `HVPA` mode, the Shoot Kubernetes API server is scaled by the [hvpa-controller](https://github.com/gardener/hvpa-controller). The gardenlet is creating an `HVPA` resource for the API server. The `HVPA` resource is backed by HPA and VPA both in recommend-only mode. The hvpa-controller is responsible for enabling simultaneous horizontal and vertical scaling by incorporating the recommendations from the HPA and VPA.
+
+   The initial API server resource requests are `500m` and `1Gi`.
+
+   The `HVPA` mode is the used autoscaling mode when the `HVPA` feature gate is enabled (and the `VPAAndHPAForAPIServer` feature gate is disabled).
+
+- `VPAAndHPA`
+
+   In `VPAAndHPA` mode, the Shoot Kubernetes API server is scaled simultaneously by VPA on CPU and memory utilization and by HPA - on CPU and memory usage. The gardenlet configures VPA and HPA resources in a such a way that the VPA's `maxAllowed` CPU and memory values are a little smaller than the HPA's average usage target. This allows VPA to scale vertically on the Pod's CPU and memory requests. Once all Pods on average exceed the maxAllowed CPU/memory, HPA is scaling horizontally (by adding a new replica).
+
+   The `VPAAndHPA` mode is introduced to address disadvantages with HVPA: additional component; modifies the deployment triggering unnecessary rollouts; vertical scaling only at max replicas; stuck vertical resource requests when scaling in again; etc.
+
+   The initial API server resource requests are `250m` and `500Mi`.
+
+   The `VPAAndHPA` mode is the used autoscaling mode when the `VPAAndHPAForAPIServer` feature gate is enabled (takes precedence over the `HVPA` feature gate).
+
+The API server's replica count in all scaling modes varies between 2 and 3. The min replicas count of 2 is imposed by the [High Availability of Shoot Control Plane Components](../development/high-availability.md#control-plane-components).
+
+The gardenlet sets the initial API server resource requests only when the Deployment is not found. When the Deployment exists, it is not overwriting the kube-apiserver container resources.

From 2caae510555507424f17a2f6e6fb63cbe6d421f4 Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Wed, 8 May 2024 14:42:59 +0300
Subject: [PATCH 05/10] Address review comments from voelzmo

---
 .../kubernetes/apiserver/apiserver_test.go    | 22 ++++++++++---------
 .../apiserver/verticalpodautoscaler.go        | 20 ++++++++---------
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/pkg/component/kubernetes/apiserver/apiserver_test.go b/pkg/component/kubernetes/apiserver/apiserver_test.go
index de06cc101b1..40ec9db8eb9 100644
--- a/pkg/component/kubernetes/apiserver/apiserver_test.go
+++ b/pkg/component/kubernetes/apiserver/apiserver_test.go
@@ -365,7 +365,7 @@ var _ = Describe("KubeAPIServer", func() {
 			})
 
 			DescribeTable("should successfully deploy the VPA resource",
-				func(autoscalingConfig apiserver.AutoscalingConfig, haVPN bool, vpaUpdateMode *vpaautoscalingv1.UpdateMode, containerPolicies []vpaautoscalingv1.ContainerResourcePolicy, evictionRequirements []*vpaautoscalingv1.EvictionRequirement) {
+				func(autoscalingConfig apiserver.AutoscalingConfig, haVPN bool, annotations, labels map[string]string, vpaUpdateMode *vpaautoscalingv1.UpdateMode, containerPolicies []vpaautoscalingv1.ContainerResourcePolicy) {
 					kapi = New(kubernetesInterface, namespace, sm, Values{
 						Values: apiserver.Values{
 							Autoscaling:    autoscalingConfig,
@@ -385,6 +385,8 @@ var _ = Describe("KubeAPIServer", func() {
 						ObjectMeta: metav1.ObjectMeta{
 							Name:            verticalPodAutoscaler.Name,
 							Namespace:       verticalPodAutoscaler.Namespace,
+							Annotations:     annotations,
+							Labels:          labels,
 							ResourceVersion: "1",
 						},
 						Spec: vpaautoscalingv1.VerticalPodAutoscalerSpec{
@@ -394,8 +396,7 @@ var _ = Describe("KubeAPIServer", func() {
 								Name:       "kube-apiserver",
 							},
 							UpdatePolicy: &vpaautoscalingv1.PodUpdatePolicy{
-								UpdateMode:           vpaUpdateMode,
-								EvictionRequirements: evictionRequirements,
+								UpdateMode: vpaUpdateMode,
 							},
 							ResourcePolicy: &vpaautoscalingv1.PodResourcePolicy{
 								ContainerPolicies: containerPolicies,
@@ -407,6 +408,8 @@ var _ = Describe("KubeAPIServer", func() {
 				Entry("autoscaling mode is baseline",
 					apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeBaseline},
 					false,
+					nil,
+					nil,
 					ptr.To(vpaautoscalingv1.UpdateModeOff),
 					[]vpaautoscalingv1.ContainerResourcePolicy{
 						{
@@ -414,11 +417,12 @@ var _ = Describe("KubeAPIServer", func() {
 							ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
 						},
 					},
-					nil,
 				),
 				Entry("autoscaling mode is VPAAndHPA",
 					apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeVPAAndHPA},
 					false,
+					nil,
+					nil,
 					ptr.To(vpaautoscalingv1.UpdateModeAuto),
 					[]vpaautoscalingv1.ContainerResourcePolicy{
 						{
@@ -434,11 +438,12 @@ var _ = Describe("KubeAPIServer", func() {
 							},
 						},
 					},
-					nil,
 				),
 				Entry("autoscaling mode is VPAAndHPA and HA VPN is enabled",
 					apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeVPAAndHPA},
 					true,
+					nil,
+					nil,
 					ptr.To(vpaautoscalingv1.UpdateModeAuto),
 					[]vpaautoscalingv1.ContainerResourcePolicy{
 						{
@@ -475,11 +480,12 @@ var _ = Describe("KubeAPIServer", func() {
 							},
 						},
 					},
-					nil,
 				),
 				Entry("autoscaling mode is VPAAndHPA and scale-down is disabled",
 					apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeVPAAndHPA, ScaleDownDisabled: true},
 					false,
+					map[string]string{"eviction-requirements.autoscaling.gardener.cloud/downscale-restriction": "never"},
+					map[string]string{"autoscaling.gardener.cloud/eviction-requirements": "managed-by-controller"},
 					ptr.To(vpaautoscalingv1.UpdateModeAuto),
 					[]vpaautoscalingv1.ContainerResourcePolicy{
 						{
@@ -495,10 +501,6 @@ var _ = Describe("KubeAPIServer", func() {
 							},
 						},
 					},
-					[]*vpaautoscalingv1.EvictionRequirement{{
-						Resources:         []corev1.ResourceName{corev1.ResourceMemory, corev1.ResourceCPU},
-						ChangeRequirement: vpaautoscalingv1.TargetHigherThanRequests,
-					}},
 				),
 			)
 
diff --git a/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go b/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
index 89fbdd68e5a..e2046878ab3 100644
--- a/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
+++ b/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
@@ -73,14 +73,6 @@ func (k *kubeAPIServer) reconcileVerticalPodAutoscalerInVPAAndHPAMode(ctx contex
 		corev1.ResourceMemory: resource.MustParse("28G"),
 	}
 
-	var evictionRequirements []*vpaautoscalingv1.EvictionRequirement
-	if k.values.Autoscaling.ScaleDownDisabled {
-		evictionRequirements = []*vpaautoscalingv1.EvictionRequirement{{
-			Resources:         []corev1.ResourceName{corev1.ResourceMemory, corev1.ResourceCPU},
-			ChangeRequirement: vpaautoscalingv1.TargetHigherThanRequests,
-		}}
-	}
-
 	_, err := controllerutils.GetAndCreateOrMergePatch(ctx, k.client.Client(), verticalPodAutoscaler, func() error {
 		verticalPodAutoscaler.Spec = vpaautoscalingv1.VerticalPodAutoscalerSpec{
 			TargetRef: &autoscalingv1.CrossVersionObjectReference{
@@ -89,13 +81,21 @@ func (k *kubeAPIServer) reconcileVerticalPodAutoscalerInVPAAndHPAMode(ctx contex
 				Name:       deployment.Name,
 			},
 			UpdatePolicy: &vpaautoscalingv1.PodUpdatePolicy{
-				UpdateMode:           &updateMode,
-				EvictionRequirements: evictionRequirements,
+				UpdateMode: &updateMode,
 			},
 			ResourcePolicy: &vpaautoscalingv1.PodResourcePolicy{
 				ContainerPolicies: k.computeVerticalPodAutoscalerContainerResourcePolicies(kubeAPIServerMinAllowed, kubeAPIServerMaxAllowed),
 			},
 		}
+
+		if k.values.Autoscaling.ScaleDownDisabled {
+			metav1.SetMetaDataLabel(&verticalPodAutoscaler.ObjectMeta, v1beta1constants.LabelVPAEvictionRequirementsController, v1beta1constants.EvictionRequirementManagedByController)
+			metav1.SetMetaDataAnnotation(&verticalPodAutoscaler.ObjectMeta, v1beta1constants.AnnotationVPAEvictionRequirementDownscaleRestriction, v1beta1constants.EvictionRequirementNever)
+		} else {
+			delete(verticalPodAutoscaler.GetLabels(), v1beta1constants.LabelVPAEvictionRequirementsController)
+			delete(verticalPodAutoscaler.GetAnnotations(), v1beta1constants.AnnotationVPAEvictionRequirementDownscaleRestriction)
+		}
+
 		return nil
 	})
 

From f205c8f099134d138521d1d76671d42d48c07bdd Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Wed, 8 May 2024 15:33:16 +0300
Subject: [PATCH 06/10] Address review comments from rfranzke

---
 docs/README.md                                |  5 ++--
 ...gins.md => apiserver-admission-plugins.md} |  0
 .../{gardener-apiserver.md => apiserver.md}   |  0
 docs/concepts/etcd.md                         |  8 ------
 .../autoscaling-specifics-for-components.md}  | 25 +++++++++++++++++--
 .../operation/botanist/kubeapiserver.go       |  4 +--
 6 files changed, 26 insertions(+), 16 deletions(-)
 rename docs/concepts/{gardener-apiserver-admission-plugins.md => apiserver-admission-plugins.md} (100%)
 rename docs/concepts/{gardener-apiserver.md => apiserver.md} (100%)
 rename docs/{concepts/kubernetes-apiserver.md => development/autoscaling-specifics-for-components.md} (67%)

diff --git a/docs/README.md b/docs/README.md
index 9aa496d760d..c0c2414a755 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -10,8 +10,8 @@
 ## Concepts
 
 * Components
-  * [Gardener API server](concepts/gardener-apiserver.md)
-    * [In-Tree admission plugins](concepts/gardener-apiserver-admission-plugins.md)
+  * [Gardener API server](concepts/apiserver.md)
+    * [In-Tree admission plugins](concepts/apiserver-admission-plugins.md)
   * [Gardener Controller Manager](concepts/controller-manager.md)
   * [Gardener Scheduler](concepts/scheduler.md)
   * [Gardener Admission Controller](concepts/admission-controller.md)
@@ -21,7 +21,6 @@
   * [Gardenlet](concepts/gardenlet.md)
 * [Backup Restore](concepts/backup-restore.md)
 * [etcd](concepts/etcd.md)
-* [Kubernetes API Server](concepts/kubernetes-apiserver.md)
 * [Relation between Gardener API and Cluster API](concepts/cluster-api.md)
 
 ## Usage
diff --git a/docs/concepts/gardener-apiserver-admission-plugins.md b/docs/concepts/apiserver-admission-plugins.md
similarity index 100%
rename from docs/concepts/gardener-apiserver-admission-plugins.md
rename to docs/concepts/apiserver-admission-plugins.md
diff --git a/docs/concepts/gardener-apiserver.md b/docs/concepts/apiserver.md
similarity index 100%
rename from docs/concepts/gardener-apiserver.md
rename to docs/concepts/apiserver.md
diff --git a/docs/concepts/etcd.md b/docs/concepts/etcd.md
index 446b2fc15df..e51c71620de 100644
--- a/docs/concepts/etcd.md
+++ b/docs/concepts/etcd.md
@@ -35,14 +35,6 @@ When a [`gardenlet`](gardenlet.md) reconciles a `Shoot` resource or a [`gardener
 `etcd-druid` needs to manage the lifecycle of the desired etcd instance (today `main` or `events`).
 Likewise, when the `Shoot` or `Garden` is deleted, `gardenlet` or `gardener-operator` deletes the `Etcd` resources and [etcd Druid](https://github.com/gardener/etcd-druid/) takes care of cleaning up all related objects, e.g. the backing `StatefulSet`s.
 
-## Autoscaling
-
-Gardenlet maintains [`HVPA`](https://github.com/gardener/hvpa-controller/blob/master/config/samples/autoscaling_v1alpha1_hvpa.yaml) objects for etcd `StatefulSet`s if the corresponding [feature gate](../deployment/feature_gates.md) is enabled.
-This enables a vertical scaling for etcd.
-Downscaling is handled more pessimistically to prevent many subsequent etcd restarts.
-Thus, for `production` and `infrastructure` shoot clusters (or all garden clusters), downscaling is deactivated for the main etcd.
-For all other shoot clusters, lower advertised requests/limits are only applied during a shoot's maintenance time window.
-
 ## Backup
 
 If `Seed`s specify backups for etcd ([example](../../example/50-seed.yaml)), then Gardener and the respective [provider extensions](../extensions/overview.md) are responsible for creating a bucket on the cloud provider's side (modelled through a [BackupBucket resource](../extensions/backupbucket.md)).
diff --git a/docs/concepts/kubernetes-apiserver.md b/docs/development/autoscaling-specifics-for-components.md
similarity index 67%
rename from docs/concepts/kubernetes-apiserver.md
rename to docs/development/autoscaling-specifics-for-components.md
index 79bd957b893..0c8008e8415 100644
--- a/docs/concepts/kubernetes-apiserver.md
+++ b/docs/development/autoscaling-specifics-for-components.md
@@ -1,10 +1,31 @@
 ---
-title: Kubernetes API Server
+title: Autoscaling Specifics for Components
 ---
 
 ## Overview
 
-The Kubernetes API Server is a component of the Kubernetes control plane that exposes the Kubernetes API. It is the gateway to the Kubernetes cluster - the central touch point that is accessed by all users, automation, and components in the Kubernetes cluster.
+This document describes the used autoscaling mechanism for several components.
+
+## Garden or Shoot Cluster etcd
+
+By default, if none of the autoscaling modes is requested the `etcd` is deployed with static resources, without autoscaling.
+
+However, there are two supported autoscaling modes for the Garden or Shoot cluster etcd.
+
+- `HVPA`
+
+   In `HVPA` mode, the etcd is scaled by the [hvpa-controller](https://github.com/gardener/hvpa-controller). The gardenlet/gardener-operator is creating an `HVPA` resource for the etcd (`main` or `events`).
+   The `HVPA` enables a vertical scaling for etcd.
+
+   The `HVPA` mode is the used autoscaling mode when the `HVPA` feature gate is enabled (and the `VPAForETCD` feature gate is disabled).
+
+- `VPA`
+
+   In `VPA` mode, the etcd is scaled by a native `VPA` resource.
+
+   The `VPA` mode is the used autoscaling mode when the `VPAForETCD` feature gate is enabled (takes precedence over the `HVPA` feature gate).
+
+For both of the autoscaling modes downscaling is handled more pessimistically to prevent many subsequent etcd restarts. Thus, for `production` and `infrastructure` Shoot clusters (or all Garden clusters), downscaling is deactivated for the main etcd. For all other Shoot clusters, lower advertised requests/limits are only applied during the Shoot's maintenance time window.
 
 ## Shoot Kubernetes API Server Autoscaling
 
diff --git a/pkg/gardenlet/operation/botanist/kubeapiserver.go b/pkg/gardenlet/operation/botanist/kubeapiserver.go
index da7beb6605a..bec1a6f5177 100644
--- a/pkg/gardenlet/operation/botanist/kubeapiserver.go
+++ b/pkg/gardenlet/operation/botanist/kubeapiserver.go
@@ -162,11 +162,9 @@ func (b *Botanist) autoscalingMode() apiserver.AutoscalingMode {
 		return apiserver.AutoscalingModeVPAAndHPA
 	}
 
-	var hvpaEnabled bool
+	hvpaEnabled := features.DefaultFeatureGate.Enabled(features.HVPA)
 	if b.ManagedSeed != nil {
 		hvpaEnabled = features.DefaultFeatureGate.Enabled(features.HVPAForShootedSeed)
-	} else {
-		hvpaEnabled = features.DefaultFeatureGate.Enabled(features.HVPA)
 	}
 
 	if hvpaEnabled {

From d6c7852e431db55e23c75f2b157f1e322ecd6dfd Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Thu, 9 May 2024 14:17:15 +0300
Subject: [PATCH 07/10] Address review comments from voelzmo (2)

---
 docs/deployment/feature_gates.md              | 26 +++++++++----------
 .../autoscaling-specifics-for-components.md   |  2 +-
 pkg/component/shared/kubeapiserver.go         |  6 ++---
 pkg/features/features.go                      |  6 +++--
 4 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/docs/deployment/feature_gates.md b/docs/deployment/feature_gates.md
index 7c265889e3b..f32bfcf6995 100644
--- a/docs/deployment/feature_gates.md
+++ b/docs/deployment/feature_gates.md
@@ -191,16 +191,16 @@ A *General Availability* (GA) feature is also referred to as a *stable* feature.
 
 ## List of Feature Gates
 
-| Feature                            | Relevant Components               | Description                                                                                                                                                                                                                                                                                                                                                                        |
-|------------------------------------|-----------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| HVPA                               | `gardenlet`, `gardener-operator`  | Enables simultaneous horizontal and vertical scaling in garden or seed clusters.                                                                                                                                                                                                                                                                                                   |
-| HVPAForShootedSeed                 | `gardenlet`                       | Enables simultaneous horizontal and vertical scaling in managed seed (aka "shooted seed") clusters.                                                                                                                                                                                                                                                                                |
-| DefaultSeccompProfile              | `gardenlet`, `gardener-operator`  | Enables the defaulting of the seccomp profile for Gardener managed workload in the garden or seed to `RuntimeDefault`.                                                                                                                                                                                                                                                             |
-| CoreDNSQueryRewriting              | `gardenlet`                       | Enables automatic DNS query rewriting in shoot cluster's CoreDNS to shortcut name resolution of fully qualified in-cluster and out-of-cluster names, which follow a user-defined pattern. Details can be found in [DNS Search Path Optimization](../usage/dns-search-path-optimization.md).                                                                                        |
-| IPv6SingleStack                    | `gardener-apiserver`, `gardenlet` | Allows creating seed and shoot clusters with [IPv6 single-stack networking](../usage/ipv6.md) enabled in their spec ([GEP-21](../proposals/21-ipv6-singlestack-local.md)). If enabled in gardenlet, the default behavior is unchanged, but setting `ipFamilies=[IPv6]` in the `seedConfig` is allowed. Only if the `ipFamilies` setting is changed, gardenlet behaves differently. |
-| MutableShootSpecNetworkingNodes    | `gardener-apiserver`              | Allows updating the field `spec.networking.nodes`. The validity of the values has to be checked in the provider extensions. Only enable this feature gate when your system runs provider extensions which have implemented the validation.                                                                                                                                         |
-| ShootForceDeletion                 | `gardener-apiserver`              | Allows forceful deletion of Shoots by annotating them with the `confirmation.gardener.cloud/force-deletion` annotation.                                                                                                                                                                                                                                                            |
-| UseNamespacedCloudProfile          | `gardener-apiserver`              | Enables usage of `NamespacedCloudProfile`s in `Shoot`s.                                                                                                                                                                                                                                                                                                                            |
-| ShootManagedIssuer                 | `gardenlet`                       | Enables the shoot managed issuer functionality described in GEP 24.                                                                                                                                                                                                                                                                                                                |
-| VPAForETCD                         | `gardenlet`, `gardener-operator`  | Enables VPA for `etcd-main` and `etcd-events`, regardless of HVPA enablement.                                                                                                                                                                                                                                                                                                      |
-| VPAAndHPAForAPIServer              | `gardenlet`                       | Enables an autoscaling mechanism for shoot kube-apiserver where it is scaled simultaneously by VPA on CPU and memory utilization and by HPA - on CPU and memory usage. The feature gate takes precedence over the `HVPA` feature gate when they are both enabled.                                                                                                                  |
+| Feature                         | Relevant Components               | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|---------------------------------|-----------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| HVPA                            | `gardenlet`, `gardener-operator`  | Enables simultaneous horizontal and vertical scaling in garden or seed clusters.                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| HVPAForShootedSeed              | `gardenlet`                       | Enables simultaneous horizontal and vertical scaling in managed seed (aka "shooted seed") clusters.                                                                                                                                                                                                                                                                                                                                                                                                       |
+| DefaultSeccompProfile           | `gardenlet`, `gardener-operator`  | Enables the defaulting of the seccomp profile for Gardener managed workload in the garden or seed to `RuntimeDefault`.                                                                                                                                                                                                                                                                                                                                                                                    |
+| CoreDNSQueryRewriting           | `gardenlet`                       | Enables automatic DNS query rewriting in shoot cluster's CoreDNS to shortcut name resolution of fully qualified in-cluster and out-of-cluster names, which follow a user-defined pattern. Details can be found in [DNS Search Path Optimization](../usage/dns-search-path-optimization.md).                                                                                                                                                                                                               |
+| IPv6SingleStack                 | `gardener-apiserver`, `gardenlet` | Allows creating seed and shoot clusters with [IPv6 single-stack networking](../usage/ipv6.md) enabled in their spec ([GEP-21](../proposals/21-ipv6-singlestack-local.md)). If enabled in gardenlet, the default behavior is unchanged, but setting `ipFamilies=[IPv6]` in the `seedConfig` is allowed. Only if the `ipFamilies` setting is changed, gardenlet behaves differently.                                                                                                                        |
+| MutableShootSpecNetworkingNodes | `gardener-apiserver`              | Allows updating the field `spec.networking.nodes`. The validity of the values has to be checked in the provider extensions. Only enable this feature gate when your system runs provider extensions which have implemented the validation.                                                                                                                                                                                                                                                                |
+| ShootForceDeletion              | `gardener-apiserver`              | Allows forceful deletion of Shoots by annotating them with the `confirmation.gardener.cloud/force-deletion` annotation.                                                                                                                                                                                                                                                                                                                                                                                   |
+| UseNamespacedCloudProfile       | `gardener-apiserver`              | Enables usage of `NamespacedCloudProfile`s in `Shoot`s.                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| ShootManagedIssuer              | `gardenlet`                       | Enables the shoot managed issuer functionality described in GEP 24.                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| VPAForETCD                      | `gardenlet`, `gardener-operator`  | Enables VPA for `etcd-main` and `etcd-events`, regardless of HVPA enablement.                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| VPAAndHPAForAPIServer           | `gardenlet`                       | Enables an autoscaling mechanism for shoot kube-apiserver where it is scaled simultaneously by VPA and HPA on the same metric (CPU and memory usage). The pod-trashing cycle between VPA and HPA scaling on the same metric is avoided by configuring the HPA to scale on average usage (not on average utilization) and by picking the target average utilization values in sync with VPA's allowed maximums. The feature gate takes precedence over the `HVPA` feature gate when they are both enabled. |
diff --git a/docs/development/autoscaling-specifics-for-components.md b/docs/development/autoscaling-specifics-for-components.md
index 0c8008e8415..39c2ebed2cf 100644
--- a/docs/development/autoscaling-specifics-for-components.md
+++ b/docs/development/autoscaling-specifics-for-components.md
@@ -56,7 +56,7 @@ There are three supported autoscaling modes for the Shoot Kubernetes API Server.
 
 - `VPAAndHPA`
 
-   In `VPAAndHPA` mode, the Shoot Kubernetes API server is scaled simultaneously by VPA on CPU and memory utilization and by HPA - on CPU and memory usage. The gardenlet configures VPA and HPA resources in a such a way that the VPA's `maxAllowed` CPU and memory values are a little smaller than the HPA's average usage target. This allows VPA to scale vertically on the Pod's CPU and memory requests. Once all Pods on average exceed the maxAllowed CPU/memory, HPA is scaling horizontally (by adding a new replica).
+   In `VPAAndHPA` mode, the Shoot Kubernetes API server is scaled simultaneously by VPA and HPA on the same metric (CPU and memory usage). The pod-trashing cycle between VPA and HPA scaling on the same metric is avoided by configuring the HPA to scale on average usage (not on average utilization) and by picking the target average utilization values in sync with VPA's allowed maximums. This makes possible VPA to first scale vertically on CPU/memory usage. Once all Pods' average CPU/memory usage is close to exceed the VPA's allowed maximum CPU/memory (the HPA's target average utilization, 10-20% less than VPA's allowed maximums), HPA is scaling horizontally (by adding a new replica).
 
    The `VPAAndHPA` mode is introduced to address disadvantages with HVPA: additional component; modifies the deployment triggering unnecessary rollouts; vertical scaling only at max replicas; stuck vertical resource requests when scaling in again; etc.
 
diff --git a/pkg/component/shared/kubeapiserver.go b/pkg/component/shared/kubeapiserver.go
index 54b7b3f97dc..881e55ff963 100644
--- a/pkg/component/shared/kubeapiserver.go
+++ b/pkg/component/shared/kubeapiserver.go
@@ -225,9 +225,9 @@ func DeployKubeAPIServer(
 	// - When autoscaling mode is HVPA. hvpa-controller updates the resources in Deployment spec directly.
 	//   Overwriting the resources in the Deployment spec would revert hvpa-controller's recommendations.
 	// - When scale-down is disabled, operators might want to overwrite the kube-apiserver container resource requests.
-	// - When transitioning from HVPA to HPAAndVPA autoscaling mode, we need to preserve the kube-apiserver container resources
-	//   to do not cause an unwanted rollout that might be breaking. Otherwise, we would scale down from the potentially
-	//   high resource requests (set by HVPA) to the initial resource requests in HPAAndVPA mode.
+	// - When transitioning from HVPA to VPAAndHPA autoscaling mode, we need to preserve the kube-apiserver container resources
+	//   to do not cause an unwanted Deployment rollout by overwriting the container resources set by the HVPA with the initial
+	//   resources in VPAAndHPA mode.
 	if deployment != nil {
 		for _, container := range deployment.Spec.Template.Spec.Containers {
 			if container.Name == kubeapiserver.ContainerNameKubeAPIServer {
diff --git a/pkg/features/features.go b/pkg/features/features.go
index 6cd160ef54c..e95015ed028 100644
--- a/pkg/features/features.go
+++ b/pkg/features/features.go
@@ -72,8 +72,10 @@ const (
 	// alpha: v1.93.0
 	ShootManagedIssuer featuregate.Feature = "ShootManagedIssuer"
 
-	// VPAAndHPAForAPIServer enables an autoscaling mechanism for shoot kube-apiserver
-	// where it is scaled simultaneously by VPA on CPU and memory utilization and by HPA - on CPU and memory usage.
+	// VPAAndHPAForAPIServer enables an autoscaling mechanism for shoot kube-apiserver where it is scaled simultaneously
+	// by VPA and HPA on the same metric (CPU and memory usage). The pod-trashing cycle between VPA and HPA scaling on
+	// the same metric is avoided by configuring the HPA to scale on average usage (not on average utilization) and
+	// by picking the target average utilization values in sync with VPA's allowed maximums.
 	// The feature gate takes precedence over the `HVPA` feature gate when they are both enabled.
 	// owner: @ialidzhikov
 	// alpha: v1.95.0

From 756e37eff3de6daa8f2a1201ea7ce6d2dfe361d7 Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Fri, 10 May 2024 15:59:11 +0300
Subject: [PATCH 08/10] Minor nits

---
 .../autoscaling-specifics-for-components.md         |  8 +++++---
 .../kubernetes/apiserver/horizontalpodautoscaler.go |  4 ++--
 .../kubernetes/apiserver/verticalpodautoscaler.go   | 13 +++++--------
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/docs/development/autoscaling-specifics-for-components.md b/docs/development/autoscaling-specifics-for-components.md
index 39c2ebed2cf..9c8b5e20a90 100644
--- a/docs/development/autoscaling-specifics-for-components.md
+++ b/docs/development/autoscaling-specifics-for-components.md
@@ -27,9 +27,9 @@ However, there are two supported autoscaling modes for the Garden or Shoot clust
 
 For both of the autoscaling modes downscaling is handled more pessimistically to prevent many subsequent etcd restarts. Thus, for `production` and `infrastructure` Shoot clusters (or all Garden clusters), downscaling is deactivated for the main etcd. For all other Shoot clusters, lower advertised requests/limits are only applied during the Shoot's maintenance time window.
 
-## Shoot Kubernetes API Server Autoscaling
+## Shoot Kubernetes API Server
 
-There are three supported autoscaling modes for the Shoot Kubernetes API Server.
+There are three supported autoscaling modes for the Shoot Kubernetes API server.
 
 - `Baseline`
 
@@ -51,16 +51,18 @@ There are three supported autoscaling modes for the Shoot Kubernetes API Server.
    In `HVPA` mode, the Shoot Kubernetes API server is scaled by the [hvpa-controller](https://github.com/gardener/hvpa-controller). The gardenlet is creating an `HVPA` resource for the API server. The `HVPA` resource is backed by HPA and VPA both in recommend-only mode. The hvpa-controller is responsible for enabling simultaneous horizontal and vertical scaling by incorporating the recommendations from the HPA and VPA.
 
    The initial API server resource requests are `500m` and `1Gi`.
+   HVPA's HPA is scaling only on CPU (average utilization 80%). HVPA's VPA max allowed values are `8` CPU and `25G`.
 
    The `HVPA` mode is the used autoscaling mode when the `HVPA` feature gate is enabled (and the `VPAAndHPAForAPIServer` feature gate is disabled).
 
 - `VPAAndHPA`
 
-   In `VPAAndHPA` mode, the Shoot Kubernetes API server is scaled simultaneously by VPA and HPA on the same metric (CPU and memory usage). The pod-trashing cycle between VPA and HPA scaling on the same metric is avoided by configuring the HPA to scale on average usage (not on average utilization) and by picking the target average utilization values in sync with VPA's allowed maximums. This makes possible VPA to first scale vertically on CPU/memory usage. Once all Pods' average CPU/memory usage is close to exceed the VPA's allowed maximum CPU/memory (the HPA's target average utilization, 10-20% less than VPA's allowed maximums), HPA is scaling horizontally (by adding a new replica).
+   In `VPAAndHPA` mode, the Shoot Kubernetes API server is scaled simultaneously by VPA and HPA on the same metric (CPU and memory usage). The pod-trashing cycle between VPA and HPA scaling on the same metric is avoided by configuring the HPA to scale on average usage (not on average utilization) and by picking the target average utilization values in sync with VPA's allowed maximums. This makes possible VPA to first scale vertically on CPU/memory usage. Once all Pods' average CPU/memory usage is close to exceed the VPA's allowed maximum CPU/memory (the HPA's target average utilization, 1/7 less than VPA's allowed maximums), HPA is scaling horizontally (by adding a new replica).
 
    The `VPAAndHPA` mode is introduced to address disadvantages with HVPA: additional component; modifies the deployment triggering unnecessary rollouts; vertical scaling only at max replicas; stuck vertical resource requests when scaling in again; etc.
 
    The initial API server resource requests are `250m` and `500Mi`.
+   VPA's max allowed values are `7` CPU and `28G`. HPA's average target usage values are `6` CPU and `24G`.
 
    The `VPAAndHPA` mode is the used autoscaling mode when the `VPAAndHPAForAPIServer` feature gate is enabled (takes precedence over the `HVPA` feature gate).
 
diff --git a/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go b/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
index 0469c35c439..fd3285de400 100644
--- a/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
+++ b/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
@@ -90,9 +90,9 @@ func (k *kubeAPIServer) reconcileHorizontalPodAutoscalerInBaselineMode(ctx conte
 }
 
 func (k *kubeAPIServer) reconcileHorizontalPodAutoscalerInVPAAndHPAMode(ctx context.Context, hpa *autoscalingv2.HorizontalPodAutoscaler, deployment *appsv1.Deployment) error {
-	// The chosen value is 6 CPU: 1 CPU less than the VPA's maxAllowed 7 CPU in VPAAndHPA mode to have a headroom for the horizontal scaling.
+	// The chosen value is 6 CPU: 1 CPU less (ratio 1/7) than the VPA's maxAllowed 7 CPU in VPAAndHPA mode to have a headroom for the horizontal scaling.
 	hpaTargetAverageValueCPU := resource.MustParse("6")
-	// The chosen value is 24G: 4G less than the VPA's maxAllowed 28G in VPAAndHPA mode to have a headroom for the horizontal scaling.
+	// The chosen value is 24G: 4G less (ratio 1/7) than the VPA's maxAllowed 28G in VPAAndHPA mode to have a headroom for the horizontal scaling.
 	hpaTargetAverageValueMemory := resource.MustParse("24G")
 
 	_, err := controllerutils.GetAndCreateOrMergePatch(ctx, k.client.Client(), hpa, func() error {
diff --git a/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go b/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
index e2046878ab3..5532846f505 100644
--- a/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
+++ b/pkg/component/kubernetes/apiserver/verticalpodautoscaler.go
@@ -13,6 +13,7 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	vpaautoscalingv1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+	"k8s.io/utils/ptr"
 
 	v1beta1constants "github.com/gardener/gardener/pkg/apis/core/v1beta1/constants"
 	"github.com/gardener/gardener/pkg/component/apiserver"
@@ -25,7 +26,7 @@ func (k *kubeAPIServer) emptyVerticalPodAutoscaler() *vpaautoscalingv1.VerticalP
 }
 
 func (k *kubeAPIServer) reconcileVerticalPodAutoscaler(ctx context.Context, verticalPodAutoscaler *vpaautoscalingv1.VerticalPodAutoscaler, deployment *appsv1.Deployment) error {
-	switch mode := k.values.Autoscaling.Mode; mode {
+	switch k.values.Autoscaling.Mode {
 	case apiserver.AutoscalingModeHVPA:
 		return kubernetesutils.DeleteObject(ctx, k.client.Client(), verticalPodAutoscaler)
 	case apiserver.AutoscalingModeVPAAndHPA:
@@ -36,9 +37,6 @@ func (k *kubeAPIServer) reconcileVerticalPodAutoscaler(ctx context.Context, vert
 }
 
 func (k *kubeAPIServer) reconcileVerticalPodAutoscalerInBaselineMode(ctx context.Context, verticalPodAutoscaler *vpaautoscalingv1.VerticalPodAutoscaler, deployment *appsv1.Deployment) error {
-	vpaUpdateMode := vpaautoscalingv1.UpdateModeOff
-	controlledValues := vpaautoscalingv1.ContainerControlledValuesRequestsOnly
-
 	_, err := controllerutils.GetAndCreateOrMergePatch(ctx, k.client.Client(), verticalPodAutoscaler, func() error {
 		verticalPodAutoscaler.Spec = vpaautoscalingv1.VerticalPodAutoscalerSpec{
 			TargetRef: &autoscalingv1.CrossVersionObjectReference{
@@ -47,12 +45,12 @@ func (k *kubeAPIServer) reconcileVerticalPodAutoscalerInBaselineMode(ctx context
 				Name:       deployment.Name,
 			},
 			UpdatePolicy: &vpaautoscalingv1.PodUpdatePolicy{
-				UpdateMode: &vpaUpdateMode,
+				UpdateMode: ptr.To(vpaautoscalingv1.UpdateModeOff),
 			},
 			ResourcePolicy: &vpaautoscalingv1.PodResourcePolicy{
 				ContainerPolicies: []vpaautoscalingv1.ContainerResourcePolicy{{
 					ContainerName:    vpaautoscalingv1.DefaultContainerResourcePolicy,
-					ControlledValues: &controlledValues,
+					ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
 				}},
 			},
 		}
@@ -62,7 +60,6 @@ func (k *kubeAPIServer) reconcileVerticalPodAutoscalerInBaselineMode(ctx context
 }
 
 func (k *kubeAPIServer) reconcileVerticalPodAutoscalerInVPAAndHPAMode(ctx context.Context, verticalPodAutoscaler *vpaautoscalingv1.VerticalPodAutoscaler, deployment *appsv1.Deployment) error {
-	updateMode := vpaautoscalingv1.UpdateModeAuto
 	kubeAPIServerMinAllowed := corev1.ResourceList{
 		corev1.ResourceCPU:    resource.MustParse("20m"),
 		corev1.ResourceMemory: resource.MustParse("200M"),
@@ -81,7 +78,7 @@ func (k *kubeAPIServer) reconcileVerticalPodAutoscalerInVPAAndHPAMode(ctx contex
 				Name:       deployment.Name,
 			},
 			UpdatePolicy: &vpaautoscalingv1.PodUpdatePolicy{
-				UpdateMode: &updateMode,
+				UpdateMode: ptr.To(vpaautoscalingv1.UpdateModeAuto),
 			},
 			ResourcePolicy: &vpaautoscalingv1.PodResourcePolicy{
 				ContainerPolicies: k.computeVerticalPodAutoscalerContainerResourcePolicies(kubeAPIServerMinAllowed, kubeAPIServerMaxAllowed),

From 1433c5f02a925019dfb441eee33fea43d592ab3e Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Tue, 14 May 2024 15:22:34 +0300
Subject: [PATCH 09/10] Address review comments from voelzmo (3)

---
 .../autoscaling-specifics-for-components.md   | 12 +++++
 .../apiserver/horizontalpodautoscaler.go      |  2 +-
 pkg/component/shared/kubeapiserver_test.go    | 46 ++++++++++++++++++-
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/docs/development/autoscaling-specifics-for-components.md b/docs/development/autoscaling-specifics-for-components.md
index 9c8b5e20a90..e76fafd5836 100644
--- a/docs/development/autoscaling-specifics-for-components.md
+++ b/docs/development/autoscaling-specifics-for-components.md
@@ -69,3 +69,15 @@ There are three supported autoscaling modes for the Shoot Kubernetes API server.
 The API server's replica count in all scaling modes varies between 2 and 3. The min replicas count of 2 is imposed by the [High Availability of Shoot Control Plane Components](../development/high-availability.md#control-plane-components).
 
 The gardenlet sets the initial API server resource requests only when the Deployment is not found. When the Deployment exists, it is not overwriting the kube-apiserver container resources.
+
+## Disabling Scale Down for Components in the Shoot Control Plane
+
+Some Shoot clusters' control plane components can be overloaded and can have very high resource usage. The existing autoscaling solution could be imperfect to cover these cases. Scale down actions for such overloaded components could be disruptive.
+
+To prevent such disruptive scale-down actions it is possible to disable scale down of the etcd, Kubernetes API server and Kubernetes controller manager in the Shoot control plane by annotating the Shoot with `alpha.control-plane.scaling.shoot.gardener.cloud/scale-down-disabled=true`.
+
+There are the following specifics for when disabling scale-down for the Kubernetes API server component:
+- In `Baseline` and `HVPA` modes the HPA's min and max replicas count are set to 4.
+- In `VPAAndHPA` mode if the HPA resource exists and HPA's `spec.minReplicas` is not nil then the min replicas count is `max(spec.minReplicas, status.desiredReplicas)`. When scale-down is disabled, this allows operators to specify a custom value for HPA `spec.minReplicas` and this value not to be reverted by gardenlet. I.e, HPA _does_ scale down to min replicas but not below min replicas. HPA's max replicas count is 4.
+
+> Note: The `alpha.control-plane.scaling.shoot.gardener.cloud/scale-down-disabled` annotation is alpha and can be removed anytime without further notice. Only use it if you know what you do.
diff --git a/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go b/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
index fd3285de400..e318bd8e566 100644
--- a/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
+++ b/pkg/component/kubernetes/apiserver/horizontalpodautoscaler.go
@@ -99,7 +99,7 @@ func (k *kubeAPIServer) reconcileHorizontalPodAutoscalerInVPAAndHPAMode(ctx cont
 		minReplicas := k.values.Autoscaling.MinReplicas
 		if k.values.Autoscaling.ScaleDownDisabled && hpa.Spec.MinReplicas != nil {
 			// If scale-down is disabled and the HPA resource exists and HPA's spec.minReplicas is not nil,
-			// then minReplicas is max(spec.minReplicas, status.desiredReplcias).
+			// then minReplicas is max(spec.minReplicas, status.desiredReplicas).
 			// When scale-down is disabled, this allows operators to specify a custom value for HPA spec.minReplicas
 			// and this value not to be reverted by gardenlet.
 			minReplicas = max(*hpa.Spec.MinReplicas, hpa.Status.DesiredReplicas)
diff --git a/pkg/component/shared/kubeapiserver_test.go b/pkg/component/shared/kubeapiserver_test.go
index b99443f17c2..ed5c0b176c5 100644
--- a/pkg/component/shared/kubeapiserver_test.go
+++ b/pkg/component/shared/kubeapiserver_test.go
@@ -980,7 +980,7 @@ exemptions:
 				apiserver.AutoscalingConfig{},
 				nil,
 			),
-			Entry("set the existing requirements when the deployment is found",
+			Entry("set the existing requirements when the deployment is found and autoscaling mode is HVPA",
 				func() {
 					Expect(runtimeClient.Create(ctx, &appsv1.Deployment{
 						ObjectMeta: metav1.ObjectMeta{
@@ -1002,6 +1002,50 @@ exemptions:
 				apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeHVPA},
 				&apiServerResources,
 			),
+			Entry("set the existing requirements when the deployment is found and scale-down is disabled",
+				func() {
+					Expect(runtimeClient.Create(ctx, &appsv1.Deployment{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:      "kube-apiserver",
+							Namespace: namespace,
+						},
+						Spec: appsv1.DeploymentSpec{
+							Template: corev1.PodTemplateSpec{
+								Spec: corev1.PodSpec{
+									Containers: []corev1.Container{{
+										Name:      "kube-apiserver",
+										Resources: apiServerResources,
+									}},
+								},
+							},
+						},
+					})).To(Succeed())
+				},
+				apiserver.AutoscalingConfig{ScaleDownDisabled: true},
+				&apiServerResources,
+			),
+			Entry("set the existing requirements when the deployment is found and autoscaling mode is VPAAndHPA",
+				func() {
+					Expect(runtimeClient.Create(ctx, &appsv1.Deployment{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:      "kube-apiserver",
+							Namespace: namespace,
+						},
+						Spec: appsv1.DeploymentSpec{
+							Template: corev1.PodTemplateSpec{
+								Spec: corev1.PodSpec{
+									Containers: []corev1.Container{{
+										Name:      "kube-apiserver",
+										Resources: apiServerResources,
+									}},
+								},
+							},
+						},
+					})).To(Succeed())
+				},
+				apiserver.AutoscalingConfig{Mode: apiserver.AutoscalingModeVPAAndHPA},
+				&apiServerResources,
+			),
 		)
 
 		DescribeTable("should correctly set the autoscaling replicas",

From 5da940109067f830c6e4d41ecf6854a5c2ae3beb Mon Sep 17 00:00:00 2001
From: ialidzhikov <i.alidjikov@gmail.com>
Date: Tue, 14 May 2024 16:23:58 +0300
Subject: [PATCH 10/10] Address review comments from plkokanov

---
 pkg/component/kubernetes/apiserver/hvpa.go | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pkg/component/kubernetes/apiserver/hvpa.go b/pkg/component/kubernetes/apiserver/hvpa.go
index acb10e13be0..33695e5eb8e 100644
--- a/pkg/component/kubernetes/apiserver/hvpa.go
+++ b/pkg/component/kubernetes/apiserver/hvpa.go
@@ -183,13 +183,12 @@ func (k *kubeAPIServer) reconcileHVPA(ctx context.Context, hvpa *hvpav1alpha1.Hv
 
 func (k *kubeAPIServer) computeVerticalPodAutoscalerContainerResourcePolicies(kubeAPIServerMinAllowed, kubeAPIServerMaxAllowed corev1.ResourceList) []vpaautoscalingv1.ContainerResourcePolicy {
 	var (
-		controlledValues             = vpaautoscalingv1.ContainerControlledValuesRequestsOnly
 		vpaContainerResourcePolicies = []vpaautoscalingv1.ContainerResourcePolicy{
 			{
 				ContainerName:    ContainerNameKubeAPIServer,
 				MinAllowed:       kubeAPIServerMinAllowed,
 				MaxAllowed:       kubeAPIServerMaxAllowed,
-				ControlledValues: &controlledValues,
+				ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
 			},
 		}
 	)
@@ -201,7 +200,7 @@ func (k *kubeAPIServer) computeVerticalPodAutoscalerContainerResourcePolicies(ku
 				MinAllowed: corev1.ResourceList{
 					corev1.ResourceMemory: resource.MustParse("20Mi"),
 				},
-				ControlledValues: &controlledValues,
+				ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
 			})
 		}
 		vpaContainerResourcePolicies = append(vpaContainerResourcePolicies, vpaautoscalingv1.ContainerResourcePolicy{
@@ -209,7 +208,7 @@ func (k *kubeAPIServer) computeVerticalPodAutoscalerContainerResourcePolicies(ku
 			MinAllowed: corev1.ResourceList{
 				corev1.ResourceMemory: resource.MustParse("20Mi"),
 			},
-			ControlledValues: &controlledValues,
+			ControlledValues: ptr.To(vpaautoscalingv1.ContainerControlledValuesRequestsOnly),
 		})
 	}