Skip to content

Commit

Permalink
If the user has specified cluster autoscaling behavior for their (#2754)
Browse files Browse the repository at this point in the history
gameserver then don't overwrite it.
  • Loading branch information
roberthbailey committed Sep 30, 2022
1 parent 29b130f commit acf760b
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 14 deletions.
17 changes: 12 additions & 5 deletions pkg/apis/agones/v1/gameserver.go
Expand Up @@ -99,6 +99,10 @@ const (
// becomes ready, so we can track when restarts should occur and when a GameServer
// should be moved to Unhealthy.
GameServerReadyContainerIDAnnotation = agones.GroupName + "/ready-container-id"
// PodSafeToEvictAnnotation is an annotation that the Kubernetes cluster autoscaler uses to
// determine if a pod can safely be evicted to compact a cluster by moving pods between nodes
// and scaling down nodes.
PodSafeToEvictAnnotation = "cluster-autoscaler.kubernetes.io/safe-to-evict"
)

var (
Expand Down Expand Up @@ -632,7 +636,7 @@ func (gs *GameServer) podObjectMeta(pod *corev1.Pod) {
pod.ObjectMeta.Labels = make(map[string]string, 2)
}
if pod.ObjectMeta.Annotations == nil {
pod.ObjectMeta.Annotations = make(map[string]string, 1)
pod.ObjectMeta.Annotations = make(map[string]string, 2)
}
pod.ObjectMeta.Labels[RoleLabel] = GameServerLabelRole
// store the GameServer name as a label, for easy lookup later on
Expand All @@ -642,10 +646,13 @@ func (gs *GameServer) podObjectMeta(pod *corev1.Pod) {
ref := metav1.NewControllerRef(gs, SchemeGroupVersion.WithKind("GameServer"))
pod.ObjectMeta.OwnerReferences = append(pod.ObjectMeta.OwnerReferences, *ref)

if gs.Spec.Scheduling == apis.Packed {
// This means that the autoscaler cannot remove the Node that this Pod is on.
// (and evict the Pod in the process)
pod.ObjectMeta.Annotations["cluster-autoscaler.kubernetes.io/safe-to-evict"] = "false"
// This means that the autoscaler cannot remove the Node that this Pod is on.
// (and evict the Pod in the process). Only set the value if it has not already
// been configured in the pod template (to not override user specified behavior).
// We only set this for packed game servers, under the assumption that if
// game servers are distributed then the cluster autoscaler isn't likely running.
if _, exists := pod.ObjectMeta.Annotations[PodSafeToEvictAnnotation]; !exists && gs.Spec.Scheduling == apis.Packed {
pod.ObjectMeta.Annotations[PodSafeToEvictAnnotation] = "false"
}

// Add Agones version into Pod Annotations
Expand Down
62 changes: 60 additions & 2 deletions pkg/apis/agones/v1/gameserver_test.go
Expand Up @@ -1265,7 +1265,7 @@ func TestGameServerPodObjectMeta(t *testing.T) {
gs.podObjectMeta(pod)
f(t, gs, pod)

assert.Equal(t, "false", pod.ObjectMeta.Annotations["cluster-autoscaler.kubernetes.io/safe-to-evict"])
assert.Equal(t, "false", pod.ObjectMeta.Annotations[PodSafeToEvictAnnotation])
})

t.Run("distributed", func(t *testing.T) {
Expand All @@ -1276,10 +1276,68 @@ func TestGameServerPodObjectMeta(t *testing.T) {
gs.podObjectMeta(pod)
f(t, gs, pod)

assert.Equal(t, "", pod.ObjectMeta.Annotations["cluster-autoscaler.kubernetes.io/safe-to-evict"])
assert.Equal(t, "", pod.ObjectMeta.Annotations[PodSafeToEvictAnnotation])
})
}

func TestGameServerPodAutoscalerAnnotations(t *testing.T) {
testCases := []struct {
description string
scheduling apis.SchedulingStrategy
setAnnotation bool
expectedAnnotation string
}{
{
description: "Packed",
scheduling: apis.Packed,
expectedAnnotation: "false",
},
{
description: "Distributed",
scheduling: apis.Distributed,
expectedAnnotation: "",
},
{
description: "Packed with autoscaler annotation",
scheduling: apis.Packed,
setAnnotation: true,
expectedAnnotation: "true",
},
{
description: "Distributed with autoscaler annotation",
scheduling: apis.Distributed,
setAnnotation: true,
expectedAnnotation: "true",
},
}

fixture := &GameServer{
ObjectMeta: metav1.ObjectMeta{Name: "logan"},
Spec: GameServerSpec{Container: "sheep"},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
gs := fixture.DeepCopy()
gs.Spec.Scheduling = tc.scheduling
if tc.setAnnotation {
gs.Spec.Template = corev1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{PodSafeToEvictAnnotation: "true"},
}}
}
pod, err := gs.Pod()
assert.Nil(t, err, "Pod should not return an error")
assert.Equal(t, gs.ObjectMeta.Name, pod.ObjectMeta.Name)
assert.Equal(t, gs.ObjectMeta.Namespace, pod.ObjectMeta.Namespace)
assert.Equal(t, GameServerLabelRole, pod.ObjectMeta.Labels[RoleLabel])
assert.Equal(t, "gameserver", pod.ObjectMeta.Labels[agones.GroupName+"/role"])
assert.Equal(t, gs.ObjectMeta.Name, pod.ObjectMeta.Labels[GameServerPodLabel])
assert.Equal(t, "sheep", pod.ObjectMeta.Annotations[GameServerContainerAnnotation])
assert.True(t, metav1.IsControlledBy(pod, gs))
assert.Equal(t, tc.expectedAnnotation, pod.ObjectMeta.Annotations[PodSafeToEvictAnnotation])
})
}
}

func TestGameServerPodScheduling(t *testing.T) {
fixture := &corev1.Pod{Spec: corev1.PodSpec{}}

Expand Down
44 changes: 37 additions & 7 deletions site/content/en/docs/Advanced/scheduling-and-autoscaling.md
Expand Up @@ -35,7 +35,7 @@ or their cloud specific documentation.
## Fleet Autoscaling

Fleet autoscaling is the only type of autoscaling that exists in Agones. It is currently available as a
buffer autoscaling strategy or as a webhook driven strategy, such that you can provide your own autoscaling logic.
buffer autoscaling strategy or as a webhook driven strategy, such that you can provide your own autoscaling logic.

Have a look at the [Create a Fleet Autoscaler]({{< relref "../Getting Started/create-fleetautoscaler.md" >}}) quickstart, the
[Create a Webhook Fleet Autoscaler]({{< relref "../Getting Started/create-webhook-fleetautoscaler.md" >}}) quickstart,
Expand All @@ -58,7 +58,7 @@ when it is created.

### Fleet Scale Down Strategy

Fleet Scale Down strategy refers to the order in which the `GameServers` that belong to a `Fleet` are deleted,
Fleet Scale Down strategy refers to the order in which the `GameServers` that belong to a `Fleet` are deleted,
when Fleets are shrunk in size.

## Fleet Scheduling
Expand Down Expand Up @@ -86,7 +86,7 @@ spec:
image: {{% example-image %}}
```

This is the *default* Fleet scheduling strategy. It is designed for dynamic Kubernetes environments, wherein you wish
This is the *default* Fleet scheduling strategy. It is designed for dynamic Kubernetes environments, wherein you wish
to scale up and down as load increases or decreases, such as in a Cloud environment where you are paying
for the infrastructure you use.

Expand All @@ -97,13 +97,43 @@ This affects the Cluster autoscaler, Allocation Scheduling, Pod Scheduling and F

#### Cluster Autoscaler

{{% feature expiryVersion="1.27.0" %}}
To ensure that the Cluster Autoscaler doesn't attempt to evict and move `GameServer` `Pods` onto new Nodes during
gameplay, Agones adds the annotation [`"cluster-autoscaler.kubernetes.io/safe-to-evict": "false"`](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-types-of-pods-can-prevent-ca-from-removing-a-node)
to the backing Pod.
{{% /feature %}}

{{% feature publishVersion="1.27.0" %}}
When using the “Packed” strategy, Agones will ensure that the Cluster Autoscaler doesn't attempt to evict and move `GameServer` `Pods` onto new Nodes during
gameplay by adding the annotation [`"cluster-autoscaler.kubernetes.io/safe-to-evict": "false"`](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-types-of-pods-can-prevent-ca-from-removing-a-node)
to the backing Pod.

However, if a gameserver can tolerate [being evicted](https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/#how-api-initiated-eviction-works)
(generally in combination with setting an appropriate graceful termination period on the gameserver pod) and you
want the Cluster Autoscaler to compact your cluster by evicting game servers when it would allow the Cluster
Autoscaler to reduce the number of nodes in the cluster, then this behavior can be overridden by explicitly setting the
`"cluster-autoscaler.kubernetes.io/safe-to-evict"` annotation to `"true"` in the metadata for the game server pod, e.g.

```
apiVersion: "agones.dev/v1"
kind: GameServer
metadata:
name: "simple-game-server"
spec:
template:
# pod metadata. Name & Namespace is overwritten
metadata:
annotations:
cluster-autoscaler.kubernetes.io/safe-to-evict: true
spec:
containers:
- image: {{< example-image >}}
```
{{% /feature %}}

#### Allocation Scheduling Strategy

Under the "Packed" strategy, allocation will prioritise allocating `GameServers` to nodes that are running on
Under the "Packed" strategy, allocation will prioritise allocating `GameServers` to nodes that are running on
Nodes that already have allocated `GameServers` running on them.

#### Pod Scheduling Strategy
Expand All @@ -113,13 +143,13 @@ with a `preferredDuringSchedulingIgnoredDuringExecution` affinity with [hostname
topology. This attempts to group together `GameServer` Pods within as few nodes in the cluster as it can.

{{< alert title="Note" color="info">}}
The default Kubernetes scheduler doesn't do a perfect job of packing, but it's a good enough job for what we need -
at least at this stage.
The default Kubernetes scheduler doesn't do a perfect job of packing, but it's a good enough job for what we need -
at least at this stage.
{{< /alert >}}

#### Fleet Scale Down Strategy

With the "Packed" strategy, Fleets will remove `Ready` `GameServers` from Nodes with the _least_ number of `Ready` and
With the "Packed" strategy, Fleets will remove `Ready` `GameServers` from Nodes with the _least_ number of `Ready` and
`Allocated` `GameServers` on them. Attempting to empty Nodes so that they can be safely removed.

### Distributed
Expand Down

0 comments on commit acf760b

Please sign in to comment.