diff --git a/CHANGELOG.md b/CHANGELOG.md index 9aad46870..1cb696d41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - (Bugfix) Remove PDBs if group count is 0 - (Feature) Add SpecPropagated condition - (Bugfix) Recover from locked ShuttingDown state +- (Feature) Add tolerations runtime rotation ## [1.2.22](https://github.com/arangodb/kube-arangodb/tree/1.2.22) (2022-12-13) - (Bugfix) Do not manage ports in managed ExternalAccess mode diff --git a/docs/generated/actions.md b/docs/generated/actions.md index d15c3d335..26ee622e3 100644 --- a/docs/generated/actions.md +++ b/docs/generated/actions.md @@ -57,6 +57,7 @@ | RotateStopMember | no | 15m0s | Community & Enterprise | Finalize member rotation. After this action member is started back | | RuntimeContainerArgsLogLevelUpdate | no | 10m0s | Community & Enterprise | Change ArangoDB Member log levels in runtime | | RuntimeContainerImageUpdate | no | 10m0s | Community & Enterprise | Update Container Image in runtime | +| RuntimeContainerSyncTolerations | no | 10m0s | Community & Enterprise | Update Pod Tolerations in runtime | | SetCondition | no | 10m0s | Community & Enterprise | (Deprecated) Set deployment condition | | SetConditionV2 | no | 10m0s | Community & Enterprise | Set deployment condition | | SetCurrentImage | no | 6h0m0s | Community & Enterprise | Update deployment current image after image discovery | @@ -140,6 +141,7 @@ spec: RotateStopMember: 15m0s RuntimeContainerArgsLogLevelUpdate: 10m0s RuntimeContainerImageUpdate: 10m0s + RuntimeContainerSyncTolerations: 10m0s SetCondition: 10m0s SetConditionV2: 10m0s SetCurrentImage: 6h0m0s diff --git a/internal/actions.yaml b/internal/actions.yaml index ea9efaf45..9402be215 100644 --- a/internal/actions.yaml +++ b/internal/actions.yaml @@ -202,6 +202,8 @@ actions: description: Update Cluster license (3.9+) RuntimeContainerImageUpdate: description: Update Container Image in runtime + RuntimeContainerSyncTolerations: + description: Update Pod Tolerations in runtime RuntimeContainerArgsLogLevelUpdate: description: Change ArangoDB Member log levels in runtime TopologyEnable: diff --git a/pkg/apis/deployment/v1/actions.generated.go b/pkg/apis/deployment/v1/actions.generated.go index 22bce6fe7..e2e24f014 100644 --- a/pkg/apis/deployment/v1/actions.generated.go +++ b/pkg/apis/deployment/v1/actions.generated.go @@ -131,6 +131,8 @@ const ( ActionRuntimeContainerArgsLogLevelUpdateDefaultTimeout time.Duration = ActionsDefaultTimeout // ActionRuntimeContainerImageUpdateDefaultTimeout define default timeout for action ActionRuntimeContainerImageUpdate ActionRuntimeContainerImageUpdateDefaultTimeout time.Duration = ActionsDefaultTimeout + // ActionRuntimeContainerSyncTolerationsDefaultTimeout define default timeout for action ActionRuntimeContainerSyncTolerations + ActionRuntimeContainerSyncTolerationsDefaultTimeout time.Duration = ActionsDefaultTimeout // ActionSetConditionDefaultTimeout define default timeout for action ActionSetCondition ActionSetConditionDefaultTimeout time.Duration = ActionsDefaultTimeout // ActionSetConditionV2DefaultTimeout define default timeout for action ActionSetConditionV2 @@ -284,6 +286,8 @@ const ( ActionTypeRuntimeContainerArgsLogLevelUpdate ActionType = "RuntimeContainerArgsLogLevelUpdate" // ActionTypeRuntimeContainerImageUpdate in scopes Normal. Update Container Image in runtime ActionTypeRuntimeContainerImageUpdate ActionType = "RuntimeContainerImageUpdate" + // ActionTypeRuntimeContainerSyncTolerations in scopes Normal. Update Pod Tolerations in runtime + ActionTypeRuntimeContainerSyncTolerations ActionType = "RuntimeContainerSyncTolerations" // ActionTypeSetCondition in scopes High. (Deprecated) Set deployment condition ActionTypeSetCondition ActionType = "SetCondition" // ActionTypeSetConditionV2 in scopes High. Set deployment condition @@ -438,6 +442,8 @@ func (a ActionType) DefaultTimeout() time.Duration { return ActionRuntimeContainerArgsLogLevelUpdateDefaultTimeout case ActionTypeRuntimeContainerImageUpdate: return ActionRuntimeContainerImageUpdateDefaultTimeout + case ActionTypeRuntimeContainerSyncTolerations: + return ActionRuntimeContainerSyncTolerationsDefaultTimeout case ActionTypeSetCondition: return ActionSetConditionDefaultTimeout case ActionTypeSetConditionV2: @@ -596,6 +602,8 @@ func (a ActionType) Priority() ActionPriority { return ActionPriorityNormal case ActionTypeRuntimeContainerImageUpdate: return ActionPriorityNormal + case ActionTypeRuntimeContainerSyncTolerations: + return ActionPriorityNormal case ActionTypeSetCondition: return ActionPriorityHigh case ActionTypeSetConditionV2: diff --git a/pkg/deployment/images.go b/pkg/deployment/images.go index 2dd0a3d92..0f42d7e2b 100644 --- a/pkg/deployment/images.go +++ b/pkg/deployment/images.go @@ -45,6 +45,7 @@ import ( inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/interfaces" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/kerrors" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/tolerations" ) var _ interfaces.PodCreator = &ImageUpdatePod{} @@ -319,27 +320,27 @@ func (i *ImageUpdatePod) GetFinalizers() []string { } func (i *ImageUpdatePod) GetTolerations() []core.Toleration { - shortDur := k8sutil.TolerationDuration{ + shortDur := tolerations.TolerationDuration{ Forever: false, TimeSpan: time.Second * 5, } - tolerations := make([]core.Toleration, 0, 3+len(i.spec.ID.Get().Tolerations)) + ts := make([]core.Toleration, 0, 3+len(i.spec.ID.Get().Tolerations)) if idTolerations := i.spec.ID.Get().Tolerations; len(idTolerations) > 0 { for _, toleration := range idTolerations { - tolerations = k8sutil.AddTolerationIfNotFound(tolerations, toleration) + ts = tolerations.AddTolerationIfNotFound(ts, toleration) } } - tolerations = k8sutil.AddTolerationIfNotFound(tolerations, - k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeNotReady, shortDur)) - tolerations = k8sutil.AddTolerationIfNotFound(tolerations, - k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeUnreachable, shortDur)) - tolerations = k8sutil.AddTolerationIfNotFound(tolerations, - k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeAlphaUnreachable, shortDur)) + ts = tolerations.AddTolerationIfNotFound(ts, + tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeNotReady, shortDur)) + ts = tolerations.AddTolerationIfNotFound(ts, + tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeUnreachable, shortDur)) + ts = tolerations.AddTolerationIfNotFound(ts, + tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeAlphaUnreachable, shortDur)) - return tolerations + return ts } func (i *ImageUpdatePod) IsDeploymentMode() bool { diff --git a/pkg/deployment/images_test.go b/pkg/deployment/images_test.go index 199dee4fe..8a272713f 100644 --- a/pkg/deployment/images_test.go +++ b/pkg/deployment/images_test.go @@ -38,6 +38,7 @@ import ( "github.com/arangodb/kube-arangodb/pkg/util" "github.com/arangodb/kube-arangodb/pkg/util/constants" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/tolerations" ) const ( @@ -482,14 +483,14 @@ func createTestCommandForImageUpdatePod() []string { func getTestTolerations() []core.Toleration { - shortDur := k8sutil.TolerationDuration{ + shortDur := tolerations.TolerationDuration{ Forever: false, TimeSpan: time.Second * 5, } return []core.Toleration{ - k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeNotReady, shortDur), - k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeUnreachable, shortDur), - k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeAlphaUnreachable, shortDur), + tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeNotReady, shortDur), + tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeUnreachable, shortDur), + tolerations.NewNoExecuteToleration(tolerations.TolerationKeyNodeAlphaUnreachable, shortDur), } } diff --git a/pkg/deployment/reconcile/action.register.generated.go b/pkg/deployment/reconcile/action.register.generated.go index a62cd8c71..d6d56bcc6 100644 --- a/pkg/deployment/reconcile/action.register.generated.go +++ b/pkg/deployment/reconcile/action.register.generated.go @@ -186,6 +186,9 @@ var ( _ Action = &actionRuntimeContainerImageUpdate{} _ actionFactory = newRuntimeContainerImageUpdateAction + _ Action = &actionRuntimeContainerSyncTolerations{} + _ actionFactory = newRuntimeContainerSyncTolerationsAction + _ Action = &actionSetCondition{} _ actionFactory = newSetConditionAction @@ -894,6 +897,18 @@ func init() { registerAction(action, function) } + // RuntimeContainerSyncTolerations + { + // Get Action defition + function := newRuntimeContainerSyncTolerationsAction + action := api.ActionTypeRuntimeContainerSyncTolerations + + // Wrap action main function + + // Register action + registerAction(action, function) + } + // SetCondition { // Get Action defition diff --git a/pkg/deployment/reconcile/action.register.generated_test.go b/pkg/deployment/reconcile/action.register.generated_test.go index 890f5433f..b3f648e38 100644 --- a/pkg/deployment/reconcile/action.register.generated_test.go +++ b/pkg/deployment/reconcile/action.register.generated_test.go @@ -403,6 +403,13 @@ func Test_Actions(t *testing.T) { }) }) + t.Run("RuntimeContainerSyncTolerations", func(t *testing.T) { + ActionsExistence(t, api.ActionTypeRuntimeContainerSyncTolerations) + t.Run("Internal", func(t *testing.T) { + require.False(t, api.ActionTypeRuntimeContainerSyncTolerations.Internal()) + }) + }) + t.Run("SetCondition", func(t *testing.T) { ActionsExistence(t, api.ActionTypeSetCondition) t.Run("Internal", func(t *testing.T) { diff --git a/pkg/deployment/reconcile/action_runtime_container_image_update.go b/pkg/deployment/reconcile/action_runtime_container_image_update.go index 9077a3fae..8a32b423d 100644 --- a/pkg/deployment/reconcile/action_runtime_container_image_update.go +++ b/pkg/deployment/reconcile/action_runtime_container_image_update.go @@ -121,7 +121,7 @@ func (a actionRuntimeContainerImageUpdate) Post(ctx context.Context) error { return err } - return inspector.WithArangoMemberUpdate(ctx, cache, name, func(in *api.ArangoMember) (bool, error) { + return inspector.WithArangoMemberStatusUpdate(ctx, cache, name, func(in *api.ArangoMember) (bool, error) { if in.Spec.Template == nil || in.Status.Template == nil || in.Spec.Template.PodSpec == nil || in.Status.Template.PodSpec == nil { a.log.Info("Nil Member definition") diff --git a/pkg/deployment/reconcile/action_runtime_sync_tolerations.go b/pkg/deployment/reconcile/action_runtime_sync_tolerations.go new file mode 100644 index 000000000..ff0019bb1 --- /dev/null +++ b/pkg/deployment/reconcile/action_runtime_sync_tolerations.go @@ -0,0 +1,100 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package reconcile + +import ( + "context" + "reflect" + + "github.com/pkg/errors" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/arangodb/kube-arangodb/pkg/deployment/patch" + "github.com/arangodb/kube-arangodb/pkg/util/globals" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/tolerations" +) + +func newRuntimeContainerSyncTolerationsAction(action api.Action, actionCtx ActionContext) Action { + a := &actionRuntimeContainerSyncTolerations{} + + a.actionImpl = newActionImplDefRef(action, actionCtx) + + return a +} + +type actionRuntimeContainerSyncTolerations struct { + // actionImpl implement timeout and member id functions + actionImpl + + actionEmptyCheckProgress +} + +// Start starts the action for changing conditions on the provided member. +func (a actionRuntimeContainerSyncTolerations) Start(ctx context.Context) (bool, error) { + m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !ok { + a.log.Info("member is gone already") + return true, nil + } + + cache, ok := a.actionCtx.ACS().ClusterCache(m.ClusterID) + if !ok { + return true, errors.Errorf("Client is not ready") + } + + memberName := m.ArangoMemberName(a.actionCtx.GetName(), a.action.Group) + member, ok := cache.ArangoMember().V1().GetSimple(memberName) + if !ok { + return false, errors.Errorf("ArangoMember %s not found", memberName) + } + + pod, ok := cache.Pod().V1().GetSimple(m.Pod.GetName()) + if !ok { + a.log.Str("podName", m.Pod.GetName()).Info("pod is not present") + return true, nil + } + + currentTolerations := pod.Spec.Tolerations + + expectedTolerations := member.Spec.Template.PodSpec.Spec.Tolerations + + calculatedTolerations := tolerations.MergeTolerationsIfNotFound(currentTolerations, expectedTolerations) + + if reflect.DeepEqual(currentTolerations, calculatedTolerations) { + return true, nil + } + + p, err := patch.NewPatch(patch.ItemReplace(patch.NewPath("spec", "tolerations"), calculatedTolerations)).Marshal() + if err != nil { + return false, errors.Wrapf(err, "Unable to create patch") + } + + nctx, c := globals.GetGlobalTimeouts().Kubernetes().WithTimeout(ctx) + defer c() + + if _, err := a.actionCtx.ACS().CurrentClusterCache().PodsModInterface().V1().Patch(nctx, pod.GetName(), types.JSONPatchType, p, meta.PatchOptions{}); err != nil { + return false, errors.Wrapf(err, "Unable to apply patch") + } + + return true, nil +} diff --git a/pkg/deployment/resources/pod_creator.go b/pkg/deployment/resources/pod_creator.go index 488106924..221457d98 100644 --- a/pkg/deployment/resources/pod_creator.go +++ b/pkg/deployment/resources/pod_creator.go @@ -29,7 +29,6 @@ import ( "path/filepath" "strconv" "sync" - "time" core "k8s.io/api/core/v1" meta "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -51,6 +50,7 @@ import ( "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/interfaces" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/kerrors" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/tls" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/tolerations" ) // createArangodArgsWithUpgrade creates command line arguments for an arangod server upgrade in the given group. @@ -248,38 +248,7 @@ func createArangoSyncArgs(apiObject meta.Object, spec api.DeploymentSpec, group // CreatePodTolerations creates a list of tolerations for a pod created for the given group. func (r *Resources) CreatePodTolerations(group api.ServerGroup, groupSpec api.ServerGroupSpec) []core.Toleration { - notReadyDur := k8sutil.TolerationDuration{Forever: false, TimeSpan: time.Minute} - unreachableDur := k8sutil.TolerationDuration{Forever: false, TimeSpan: time.Minute} - switch group { - case api.ServerGroupAgents: - notReadyDur.Forever = true - unreachableDur.Forever = true - case api.ServerGroupCoordinators: - notReadyDur.TimeSpan = 15 * time.Second - unreachableDur.TimeSpan = 15 * time.Second - case api.ServerGroupDBServers: - notReadyDur.TimeSpan = 5 * time.Minute - unreachableDur.TimeSpan = 5 * time.Minute - case api.ServerGroupSingle: - if r.context.GetSpec().GetMode() == api.DeploymentModeSingle { - notReadyDur.Forever = true - unreachableDur.Forever = true - } else { - notReadyDur.TimeSpan = 5 * time.Minute - unreachableDur.TimeSpan = 5 * time.Minute - } - case api.ServerGroupSyncMasters: - notReadyDur.TimeSpan = 15 * time.Second - unreachableDur.TimeSpan = 15 * time.Second - case api.ServerGroupSyncWorkers: - notReadyDur.TimeSpan = 1 * time.Minute - unreachableDur.TimeSpan = 1 * time.Minute - } - tolerations := groupSpec.GetTolerations() - tolerations = k8sutil.AddTolerationIfNotFound(tolerations, k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeNotReady, notReadyDur)) - tolerations = k8sutil.AddTolerationIfNotFound(tolerations, k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeUnreachable, unreachableDur)) - tolerations = k8sutil.AddTolerationIfNotFound(tolerations, k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeAlphaUnreachable, unreachableDur)) - return tolerations + return tolerations.MergeTolerationsIfNotFound(tolerations.CreatePodTolerations(r.context.GetMode(), group), groupSpec.GetTolerations()) } func (r *Resources) RenderPodTemplateForMember(ctx context.Context, acs sutil.ACS, spec api.DeploymentSpec, status api.DeploymentStatus, memberID string, imageInfo api.ImageInfo) (*core.PodTemplateSpec, error) { diff --git a/pkg/deployment/rotation/arangod_tolerations.go b/pkg/deployment/rotation/arangod_tolerations.go new file mode 100644 index 000000000..1aa0c1efa --- /dev/null +++ b/pkg/deployment/rotation/arangod_tolerations.go @@ -0,0 +1,44 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package rotation + +import ( + "reflect" + + core "k8s.io/api/core/v1" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" +) + +func comparePodTolerations(_ api.DeploymentSpec, _ api.ServerGroup, spec, status *core.PodSpec) comparePodFunc { + return func(builder api.ActionBuilder) (mode Mode, plan api.Plan, err error) { + if !reflect.DeepEqual(spec.Tolerations, status.Tolerations) { + plan = append(plan, builder.NewAction(api.ActionTypeRuntimeContainerSyncTolerations)) + + spec.Tolerations = status.Tolerations + mode = mode.And(InPlaceRotation) + + return + } + + return + } +} diff --git a/pkg/deployment/rotation/compare.go b/pkg/deployment/rotation/compare.go index b771bd320..f06550ad8 100644 --- a/pkg/deployment/rotation/compare.go +++ b/pkg/deployment/rotation/compare.go @@ -94,7 +94,7 @@ func compare(deploymentSpec api.DeploymentSpec, member api.MemberStatus, group a g := podFuncGenerator(deploymentSpec, group, &spec.PodSpec.Spec, &podStatus.Spec) - if m, p, err := comparePod(b, g(podCompare), g(affinityCompare), g(comparePodVolumes), g(containersCompare), g(initContainersCompare)); err != nil { + if m, p, err := comparePod(b, g(podCompare), g(affinityCompare), g(comparePodVolumes), g(containersCompare), g(initContainersCompare), g(comparePodTolerations)); err != nil { log.Err(err).Msg("Error while getting pod diff") return SkippedRotation, nil, err } else { diff --git a/pkg/util/k8sutil/tolerations.go b/pkg/util/k8sutil/tolerations.go deleted file mode 100644 index 851063864..000000000 --- a/pkg/util/k8sutil/tolerations.go +++ /dev/null @@ -1,71 +0,0 @@ -// -// DISCLAIMER -// -// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright holder is ArangoDB GmbH, Cologne, Germany -// - -package k8sutil - -import ( - "time" - - core "k8s.io/api/core/v1" -) - -const ( - TolerationKeyNodeNotReady = "node.kubernetes.io/not-ready" - TolerationKeyNodeAlphaUnreachable = "node.alpha.kubernetes.io/unreachable" - TolerationKeyNodeUnreachable = "node.kubernetes.io/unreachable" -) - -// TolerationDuration is a duration spec for tolerations. -type TolerationDuration struct { - Forever bool - TimeSpan time.Duration -} - -// NewNoExecuteToleration is a helper to create a Toleration with -// Key=key, Operator='Exists' Effect='NoExecute', TolerationSeconds=tolerationDuration.Seconds(). -func NewNoExecuteToleration(key string, duration TolerationDuration) core.Toleration { - t := core.Toleration{ - Key: key, - Operator: "Exists", - Effect: "NoExecute", - } - if !duration.Forever { - tolerationSeconds := int64(duration.TimeSpan.Seconds()) - t.TolerationSeconds = &tolerationSeconds - } - return t -} - -// AddTolerationIfNotFound adds the given tolerations, if no such toleration has been set in the given source. -func AddTolerationIfNotFound(source []core.Toleration, toAdd core.Toleration) []core.Toleration { - if len(source) == 0 { - return []core.Toleration{ - toAdd, - } - } - - for _, t := range source { - if (t.Key == toAdd.Key || len(t.Key) == 0) && (t.Effect == toAdd.Effect || len(t.Effect) == 0) { - // Toleration alread exists, do not add - return source - } - } - return append(source, toAdd) -} diff --git a/pkg/util/k8sutil/tolerations/tolerations.go b/pkg/util/k8sutil/tolerations/tolerations.go new file mode 100644 index 000000000..7ca4597f9 --- /dev/null +++ b/pkg/util/k8sutil/tolerations/tolerations.go @@ -0,0 +1,131 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package tolerations + +import ( + "time" + + core "k8s.io/api/core/v1" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/arangodb/kube-arangodb/pkg/util" +) + +const ( + TolerationKeyNodeNotReady = "node.kubernetes.io/not-ready" + TolerationKeyNodeAlphaUnreachable = "node.alpha.kubernetes.io/unreachable" + TolerationKeyNodeUnreachable = "node.kubernetes.io/unreachable" +) + +// TolerationDuration is a duration spec for tolerations. +type TolerationDuration struct { + Forever bool + TimeSpan time.Duration +} + +// NewNoExecuteToleration is a helper to create a Toleration with +// Key=key, Operator='Exists' Effect='NoExecute', TolerationSeconds=tolerationDuration.Seconds(). +func NewNoExecuteToleration(key string, duration TolerationDuration) core.Toleration { + t := core.Toleration{ + Key: key, + Operator: "Exists", + Effect: "NoExecute", + } + if !duration.Forever { + tolerationSeconds := int64(duration.TimeSpan.Seconds()) + t.TolerationSeconds = &tolerationSeconds + } + return t +} + +// MergeTolerationsIfNotFound merge the given tolerations lists, if no such toleration has been set in the given source. +func MergeTolerationsIfNotFound(source []core.Toleration, toAdd ...[]core.Toleration) []core.Toleration { + for _, toleration := range toAdd { + source = AddTolerationsIfNotFound(source, toleration...) + } + + return source +} + +// AddTolerationsIfNotFound add the given tolerations, if no such toleration has been set in the given source. +func AddTolerationsIfNotFound(source []core.Toleration, toAdd ...core.Toleration) []core.Toleration { + for _, toleration := range toAdd { + source = AddTolerationIfNotFound(source, toleration) + } + + return source +} + +// AddTolerationIfNotFound adds the given tolerations, if no such toleration has been set in the given source. +func AddTolerationIfNotFound(source []core.Toleration, toAdd core.Toleration) []core.Toleration { + if len(source) == 0 { + return []core.Toleration{ + toAdd, + } + } + + for id, t := range source { + if t.Key == toAdd.Key && t.Effect == toAdd.Effect && t.Operator == toAdd.Operator && t.Value == toAdd.Value { + // We are on same toleration, only value needs to be modified + if !util.CompareInt64p(t.TolerationSeconds, toAdd.TolerationSeconds) { + source[id].TolerationSeconds = util.NewInt64OrNil(toAdd.TolerationSeconds) + } + + return source + } + } + return append(source, toAdd) +} + +// CreatePodTolerations creates a list of tolerations for a pod created for the given group. +func CreatePodTolerations(mode api.DeploymentMode, group api.ServerGroup) []core.Toleration { + notReadyDur := TolerationDuration{Forever: false, TimeSpan: time.Minute} + unreachableDur := TolerationDuration{Forever: false, TimeSpan: time.Minute} + switch group { + case api.ServerGroupAgents: + notReadyDur.Forever = true + unreachableDur.Forever = true + case api.ServerGroupCoordinators: + notReadyDur.TimeSpan = 15 * time.Second + unreachableDur.TimeSpan = 15 * time.Second + case api.ServerGroupDBServers: + notReadyDur.TimeSpan = 5 * time.Minute + unreachableDur.TimeSpan = 5 * time.Minute + case api.ServerGroupSingle: + if mode == api.DeploymentModeSingle { + notReadyDur.Forever = true + unreachableDur.Forever = true + } else { + notReadyDur.TimeSpan = 5 * time.Minute + unreachableDur.TimeSpan = 5 * time.Minute + } + case api.ServerGroupSyncMasters: + notReadyDur.TimeSpan = 15 * time.Second + unreachableDur.TimeSpan = 15 * time.Second + case api.ServerGroupSyncWorkers: + notReadyDur.TimeSpan = 1 * time.Minute + unreachableDur.TimeSpan = 1 * time.Minute + } + return []core.Toleration{NewNoExecuteToleration(TolerationKeyNodeNotReady, notReadyDur), + NewNoExecuteToleration(TolerationKeyNodeUnreachable, unreachableDur), + NewNoExecuteToleration(TolerationKeyNodeAlphaUnreachable, unreachableDur), + } +} diff --git a/pkg/util/k8sutil/tolerations/tolerations_test.go b/pkg/util/k8sutil/tolerations/tolerations_test.go new file mode 100644 index 000000000..e406b5310 --- /dev/null +++ b/pkg/util/k8sutil/tolerations/tolerations_test.go @@ -0,0 +1,126 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package tolerations + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + core "k8s.io/api/core/v1" +) + +const ( + TolerationValid = "" + TolerationUnableToRemove = "Unable to remove toleration" + TolerationUnableToModify = "Unable to modify toleration" +) + +func copyTolerations(in []core.Toleration) []core.Toleration { + out := make([]core.Toleration, len(in)) + + for id := range in { + in[id].DeepCopyInto(&out[id]) + + } + + return out +} + +func areTolerationsValid(a, b []core.Toleration) string { + if len(a) > len(b) { + return TolerationUnableToRemove + } + + for id := range a { + if a[id].Operator != b[id].Operator || + a[id].Key != b[id].Key || + a[id].Effect != b[id].Effect || + a[id].Value != b[id].Value { + return TolerationUnableToModify + } + } + + return TolerationValid +} + +func mergeTolerations(t *testing.T, tolerations []core.Toleration, toAdd ...core.Toleration) []core.Toleration { + return ensureTolerationImmutable(t, tolerations, func(in []core.Toleration) []core.Toleration { + return MergeTolerationsIfNotFound(tolerations, toAdd) + }, func(t *testing.T, change string) { + require.Equal(t, TolerationValid, change) + }) +} + +func ensureTolerationImmutable(t *testing.T, tolerations []core.Toleration, mod func(in []core.Toleration) []core.Toleration, check func(t *testing.T, change string)) []core.Toleration { + param := copyTolerations(tolerations) + param = mod(param) + + r := areTolerationsValid(tolerations, param) + + check(t, r) + return param +} + +func Test_Tolerations(t *testing.T) { + var tolerations []core.Toleration + + t.Run("Add initial toleration", func(t *testing.T) { + tolerations = mergeTolerations(t, tolerations, NewNoExecuteToleration(TolerationKeyNodeNotReady, TolerationDuration{Forever: true})) + + require.Len(t, tolerations, 1) + + require.Nil(t, tolerations[0].TolerationSeconds) + }) + + t.Run("Modify initial toleration", func(t *testing.T) { + tolerations = mergeTolerations(t, tolerations, NewNoExecuteToleration(TolerationKeyNodeNotReady, TolerationDuration{TimeSpan: 5 * time.Second})) + + require.Len(t, tolerations, 1) + + require.NotNil(t, tolerations[0].TolerationSeconds) + require.EqualValues(t, 5, *tolerations[0].TolerationSeconds) + }) + + t.Run("Add second toleration", func(t *testing.T) { + tolerations = mergeTolerations(t, tolerations, NewNoExecuteToleration(TolerationKeyNodeAlphaUnreachable, TolerationDuration{TimeSpan: 5 * time.Second})) + + require.Len(t, tolerations, 2) + + require.NotNil(t, tolerations[0].TolerationSeconds) + require.EqualValues(t, 5, *tolerations[0].TolerationSeconds) + + require.NotNil(t, tolerations[1].TolerationSeconds) + require.EqualValues(t, 5, *tolerations[1].TolerationSeconds) + }) + + t.Run("Modify initial toleration again", func(t *testing.T) { + tolerations = mergeTolerations(t, tolerations, NewNoExecuteToleration(TolerationKeyNodeNotReady, TolerationDuration{TimeSpan: 10 * time.Second})) + + require.Len(t, tolerations, 2) + + require.NotNil(t, tolerations[0].TolerationSeconds) + require.EqualValues(t, 10, *tolerations[0].TolerationSeconds) + + require.NotNil(t, tolerations[1].TolerationSeconds) + require.EqualValues(t, 5, *tolerations[1].TolerationSeconds) + }) +}