diff --git a/CHANGELOG.md b/CHANGELOG.md index a1f54254f..70dd650eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Add debug mode (Golang DLV) - License V2 for ArangoDB 3.9.0+ - Add ArangoClusterSynchronization v1 API +- Add core containers names to follow their terminations ## [1.2.6](https://github.com/arangodb/kube-arangodb/tree/1.2.6) (2021-12-15) - Add ArangoBackup backoff functionality diff --git a/pkg/apis/deployment/v1/deployment_spec.go b/pkg/apis/deployment/v1/deployment_spec.go index d5013fe70..b5fdbce01 100644 --- a/pkg/apis/deployment/v1/deployment_spec.go +++ b/pkg/apis/deployment/v1/deployment_spec.go @@ -17,8 +17,6 @@ // // Copyright holder is ArangoDB GmbH, Cologne, Germany // -// Author Ewout Prangsma -// package v1 @@ -28,11 +26,12 @@ import ( "fmt" "reflect" - "github.com/arangodb/kube-arangodb/pkg/util/errors" + core "k8s.io/api/core/v1" + "github.com/arangodb/kube-arangodb/pkg/backup/utils" "github.com/arangodb/kube-arangodb/pkg/util" - - core "k8s.io/api/core/v1" + "github.com/arangodb/kube-arangodb/pkg/util/errors" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" ) var ( @@ -537,3 +536,19 @@ func (s DeploymentSpec) Checksum() (string, error) { return fmt.Sprintf("%0x", sha256.Sum256(data)), nil } + +// GetCoreContainers returns all containers' names which must running in the pod for the given group of servers. +func (s DeploymentSpec) GetCoreContainers(group ServerGroup) utils.StringList { + groupSpec := s.GetServerGroupSpec(group) + if len(groupSpec.SidecarCoreNames) == 0 { + return utils.StringList{k8sutil.ServerContainerName} + } + + result := make(utils.StringList, 0, len(groupSpec.SidecarCoreNames)+1) + if !utils.StringList(groupSpec.SidecarCoreNames).Has(k8sutil.ServerContainerName) { + result = append(result, k8sutil.ServerContainerName) + } + result = append(result, groupSpec.SidecarCoreNames...) + + return result +} diff --git a/pkg/apis/deployment/v1/deployment_spec_test.go b/pkg/apis/deployment/v1/deployment_spec_test.go index fe0ef8726..c6b80548a 100644 --- a/pkg/apis/deployment/v1/deployment_spec_test.go +++ b/pkg/apis/deployment/v1/deployment_spec_test.go @@ -17,17 +17,17 @@ // // Copyright holder is ArangoDB GmbH, Cologne, Germany // -// Author Ewout Prangsma -// package v1 import ( "testing" - "github.com/arangodb/kube-arangodb/pkg/util" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" + + "github.com/arangodb/kube-arangodb/pkg/backup/utils" + "github.com/arangodb/kube-arangodb/pkg/util" ) func TestDeploymentSpecValidate(t *testing.T) { @@ -122,3 +122,80 @@ func TestDeploymentSpecResetImmutableFields(t *testing.T) { assert.Equal(t, test.Expected, test.Target) } } + +func TestDeploymentSpec_GetCoreContainers(t *testing.T) { + type fields struct { + Single ServerGroupSpec + Agents ServerGroupSpec + DBServers ServerGroupSpec + Coordinators ServerGroupSpec + SyncMasters ServerGroupSpec + SyncWorkers ServerGroupSpec + } + + type args struct { + group ServerGroup + } + + tests := map[string]struct { + fields fields + args args + want utils.StringList + }{ + "one sidecar container": { + fields: fields{ + DBServers: ServerGroupSpec{ + SidecarCoreNames: []string{"other"}, + }, + }, + args: args{ + group: ServerGroupDBServers, + }, + want: utils.StringList{"server", "other"}, + }, + "one predefined container and one sidecar container": { + fields: fields{ + DBServers: ServerGroupSpec{ + SidecarCoreNames: []string{"server", "other"}, + }, + }, + args: args{ + group: ServerGroupDBServers, + }, + want: utils.StringList{"server", "other"}, + }, + "zero core containers": { + fields: fields{ + DBServers: ServerGroupSpec{ + SidecarCoreNames: nil, + }, + }, + args: args{ + group: ServerGroupDBServers, + }, + want: utils.StringList{"server"}, + }, + "two non-core containers": { + fields: fields{ + DBServers: ServerGroupSpec{ + SidecarCoreNames: []string{"other1", "other2"}, + }, + }, + args: args{ + group: ServerGroupDBServers, + }, + want: utils.StringList{"server", "other1", "other2"}, + }, + } + for testName, test := range tests { + t.Run(testName, func(t *testing.T) { + s := DeploymentSpec{ + DBServers: test.fields.DBServers, + } + + got := s.GetCoreContainers(test.args.group) + assert.Equal(t, test.want, got) + + }) + } +} diff --git a/pkg/apis/deployment/v1/deployment_status_members.go b/pkg/apis/deployment/v1/deployment_status_members.go index 2bcaab428..b563c038a 100644 --- a/pkg/apis/deployment/v1/deployment_status_members.go +++ b/pkg/apis/deployment/v1/deployment_status_members.go @@ -128,7 +128,8 @@ func (ds DeploymentStatusMembers) ForServerGroup(cb func(group ServerGroup, list } // MemberStatusByPodName returns a reference to the element in the given set of lists that has the given pod name. -// If no such element exists, nil is returned. +// Returns member status and group which the pod belong to. +// If no such element exists, false is returned. func (ds DeploymentStatusMembers) MemberStatusByPodName(podName string) (MemberStatus, ServerGroup, bool) { if result, found := ds.Single.ElementByPodName(podName); found { return result, ServerGroupSingle, true diff --git a/pkg/apis/deployment/v1/server_group_spec.go b/pkg/apis/deployment/v1/server_group_spec.go index d5ddd8bd4..cd9f7915f 100644 --- a/pkg/apis/deployment/v1/server_group_spec.go +++ b/pkg/apis/deployment/v1/server_group_spec.go @@ -126,6 +126,9 @@ type ServerGroupSpec struct { Affinity *core.PodAffinity `json:"affinity,omitempty"` // NodeAffinity specified additional nodeAffinity settings in ArangoDB Pod definitions NodeAffinity *core.NodeAffinity `json:"nodeAffinity,omitempty"` + // SidecarCoreNames is a list of sidecar containers which must run in the pod. + // Some names (e.g.: "server", "worker") are reserved, and they don't have any impact. + SidecarCoreNames []string `json:"sidecarCoreNames,omitempty"` // Sidecars specifies a list of additional containers to be started Sidecars []core.Container `json:"sidecars,omitempty"` // SecurityContext specifies security context for group diff --git a/pkg/apis/deployment/v1/zz_generated.deepcopy.go b/pkg/apis/deployment/v1/zz_generated.deepcopy.go index 2d09ef397..4b0fb0020 100644 --- a/pkg/apis/deployment/v1/zz_generated.deepcopy.go +++ b/pkg/apis/deployment/v1/zz_generated.deepcopy.go @@ -1976,6 +1976,11 @@ func (in *ServerGroupSpec) DeepCopyInto(out *ServerGroupSpec) { *out = new(corev1.NodeAffinity) (*in).DeepCopyInto(*out) } + if in.SidecarCoreNames != nil { + in, out := &in.SidecarCoreNames, &out.SidecarCoreNames + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.Sidecars != nil { in, out := &in.Sidecars, &out.Sidecars *out = make([]corev1.Container, len(*in)) diff --git a/pkg/apis/deployment/v2alpha1/deployment_spec.go b/pkg/apis/deployment/v2alpha1/deployment_spec.go index 0c33217d1..5325227cf 100644 --- a/pkg/apis/deployment/v2alpha1/deployment_spec.go +++ b/pkg/apis/deployment/v2alpha1/deployment_spec.go @@ -17,8 +17,6 @@ // // Copyright holder is ArangoDB GmbH, Cologne, Germany // -// Author Ewout Prangsma -// package v2alpha1 @@ -28,11 +26,12 @@ import ( "fmt" "reflect" - "github.com/arangodb/kube-arangodb/pkg/util/errors" + core "k8s.io/api/core/v1" + "github.com/arangodb/kube-arangodb/pkg/backup/utils" "github.com/arangodb/kube-arangodb/pkg/util" - - core "k8s.io/api/core/v1" + "github.com/arangodb/kube-arangodb/pkg/util/errors" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" ) var ( @@ -537,3 +536,19 @@ func (s DeploymentSpec) Checksum() (string, error) { return fmt.Sprintf("%0x", sha256.Sum256(data)), nil } + +// GetCoreContainers returns all containers' names which must running in the pod for the given group of servers. +func (s DeploymentSpec) GetCoreContainers(group ServerGroup) utils.StringList { + groupSpec := s.GetServerGroupSpec(group) + if len(groupSpec.SidecarCoreNames) == 0 { + return utils.StringList{k8sutil.ServerContainerName} + } + + result := make(utils.StringList, 0, len(groupSpec.SidecarCoreNames)+1) + if !utils.StringList(groupSpec.SidecarCoreNames).Has(k8sutil.ServerContainerName) { + result = append(result, k8sutil.ServerContainerName) + } + result = append(result, groupSpec.SidecarCoreNames...) + + return result +} diff --git a/pkg/apis/deployment/v2alpha1/deployment_spec_test.go b/pkg/apis/deployment/v2alpha1/deployment_spec_test.go index 70693609f..51f991d5c 100644 --- a/pkg/apis/deployment/v2alpha1/deployment_spec_test.go +++ b/pkg/apis/deployment/v2alpha1/deployment_spec_test.go @@ -17,17 +17,17 @@ // // Copyright holder is ArangoDB GmbH, Cologne, Germany // -// Author Ewout Prangsma -// package v2alpha1 import ( "testing" - "github.com/arangodb/kube-arangodb/pkg/util" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" + + "github.com/arangodb/kube-arangodb/pkg/backup/utils" + "github.com/arangodb/kube-arangodb/pkg/util" ) func TestDeploymentSpecValidate(t *testing.T) { @@ -122,3 +122,80 @@ func TestDeploymentSpecResetImmutableFields(t *testing.T) { assert.Equal(t, test.Expected, test.Target) } } + +func TestDeploymentSpec_GetCoreContainers(t *testing.T) { + type fields struct { + Single ServerGroupSpec + Agents ServerGroupSpec + DBServers ServerGroupSpec + Coordinators ServerGroupSpec + SyncMasters ServerGroupSpec + SyncWorkers ServerGroupSpec + } + + type args struct { + group ServerGroup + } + + tests := map[string]struct { + fields fields + args args + want utils.StringList + }{ + "one sidecar container": { + fields: fields{ + DBServers: ServerGroupSpec{ + SidecarCoreNames: []string{"other"}, + }, + }, + args: args{ + group: ServerGroupDBServers, + }, + want: utils.StringList{"server", "other"}, + }, + "one predefined container and one sidecar container": { + fields: fields{ + DBServers: ServerGroupSpec{ + SidecarCoreNames: []string{"server", "other"}, + }, + }, + args: args{ + group: ServerGroupDBServers, + }, + want: utils.StringList{"server", "other"}, + }, + "zero core containers": { + fields: fields{ + DBServers: ServerGroupSpec{ + SidecarCoreNames: nil, + }, + }, + args: args{ + group: ServerGroupDBServers, + }, + want: utils.StringList{"server"}, + }, + "two non-core containers": { + fields: fields{ + DBServers: ServerGroupSpec{ + SidecarCoreNames: []string{"other1", "other2"}, + }, + }, + args: args{ + group: ServerGroupDBServers, + }, + want: utils.StringList{"server", "other1", "other2"}, + }, + } + for testName, test := range tests { + t.Run(testName, func(t *testing.T) { + s := DeploymentSpec{ + DBServers: test.fields.DBServers, + } + + got := s.GetCoreContainers(test.args.group) + assert.Equal(t, test.want, got) + + }) + } +} diff --git a/pkg/apis/deployment/v2alpha1/deployment_status_members.go b/pkg/apis/deployment/v2alpha1/deployment_status_members.go index c984c9b93..96b8915bd 100644 --- a/pkg/apis/deployment/v2alpha1/deployment_status_members.go +++ b/pkg/apis/deployment/v2alpha1/deployment_status_members.go @@ -128,7 +128,8 @@ func (ds DeploymentStatusMembers) ForServerGroup(cb func(group ServerGroup, list } // MemberStatusByPodName returns a reference to the element in the given set of lists that has the given pod name. -// If no such element exists, nil is returned. +// Returns member status and group which the pod belong to. +// If no such element exists, false is returned. func (ds DeploymentStatusMembers) MemberStatusByPodName(podName string) (MemberStatus, ServerGroup, bool) { if result, found := ds.Single.ElementByPodName(podName); found { return result, ServerGroupSingle, true diff --git a/pkg/apis/deployment/v2alpha1/server_group_spec.go b/pkg/apis/deployment/v2alpha1/server_group_spec.go index b22e6f993..603191605 100644 --- a/pkg/apis/deployment/v2alpha1/server_group_spec.go +++ b/pkg/apis/deployment/v2alpha1/server_group_spec.go @@ -126,6 +126,9 @@ type ServerGroupSpec struct { Affinity *core.PodAffinity `json:"affinity,omitempty"` // NodeAffinity specified additional nodeAffinity settings in ArangoDB Pod definitions NodeAffinity *core.NodeAffinity `json:"nodeAffinity,omitempty"` + // SidecarCoreNames is a list of sidecar containers which must run in the pod. + // Some names (e.g.: "server", "worker") are reserved, and they don't have any impact. + SidecarCoreNames []string `json:"sidecarCoreNames,omitempty"` // Sidecars specifies a list of additional containers to be started Sidecars []core.Container `json:"sidecars,omitempty"` // SecurityContext specifies security context for group diff --git a/pkg/apis/deployment/v2alpha1/zz_generated.deepcopy.go b/pkg/apis/deployment/v2alpha1/zz_generated.deepcopy.go index f4592306e..9aa4530c6 100644 --- a/pkg/apis/deployment/v2alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/deployment/v2alpha1/zz_generated.deepcopy.go @@ -1976,6 +1976,11 @@ func (in *ServerGroupSpec) DeepCopyInto(out *ServerGroupSpec) { *out = new(v1.NodeAffinity) (*in).DeepCopyInto(*out) } + if in.SidecarCoreNames != nil { + in, out := &in.SidecarCoreNames, &out.SidecarCoreNames + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.Sidecars != nil { in, out := &in.Sidecars, &out.Sidecars *out = make([]v1.Container, len(*in)) diff --git a/pkg/deployment/images.go b/pkg/deployment/images.go index ef96405c6..6bc5b3238 100644 --- a/pkg/deployment/images.go +++ b/pkg/deployment/images.go @@ -27,18 +27,18 @@ import ( "strings" "time" - "github.com/arangodb/kube-arangodb/pkg/util/globals" - "github.com/rs/zerolog" core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/arangodb/kube-arangodb/pkg/backup/utils" "github.com/arangodb/kube-arangodb/pkg/deployment/pod" "github.com/arangodb/kube-arangodb/pkg/deployment/resources" "github.com/arangodb/kube-arangodb/pkg/util/arangod" "github.com/arangodb/kube-arangodb/pkg/util/constants" "github.com/arangodb/kube-arangodb/pkg/util/errors" + "github.com/arangodb/kube-arangodb/pkg/util/globals" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/interfaces" @@ -141,7 +141,7 @@ func (ib *imagesBuilder) fetchArangoDBImageIDAndVersion(ctx context.Context, cac pod, err := ib.Context.GetCachedStatus().PodReadInterface().Get(ctxChild, podName, metav1.GetOptions{}) if err == nil { // Pod found - if k8sutil.IsPodFailed(pod) { + if k8sutil.IsPodFailed(pod, utils.StringList{k8sutil.ServerContainerName}) { // Wait some time before deleting the pod if time.Now().After(pod.GetCreationTimestamp().Add(30 * time.Second)) { err := globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error { diff --git a/pkg/deployment/resources/pod_cleanup.go b/pkg/deployment/resources/pod_cleanup.go index ec3e4b8b0..9952475fa 100644 --- a/pkg/deployment/resources/pod_cleanup.go +++ b/pkg/deployment/resources/pod_cleanup.go @@ -17,8 +17,6 @@ // // Copyright holder is ArangoDB GmbH, Cologne, Germany // -// Author Ewout Prangsma -// package resources @@ -26,14 +24,13 @@ import ( "context" "time" - "github.com/arangodb/kube-arangodb/pkg/deployment/resources/inspector" v1 "k8s.io/api/core/v1" + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/arangodb/kube-arangodb/pkg/deployment/resources/inspector" "github.com/arangodb/kube-arangodb/pkg/util" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector" - - api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" ) const ( @@ -55,15 +52,19 @@ func (r *Resources) CleanupTerminatedPods(ctx context.Context, cachedStatus insp return nil } - if !(k8sutil.IsPodSucceeded(pod) || k8sutil.IsPodFailed(pod) || k8sutil.IsPodTerminating(pod)) { - return nil - } - // Find member status memberStatus, group, found := status.Members.MemberStatusByPodName(pod.GetName()) if !found { log.Debug().Str("pod", pod.GetName()).Msg("no memberstatus found for pod. Performing cleanup") } else { + spec := r.context.GetSpec() + coreContainers := spec.GetCoreContainers(group) + if !(k8sutil.IsPodSucceeded(pod, coreContainers) || k8sutil.IsPodFailed(pod, coreContainers) || + k8sutil.IsPodTerminating(pod)) { + // The pod is not being terminated or failed or succeeded. + return nil + } + // Check member termination condition if !memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) { if !group.IsStateless() { diff --git a/pkg/deployment/resources/pod_creator_arangod.go b/pkg/deployment/resources/pod_creator_arangod.go index 56cd7e3b7..f72bed33a 100644 --- a/pkg/deployment/resources/pod_creator_arangod.go +++ b/pkg/deployment/resources/pod_creator_arangod.go @@ -289,6 +289,10 @@ func (m *MemberArangoDPod) Validate(cachedStatus interfaces.Inspector) error { return err } + if err := validateSidecars(m.groupSpec.SidecarCoreNames, m.groupSpec.GetSidecars()); err != nil { + return err + } + return nil } @@ -364,6 +368,7 @@ func (m *MemberArangoDPod) GetSidecars(pod *core.Pod) error { // A sidecar provided by the user sidecars := m.groupSpec.GetSidecars() if len(sidecars) > 0 { + addLifecycleSidecar(m.groupSpec.SidecarCoreNames, sidecars) pod.Spec.Containers = append(pod.Spec.Containers, sidecars...) } @@ -634,3 +639,60 @@ func (a *ArangoVersionCheckContainer) GetName() string { func (a *ArangoVersionCheckContainer) GetProbes() (*core.Probe, *core.Probe, *core.Probe, error) { return nil, nil, nil, nil } + +// validateSidecars checks if all core names are in the sidecar list. +// It returns error when at least one core name is missing. +func validateSidecars(coreNames []string, sidecars []core.Container) error { + for _, coreName := range coreNames { + if api.IsReservedServerGroupContainerName(coreName) { + return fmt.Errorf("sidecar core name \"%s\" can not be used because it is reserved", coreName) + } + + found := false + for _, sidecar := range sidecars { + if sidecar.Name == coreName { + found = true + break + } + } + + if !found { + return fmt.Errorf("sidecar core name \"%s\" does not exist on the sidecars' list", coreName) + } + } + + return nil + +} + +// addLifecycleSidecar adds lifecycle to all core sidecar unless the sidecar contains its own custom lifecycle. +func addLifecycleSidecar(coreNames []string, sidecars []core.Container) error { + for _, coreName := range coreNames { + for i, sidecar := range sidecars { + if coreName != sidecar.Name { + continue + } + + if sidecar.Lifecycle != nil && sidecar.Lifecycle.PreStop != nil { + // A user provided a custom lifecycle preStop, so break and check next core name container. + break + } + + lifecycle, err := k8sutil.NewLifecycleFinalizers() + if err != nil { + return err + } + + if sidecar.Lifecycle == nil { + sidecars[i].Lifecycle = lifecycle + } else { + // Set only preStop, because user can provide postStart lifecycle. + sidecars[i].Lifecycle.PreStop = lifecycle.PreStop + } + + break + } + } + + return nil +} diff --git a/pkg/deployment/resources/pod_creator_sync.go b/pkg/deployment/resources/pod_creator_sync.go index 78ec63a10..a132f8372 100644 --- a/pkg/deployment/resources/pod_creator_sync.go +++ b/pkg/deployment/resources/pod_creator_sync.go @@ -251,6 +251,7 @@ func (m *MemberSyncPod) GetSidecars(pod *core.Pod) error { // A sidecar provided by the user sidecars := m.groupSpec.GetSidecars() if len(sidecars) > 0 { + addLifecycleSidecar(m.groupSpec.SidecarCoreNames, sidecars) pod.Spec.Containers = append(pod.Spec.Containers, sidecars...) } @@ -361,6 +362,10 @@ func (m *MemberSyncPod) Init(ctx context.Context, cachedStatus interfaces.Inspec } func (m *MemberSyncPod) Validate(_ interfaces.Inspector) error { + if err := validateSidecars(m.groupSpec.SidecarCoreNames, m.groupSpec.GetSidecars()); err != nil { + return err + } + return nil } diff --git a/pkg/deployment/resources/pod_finalizers.go b/pkg/deployment/resources/pod_finalizers.go index 3f5a2493f..a41f343ba 100644 --- a/pkg/deployment/resources/pod_finalizers.go +++ b/pkg/deployment/resources/pod_finalizers.go @@ -52,6 +52,8 @@ func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatu log := r.log.With().Str("pod-name", p.GetName()).Logger() var removalList []string + // When the main container is terminated, then the whole pod should be terminated, + // so sidecar core containers' names should not be checked here. isServerContainerDead := !k8sutil.IsPodServerContainerRunning(p) for _, f := range p.ObjectMeta.GetFinalizers() { diff --git a/pkg/deployment/resources/pod_inspector.go b/pkg/deployment/resources/pod_inspector.go index 7da8117b2..bfc5089d7 100644 --- a/pkg/deployment/resources/pod_inspector.go +++ b/pkg/deployment/resources/pod_inspector.go @@ -96,9 +96,12 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter return nil } + spec := r.context.GetSpec() + coreContainers := spec.GetCoreContainers(group) + // Update state updateMemberStatusNeeded := false - if k8sutil.IsPodSucceeded(pod) { + if k8sutil.IsPodSucceeded(pod, coreContainers) { // Pod has terminated with exit code 0. wasTerminated := memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) if memberStatus.Conditions.Update(api.ConditionTypeTerminated, true, "Pod Succeeded", "") { @@ -112,7 +115,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter r.InvalidateSyncStatus() } } - } else if k8sutil.IsPodFailed(pod) { + } else if k8sutil.IsPodFailed(pod, coreContainers) { // Pod has terminated with at least 1 container with a non-zero exit code. wasTerminated := memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) if memberStatus.Conditions.Update(api.ConditionTypeTerminated, true, "Pod Failed", "") { @@ -121,11 +124,9 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter switch container { case api.ServerGroupReservedInitContainerNameVersionCheck: if c, ok := k8sutil.GetAnyContainerStatusByName(pod.Status.InitContainerStatuses, container); ok { - if t := c.State.Terminated; t != nil { - if t := c.State.Terminated; t != nil && t.ExitCode == 11 { - memberStatus.Upgrade = true - updateMemberStatusNeeded = true - } + if t := c.State.Terminated; t != nil && t.ExitCode == 11 { + memberStatus.Upgrade = true + updateMemberStatusNeeded = true } } case api.ServerGroupReservedInitContainerNameUpgrade: @@ -133,20 +134,18 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter } if c, ok := k8sutil.GetAnyContainerStatusByName(pod.Status.InitContainerStatuses, container); ok { - if t := c.State.Terminated; t != nil { - if t := c.State.Terminated; t != nil && t.ExitCode != 0 { - log.Warn().Str("member", memberStatus.ID). - Str("pod", pod.GetName()). - Str("container", container). - Str("uid", string(pod.GetUID())). - Int32("exit-code", t.ExitCode). - Str("reason", t.Reason). - Str("message", t.Message). - Int32("signal", t.Signal). - Time("started", t.StartedAt.Time). - Time("finished", t.FinishedAt.Time). - Msgf("Pod failed in unexpected way: Init Container failed") - } + if t := c.State.Terminated; t != nil && t.ExitCode != 0 { + log.Warn().Str("member", memberStatus.ID). + Str("pod", pod.GetName()). + Str("container", container). + Str("uid", string(pod.GetUID())). + Int32("exit-code", t.ExitCode). + Str("reason", t.Reason). + Str("message", t.Message). + Int32("signal", t.Signal). + Time("started", t.StartedAt.Time). + Time("finished", t.FinishedAt.Time). + Msgf("Pod failed in unexpected way: Init Container failed") } } } @@ -155,20 +154,18 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter if containers := k8sutil.GetFailedContainerNames(pod.Status.ContainerStatuses); len(containers) > 0 { for _, container := range containers { if c, ok := k8sutil.GetAnyContainerStatusByName(pod.Status.ContainerStatuses, container); ok { - if t := c.State.Terminated; t != nil { - if t := c.State.Terminated; t != nil && t.ExitCode != 0 { - log.Warn().Str("member", memberStatus.ID). - Str("pod", pod.GetName()). - Str("container", container). - Str("uid", string(pod.GetUID())). - Int32("exit-code", t.ExitCode). - Str("reason", t.Reason). - Str("message", t.Message). - Int32("signal", t.Signal). - Time("started", t.StartedAt.Time). - Time("finished", t.FinishedAt.Time). - Msgf("Pod failed in unexpected way: Core Container failed") - } + if t := c.State.Terminated; t != nil && t.ExitCode != 0 { + log.Warn().Str("member", memberStatus.ID). + Str("pod", pod.GetName()). + Str("container", container). + Str("uid", string(pod.GetUID())). + Int32("exit-code", t.ExitCode). + Str("reason", t.Reason). + Str("message", t.Message). + Int32("signal", t.Signal). + Time("started", t.StartedAt.Time). + Time("finished", t.FinishedAt.Time). + Msgf("Pod failed in unexpected way: Core Container failed") } } } @@ -222,7 +219,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter } // End of Topology labels - if k8sutil.IsContainerReady(pod, k8sutil.ServerContainerName) { + if k8sutil.AreContainersReady(pod, coreContainers) { // Pod is now ready if memberStatus.Conditions.Update(api.ConditionTypeReady, true, "Pod Ready", "") { log.Debug().Str("pod-name", pod.GetName()).Msg("Updating member condition Ready & Initialised to true") diff --git a/pkg/util/k8sutil/pods.go b/pkg/util/k8sutil/pods.go index ffad91243..afe34f7ae 100644 --- a/pkg/util/k8sutil/pods.go +++ b/pkg/util/k8sutil/pods.go @@ -17,9 +17,6 @@ // // Copyright holder is ArangoDB GmbH, Cologne, Germany // -// Author Ewout Prangsma -// Author Tomasz Mielech -// package k8sutil @@ -30,21 +27,17 @@ import ( "strings" "time" - "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/pod" + core "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/json" + "github.com/arangodb/kube-arangodb/pkg/backup/utils" "github.com/arangodb/kube-arangodb/pkg/util" - "github.com/arangodb/kube-arangodb/pkg/util/errors" - + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/pod" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/interfaces" - - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/json" - - "k8s.io/apimachinery/pkg/api/resource" - - core "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const ( @@ -102,9 +95,10 @@ func IsPodReady(pod *core.Pod) bool { return condition != nil && condition.Status == core.ConditionTrue } -// IsContainerReady returns true if the PodReady condition on -// the given pod is set to true. -func IsContainerReady(pod *core.Pod, container string) bool { +// AreContainersReady checks whether Pod is considered as ready. +// Returns true if the PodReady condition on the given pod is set to true, +// or all provided containers' names are running and are not in the list of failed containers. +func AreContainersReady(pod *core.Pod, coreContainers utils.StringList) bool { condition := getPodCondition(&pod.Status, core.PodReady) if condition == nil { return false @@ -114,21 +108,32 @@ func IsContainerReady(pod *core.Pod, container string) bool { return true } - if !IsContainerRunning(pod, container) { - return false + // Check if all required containers are running. + for _, c := range coreContainers { + if !IsContainerRunning(pod, c) { + return false + } } + // From here on all required containers are running, but unready condition must be checked additionally. switch condition.Reason { case ServerContainerConditionContainersNotReady: - if strings.HasPrefix(condition.Message, ServerContainerConditionPrefix) { - n := strings.TrimPrefix(condition.Message, ServerContainerConditionPrefix) + if !strings.HasPrefix(condition.Message, ServerContainerConditionPrefix) { + return false + } - return !strings.Contains(n, container) + unreadyContainers := strings.TrimPrefix(condition.Message, ServerContainerConditionPrefix) + for _, c := range coreContainers { + if strings.Contains(unreadyContainers, c) { + // The container is on the list with unready containers. + return false + } } - return false - default: - return false + + return true } + + return false } // GetPodByName returns pod if it exists among the pods' list @@ -163,45 +168,77 @@ func IsContainerRunning(pod *core.Pod, name string) bool { return false } -// IsPodSucceeded returns true if the arangodb container of the pod -// has terminated with exit code 0. -func IsPodSucceeded(pod *core.Pod) bool { +// IsPodSucceeded returns true when all core containers are terminated wih a zero exit code, +// or the whole pod has been succeeded. +func IsPodSucceeded(pod *core.Pod, coreContainers utils.StringList) bool { if pod.Status.Phase == core.PodSucceeded { return true - } else { - for _, c := range pod.Status.ContainerStatuses { - if c.Name != ServerContainerName { - continue - } + } - t := c.State.Terminated - if t != nil { - return t.ExitCode == 0 - } + core, succeeded := 0, 0 + for _, c := range pod.Status.ContainerStatuses { + if !coreContainers.Has(c.Name) { + // It is not core container, so check next one status. + continue + } + + core++ + if t := c.State.Terminated; t != nil && t.ExitCode == 0 { + succeeded++ } - return false } + + if core > 0 && core == succeeded { + // If there are some core containers and all of them succeeded then return that the whole pod succeeded. + return true + } + + return false } -// IsPodFailed returns true if the arangodb container of the pod -// has terminated wih a non-zero exit code. -func IsPodFailed(pod *core.Pod) bool { +// IsPodFailed returns true when one of the core containers is terminated wih a non-zero exit code, +// or the whole pod has been failed. +func IsPodFailed(pod *core.Pod, coreContainers utils.StringList) bool { if pod.Status.Phase == core.PodFailed { return true - } else { - for _, c := range pod.Status.ContainerStatuses { - if c.Name != ServerContainerName { - continue - } + } + + allCore, succeeded, failed := 0, 0, 0 + for _, c := range pod.Status.ContainerStatuses { + if !coreContainers.Has(c.Name) { + // It is not core container, so check next one status. + continue + } - t := c.State.Terminated - if t != nil { - return t.ExitCode != 0 + allCore++ + if t := c.State.Terminated; t != nil { + // A core container is terminated. + if t.ExitCode != 0 { + failed++ + } else { + succeeded++ } } + } + if failed == 0 && succeeded == 0 { + // All core containers are not terminated. return false } + + if failed > 0 { + // Some (or all) core containers have been terminated. + // Some other core containers can be still running or succeeded, + // but the whole pod is considered as failed. + return true + } else if allCore == succeeded { + // All core containers are succeeded, so the pod is not failed. + // The function `IsPodSucceeded` should recognize it in next iteration. + return false + } + + // Some core containers are succeeded, but not all of them. + return true } // IsContainerFailed returns true if the arangodb container diff --git a/pkg/util/k8sutil/pods_test.go b/pkg/util/k8sutil/pods_test.go index d055216d5..7c517d44a 100644 --- a/pkg/util/k8sutil/pods_test.go +++ b/pkg/util/k8sutil/pods_test.go @@ -17,8 +17,6 @@ // // Copyright holder is ArangoDB GmbH, Cologne, Germany // -// Author Ewout Prangsma -// package k8sutil @@ -27,6 +25,8 @@ import ( "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" + + "github.com/arangodb/kube-arangodb/pkg/backup/utils" ) // TestIsPodReady tests IsPodReady. @@ -56,30 +56,265 @@ func TestIsPodReady(t *testing.T) { // TestIsPodFailed tests IsPodFailed. func TestIsPodFailed(t *testing.T) { - assert.False(t, IsPodFailed(&v1.Pod{})) - assert.False(t, IsPodFailed(&v1.Pod{ - Status: v1.PodStatus{ - Phase: v1.PodRunning, + type args struct { + pod *v1.Pod + coreContainers utils.StringList + } + tests := map[string]struct { + args args + want bool + }{ + "empty pod": { + args: args{ + pod: &v1.Pod{}, + }, }, - })) - assert.True(t, IsPodFailed(&v1.Pod{ - Status: v1.PodStatus{ - Phase: v1.PodFailed, + "pod is running": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + }, + }, + }, }, - })) + "pod is failed": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodFailed, + }, + }, + }, + want: true, + }, + "one core container failed": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "core_container", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 1, + }, + }, + }, + }, + }, + }, + coreContainers: utils.StringList{"something", "core_container"}, + }, + want: true, + }, + "one non-core container failed": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "non_core_container", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 1, + }, + }, + }, + }, + }, + }, + coreContainers: utils.StringList{"something", "core_container"}, + }, + }, + "one core container succeeded": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "core_container", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + }, + }, + }, + coreContainers: utils.StringList{"something", "core_container"}, + }, + }, + "first core container succeeded and second is still running": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "core_container1", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + Name: "core_container2", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + }, + }, + }, + coreContainers: utils.StringList{"core_container1", "core_container2"}, + }, + want: true, + }, + "all containers succeeded": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "core_container1", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + { + Name: "core_container2", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + }, + }, + }, + coreContainers: utils.StringList{"core_container1", "core_container2"}, + }, + }, + } + + for testName, test := range tests { + t.Run(testName, func(t *testing.T) { + got := IsPodFailed(test.args.pod, test.args.coreContainers) + assert.Equal(t, test.want, got) + }) + } } -// TestIsPodSucceeded tests IsPodSucceeded. func TestIsPodSucceeded(t *testing.T) { - assert.False(t, IsPodSucceeded(&v1.Pod{})) - assert.False(t, IsPodSucceeded(&v1.Pod{ - Status: v1.PodStatus{ - Phase: v1.PodRunning, + type args struct { + pod *v1.Pod + coreContainers utils.StringList + } + tests := map[string]struct { + args args + want bool + }{ + "empty pod": { + args: args{ + pod: &v1.Pod{}, + }, }, - })) - assert.True(t, IsPodSucceeded(&v1.Pod{ - Status: v1.PodStatus{ - Phase: v1.PodSucceeded, + "pod is succeeded": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + }, + }, + }, + want: true, }, - })) + "all core containers succeeded": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "core_container1", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + { + Name: "core_container2", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + { + Name: "non-core_container", + }, + }, + }, + }, + coreContainers: utils.StringList{"core_container1", "core_container2"}, + }, + want: true, + }, + "non-core container succeeded": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "core_container1", + }, + { + Name: "non-core_container", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + }, + }, + }, + coreContainers: utils.StringList{"core_container1"}, + }, + }, + "the only one core container succeeded": { + args: args{ + pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "core_container1", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + { + Name: "non-core_container", + }, + }, + }, + }, + coreContainers: utils.StringList{"core_container1"}, + }, + want: true, + }, + } + + for testName, test := range tests { + t.Run(testName, func(t *testing.T) { + got := IsPodSucceeded(test.args.pod, test.args.coreContainers) + assert.Equal(t, test.want, got) + }) + } }