Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- (Bugfix) Fix GetClient lock system
- (Feature) Backup InProgress Agency key discovery
- (Feature) Backup & Maintenance Conditions
- (Bugfix) Disable member removal in case of health failure

## [1.2.9](https://github.com/arangodb/kube-arangodb/tree/1.2.9) (2022-03-30)
- (Feature) Improve Kubernetes clientsets management
Expand Down
4 changes: 2 additions & 2 deletions pkg/deployment/deployment_inspector.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterva
return minInspectionInterval, errors.Wrapf(err, "AccessPackage creation failed")
}

// Inspect deployment for obsolete members
if err := d.resources.CleanupRemovedMembers(ctx, d.GetMembersState().Health()); err != nil {
// Inspect deployment for synced members
if err := d.resources.SyncMembersInCluster(ctx, d.GetMembersState().Health()); err != nil {
return minInspectionInterval, errors.Wrapf(err, "Removed member cleanup failed")
}

Expand Down
51 changes: 10 additions & 41 deletions pkg/deployment/resources/member_cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
memberState "github.com/arangodb/kube-arangodb/pkg/deployment/member"
"github.com/arangodb/kube-arangodb/pkg/metrics"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
arangomemberv1 "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/arangomember/v1"
)

Expand All @@ -47,8 +46,8 @@ var (
cleanupRemovedMembersCounters = metrics.MustRegisterCounterVec(metricsComponent, "cleanup_removed_members", "Number of cleanup-removed-members actions", metrics.DeploymentName, metrics.Result)
)

// CleanupRemovedMembers removes all arangod members that are no longer part of ArangoDB deployment.
func (r *Resources) CleanupRemovedMembers(ctx context.Context, health memberState.Health) error {
// SyncMembersInCluster sets proper condition for all arangod members that belongs to the deployment.
func (r *Resources) SyncMembersInCluster(ctx context.Context, health memberState.Health) error {
if health.Error != nil {
r.log.Info().Err(health.Error).Msg("Health of the cluster is missing")
return nil
Expand All @@ -58,7 +57,7 @@ func (r *Resources) CleanupRemovedMembers(ctx context.Context, health memberStat
switch r.context.GetSpec().GetMode() {
case api.DeploymentModeCluster:
deploymentName := r.context.GetAPIObject().GetName()
if err := r.cleanupRemovedClusterMembers(ctx, health); err != nil {
if err := r.syncMembersInCluster(ctx, health); err != nil {
cleanupRemovedMembersCounters.WithLabelValues(deploymentName, metrics.Failed).Inc()
return errors.WithStack(err)
}
Expand All @@ -70,29 +69,25 @@ func (r *Resources) CleanupRemovedMembers(ctx context.Context, health memberStat
}
}

// cleanupRemovedClusterMembers removes all arangod members that are no longer part of the cluster.
func (r *Resources) cleanupRemovedClusterMembers(ctx context.Context, health memberState.Health) error {
// syncMembersInCluster sets proper condition for all arangod members that are part of the cluster.
func (r *Resources) syncMembersInCluster(ctx context.Context, health memberState.Health) error {
log := r.log

serverFound := func(id string) bool {
_, found := health.Members[driver.ServerID(id)]
return found
}

// For over all members that can be removed
status, lastVersion := r.context.GetStatus()
updateStatusNeeded := false
var podNamesToRemove, pvcNamesToRemove []string

status.Members.ForeachServerGroup(func(group api.ServerGroup, list api.MemberStatusList) error {
if group != api.ServerGroupCoordinators && group != api.ServerGroupDBServers {
// We're not interested in these other groups
return nil
}
for _, m := range list {
log := log.With().
Str("member", m.ID).
Str("role", group.AsRole()).
Logger()
log := log.With().Str("member", m.ID).Str("role", group.AsRole()).Logger()
if serverFound(m.ID) {
// Member is (still) found, skip it
if m.Conditions.Update(api.ConditionTypeMemberOfCluster, true, "", "") {
Expand All @@ -104,25 +99,13 @@ func (r *Resources) cleanupRemovedClusterMembers(ctx context.Context, health mem
}
continue
} else if !m.Conditions.IsTrue(api.ConditionTypeMemberOfCluster) {
// Member is not yet recorded as member of cluster
if m.Age() < minMemberAge {
log.Debug().Dur("age", m.Age()).Msg("Member age is below minimum for removal")
log.Debug().Dur("age", m.Age()).Msg("Member is not yet recorded as member of cluster")
continue
}
log.Info().Msg("Member has never been part of the cluster for a long time. Removing it.")
log.Warn().Msg("Member can not be found in cluster")
} else {
// Member no longer part of cluster, remove it
log.Info().Msg("Member is no longer part of the ArangoDB cluster. Removing it.")
}
log.Info().Msg("Removing member")
status.Members.RemoveByID(m.ID, group)
updateStatusNeeded = true
// Remove Pod & PVC (if any)
if m.PodName != "" {
podNamesToRemove = append(podNamesToRemove, m.PodName)
}
if m.PersistentVolumeClaimName != "" {
pvcNamesToRemove = append(pvcNamesToRemove, m.PersistentVolumeClaimName)
log.Info().Msg("Member is no longer part of the ArangoDB cluster")
}
}
return nil
Expand All @@ -137,20 +120,6 @@ func (r *Resources) cleanupRemovedClusterMembers(ctx context.Context, health mem
}
}

for _, podName := range podNamesToRemove {
log.Info().Str("pod", podName).Msg("Removing obsolete member pod")
if err := r.context.DeletePod(ctx, podName, metav1.DeleteOptions{}); err != nil && !k8sutil.IsNotFound(err) {
log.Warn().Err(err).Str("pod", podName).Msg("Failed to remove obsolete pod")
}
}

for _, pvcName := range pvcNamesToRemove {
log.Info().Str("pvc", pvcName).Msg("Removing obsolete member PVC")
if err := r.context.DeletePvc(ctx, pvcName); err != nil && !k8sutil.IsNotFound(err) {
log.Warn().Err(err).Str("pvc", pvcName).Msg("Failed to remove obsolete PVC")
}
}

return nil
}

Expand Down