diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f4182049..16608f6ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - (Feature) (AT) Add ArangoTask API - (Bugfix) Fix NPE in State fetcher - (Refactor) Configurable throttle inspector +- (Bugfix) Skip Replace operation on DBServer if they need to be scaled down ## [1.2.8](https://github.com/arangodb/kube-arangodb/tree/1.2.8) (2022-02-24) - Do not check License V2 on Community images diff --git a/pkg/deployment/reconcile/plan_builder_normal.go b/pkg/deployment/reconcile/plan_builder_normal.go index c67a14fdc..70157704e 100644 --- a/pkg/deployment/reconcile/plan_builder_normal.go +++ b/pkg/deployment/reconcile/plan_builder_normal.go @@ -98,6 +98,12 @@ func createMemberFailedRestorePlan(ctx context.Context, // Check for members in failed state status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { + failed := 0 + for _, m := range members { + if m.Phase == api.MemberPhaseFailed { + failed++ + } + } for _, m := range members { if m.Phase != api.MemberPhaseFailed || len(plan) > 0 { continue @@ -112,6 +118,11 @@ func createMemberFailedRestorePlan(ctx context.Context, continue } + if c := spec.DBServers.GetCount(); c <= len(members)-failed { + // We have more or equal alive members than current count, we should not recreate this member + continue + } + if agencyState.Plan.Collections.IsDBServerInDatabases(m.ID) { // DBServer still exists in agency plan! Will not be removed, but needs to be recreated memberLog.Msg("Recreating DBServer - it cannot be removed gracefully") diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index b743b2980..1edaf3f7b 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -1058,13 +1058,13 @@ func TestCreatePlan(t *testing.T) { ExpectedLog: "Creating member replacement plan because member has failed", }, { - Name: "DBServer in failed state", + Name: "DBServer in failed state - recreate", context: &testContext{ ArangoDeployment: deploymentTemplate.DeepCopy(), }, Helper: func(ad *api.ArangoDeployment) { ad.Spec.DBServers = api.ServerGroupSpec{ - Count: util.NewInt(2), + Count: util.NewInt(3), } ad.Status.Members.DBServers[0].Phase = api.MemberPhaseFailed ad.Status.Members.DBServers[0].ID = "id" @@ -1076,6 +1076,26 @@ func TestCreatePlan(t *testing.T) { }, ExpectedLog: "Creating member replacement plan because member has failed", }, + { + Name: "DBServer in failed state - remove", + context: &testContext{ + ArangoDeployment: deploymentTemplate.DeepCopy(), + }, + Helper: func(ad *api.ArangoDeployment) { + ad.Spec.DBServers = api.ServerGroupSpec{ + Count: util.NewInt(2), + } + ad.Status.Members.DBServers[2].Phase = api.MemberPhaseFailed + ad.Status.Members.DBServers[2].ID = "id3" + }, + ExpectedPlan: []api.Action{ + actions.NewAction(api.ActionTypeCleanOutMember, api.ServerGroupDBServers, withPredefinedMember("id3")), + actions.NewAction(api.ActionTypeKillMemberPod, api.ServerGroupDBServers, withPredefinedMember("id3")), + actions.NewAction(api.ActionTypeShutdownMember, api.ServerGroupDBServers, withPredefinedMember("id3")), + actions.NewAction(api.ActionTypeRemoveMember, api.ServerGroupDBServers, withPredefinedMember("id3")), + }, + ExpectedLog: "Creating scale-down plan", + }, { Name: "CleanOut DBserver", context: &testContext{ @@ -1101,6 +1121,32 @@ func TestCreatePlan(t *testing.T) { }, ExpectedLog: "Creating dbserver replacement plan because server is cleanout in created phase", }, + { + Name: "CleanOut DBserver - scale down", + context: &testContext{ + ArangoDeployment: deploymentTemplate.DeepCopy(), + }, + Helper: func(ad *api.ArangoDeployment) { + ad.Spec.DBServers = api.ServerGroupSpec{ + Count: util.NewInt(2), + } + ad.Status.Members.DBServers[2].ID = "id3" + ad.Status.Members.DBServers[2].Phase = api.MemberPhaseCreated + ad.Status.Members.DBServers[2].Conditions = api.ConditionList{ + { + Type: api.ConditionTypeCleanedOut, + Status: core.ConditionTrue, + }, + } + }, + ExpectedPlan: []api.Action{ + actions.NewAction(api.ActionTypeCleanOutMember, api.ServerGroupDBServers, withPredefinedMember("id3")), + actions.NewAction(api.ActionTypeKillMemberPod, api.ServerGroupDBServers, withPredefinedMember("id3")), + actions.NewAction(api.ActionTypeShutdownMember, api.ServerGroupDBServers, withPredefinedMember("id3")), + actions.NewAction(api.ActionTypeRemoveMember, api.ServerGroupDBServers, withPredefinedMember("id3")), + }, + ExpectedLog: "Creating scale-down plan", + }, { Name: "Scale down DBservers", context: &testContext{