From 3c73baa707520df085d6024df13140da37b6197f Mon Sep 17 00:00:00 2001 From: ajanikow Date: Mon, 23 Mar 2020 06:44:25 +0000 Subject: [PATCH 1/7] Do not recreate Agency and Coordinator members --- .../reconcile/action_recreate_member.go | 2 +- pkg/deployment/reconcile/plan_builder.go | 27 +++++++++++++------ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pkg/deployment/reconcile/action_recreate_member.go b/pkg/deployment/reconcile/action_recreate_member.go index ca9b8ab95..967bbe752 100644 --- a/pkg/deployment/reconcile/action_recreate_member.go +++ b/pkg/deployment/reconcile/action_recreate_member.go @@ -63,7 +63,7 @@ func (a *actionRecreateMember) Start(ctx context.Context) (bool, error) { _, err := a.actionCtx.GetPvc(m.PersistentVolumeClaimName) if err != nil { if kubeErrors.IsNotFound(err) { - return false, fmt.Errorf("PVC is missing %s. DBServer wont be recreated without old PV", m.PersistentVolumeClaimName) + return false, fmt.Errorf("PVC is missing %s. Member wont be recreated without old PV", m.PersistentVolumeClaimName) } return false, maskAny(err) diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index 97401dd8f..ee43cdf70 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -155,15 +155,26 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject, // Everything is fine, proceed } - memberLog.Msg("Creating member replacement plan because member has failed") - newID := "" - if group == api.ServerGroupAgents { - newID = m.ID // Agents cannot (yet) be replaced with new IDs + switch group { + case api.ServerGroupAgents: + // For agents just recreate member do not rotate ID, do not remove PVC or service + memberLog.Msg("Restoring old member. For agency members recreation of PVC is not supported - to prevent DataLoss") + plan = append(plan, + api.NewAction(api.ActionTypeRecreateMember, group, m.ID)) + case api.ServerGroupCoordinators: + // For coordinators rotation of ID does not make sense at all - they are anyway stateless. + // This will prevent zombie coordinators creation + memberLog.Msg("Restoring old member. For coordinator rotation of ID does not make sense at all") + plan = append(plan, + api.NewAction(api.ActionTypeRecreateMember, group, m.ID)) + default: + memberLog.Msg("Creating member replacement plan because member has failed") + plan = append(plan, + api.NewAction(api.ActionTypeRemoveMember, group, m.ID), + api.NewAction(api.ActionTypeAddMember, group, ""), + ) + } - plan = append(plan, - api.NewAction(api.ActionTypeRemoveMember, group, m.ID), - api.NewAction(api.ActionTypeAddMember, group, newID), - ) } return nil }) From f281b71655945e6e9a421f1ff42422d6a9cb06b0 Mon Sep 17 00:00:00 2001 From: ajanikow Date: Mon, 23 Mar 2020 07:03:06 +0000 Subject: [PATCH 2/7] Do not remove Coordinators and Agency from members --- pkg/deployment/reconcile/plan_builder_test.go | 41 +++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index c0763dd26..eb005a274 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -681,7 +681,7 @@ func TestCreatePlan(t *testing.T) { ExpectedLog: "Creating rotation plan", }, { - Name: "Member in failed state", + Name: "Agent in failed state", context: &testContext{ ArangoDeployment: deploymentTemplate.DeepCopy(), }, @@ -690,10 +690,45 @@ func TestCreatePlan(t *testing.T) { Count: util.NewInt(2), } ad.Status.Members.Agents[0].Phase = api.MemberPhaseFailed + ad.Status.Members.Agents[0].ID = "id" }, ExpectedPlan: []api.Action{ - api.NewAction(api.ActionTypeRemoveMember, api.ServerGroupAgents, ""), - api.NewAction(api.ActionTypeAddMember, api.ServerGroupAgents, ""), + api.NewAction(api.ActionTypeRecreateMember, api.ServerGroupAgents, "id"), + }, + ExpectedLog: "Restoring old member. For agency members recreation of PVC is not supported - to prevent DataLoss", + }, + { + Name: "Coordinator in failed state", + context: &testContext{ + ArangoDeployment: deploymentTemplate.DeepCopy(), + }, + Helper: func(ad *api.ArangoDeployment) { + ad.Spec.Coordinators = api.ServerGroupSpec{ + Count: util.NewInt(2), + } + ad.Status.Members.Coordinators[0].Phase = api.MemberPhaseFailed + ad.Status.Members.Coordinators[0].ID = "id" + }, + ExpectedPlan: []api.Action{ + api.NewAction(api.ActionTypeRecreateMember, api.ServerGroupCoordinators, "id"), + }, + ExpectedLog: "Restoring old member. For coordinator rotation of ID does not make sense at all", + }, + { + Name: "DBServer in failed state", + context: &testContext{ + ArangoDeployment: deploymentTemplate.DeepCopy(), + }, + Helper: func(ad *api.ArangoDeployment) { + ad.Spec.DBServers = api.ServerGroupSpec{ + Count: util.NewInt(2), + } + ad.Status.Members.DBServers[0].Phase = api.MemberPhaseFailed + ad.Status.Members.DBServers[0].ID = "id" + }, + ExpectedPlan: []api.Action{ + api.NewAction(api.ActionTypeRemoveMember, api.ServerGroupDBServers, "id"), + api.NewAction(api.ActionTypeAddMember, api.ServerGroupDBServers, ""), }, ExpectedLog: "Creating member replacement plan because member has failed", }, From 1b2b176d27bfef0ea89e6bdaab8b7665ace50440 Mon Sep 17 00:00:00 2001 From: ajanikow Date: Mon, 23 Mar 2020 07:15:34 +0000 Subject: [PATCH 3/7] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27ac61ab2..8eca6eff4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Change Log ## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A) +- Prevent Coordinator and Agency Members recreation - Added Customizable Volumes and VolumeMounts for ArangoDB server container - Added MemoryOverride flag for ArangoDB >= 3.6.3 - Improved Rotation discovery process From ae9b93a54c89882951b8d4b6389883474d4632e8 Mon Sep 17 00:00:00 2001 From: ajanikow Date: Mon, 23 Mar 2020 08:31:13 +0000 Subject: [PATCH 4/7] Fix typo --- pkg/deployment/reconcile/action_recreate_member.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/deployment/reconcile/action_recreate_member.go b/pkg/deployment/reconcile/action_recreate_member.go index 967bbe752..2d360898f 100644 --- a/pkg/deployment/reconcile/action_recreate_member.go +++ b/pkg/deployment/reconcile/action_recreate_member.go @@ -63,7 +63,7 @@ func (a *actionRecreateMember) Start(ctx context.Context) (bool, error) { _, err := a.actionCtx.GetPvc(m.PersistentVolumeClaimName) if err != nil { if kubeErrors.IsNotFound(err) { - return false, fmt.Errorf("PVC is missing %s. Member wont be recreated without old PV", m.PersistentVolumeClaimName) + return false, fmt.Errorf("PVC is missing %s. Members won't be recreated without old PV", m.PersistentVolumeClaimName) } return false, maskAny(err) From caf80112a1d05cf05609f983cc68a7418379b16e Mon Sep 17 00:00:00 2001 From: ajanikow Date: Mon, 23 Mar 2020 10:55:52 +0000 Subject: [PATCH 5/7] restore old coordinator logic --- pkg/deployment/reconcile/plan_builder.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index ee43cdf70..89ac6fa78 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -161,12 +161,6 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject, memberLog.Msg("Restoring old member. For agency members recreation of PVC is not supported - to prevent DataLoss") plan = append(plan, api.NewAction(api.ActionTypeRecreateMember, group, m.ID)) - case api.ServerGroupCoordinators: - // For coordinators rotation of ID does not make sense at all - they are anyway stateless. - // This will prevent zombie coordinators creation - memberLog.Msg("Restoring old member. For coordinator rotation of ID does not make sense at all") - plan = append(plan, - api.NewAction(api.ActionTypeRecreateMember, group, m.ID)) default: memberLog.Msg("Creating member replacement plan because member has failed") plan = append(plan, From 49c4b223f84a929a3ba5404c8c03f4592c6cca3e Mon Sep 17 00:00:00 2001 From: ajanikow Date: Mon, 23 Mar 2020 10:58:37 +0000 Subject: [PATCH 6/7] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8eca6eff4..54dc8e5ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # Change Log ## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A) -- Prevent Coordinator and Agency Members recreation +- Prevent Agency Members recreation - Added Customizable Volumes and VolumeMounts for ArangoDB server container - Added MemoryOverride flag for ArangoDB >= 3.6.3 - Improved Rotation discovery process From e62b9bf2468a4ad561eb77c4eb39a817b677e665 Mon Sep 17 00:00:00 2001 From: ajanikow Date: Mon, 23 Mar 2020 19:36:44 +0000 Subject: [PATCH 7/7] Fix tests --- pkg/deployment/reconcile/plan_builder_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index eb005a274..2a3907b56 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -710,9 +710,10 @@ func TestCreatePlan(t *testing.T) { ad.Status.Members.Coordinators[0].ID = "id" }, ExpectedPlan: []api.Action{ - api.NewAction(api.ActionTypeRecreateMember, api.ServerGroupCoordinators, "id"), + api.NewAction(api.ActionTypeRemoveMember, api.ServerGroupCoordinators, "id"), + api.NewAction(api.ActionTypeAddMember, api.ServerGroupCoordinators, ""), }, - ExpectedLog: "Restoring old member. For coordinator rotation of ID does not make sense at all", + ExpectedLog: "Creating member replacement plan because member has failed", }, { Name: "DBServer in failed state",