Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run CAPI Backup on workload cluster during upgrade #7364

Merged
merged 2 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 24 additions & 24 deletions pkg/task/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,29 @@ type Task interface {

// Command context maintains the mutable and shared entities.
type CommandContext struct {
ClientFactory interfaces.ClientFactory
Bootstrapper interfaces.Bootstrapper
Provider providers.Provider
ClusterManager interfaces.ClusterManager
GitOpsManager interfaces.GitOpsManager
Validations interfaces.Validator
Writer filewriter.FileWriter
EksdInstaller interfaces.EksdInstaller
PackageInstaller interfaces.PackageInstaller
EksdUpgrader interfaces.EksdUpgrader
ClusterUpgrader interfaces.ClusterUpgrader
ClusterCreator interfaces.ClusterCreator
CAPIManager interfaces.CAPIManager
ClusterSpec *cluster.Spec
CurrentClusterSpec *cluster.Spec
UpgradeChangeDiff *types.ChangeDiff
BootstrapCluster *types.Cluster
ManagementCluster *types.Cluster
WorkloadCluster *types.Cluster
Profiler *Profiler
OriginalError error
ManagementClusterStateDir string
ForceCleanup bool
ClientFactory interfaces.ClientFactory
Bootstrapper interfaces.Bootstrapper
Provider providers.Provider
ClusterManager interfaces.ClusterManager
GitOpsManager interfaces.GitOpsManager
Validations interfaces.Validator
Writer filewriter.FileWriter
EksdInstaller interfaces.EksdInstaller
PackageInstaller interfaces.PackageInstaller
EksdUpgrader interfaces.EksdUpgrader
ClusterUpgrader interfaces.ClusterUpgrader
ClusterCreator interfaces.ClusterCreator
CAPIManager interfaces.CAPIManager
ClusterSpec *cluster.Spec
CurrentClusterSpec *cluster.Spec
UpgradeChangeDiff *types.ChangeDiff
BootstrapCluster *types.Cluster
ManagementCluster *types.Cluster
WorkloadCluster *types.Cluster
Profiler *Profiler
OriginalError error
BackupClusterStateDir string
ForceCleanup bool
}

func (c *CommandContext) SetError(err error) {
Expand Down Expand Up @@ -132,7 +132,7 @@ func (tr *taskRunner) RunTask(ctx context.Context, commandContext *CommandContex
var checkpointInfo CheckpointInfo
var err error

commandContext.ManagementClusterStateDir = fmt.Sprintf("cluster-state-backup-%s", time.Now().Format("2006-01-02T15_04_05"))
commandContext.BackupClusterStateDir = fmt.Sprintf("%s-backup-%s", commandContext.ClusterSpec.Cluster.Name, time.Now().Format("2006-01-02T15_04_05"))
commandContext.Profiler = &Profiler{
metrics: make(map[string]map[string]time.Duration),
starts: make(map[string]map[string]time.Time),
Expand Down
2 changes: 1 addition & 1 deletion pkg/workflows/management/post_cluster_upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func (s *postClusterUpgrade) Run(ctx context.Context, commandContext *task.Comma
}

logger.Info("Cleaning up backup resources")
capiObjectFile := filepath.Join(commandContext.ManagementCluster.Name, commandContext.ManagementClusterStateDir)
capiObjectFile := filepath.Join(commandContext.ManagementCluster.Name, commandContext.BackupClusterStateDir)
if err := os.RemoveAll(capiObjectFile); err != nil {
logger.Info(fmt.Sprintf("management cluster CAPI backup file not found: %v", err))
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/workflows/management/pre_cluster_upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ type preClusterUpgrade struct{}

// Run preClusterUpgrade implements steps to be performed before management cluster's upgrade.
func (s *preClusterUpgrade) Run(ctx context.Context, commandContext *task.CommandContext) task.Task {
// Take best effort CAPI backup of workload cluster without filter.
// If that errors, then take CAPI backup filtering on only workload cluster.
// Take best effort CAPI backup of management cluster without filter.
// If that errors, then take CAPI backup filtering on only management cluster.
logger.Info("Backing up management cluster's resources before upgrading")
err := commandContext.ClusterManager.BackupCAPI(ctx, commandContext.ManagementCluster, commandContext.ManagementClusterStateDir, "")
err := commandContext.ClusterManager.BackupCAPI(ctx, commandContext.ManagementCluster, commandContext.BackupClusterStateDir, "")
if err != nil {
err = commandContext.ClusterManager.BackupCAPIWaitForInfrastructure(ctx, commandContext.ManagementCluster, commandContext.ManagementClusterStateDir, commandContext.ManagementCluster.Name)
err = commandContext.ClusterManager.BackupCAPIWaitForInfrastructure(ctx, commandContext.ManagementCluster, commandContext.BackupClusterStateDir, commandContext.ManagementCluster.Name)
if err != nil {
commandContext.SetError(err)
return &workflows.CollectMgmtClusterDiagnosticsTask{}
Expand Down
2 changes: 1 addition & 1 deletion pkg/workflows/management/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func newUpgradeManagementTest(t *testing.T) *upgradeManagementTestSetup {
s.Cluster.Name = "management"
s.Cluster.Spec.DatacenterRef.Kind = v1alpha1.VSphereDatacenterKind
}),
managementStatePath: fmt.Sprintf("cluster-state-backup-%s", time.Now().Format("2006-01-02T15_04_05")),
managementStatePath: fmt.Sprintf("%s-backup-%s", "management", time.Now().Format("2006-01-02T15_04_05")),
}
}

Expand Down
8 changes: 4 additions & 4 deletions pkg/workflows/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,9 +459,9 @@ func (s *moveManagementToBootstrapTask) Run(ctx context.Context, commandContext
// Take best effort CAPI backup of workload cluster without filter.
// If that errors, then take CAPI backup filtering on only workload cluster.
logger.Info("Backing up workload cluster's management resources before moving to bootstrap cluster")
err := commandContext.ClusterManager.BackupCAPI(ctx, commandContext.WorkloadCluster, commandContext.ManagementClusterStateDir, "")
err := commandContext.ClusterManager.BackupCAPI(ctx, commandContext.WorkloadCluster, commandContext.BackupClusterStateDir, "")
if err != nil {
err = commandContext.ClusterManager.BackupCAPIWaitForInfrastructure(ctx, commandContext.WorkloadCluster, commandContext.ManagementClusterStateDir, commandContext.WorkloadCluster.Name)
err = commandContext.ClusterManager.BackupCAPIWaitForInfrastructure(ctx, commandContext.WorkloadCluster, commandContext.BackupClusterStateDir, commandContext.WorkloadCluster.Name)
if err != nil {
commandContext.SetError(err)
return &CollectDiagnosticsTask{}
Expand Down Expand Up @@ -521,7 +521,7 @@ func (s *upgradeWorkloadClusterTask) Run(ctx context.Context, commandContext *ta
// Take backup of bootstrap cluster capi components
if commandContext.BootstrapCluster != nil {
logger.Info("Backing up management components from bootstrap cluster")
err := commandContext.ClusterManager.BackupCAPIWaitForInfrastructure(ctx, commandContext.BootstrapCluster, fmt.Sprintf("bootstrap-%s", commandContext.ManagementClusterStateDir), commandContext.WorkloadCluster.Name)
err := commandContext.ClusterManager.BackupCAPIWaitForInfrastructure(ctx, commandContext.BootstrapCluster, fmt.Sprintf("bootstrap-%s", commandContext.BackupClusterStateDir), commandContext.WorkloadCluster.Name)
if err != nil {
logger.Info("Bootstrap management component backup failed, use existing workload cluster backup", "error", err)
}
Expand Down Expand Up @@ -701,7 +701,7 @@ func (s *deleteBootstrapClusterTask) Run(ctx context.Context, commandContext *ta
logger.Info(fmt.Sprintf("%v", err))
}

capiObjectFile := filepath.Join(commandContext.BootstrapCluster.Name, commandContext.ManagementClusterStateDir)
capiObjectFile := filepath.Join(commandContext.BootstrapCluster.Name, commandContext.BackupClusterStateDir)
if err := os.RemoveAll(capiObjectFile); err != nil {
logger.Info(fmt.Sprintf("management cluster CAPI backup file not found: %v", err))
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/workflows/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func newUpgradeTest(t *testing.T) *upgradeTestSetup {
ctx: context.Background(),
newClusterSpec: test.NewClusterSpec(func(s *cluster.Spec) { s.Cluster.Name = "cluster-name" }),
workloadCluster: &types.Cluster{Name: "workload"},
managementStatePath: fmt.Sprintf("cluster-state-backup-%s", time.Now().Format("2006-01-02T15_04_05")),
managementStatePath: fmt.Sprintf("%s-backup-%s", "cluster-name", time.Now().Format("2006-01-02T15_04_05")),
}
}

Expand Down
38 changes: 38 additions & 0 deletions pkg/workflows/workload/post_cluster_upgrade.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package workload

import (
"context"
"fmt"
"os"
"path/filepath"

"github.com/aws/eks-anywhere/pkg/logger"
"github.com/aws/eks-anywhere/pkg/task"
)

type postClusterUpgrade struct{}

// Run postClusterUpgrade implements steps to be performed after the upgrade process.
func (s *postClusterUpgrade) Run(ctx context.Context, commandContext *task.CommandContext) task.Task {
logger.Info("Cleaning up backup resources")
capiObjectFile := filepath.Join(commandContext.ManagementCluster.Name, commandContext.BackupClusterStateDir)
if err := os.RemoveAll(capiObjectFile); err != nil {
logger.Info(fmt.Sprintf("workload cluster CAPI backup file not found: %v", err))
}

Check warning on line 21 in pkg/workflows/workload/post_cluster_upgrade.go

View check run for this annotation

Codecov / codecov/patch

pkg/workflows/workload/post_cluster_upgrade.go#L20-L21

Added lines #L20 - L21 were not covered by tests

return nil
}

func (s *postClusterUpgrade) Name() string {
return "post-cluster-upgrade"
}

func (s *postClusterUpgrade) Checkpoint() *task.CompletedTask {
return &task.CompletedTask{
Checkpoint: nil,
}
}

func (s *postClusterUpgrade) Restore(ctx context.Context, commandContext *task.CommandContext, completedTask *task.CompletedTask) (task.Task, error) {
return nil, nil

Check warning on line 37 in pkg/workflows/workload/post_cluster_upgrade.go

View check run for this annotation

Codecov / codecov/patch

pkg/workflows/workload/post_cluster_upgrade.go#L36-L37

Added lines #L36 - L37 were not covered by tests
}
38 changes: 38 additions & 0 deletions pkg/workflows/workload/pre_cluster_upgrade.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package workload

import (
"context"

"github.com/aws/eks-anywhere/pkg/logger"
"github.com/aws/eks-anywhere/pkg/task"
"github.com/aws/eks-anywhere/pkg/workflows"
)

type preClusterUpgrade struct{}

// Run preClusterUpgrade implements steps to be performed before workload cluster's upgrade.
func (s *preClusterUpgrade) Run(ctx context.Context, commandContext *task.CommandContext) task.Task {
// Take CAPI backup filtering on only current workload cluster.
logger.Info("Backing up workload cluster's resources before upgrading")
err := commandContext.ClusterManager.BackupCAPI(ctx, commandContext.ManagementCluster, commandContext.BackupClusterStateDir, commandContext.WorkloadCluster.Name)
if err != nil {
commandContext.SetError(err)
return &workflows.CollectMgmtClusterDiagnosticsTask{}
}

return &upgradeCluster{}
}

func (s *preClusterUpgrade) Name() string {
return "pre-cluster-upgrade"
}

func (s *preClusterUpgrade) Checkpoint() *task.CompletedTask {
return &task.CompletedTask{
Checkpoint: nil,
}
}

func (s *preClusterUpgrade) Restore(ctx context.Context, commandContext *task.CommandContext, completedTask *task.CompletedTask) (task.Task, error) {
return &upgradeCluster{}, nil

Check warning on line 37 in pkg/workflows/workload/pre_cluster_upgrade.go

View check run for this annotation

Codecov / codecov/patch

pkg/workflows/workload/pre_cluster_upgrade.go#L36-L37

Added lines #L36 - L37 were not covered by tests
}
64 changes: 47 additions & 17 deletions pkg/workflows/workload/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ package workload_test

import (
"context"
"errors"
"fmt"
"os"
"testing"
"time"

"github.com/golang/mock/gomock"

Expand All @@ -21,22 +23,23 @@ import (
)

type upgradeTestSetup struct {
t *testing.T
clusterManager *mocks.MockClusterManager
gitOpsManager *mocks.MockGitOpsManager
provider *providermocks.MockProvider
writer *writermocks.MockFileWriter
validator *mocks.MockValidator
eksd *mocks.MockEksdInstaller
packageInstaller *mocks.MockPackageInstaller
clusterUpgrader *mocks.MockClusterUpgrader
datacenterConfig providers.DatacenterConfig
machineConfigs []providers.MachineConfig
ctx context.Context
currentClusterSpec *cluster.Spec
clusterSpec *cluster.Spec
workloadCluster *types.Cluster
workload *workload.Upgrade
t *testing.T
clusterManager *mocks.MockClusterManager
gitOpsManager *mocks.MockGitOpsManager
provider *providermocks.MockProvider
writer *writermocks.MockFileWriter
validator *mocks.MockValidator
eksd *mocks.MockEksdInstaller
packageInstaller *mocks.MockPackageInstaller
clusterUpgrader *mocks.MockClusterUpgrader
datacenterConfig providers.DatacenterConfig
machineConfigs []providers.MachineConfig
ctx context.Context
currentClusterSpec *cluster.Spec
clusterSpec *cluster.Spec
workloadCluster *types.Cluster
workload *workload.Upgrade
backupClusterStateDir string
}

func newUpgradeTest(t *testing.T) *upgradeTestSetup {
Expand Down Expand Up @@ -96,7 +99,8 @@ func newUpgradeTest(t *testing.T) *upgradeTestSetup {
s.ManagementCluster = &types.Cluster{Name: "management"}
s.Cluster.Spec.KubernetesVersion = v1alpha1.Kube128
}),
workloadCluster: &types.Cluster{Name: "workload"},
workloadCluster: &types.Cluster{Name: "workload"},
backupClusterStateDir: fmt.Sprintf("%s-backup-%s", "workload", time.Now().Format("2006-01-02T15_04_05")),
}
}

Expand Down Expand Up @@ -137,6 +141,12 @@ func (c *upgradeTestSetup) expectPreflightValidationsToPass() {
c.validator.EXPECT().PreflightValidations(c.ctx).Return(nil)
}

func (c *upgradeTestSetup) expectBackupWorkloadFromCluster(err error) {
gomock.InOrder(
c.clusterManager.EXPECT().BackupCAPI(c.ctx, c.clusterSpec.ManagementCluster, c.backupClusterStateDir, c.workloadCluster.Name).Return(err),
)
}

func (c *upgradeTestSetup) expectSaveLogsManagement() {
c.clusterManager.EXPECT().SaveLogsManagementCluster(c.ctx, c.clusterSpec, c.clusterSpec.ManagementCluster)
c.expectWrite()
Expand All @@ -154,6 +164,7 @@ func TestUpgradeRunSuccess(t *testing.T) {
test.expectPreflightValidationsToPass()
test.expectDatacenterConfig()
test.expectMachineConfigs()
test.expectBackupWorkloadFromCluster(nil)
test.expectUpgradeWorkloadCluster(nil)
test.expectWriteWorkloadClusterConfig(nil)

Expand All @@ -171,6 +182,7 @@ func TestUpgradeRunUpgradeFail(t *testing.T) {
test.expectPreflightValidationsToPass()
test.expectDatacenterConfig()
test.expectMachineConfigs()
test.expectBackupWorkloadFromCluster(nil)
test.expectUpgradeWorkloadCluster(fmt.Errorf("boom"))
test.expectSaveLogsManagement()

Expand Down Expand Up @@ -210,6 +222,23 @@ func TestUpgradeRunValidateFail(t *testing.T) {
}
}

func TestUpgradeWorkloadRunBackupFailed(t *testing.T) {
features.ClearCache()
os.Setenv(features.UseControllerForCli, "true")
test := newUpgradeTest(t)
test.expectSetup()
test.expectPreflightValidationsToPass()
test.expectDatacenterConfig()
test.expectMachineConfigs()
test.expectBackupWorkloadFromCluster(errors.New(""))
test.expectSaveLogsManagement()

err := test.run()
if err == nil {
t.Fatalf("Upgrade.Run() err = %v, want err = nil", err)
}
}

func TestUpgradeRunWriteClusterConfigFail(t *testing.T) {
features.ClearCache()
os.Setenv(features.UseControllerForCli, "true")
Expand All @@ -218,6 +247,7 @@ func TestUpgradeRunWriteClusterConfigFail(t *testing.T) {
test.expectPreflightValidationsToPass()
test.expectDatacenterConfig()
test.expectMachineConfigs()
test.expectBackupWorkloadFromCluster(nil)
test.expectUpgradeWorkloadCluster(nil)
test.expectWriteWorkloadClusterConfig(fmt.Errorf("boom"))
test.expectWrite()
Expand Down
2 changes: 1 addition & 1 deletion pkg/workflows/workload/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (s *setAndValidateUpgradeWorkloadTask) Run(ctx context.Context, commandCont
commandContext.SetError(err)
return nil
}
return &upgradeCluster{}
return &preClusterUpgrade{}
}

func (s *setAndValidateUpgradeWorkloadTask) providerValidation(ctx context.Context, commandContext *task.CommandContext) []validations.Validation {
Expand Down
6 changes: 6 additions & 0 deletions pkg/workflows/workload/writeclusterconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
if commandContext.OriginalError == nil {
logger.MarkSuccess(successMsg)
}
if commandContext.CurrentClusterSpec != nil {
return &postClusterUpgrade{}
}
return nil
}

Expand All @@ -42,5 +45,8 @@
}

func (s *writeClusterConfig) Restore(ctx context.Context, commandContext *task.CommandContext, completedTask *task.CompletedTask) (task.Task, error) {
if commandContext.CurrentClusterSpec == nil {
return &postClusterUpgrade{}, nil
}

Check warning on line 50 in pkg/workflows/workload/writeclusterconfig.go

View check run for this annotation

Codecov / codecov/patch

pkg/workflows/workload/writeclusterconfig.go#L48-L50

Added lines #L48 - L50 were not covered by tests
return nil, nil
}
Loading