From 6a3396dbc8b8410982a3fe0c860fb910a9f6671a Mon Sep 17 00:00:00 2001 From: Angel Misevski Date: Mon, 27 Jun 2022 20:41:46 -0400 Subject: [PATCH 1/6] Rework DevWorkspace status handling in the finalize section Rework how we update workspace status to avoid multiple calls that update the workspace status. Previously, every time we enter the finalize function, we would set the status to terminating, and then potentially set it to errored. Instead, we use the same deferred-function handling of updating the workspace status from main reconcile function -- defer a function that updates the status and pass around a currentStatus struct reference that needs to be updated. This moves all calls that update the workspace status into one place. Signed-off-by: Angel Misevski --- controllers/workspace/finalize.go | 46 ++++++++++++++++--------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/controllers/workspace/finalize.go b/controllers/workspace/finalize.go index cffc3baf7..9cbe7b23f 100644 --- a/controllers/workspace/finalize.go +++ b/controllers/workspace/finalize.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "github.com/devfile/devworkspace-operator/pkg/conditions" "github.com/devfile/devworkspace-operator/pkg/constants" dw "github.com/devfile/api/v2/pkg/apis/workspaces/v1alpha2" @@ -26,7 +27,6 @@ import ( "github.com/go-logr/logr" coputil "github.com/redhat-cop/operator-utils/pkg/util" corev1 "k8s.io/api/core/v1" - k8sErrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -46,28 +46,30 @@ func (r *DevWorkspaceReconciler) workspaceNeedsFinalize(workspace *dw.DevWorkspa return false } -func (r *DevWorkspaceReconciler) finalize(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace) (reconcile.Result, error) { - if workspace.Status.Phase != dw.DevWorkspaceStatusError { - workspace.Status.Message = "Cleaning up resources for deletion" - workspace.Status.Phase = devworkspacePhaseTerminating - err := r.Client.Status().Update(ctx, workspace) - if err != nil && !k8sErrors.IsConflict(err) { - return reconcile.Result{}, err - } +func (r *DevWorkspaceReconciler) finalize(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace) (finalizeResult reconcile.Result, finalizeErr error) { + // Tracked state for the finalize process; we update the workspace status in a deferred function (and pass the + // named return value for finalize()) to update the workspace's status with whatever is in finalizeStatus + // when this function returns. + finalizeStatus := ¤tStatus{phase: devworkspacePhaseTerminating} + finalizeStatus.setConditionTrue(conditions.Started, "Cleaning up resources for deletion") + defer func() (reconcile.Result, error) { + return r.updateWorkspaceStatus(workspace, log, finalizeStatus, finalizeResult, finalizeErr) + }() + if workspace.Status.Phase != dw.DevWorkspaceStatusError { for _, finalizer := range workspace.Finalizers { switch finalizer { case constants.StorageCleanupFinalizer: - return r.finalizeStorage(ctx, log, workspace) + return r.finalizeStorage(ctx, log, workspace, finalizeStatus) case constants.ServiceAccountCleanupFinalizer: - return r.finalizeServiceAccount(ctx, log, workspace) + return r.finalizeServiceAccount(ctx, log, workspace, finalizeStatus) } } } return reconcile.Result{}, nil } -func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace) (reconcile.Result, error) { +func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace, finalizeStatus *currentStatus) (reconcile.Result, error) { // Need to make sure Deployment is cleaned up before starting job to avoid mounting issues for RWO PVCs wait, err := wsprovision.DeleteWorkspaceDeployment(ctx, workspace, r.Client) if err != nil { @@ -90,9 +92,9 @@ func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.L storageProvisioner, err := storage.GetProvisioner(workspace) if err != nil { log.Error(err, "Failed to clean up DevWorkspace storage") - failedStatus := currentStatus{phase: dw.DevWorkspaceStatusError} - failedStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) - return r.updateWorkspaceStatus(workspace, r.Log, &failedStatus, reconcile.Result{}, nil) + finalizeStatus.phase = dw.DevWorkspaceStatusError + finalizeStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) + return reconcile.Result{}, nil } err = storageProvisioner.CleanupWorkspaceStorage(workspace, sync.ClusterAPI{ Ctx: ctx, @@ -107,9 +109,9 @@ func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.L return reconcile.Result{RequeueAfter: storageErr.RequeueAfter}, nil case *storage.ProvisioningError: log.Error(storageErr, "Failed to clean up DevWorkspace storage") - failedStatus := currentStatus{phase: dw.DevWorkspaceStatusError} - failedStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) - return r.updateWorkspaceStatus(workspace, r.Log, &failedStatus, reconcile.Result{}, nil) + finalizeStatus.phase = dw.DevWorkspaceStatusError + finalizeStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) + return reconcile.Result{}, nil default: return reconcile.Result{}, storageErr } @@ -119,13 +121,13 @@ func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.L return reconcile.Result{}, r.Update(ctx, workspace) } -func (r *DevWorkspaceReconciler) finalizeServiceAccount(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace) (reconcile.Result, error) { +func (r *DevWorkspaceReconciler) finalizeServiceAccount(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace, finalizeStatus *currentStatus) (reconcile.Result, error) { retry, err := wsprovision.FinalizeServiceAccount(workspace, ctx, r.NonCachingClient) if err != nil { log.Error(err, "Failed to finalize workspace ServiceAccount") - failedStatus := currentStatus{phase: dw.DevWorkspaceStatusError} - failedStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) - return r.updateWorkspaceStatus(workspace, r.Log, &failedStatus, reconcile.Result{}, nil) + finalizeStatus.phase = dw.DevWorkspaceStatusError + finalizeStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) + return reconcile.Result{}, nil } if retry { return reconcile.Result{Requeue: true}, nil From d0b1e23fdb26703edb8da4266eb62f1896e8c4ae Mon Sep 17 00:00:00 2001 From: Angel Misevski Date: Mon, 27 Jun 2022 20:53:47 -0400 Subject: [PATCH 2/6] Reconcile terminating workspace that have an error status set With the update to terminating workspace status handling (see previous commit), it turns out it's no longer necessary to stop reconciling workspaces once their job encounters an error. The previous bug where DWO would enter a tight loop reconciling failed workspaces turns out to be due to each finalize call updating the workspace to Terminating status, then to Errored status, triggering a new reconcile. Signed-off-by: Angel Misevski --- controllers/workspace/finalize.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/controllers/workspace/finalize.go b/controllers/workspace/finalize.go index 9cbe7b23f..98f33ea63 100644 --- a/controllers/workspace/finalize.go +++ b/controllers/workspace/finalize.go @@ -56,14 +56,12 @@ func (r *DevWorkspaceReconciler) finalize(ctx context.Context, log logr.Logger, return r.updateWorkspaceStatus(workspace, log, finalizeStatus, finalizeResult, finalizeErr) }() - if workspace.Status.Phase != dw.DevWorkspaceStatusError { - for _, finalizer := range workspace.Finalizers { - switch finalizer { - case constants.StorageCleanupFinalizer: - return r.finalizeStorage(ctx, log, workspace, finalizeStatus) - case constants.ServiceAccountCleanupFinalizer: - return r.finalizeServiceAccount(ctx, log, workspace, finalizeStatus) - } + for _, finalizer := range workspace.Finalizers { + switch finalizer { + case constants.StorageCleanupFinalizer: + return r.finalizeStorage(ctx, log, workspace, finalizeStatus) + case constants.ServiceAccountCleanupFinalizer: + return r.finalizeServiceAccount(ctx, log, workspace, finalizeStatus) } } return reconcile.Result{}, nil From 08ed8263226fff7fe09193c728fcdbfd0b681ed5 Mon Sep 17 00:00:00 2001 From: Angel Misevski Date: Mon, 27 Jun 2022 21:07:22 -0400 Subject: [PATCH 3/6] Avoid logging error from updating status of a deleted workspace When the last finalizer on a DevWorkspace is cleared, the cluster may garbage-collect that workspace immediately, which can result in the object being deleted before we reach the point of attempting to update the workspace's status. This results in logging a harmless but confusing error, so we don't try to update the status for terminating workspaces with no finalizers. Signed-off-by: Angel Misevski --- controllers/workspace/finalize.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/controllers/workspace/finalize.go b/controllers/workspace/finalize.go index 98f33ea63..bd9ae22f1 100644 --- a/controllers/workspace/finalize.go +++ b/controllers/workspace/finalize.go @@ -53,6 +53,13 @@ func (r *DevWorkspaceReconciler) finalize(ctx context.Context, log logr.Logger, finalizeStatus := ¤tStatus{phase: devworkspacePhaseTerminating} finalizeStatus.setConditionTrue(conditions.Started, "Cleaning up resources for deletion") defer func() (reconcile.Result, error) { + if len(workspace.Finalizers) == 0 { + // If there are no finalizers on the workspace, the workspace may be garbage collected before we get to update + // its status. This avoids potentially logging a confusing error due to trying to set the status on a deleted + // workspace. This check has to be in the deferred function since updateWorkspaceStatus will be called after the + // client.Update() call that removes the last finalizer. + return finalizeResult, finalizeErr + } return r.updateWorkspaceStatus(workspace, log, finalizeStatus, finalizeResult, finalizeErr) }() From 14cacb5c943911d8239b279d73736903e5764365 Mon Sep 17 00:00:00 2001 From: Angel Misevski Date: Thu, 30 Jun 2022 18:21:01 -0600 Subject: [PATCH 4/6] Avoid logging failure message repeatedly in PVC cleanup Since setting a workspace to the errored status will queue another reconcile, we only log the "failed to clean up common PVC" message if the workspace status is not already set to errored. Signed-off-by: Angel Misevski --- controllers/workspace/finalize.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/controllers/workspace/finalize.go b/controllers/workspace/finalize.go index bd9ae22f1..201c176f1 100644 --- a/controllers/workspace/finalize.go +++ b/controllers/workspace/finalize.go @@ -113,7 +113,10 @@ func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.L log.Info(storageErr.Message) return reconcile.Result{RequeueAfter: storageErr.RequeueAfter}, nil case *storage.ProvisioningError: - log.Error(storageErr, "Failed to clean up DevWorkspace storage") + if workspace.Status.Phase != dw.DevWorkspaceStatusError { + // Avoid repeatedly logging error unless it's relevant + log.Error(storageErr, "Failed to clean up DevWorkspace storage") + } finalizeStatus.phase = dw.DevWorkspaceStatusError finalizeStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) return reconcile.Result{}, nil From 65a005fac8ed51e8f5a88c243013055a435e66dc Mon Sep 17 00:00:00 2001 From: Angel Misevski Date: Thu, 30 Jun 2022 18:46:12 -0600 Subject: [PATCH 5/6] Queue reconcile for all common storage-type workspaces on PVC deletion To make sure workspaces that are in an errored state due to PVC cleanup failing are removed when the common PVC is deleted (e.g. when all DevWorkspaces are deleted), add a watch and enqueue reconciles for all common-storage workspaces when the common PVC is deleted. Signed-off-by: Angel Misevski --- .../workspace/devworkspace_controller.go | 56 +++++++++++++------ 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/controllers/workspace/devworkspace_controller.go b/controllers/workspace/devworkspace_controller.go index 786e02065..7e2529ef0 100644 --- a/controllers/workspace/devworkspace_controller.go +++ b/controllers/workspace/devworkspace_controller.go @@ -636,28 +636,49 @@ func (r *DevWorkspaceReconciler) getWorkspaceId(ctx context.Context, workspace * } // Mapping the pod to the devworkspace -func dwRelatedPodsHandler() handler.EventHandler { - podToDW := func(obj client.Object) []reconcile.Request { - labels := obj.GetLabels() - if _, ok := labels[constants.DevWorkspaceNameLabel]; !ok { - return nil - } +func dwRelatedPodsHandler(obj client.Object) []reconcile.Request { + labels := obj.GetLabels() + if _, ok := labels[constants.DevWorkspaceNameLabel]; !ok { + return []reconcile.Request{} + } - //If the dewworkspace label does not exist, do no reconcile - if _, ok := labels[constants.DevWorkspaceIDLabel]; !ok { - return nil - } + //If the dewworkspace label does not exist, do no reconcile + if _, ok := labels[constants.DevWorkspaceIDLabel]; !ok { + return []reconcile.Request{} + } - return []reconcile.Request{ - { + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Name: labels[constants.DevWorkspaceNameLabel], + Namespace: obj.GetNamespace(), + }, + }, + } +} + +func (r *DevWorkspaceReconciler) dwPVCHandler(obj client.Object) []reconcile.Request { + if obj.GetName() != config.Workspace.PVCName || obj.GetDeletionTimestamp() == nil { + // We're looking for a deleted common PVC + return []reconcile.Request{} + } + dwList := &dw.DevWorkspaceList{} + if err := r.Client.List(context.Background(), dwList); err != nil { + return []reconcile.Request{} + } + var reconciles []reconcile.Request + for _, workspace := range dwList.Items { + storageType := workspace.Spec.Template.Attributes.GetString(constants.DevWorkspaceStorageTypeAttribute, nil) + if storageType == constants.CommonStorageClassType || storageType == "" { + reconciles = append(reconciles, reconcile.Request{ NamespacedName: types.NamespacedName{ - Name: labels[constants.DevWorkspaceNameLabel], - Namespace: obj.GetNamespace(), + Name: workspace.GetName(), + Namespace: workspace.GetNamespace(), }, - }, + }) } } - return handler.EnqueueRequestsFromMapFunc(podToDW) + return reconciles } func (r *DevWorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error { @@ -688,7 +709,8 @@ func (r *DevWorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error { Owns(&corev1.ConfigMap{}). Owns(&corev1.Secret{}). Owns(&corev1.ServiceAccount{}). - Watches(&source.Kind{Type: &corev1.Pod{}}, dwRelatedPodsHandler()). + Watches(&source.Kind{Type: &corev1.Pod{}}, handler.EnqueueRequestsFromMapFunc(dwRelatedPodsHandler)). + Watches(&source.Kind{Type: &corev1.PersistentVolumeClaim{}}, handler.EnqueueRequestsFromMapFunc(r.dwPVCHandler)). Watches(&source.Kind{Type: &controllerv1alpha1.DevWorkspaceOperatorConfig{}}, handler.EnqueueRequestsFromMapFunc(emptyMapper), configWatcher). WithEventFilter(predicates). WithEventFilter(podPredicates). From ed85d8451e9d93a6971ab76a56fad41f09143688 Mon Sep 17 00:00:00 2001 From: Angel Misevski Date: Thu, 30 Jun 2022 19:00:00 -0600 Subject: [PATCH 6/6] Enqueue reconciles for per-workspace PVC events Update the watch for PVCs to also enqueue reconciles for per-workspace PVC events. Previously, the per-workspace PVC could be modified/deleted without the DevWorkspace that owns that PVC noticing. Signed-off-by: Angel Misevski --- controllers/workspace/devworkspace_controller.go | 16 ++++++++++++++++ pkg/provision/storage/perWorkspaceStorage.go | 7 +++++++ 2 files changed, 23 insertions(+) diff --git a/controllers/workspace/devworkspace_controller.go b/controllers/workspace/devworkspace_controller.go index 7e2529ef0..7ad15e7f6 100644 --- a/controllers/workspace/devworkspace_controller.go +++ b/controllers/workspace/devworkspace_controller.go @@ -658,6 +658,22 @@ func dwRelatedPodsHandler(obj client.Object) []reconcile.Request { } func (r *DevWorkspaceReconciler) dwPVCHandler(obj client.Object) []reconcile.Request { + // Check if PVC is owned by a DevWorkspace (per-workspace storage case) + for _, ownerref := range obj.GetOwnerReferences() { + if ownerref.Kind != "DevWorkspace" { + continue + } + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Name: ownerref.Name, + Namespace: obj.GetNamespace(), + }, + }, + } + } + + // Otherwise, check if common PVC is deleted to make sure all DevWorkspaces see it happen if obj.GetName() != config.Workspace.PVCName || obj.GetDeletionTimestamp() == nil { // We're looking for a deleted common PVC return []reconcile.Request{} diff --git a/pkg/provision/storage/perWorkspaceStorage.go b/pkg/provision/storage/perWorkspaceStorage.go index 4dafe8bdd..db6af9a01 100644 --- a/pkg/provision/storage/perWorkspaceStorage.go +++ b/pkg/provision/storage/perWorkspaceStorage.go @@ -63,6 +63,13 @@ func (p *PerWorkspaceStorageProvisioner) ProvisionStorage(podAdditions *v1alpha1 } pvcName := perWorkspacePVC.Name + // If PVC is being deleted, we need to fail workspace startup as a running pod will block deletion. + if perWorkspacePVC.DeletionTimestamp != nil { + return &ProvisioningError{ + Message: "DevWorkspace PVC is being deleted", + } + } + // Rewrite container volume mounts if err := p.rewriteContainerVolumeMounts(workspace.Status.DevWorkspaceId, pvcName, podAdditions, &workspace.Spec.Template); err != nil { return &ProvisioningError{