diff --git a/build/Dockerfile b/build/Dockerfile index 50190b932..f9d209bc5 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -14,7 +14,7 @@ # # https://access.redhat.com/containers/?tab=tags#/registry.access.redhat.com/ubi8/go-toolset -FROM registry.access.redhat.com/ubi8/go-toolset:1.17.7-13 as builder +FROM registry.access.redhat.com/ubi8/go-toolset:1.17.10-4 as builder ENV GOPATH=/go/ USER root WORKDIR /devworkspace-operator @@ -34,7 +34,7 @@ RUN make compile-devworkspace-controller RUN make compile-webhook-server # https://access.redhat.com/containers/?tab=tags#/registry.access.redhat.com/ubi8-minimal -FROM registry.access.redhat.com/ubi8-minimal:8.6-751 +FROM registry.access.redhat.com/ubi8-minimal:8.6-854 RUN microdnf -y update && microdnf clean all && rm -rf /var/cache/yum && echo "Installed Packages" && rpm -qa | sort -V && echo "End Of Installed Packages" WORKDIR / COPY --from=builder /devworkspace-operator/_output/bin/devworkspace-controller /usr/local/bin/devworkspace-controller diff --git a/controllers/workspace/devworkspace_controller.go b/controllers/workspace/devworkspace_controller.go index 51dad51f2..968ab54ec 100644 --- a/controllers/workspace/devworkspace_controller.go +++ b/controllers/workspace/devworkspace_controller.go @@ -631,28 +631,65 @@ func (r *DevWorkspaceReconciler) getWorkspaceId(ctx context.Context, workspace * } // Mapping the pod to the devworkspace -func dwRelatedPodsHandler() handler.EventHandler { - podToDW := func(obj client.Object) []reconcile.Request { - labels := obj.GetLabels() - if _, ok := labels[constants.DevWorkspaceNameLabel]; !ok { - return nil - } +func dwRelatedPodsHandler(obj client.Object) []reconcile.Request { + labels := obj.GetLabels() + if _, ok := labels[constants.DevWorkspaceNameLabel]; !ok { + return []reconcile.Request{} + } - //If the dewworkspace label does not exist, do no reconcile - if _, ok := labels[constants.DevWorkspaceIDLabel]; !ok { - return nil - } + //If the dewworkspace label does not exist, do no reconcile + if _, ok := labels[constants.DevWorkspaceIDLabel]; !ok { + return []reconcile.Request{} + } + + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Name: labels[constants.DevWorkspaceNameLabel], + Namespace: obj.GetNamespace(), + }, + }, + } +} +func (r *DevWorkspaceReconciler) dwPVCHandler(obj client.Object) []reconcile.Request { + // Check if PVC is owned by a DevWorkspace (per-workspace storage case) + for _, ownerref := range obj.GetOwnerReferences() { + if ownerref.Kind != "DevWorkspace" { + continue + } return []reconcile.Request{ { NamespacedName: types.NamespacedName{ - Name: labels[constants.DevWorkspaceNameLabel], + Name: ownerref.Name, Namespace: obj.GetNamespace(), }, }, } } - return handler.EnqueueRequestsFromMapFunc(podToDW) + + // Otherwise, check if common PVC is deleted to make sure all DevWorkspaces see it happen + if obj.GetName() != config.Workspace.PVCName || obj.GetDeletionTimestamp() == nil { + // We're looking for a deleted common PVC + return []reconcile.Request{} + } + dwList := &dw.DevWorkspaceList{} + if err := r.Client.List(context.Background(), dwList); err != nil { + return []reconcile.Request{} + } + var reconciles []reconcile.Request + for _, workspace := range dwList.Items { + storageType := workspace.Spec.Template.Attributes.GetString(constants.DevWorkspaceStorageTypeAttribute, nil) + if storageType == constants.CommonStorageClassType || storageType == "" { + reconciles = append(reconciles, reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: workspace.GetName(), + Namespace: workspace.GetNamespace(), + }, + }) + } + } + return reconciles } func (r *DevWorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error { @@ -683,7 +720,8 @@ func (r *DevWorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error { Owns(&corev1.ConfigMap{}). Owns(&corev1.Secret{}). Owns(&corev1.ServiceAccount{}). - Watches(&source.Kind{Type: &corev1.Pod{}}, dwRelatedPodsHandler()). + Watches(&source.Kind{Type: &corev1.Pod{}}, handler.EnqueueRequestsFromMapFunc(dwRelatedPodsHandler)). + Watches(&source.Kind{Type: &corev1.PersistentVolumeClaim{}}, handler.EnqueueRequestsFromMapFunc(r.dwPVCHandler)). Watches(&source.Kind{Type: &controllerv1alpha1.DevWorkspaceOperatorConfig{}}, handler.EnqueueRequestsFromMapFunc(emptyMapper), configWatcher). WithEventFilter(predicates). WithEventFilter(podPredicates). diff --git a/controllers/workspace/finalize.go b/controllers/workspace/finalize.go index cffc3baf7..201c176f1 100644 --- a/controllers/workspace/finalize.go +++ b/controllers/workspace/finalize.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "github.com/devfile/devworkspace-operator/pkg/conditions" "github.com/devfile/devworkspace-operator/pkg/constants" dw "github.com/devfile/api/v2/pkg/apis/workspaces/v1alpha2" @@ -26,7 +27,6 @@ import ( "github.com/go-logr/logr" coputil "github.com/redhat-cop/operator-utils/pkg/util" corev1 "k8s.io/api/core/v1" - k8sErrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -46,28 +46,35 @@ func (r *DevWorkspaceReconciler) workspaceNeedsFinalize(workspace *dw.DevWorkspa return false } -func (r *DevWorkspaceReconciler) finalize(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace) (reconcile.Result, error) { - if workspace.Status.Phase != dw.DevWorkspaceStatusError { - workspace.Status.Message = "Cleaning up resources for deletion" - workspace.Status.Phase = devworkspacePhaseTerminating - err := r.Client.Status().Update(ctx, workspace) - if err != nil && !k8sErrors.IsConflict(err) { - return reconcile.Result{}, err +func (r *DevWorkspaceReconciler) finalize(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace) (finalizeResult reconcile.Result, finalizeErr error) { + // Tracked state for the finalize process; we update the workspace status in a deferred function (and pass the + // named return value for finalize()) to update the workspace's status with whatever is in finalizeStatus + // when this function returns. + finalizeStatus := ¤tStatus{phase: devworkspacePhaseTerminating} + finalizeStatus.setConditionTrue(conditions.Started, "Cleaning up resources for deletion") + defer func() (reconcile.Result, error) { + if len(workspace.Finalizers) == 0 { + // If there are no finalizers on the workspace, the workspace may be garbage collected before we get to update + // its status. This avoids potentially logging a confusing error due to trying to set the status on a deleted + // workspace. This check has to be in the deferred function since updateWorkspaceStatus will be called after the + // client.Update() call that removes the last finalizer. + return finalizeResult, finalizeErr } + return r.updateWorkspaceStatus(workspace, log, finalizeStatus, finalizeResult, finalizeErr) + }() - for _, finalizer := range workspace.Finalizers { - switch finalizer { - case constants.StorageCleanupFinalizer: - return r.finalizeStorage(ctx, log, workspace) - case constants.ServiceAccountCleanupFinalizer: - return r.finalizeServiceAccount(ctx, log, workspace) - } + for _, finalizer := range workspace.Finalizers { + switch finalizer { + case constants.StorageCleanupFinalizer: + return r.finalizeStorage(ctx, log, workspace, finalizeStatus) + case constants.ServiceAccountCleanupFinalizer: + return r.finalizeServiceAccount(ctx, log, workspace, finalizeStatus) } } return reconcile.Result{}, nil } -func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace) (reconcile.Result, error) { +func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace, finalizeStatus *currentStatus) (reconcile.Result, error) { // Need to make sure Deployment is cleaned up before starting job to avoid mounting issues for RWO PVCs wait, err := wsprovision.DeleteWorkspaceDeployment(ctx, workspace, r.Client) if err != nil { @@ -90,9 +97,9 @@ func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.L storageProvisioner, err := storage.GetProvisioner(workspace) if err != nil { log.Error(err, "Failed to clean up DevWorkspace storage") - failedStatus := currentStatus{phase: dw.DevWorkspaceStatusError} - failedStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) - return r.updateWorkspaceStatus(workspace, r.Log, &failedStatus, reconcile.Result{}, nil) + finalizeStatus.phase = dw.DevWorkspaceStatusError + finalizeStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) + return reconcile.Result{}, nil } err = storageProvisioner.CleanupWorkspaceStorage(workspace, sync.ClusterAPI{ Ctx: ctx, @@ -106,10 +113,13 @@ func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.L log.Info(storageErr.Message) return reconcile.Result{RequeueAfter: storageErr.RequeueAfter}, nil case *storage.ProvisioningError: - log.Error(storageErr, "Failed to clean up DevWorkspace storage") - failedStatus := currentStatus{phase: dw.DevWorkspaceStatusError} - failedStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) - return r.updateWorkspaceStatus(workspace, r.Log, &failedStatus, reconcile.Result{}, nil) + if workspace.Status.Phase != dw.DevWorkspaceStatusError { + // Avoid repeatedly logging error unless it's relevant + log.Error(storageErr, "Failed to clean up DevWorkspace storage") + } + finalizeStatus.phase = dw.DevWorkspaceStatusError + finalizeStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) + return reconcile.Result{}, nil default: return reconcile.Result{}, storageErr } @@ -119,13 +129,13 @@ func (r *DevWorkspaceReconciler) finalizeStorage(ctx context.Context, log logr.L return reconcile.Result{}, r.Update(ctx, workspace) } -func (r *DevWorkspaceReconciler) finalizeServiceAccount(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace) (reconcile.Result, error) { +func (r *DevWorkspaceReconciler) finalizeServiceAccount(ctx context.Context, log logr.Logger, workspace *dw.DevWorkspace, finalizeStatus *currentStatus) (reconcile.Result, error) { retry, err := wsprovision.FinalizeServiceAccount(workspace, ctx, r.NonCachingClient) if err != nil { log.Error(err, "Failed to finalize workspace ServiceAccount") - failedStatus := currentStatus{phase: dw.DevWorkspaceStatusError} - failedStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) - return r.updateWorkspaceStatus(workspace, r.Log, &failedStatus, reconcile.Result{}, nil) + finalizeStatus.phase = dw.DevWorkspaceStatusError + finalizeStatus.setConditionTrue(dw.DevWorkspaceError, err.Error()) + return reconcile.Result{}, nil } if retry { return reconcile.Result{Requeue: true}, nil diff --git a/pkg/config/sync.go b/pkg/config/sync.go index ca9c63c38..fcf6a250b 100644 --- a/pkg/config/sync.go +++ b/pkg/config/sync.go @@ -91,9 +91,7 @@ func SetupControllerConfig(client crclient.Client) error { return err } defaultConfig.Routing.ProxyConfig = clusterProxy - if internalConfig.Routing.ProxyConfig == nil { - internalConfig.Routing.ProxyConfig = clusterProxy - } + internalConfig.Routing.ProxyConfig = proxy.MergeProxyConfigs(clusterProxy, internalConfig.Routing.ProxyConfig) updatePublicConfig() return nil diff --git a/pkg/constants/attributes.go b/pkg/constants/attributes.go index 9ce5f65fe..71994662f 100644 --- a/pkg/constants/attributes.go +++ b/pkg/constants/attributes.go @@ -20,11 +20,12 @@ const ( // DevWorkspaceStorageTypeAttribute defines the strategy used for provisioning storage for the workspace. // If empty, the common PVC strategy is used. // Supported options: - // - "common": Create one PVC per namespace, and store data for all workspaces in that namespace in that PVC - // - "async" : Create one PVC per namespace, and create a remote server that syncs data from workspaces to the PVC. - // All volumeMounts used for devworkspaces are emptyDir - // - "ephemeral": Use emptyDir volumes for all volumes in the DevWorkspace. All data is lost when the workspace is - // stopped. + // - "common": Create one PVC per namespace, and store data for all workspaces in that namespace in that PVC + // - "async" : Create one PVC per namespace, and create a remote server that syncs data from workspaces to the PVC. + // All volumeMounts used for devworkspaces are emptyDir + // - "per-workspace": Create one PVC per workspace, delete that PVC when the workspace is deleted. + // - "ephemeral": Use emptyDir volumes for all volumes in the DevWorkspace. All data is lost when the workspace is + // stopped. DevWorkspaceStorageTypeAttribute = "controller.devfile.io/storage-type" // RuntimeClassNameAttribute is an attribute added to a DevWorkspace to specify a runtimeClassName for container diff --git a/pkg/library/status/check.go b/pkg/library/status/check.go new file mode 100644 index 000000000..d36dc9d1f --- /dev/null +++ b/pkg/library/status/check.go @@ -0,0 +1,167 @@ +// +// Copyright (c) 2019-2022 Red Hat, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package status + +import ( + "context" + "fmt" + "strings" + + "github.com/devfile/devworkspace-operator/pkg/common" + "github.com/devfile/devworkspace-operator/pkg/config" + "github.com/devfile/devworkspace-operator/pkg/infrastructure" + "github.com/devfile/devworkspace-operator/pkg/provision/sync" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/fields" + k8sclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +var containerFailureStateReasons = []string{ + "CrashLoopBackOff", + "ImagePullBackOff", + "CreateContainerError", + "RunContainerError", +} + +// unrecoverablePodEventReasons contains Kubernetes events that should fail workspace startup +// if they occur related to a workspace pod. Events are stored as a map with event names as keys +// and values representing the threshold of how many times we can see an event before it is considered +// unrecoverable. +var unrecoverablePodEventReasons = map[string]int32{ + "FailedPostStartHook": 1, + "FailedMount": 3, + "FailedScheduling": 1, + "FailedCreate": 1, + "ReplicaSetCreateError": 1, +} + +var unrecoverableDeploymentConditionReasons = []string{ + "FailedCreate", +} + +func CheckDeploymentStatus(deployment *appsv1.Deployment) (ready bool) { + return deployment.Status.ReadyReplicas > 0 +} + +func CheckDeploymentConditions(deployment *appsv1.Deployment) (healthy bool, errorMsg string) { + conditions := deployment.Status.Conditions + for _, condition := range conditions { + for _, unrecoverableReason := range unrecoverableDeploymentConditionReasons { + if condition.Reason == unrecoverableReason { + return false, fmt.Sprintf("Detected unrecoverable deployment condition: %s %s", condition.Reason, condition.Message) + } + } + } + return true, "" +} + +// checkPodsState checks if workspace-related pods are in an unrecoverable state. A pod is considered to be unrecoverable +// if it has a container with one of the containerFailureStateReasons states, or if an unrecoverable event (with reason +// matching unrecoverablePodEventReasons) has the pod as the involved object. +// Returns optional message with detected unrecoverable state details +// error if any happens during check +func CheckPodsState(workspaceID string, namespace string, labelSelector k8sclient.MatchingLabels, + clusterAPI sync.ClusterAPI) (stateMsg string, checkFailure error) { + podList := &corev1.PodList{} + if err := clusterAPI.Client.List(context.TODO(), podList, k8sclient.InNamespace(namespace), labelSelector); err != nil { + return "", err + } + + for _, pod := range podList.Items { + for _, containerStatus := range pod.Status.ContainerStatuses { + ok, reason := CheckContainerStatusForFailure(&containerStatus) + if !ok { + return fmt.Sprintf("Container %s has state %s", containerStatus.Name, reason), nil + } + } + for _, initContainerStatus := range pod.Status.InitContainerStatuses { + ok, reason := CheckContainerStatusForFailure(&initContainerStatus) + if !ok { + return fmt.Sprintf("Init Container %s has state %s", initContainerStatus.Name, reason), nil + } + } + if msg, err := CheckPodEvents(&pod, workspaceID, clusterAPI); err != nil || msg != "" { + return msg, err + } + } + return "", nil +} + +func CheckPodEvents(pod *corev1.Pod, workspaceID string, clusterAPI sync.ClusterAPI) (msg string, err error) { + evs := &corev1.EventList{} + selector, err := fields.ParseSelector(fmt.Sprintf("involvedObject.name=%s", pod.Name)) + if err != nil { + return "", fmt.Errorf("failed to parse field selector: %s", err) + } + if err := clusterAPI.Client.List(clusterAPI.Ctx, evs, k8sclient.InNamespace(pod.Namespace), k8sclient.MatchingFieldsSelector{Selector: selector}); err != nil { + return "", fmt.Errorf("failed to list events in namespace %s: %w", pod.Namespace, err) + } + for _, ev := range evs.Items { + if ev.InvolvedObject.Kind != "Pod" { + continue + } + + // On OpenShift, it's possible see "FailedMount" events when using a routingClass that depends on the service-ca + // operator. To avoid this, we always ignore FailedMount events if the message refers to the DWO-provisioned volume + if infrastructure.IsOpenShift() && + ev.Reason == "FailedMount" && + strings.Contains(ev.Message, common.ServingCertVolumeName(common.ServiceName(workspaceID))) { + continue + } + + if maxCount, isUnrecoverableEvent := unrecoverablePodEventReasons[ev.Reason]; isUnrecoverableEvent { + if !checkIfUnrecoverableEventIgnored(ev.Reason) && ev.Count >= maxCount { + var msg string + if ev.Count > 1 { + msg = fmt.Sprintf("Detected unrecoverable event %s %d times: %s.", ev.Reason, ev.Count, ev.Message) + } else { + msg = fmt.Sprintf("Detected unrecoverable event %s: %s.", ev.Reason, ev.Message) + } + return msg, nil + } + } + } + return "", nil +} + +func CheckContainerStatusForFailure(containerStatus *corev1.ContainerStatus) (ok bool, reason string) { + if containerStatus.State.Waiting != nil { + for _, failureReason := range containerFailureStateReasons { + if containerStatus.State.Waiting.Reason == failureReason { + return checkIfUnrecoverableEventIgnored(containerStatus.State.Waiting.Reason), containerStatus.State.Waiting.Reason + } + } + } + + if containerStatus.State.Terminated != nil { + for _, failureReason := range containerFailureStateReasons { + if containerStatus.State.Terminated.Reason == failureReason { + return checkIfUnrecoverableEventIgnored(containerStatus.State.Terminated.Reason), containerStatus.State.Terminated.Reason + } + } + } + return true, "" +} + +func checkIfUnrecoverableEventIgnored(reason string) (ignored bool) { + for _, ignoredReason := range config.Workspace.IgnoredUnrecoverableEvents { + if ignoredReason == reason { + return true + } + } + return false +} diff --git a/pkg/provision/storage/cleanup.go b/pkg/provision/storage/cleanup.go index 6aa9f0463..ac81e22be 100644 --- a/pkg/provision/storage/cleanup.go +++ b/pkg/provision/storage/cleanup.go @@ -21,6 +21,7 @@ import ( "time" dw "github.com/devfile/api/v2/pkg/apis/workspaces/v1alpha2" + "github.com/devfile/devworkspace-operator/pkg/library/status" nsconfig "github.com/devfile/devworkspace-operator/pkg/provision/config" "github.com/devfile/devworkspace-operator/pkg/provision/sync" batchv1 "k8s.io/api/batch/v1" @@ -29,6 +30,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "github.com/devfile/devworkspace-operator/internal/images" @@ -91,6 +93,21 @@ func runCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.ClusterA } } } + + msg, err := status.CheckPodsState(workspace.Status.DevWorkspaceId, clusterJob.Namespace, k8sclient.MatchingLabels{"job-name": common.PVCCleanupJobName(workspace.Status.DevWorkspaceId)}, clusterAPI) + if err != nil { + return &ProvisioningError{ + Err: err, + } + } + + if msg != "" { + errMsg := fmt.Sprintf("DevWorkspace common PVC cleanup job failed: see logs for job %q for details. Additional information: %s", clusterJob.Name, msg) + return &ProvisioningError{ + Message: errMsg, + } + } + // Requeue at least each 10 seconds to check if PVC is not removed by someone else return &NotReadyError{ Message: "Cleanup job is not in completed state", @@ -110,7 +127,9 @@ func getSpecCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.Clus } jobLabels := map[string]string{ - constants.DevWorkspaceIDLabel: workspaceId, + constants.DevWorkspaceIDLabel: workspaceId, + constants.DevWorkspaceNameLabel: workspace.Name, + constants.DevWorkspaceCreatorLabel: workspace.Labels[constants.DevWorkspaceCreatorLabel], } if restrictedAccess, needsRestrictedAccess := workspace.Annotations[constants.DevWorkspaceRestrictedAccessAnnotation]; needsRestrictedAccess { jobLabels[constants.DevWorkspaceRestrictedAccessAnnotation] = restrictedAccess @@ -126,6 +145,9 @@ func getSpecCommonPVCCleanupJob(workspace *dw.DevWorkspace, clusterAPI sync.Clus Completions: &cleanupJobCompletions, BackoffLimit: &cleanupJobBackoffLimit, Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: jobLabels, + }, Spec: corev1.PodSpec{ RestartPolicy: "Never", SecurityContext: wsprovision.GetDevWorkspaceSecurityContext(), diff --git a/pkg/provision/storage/perWorkspaceStorage.go b/pkg/provision/storage/perWorkspaceStorage.go index 4dafe8bdd..db6af9a01 100644 --- a/pkg/provision/storage/perWorkspaceStorage.go +++ b/pkg/provision/storage/perWorkspaceStorage.go @@ -63,6 +63,13 @@ func (p *PerWorkspaceStorageProvisioner) ProvisionStorage(podAdditions *v1alpha1 } pvcName := perWorkspacePVC.Name + // If PVC is being deleted, we need to fail workspace startup as a running pod will block deletion. + if perWorkspacePVC.DeletionTimestamp != nil { + return &ProvisioningError{ + Message: "DevWorkspace PVC is being deleted", + } + } + // Rewrite container volume mounts if err := p.rewriteContainerVolumeMounts(workspace.Status.DevWorkspaceId, pvcName, podAdditions, &workspace.Spec.Template); err != nil { return &ProvisioningError{ diff --git a/pkg/provision/workspace/deployment.go b/pkg/provision/workspace/deployment.go index c5779e3ca..bb3a35a79 100644 --- a/pkg/provision/workspace/deployment.go +++ b/pkg/provision/workspace/deployment.go @@ -19,11 +19,10 @@ import ( "context" "errors" "fmt" - "strings" + "github.com/devfile/devworkspace-operator/pkg/library/status" nsconfig "github.com/devfile/devworkspace-operator/pkg/provision/config" "github.com/devfile/devworkspace-operator/pkg/provision/sync" - "k8s.io/apimachinery/pkg/fields" dw "github.com/devfile/api/v2/pkg/apis/workspaces/v1alpha2" "github.com/devfile/devworkspace-operator/apis/controller/v1alpha1" @@ -44,29 +43,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) -var containerFailureStateReasons = []string{ - "CrashLoopBackOff", - "ImagePullBackOff", - "CreateContainerError", - "RunContainerError", -} - -// unrecoverablePodEventReasons contains Kubernetes events that should fail workspace startup -// if they occur related to a workspace pod. Events are stored as a map with event names as keys -// and values representing the threshold of how many times we can see an event before it is considered -// unrecoverable. -var unrecoverablePodEventReasons = map[string]int32{ - "FailedPostStartHook": 1, - "FailedMount": 3, - "FailedScheduling": 1, - "FailedCreate": 1, - "ReplicaSetCreateError": 1, -} - -var unrecoverableDeploymentConditionReasons = []string{ - "FailedCreate", -} - type DeploymentProvisioningStatus struct { ProvisioningStatus } @@ -121,7 +97,7 @@ func SyncDeploymentToCluster( } clusterDeployment := clusterObj.(*appsv1.Deployment) - deploymentReady := checkDeploymentStatus(clusterDeployment) + deploymentReady := status.CheckDeploymentStatus(clusterDeployment) if deploymentReady { return DeploymentProvisioningStatus{ ProvisioningStatus: ProvisioningStatus{ @@ -130,7 +106,7 @@ func SyncDeploymentToCluster( } } - deploymentHealthy, deploymentErrMsg := checkDeploymentConditions(clusterDeployment) + deploymentHealthy, deploymentErrMsg := status.CheckDeploymentConditions(clusterDeployment) if !deploymentHealthy { return DeploymentProvisioningStatus{ ProvisioningStatus: ProvisioningStatus{ @@ -140,7 +116,9 @@ func SyncDeploymentToCluster( } } - failureMsg, checkErr := checkPodsState(workspace, clusterAPI) + failureMsg, checkErr := status.CheckPodsState(workspace.Status.DevWorkspaceId, workspace.Namespace, k8sclient.MatchingLabels{ + constants.DevWorkspaceIDLabel: workspace.Status.DevWorkspaceId, + }, clusterAPI) if checkErr != nil { return DeploymentProvisioningStatus{ ProvisioningStatus: ProvisioningStatus{ @@ -201,22 +179,6 @@ func GetDevWorkspaceSecurityContext() *corev1.PodSecurityContext { return config.Workspace.PodSecurityContext } -func checkDeploymentStatus(deployment *appsv1.Deployment) (ready bool) { - return deployment.Status.ReadyReplicas > 0 -} - -func checkDeploymentConditions(deployment *appsv1.Deployment) (healthy bool, errorMsg string) { - conditions := deployment.Status.Conditions - for _, condition := range conditions { - for _, unrecoverableReason := range unrecoverableDeploymentConditionReasons { - if condition.Reason == unrecoverableReason { - return false, fmt.Sprintf("Detected unrecoverable deployment condition: %s %s", condition.Reason, condition.Message) - } - } - } - return true, "" -} - func getSpecDeployment( workspace *dw.DevWorkspace, podAdditionsList []v1alpha1.PodAdditions, @@ -337,46 +299,6 @@ func getSpecDeployment( return deployment, nil } -func getPods(workspace *dw.DevWorkspace, client runtimeClient.Client) (*corev1.PodList, error) { - pods := &corev1.PodList{} - if err := client.List(context.TODO(), pods, k8sclient.InNamespace(workspace.Namespace), k8sclient.MatchingLabels{ - constants.DevWorkspaceIDLabel: workspace.Status.DevWorkspaceId, - }); err != nil { - return nil, err - } - return pods, nil -} - -// checkPodsState checks if workspace-related pods are in an unrecoverable state. A pod is considered to be unrecoverable -// if it has a container with one of the containerStateFailureReasons states, or if an unrecoverable event (with reason -// matching unrecoverablePodEventReasons) has the pod as the involved object. -// Returns optional message with detected unrecoverable state details -// error if any happens during check -func checkPodsState(workspace *dw.DevWorkspace, - clusterAPI sync.ClusterAPI) (stateMsg string, checkFailure error) { - podList, err := getPods(workspace, clusterAPI.Client) - if err != nil { - return "", err - } - - for _, pod := range podList.Items { - for _, containerStatus := range pod.Status.ContainerStatuses { - if !checkContainerStatusForFailure(&containerStatus) { - return fmt.Sprintf("Container %s has state %s", containerStatus.Name, containerStatus.State.Waiting.Reason), nil - } - } - for _, initContainerStatus := range pod.Status.InitContainerStatuses { - if !checkContainerStatusForFailure(&initContainerStatus) { - return fmt.Sprintf("Init Container %s has state %s", initContainerStatus.Name, initContainerStatus.State.Waiting.Reason), nil - } - } - if msg, err := checkPodEvents(&pod, workspace.Status.DevWorkspaceId, clusterAPI); err != nil || msg != "" { - return msg, err - } - } - return "", nil -} - func mergePodAdditions(toMerge []v1alpha1.PodAdditions) (*v1alpha1.PodAdditions, error) { podAdditions := &v1alpha1.PodAdditions{} @@ -476,60 +398,3 @@ func getAdditionalAnnotations(workspace *dw.DevWorkspace) (map[string]string, er return annotations, nil } - -func checkPodEvents(pod *corev1.Pod, workspaceID string, clusterAPI sync.ClusterAPI) (msg string, err error) { - evs := &corev1.EventList{} - selector, err := fields.ParseSelector(fmt.Sprintf("involvedObject.name=%s", pod.Name)) - if err != nil { - return "", fmt.Errorf("failed to parse field selector: %s", err) - } - if err := clusterAPI.Client.List(clusterAPI.Ctx, evs, k8sclient.InNamespace(pod.Namespace), k8sclient.MatchingFieldsSelector{Selector: selector}); err != nil { - return "", fmt.Errorf("failed to list events in namespace %s: %w", pod.Namespace, err) - } - for _, ev := range evs.Items { - if ev.InvolvedObject.Kind != "Pod" { - continue - } - - // On OpenShift, it's possible see "FailedMount" events when using a routingClass that depends on the service-ca - // operator. To avoid this, we always ignore FailedMount events if the message refers to the DWO-provisioned volume - if infrastructure.IsOpenShift() && - ev.Reason == "FailedMount" && - strings.Contains(ev.Message, common.ServingCertVolumeName(common.ServiceName(workspaceID))) { - continue - } - - if maxCount, isUnrecoverableEvent := unrecoverablePodEventReasons[ev.Reason]; isUnrecoverableEvent { - if !checkIfUnrecoverableEventIgnored(ev.Reason) && ev.Count >= maxCount { - var msg string - if ev.Count > 1 { - msg = fmt.Sprintf("Detected unrecoverable event %s %d times: %s", ev.Reason, ev.Count, ev.Message) - } else { - msg = fmt.Sprintf("Detected unrecoverable event %s: %s", ev.Reason, ev.Message) - } - return msg, nil - } - } - } - return "", nil -} - -func checkContainerStatusForFailure(containerStatus *corev1.ContainerStatus) (ok bool) { - if containerStatus.State.Waiting != nil { - for _, failureReason := range containerFailureStateReasons { - if containerStatus.State.Waiting.Reason == failureReason { - return checkIfUnrecoverableEventIgnored(containerStatus.State.Waiting.Reason) - } - } - } - return true -} - -func checkIfUnrecoverableEventIgnored(reason string) (ignored bool) { - for _, ignoredReason := range config.Workspace.IgnoredUnrecoverableEvents { - if ignoredReason == reason { - return true - } - } - return false -} diff --git a/project-clone/Dockerfile b/project-clone/Dockerfile index dca26adcf..7e12d57b2 100644 --- a/project-clone/Dockerfile +++ b/project-clone/Dockerfile @@ -15,7 +15,7 @@ # Build the manager binary # https://access.redhat.com/containers/?tab=tags#/registry.access.redhat.com/ubi8/go-toolset -FROM registry.access.redhat.com/ubi8/go-toolset:1.17.7-13 as builder +FROM registry.access.redhat.com/ubi8/go-toolset:1.17.10-4 as builder ENV GOPATH=/go/ USER root WORKDIR /project-clone @@ -37,7 +37,7 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build \ project-clone/main.go # https://access.redhat.com/containers/?tab=tags#/registry.access.redhat.com/ubi8-minimal -FROM registry.access.redhat.com/ubi8-minimal:8.6-751 +FROM registry.access.redhat.com/ubi8-minimal:8.6-854 RUN microdnf -y update && microdnf install -y time git git-lfs && microdnf clean all && rm -rf /var/cache/yum && echo "Installed Packages" && rpm -qa | sort -V && echo "End Of Installed Packages" WORKDIR / COPY --from=builder /project-clone/_output/bin/project-clone /usr/local/bin/project-clone