diff --git a/components/ws-daemon/pkg/content/service.go b/components/ws-daemon/pkg/content/service.go index 976ce6139ce7f1..c769db6d804b4b 100644 --- a/components/ws-daemon/pkg/content/service.go +++ b/components/ws-daemon/pkg/content/service.go @@ -5,17 +5,13 @@ package content import ( - "bufio" - "bytes" "context" "encoding/json" "errors" "fmt" - "io/ioutil" "math" "os" "path/filepath" - "strings" "syscall" "time" @@ -24,12 +20,9 @@ import ( "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" "golang.org/x/xerrors" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/util/retry" "github.com/gitpod-io/gitpod/common-go/log" "github.com/gitpod-io/gitpod/common-go/tracing" @@ -263,12 +256,17 @@ func (s *WorkspaceService) creator(req *api.InitWorkspaceRequest) session.Worksp ContentManifest: req.ContentManifest, RemoteStorageDisabled: req.RemoteStorageDisabled, - ServiceLocDaemon: filepath.Join(s.config.WorkingArea, req.Id+"-daemon"), - ServiceLocNode: filepath.Join(s.config.WorkingAreaNode, req.Id+"-daemon"), + ServiceLocDaemon: filepath.Join(s.config.WorkingArea, ServiceDirName(req.Id)), + ServiceLocNode: filepath.Join(s.config.WorkingAreaNode, ServiceDirName(req.Id)), }, nil } } +// ServiceDirName produces the directory name for a workspace +func ServiceDirName(instanceID string) string { + return instanceID + "-daemon" +} + // getCheckoutLocation returns the first checkout location found of any Git initializer configured by this request func getCheckoutLocation(req *api.InitWorkspaceRequest) string { spec := req.Initializer.Spec @@ -370,16 +368,6 @@ func (s *WorkspaceService) DisposeWorkspace(ctx context.Context, req *api.Dispos return nil, status.Error(codes.Internal, "cannot delete workspace from store") } - // Important: - // If ws-daemon is killed or restarts, the new ws-daemon pod mount table contains the - // node's current state, i.e., it has all the running workspaces running in the node. - // Unmounting the mark in Teardown (nsenter) works from inside the workspace, - // but the mount is still present in ws-daemon mount table. Unmounting the mark from - // ws-daemon is required to ensure the proper termination of the workspace pod. - if err := unmountMark(sess.ServiceLocDaemon); err != nil { - log.WithError(err).WithField("workspaceId", req.Id).Error("cannot unmount mark mount") - } - // remove workspace daemon directory in the node if err := os.RemoveAll(sess.ServiceLocDaemon); err != nil { log.WithError(err).WithField("workspaceId", req.Id).Error("cannot delete workspace daemon directory") @@ -787,48 +775,3 @@ func workspaceLifecycleHooks(cfg Config, kubernetesNamespace string, workspaceEx session.WorkspaceDisposed: {iws.StopServingWorkspace}, } } - -// if the mark mount still exists in /proc/mounts it means we failed to unmount it and -// we cannot remove the content. As a side effect the pod will stay in Terminating state -func unmountMark(dir string) error { - mounts, err := ioutil.ReadFile("/proc/mounts") - if err != nil { - return xerrors.Errorf("cannot read /proc/mounts: %w", err) - } - - path := fromPartialMount(filepath.Join(dir, "mark"), mounts) - // empty path means no mount found - if path == "" { - return nil - } - - // in some scenarios we need to wait for the unmount - var errorFn = func(err error) bool { - return strings.Contains(err.Error(), "device or resource busy") - } - - return retry.OnError(wait.Backoff{ - Steps: 5, - Duration: 1 * time.Second, - Factor: 5.0, - Jitter: 0.1, - }, errorFn, func() error { - return unix.Unmount(path, 0) - }) -} - -func fromPartialMount(path string, info []byte) string { - scanner := bufio.NewScanner(bytes.NewReader(info)) - for scanner.Scan() { - mount := strings.Split(scanner.Text(), " ") - if len(mount) < 2 { - continue - } - - if strings.Contains(mount[1], path) { - return mount[1] - } - } - - return "" -} diff --git a/components/ws-daemon/pkg/daemon/daemon.go b/components/ws-daemon/pkg/daemon/daemon.go index 8b4fa816f3f58b..f1de532b90d8d5 100644 --- a/components/ws-daemon/pkg/daemon/daemon.go +++ b/components/ws-daemon/pkg/daemon/daemon.go @@ -48,9 +48,14 @@ func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) { } cgCustomizer := &CgroupCustomizer{} cgCustomizer.WithCgroupBasePath(config.Resources.CGroupsBasePath) + markUnmountFallback, err := NewMarkUnmountFallback(reg) + if err != nil { + return nil, err + } dsptch, err := dispatch.NewDispatch(containerRuntime, clientset, config.Runtime.KubernetesNamespace, nodename, resources.NewDispatchListener(&config.Resources, reg), cgCustomizer, + markUnmountFallback, ) if err != nil { return nil, err diff --git a/components/ws-daemon/pkg/daemon/markunmount.go b/components/ws-daemon/pkg/daemon/markunmount.go new file mode 100644 index 00000000000000..dbd8703d3832aa --- /dev/null +++ b/components/ws-daemon/pkg/daemon/markunmount.go @@ -0,0 +1,183 @@ +// Copyright (c) 2021 Gitpod GmbH. All rights reserved. +// Licensed under the GNU Affero General Public License (AGPL). +// See License-AGPL.txt in the project root for license information. + +package daemon + +import ( + "bufio" + "bytes" + "context" + "io/ioutil" + "path/filepath" + "strings" + "sync" + "time" + + "golang.org/x/sync/errgroup" + "golang.org/x/sys/unix" + "golang.org/x/xerrors" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + + "github.com/gitpod-io/gitpod/common-go/log" + "github.com/gitpod-io/gitpod/ws-daemon/pkg/content" + "github.com/gitpod-io/gitpod/ws-daemon/pkg/dispatch" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + // propagationGracePeriod is the time we allow on top of a container's deletionGracePeriod + // to make sure the changes propagate on the data plane. + propagationGracePeriod = 10 * time.Second +) + +// NewMarkUnmountFallback produces a new MarkUnmountFallback. reg can be nil +func NewMarkUnmountFallback(reg prometheus.Registerer) (*MarkUnmountFallback, error) { + counter := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "markunmountfallback_active_total", + Help: "counts how often the mark unmount fallback was active", + }, []string{"successful"}) + if reg != nil { + err := reg.Register(counter) + if err != nil { + return nil, err + } + } + + return &MarkUnmountFallback{ + activityCounter: counter, + }, nil +} + +// MarkUnmountFallback works around the mount propagation of the ring1 FS mark mount. +// When ws-daemon restarts runc propagates all rootfs mounts to ws-daemon's mount namespace. +// This prevents proper unmounting of the mark mount, hence the rootfs of the workspace container. +// +// To work around this issue we wait pod.terminationGracePeriod + propagationGracePeriod and, +// after which we attempt to unmount the mark mount. +// +// Some clusters might run an older version of containerd, for which we build this workaround. +type MarkUnmountFallback struct { + mu sync.Mutex + handled map[string]struct{} + + activityCounter *prometheus.CounterVec +} + +// WorkspaceAdded does nothing but implemented the dispatch.Listener interface +func (c *MarkUnmountFallback) WorkspaceAdded(ctx context.Context, ws *dispatch.Workspace) error { + return nil +} + +// WorkspaceUpdated gets called when a workspace pod is updated. For containers being deleted, we'll check +// if they're still running after their terminationGracePeriod and if Kubernetes still knows about them. +func (c *MarkUnmountFallback) WorkspaceUpdated(ctx context.Context, ws *dispatch.Workspace) error { + if ws.Pod.DeletionTimestamp == nil { + return nil + } + + err := func() error { + c.mu.Lock() + defer c.mu.Unlock() + + if c.handled == nil { + c.handled = make(map[string]struct{}) + } + if _, exists := c.handled[ws.InstanceID]; exists { + return nil + } + c.handled[ws.InstanceID] = struct{}{} + return nil + }() + if err != nil { + return err + } + + var gracePeriod int64 + if ws.Pod.DeletionGracePeriodSeconds != nil { + gracePeriod = *ws.Pod.DeletionGracePeriodSeconds + } else { + gracePeriod = 30 + } + ttl := time.Duration(gracePeriod)*time.Second + propagationGracePeriod + + go func() { + time.Sleep(ttl) + + dsp := dispatch.GetFromContext(ctx) + if !dsp.WorkspaceExistsOnNode(ws.InstanceID) { + // container is already gone - all is well + return + } + + err := unmountMark(ws.InstanceID) + if err != nil { + log.WithFields(ws.OWI()).WithError(err).Error("cannot unmount mark mount from within ws-daemon") + c.activityCounter.WithLabelValues("false").Inc() + } else { + c.activityCounter.WithLabelValues("true").Inc() + } + + // We expect the container to be gone now. Don't keep its referenec in memory. + c.mu.Lock() + delete(c.handled, ws.InstanceID) + c.mu.Unlock() + }() + + return nil +} + +// if the mark mount still exists in /proc/mounts it means we failed to unmount it and +// we cannot remove the content. As a side effect the pod will stay in Terminating state +func unmountMark(instanceID string) error { + mounts, err := ioutil.ReadFile("/proc/mounts") + if err != nil { + return xerrors.Errorf("cannot read /proc/mounts: %w", err) + } + + dir := content.ServiceDirName(instanceID) + path := fromPartialMount(filepath.Join(dir, "mark"), mounts) + // empty path means no mount found + if len(path) == 0 { + return nil + } + + // in some scenarios we need to wait for the unmount + var errorFn = func(err error) bool { + return strings.Contains(err.Error(), "device or resource busy") + } + + var eg errgroup.Group + for _, p := range path { + // add p as closure so that we can use it inside the Go routine. + p := p + eg.Go(func() error { + return retry.OnError(wait.Backoff{ + Steps: 5, + Duration: 1 * time.Second, + Factor: 5.0, + Jitter: 0.1, + }, errorFn, func() error { + return unix.Unmount(p, 0) + }) + }) + } + return eg.Wait() +} + +func fromPartialMount(path string, info []byte) (res []string) { + scanner := bufio.NewScanner(bytes.NewReader(info)) + for scanner.Scan() { + mount := strings.Split(scanner.Text(), " ") + if len(mount) < 2 { + continue + } + + if strings.Contains(mount[1], path) { + res = append(res, mount[1]) + } + } + + return res +} diff --git a/components/ws-manager/pkg/manager/monitor.go b/components/ws-manager/pkg/manager/monitor.go index 1915551cbaf47b..737b1d906ad020 100644 --- a/components/ws-manager/pkg/manager/monitor.go +++ b/components/ws-manager/pkg/manager/monitor.go @@ -363,22 +363,9 @@ func actOnPodEvent(ctx context.Context, m actingManager, status *api.WorkspaceSt _, gone := wso.Pod.Annotations[wsk8s.ContainerIsGoneAnnotation] if terminated || gone { - // workaround for https://github.com/containerd/containerd/pull/4214 which can prevent pod status - // propagation. ws-daemon observes the pods and propagates this state out-of-band via the annotation. + // We start finalizing the workspace content only after the container is gone. This way we ensure there's + // no process modifying the workspace content as we create the backup. go m.finalizeWorkspaceContent(ctx, wso) - } else { - // add an additional wait time on top of a deletionGracePeriod - // to make sure the changes propagate on the data plane. - var gracePeriod int64 = 30 - if wso.Pod.DeletionGracePeriodSeconds != nil { - gracePeriod = *wso.Pod.DeletionGracePeriodSeconds - } - ttl := time.Duration(gracePeriod) * time.Second * 2 - - go func() { - time.Sleep(ttl) - m.finalizeWorkspaceContent(ctx, wso) - }() } }