Skip to content

Commit

Permalink
[ws-daemon] Properly handle mark unmount
Browse files Browse the repository at this point in the history
  • Loading branch information
csweichel committed Sep 28, 2021
1 parent 60a93e7 commit 78dcf4a
Show file tree
Hide file tree
Showing 3 changed files with 186 additions and 79 deletions.
71 changes: 7 additions & 64 deletions components/ws-daemon/pkg/content/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,13 @@
package content

import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"math"
"os"
"path/filepath"
"strings"
"syscall"
"time"

Expand All @@ -24,12 +20,9 @@ import (
"github.com/opentracing/opentracing-go"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"golang.org/x/xerrors"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"

"github.com/gitpod-io/gitpod/common-go/log"
"github.com/gitpod-io/gitpod/common-go/tracing"
Expand Down Expand Up @@ -263,12 +256,17 @@ func (s *WorkspaceService) creator(req *api.InitWorkspaceRequest) session.Worksp
ContentManifest: req.ContentManifest,
RemoteStorageDisabled: req.RemoteStorageDisabled,

ServiceLocDaemon: filepath.Join(s.config.WorkingArea, req.Id+"-daemon"),
ServiceLocNode: filepath.Join(s.config.WorkingAreaNode, req.Id+"-daemon"),
ServiceLocDaemon: filepath.Join(s.config.WorkingArea, ServiceDirName(req.Id)),
ServiceLocNode: filepath.Join(s.config.WorkingAreaNode, ServiceDirName(req.Id)),
}, nil
}
}

// ServiceDirName produces the directory name for a workspace
func ServiceDirName(instanceID string) string {
return instanceID + "-daemon"
}

// getCheckoutLocation returns the first checkout location found of any Git initializer configured by this request
func getCheckoutLocation(req *api.InitWorkspaceRequest) string {
spec := req.Initializer.Spec
Expand Down Expand Up @@ -370,16 +368,6 @@ func (s *WorkspaceService) DisposeWorkspace(ctx context.Context, req *api.Dispos
return nil, status.Error(codes.Internal, "cannot delete workspace from store")
}

// Important:
// If ws-daemon is killed or restarts, the new ws-daemon pod mount table contains the
// node's current state, i.e., it has all the running workspaces running in the node.
// Unmounting the mark in Teardown (nsenter) works from inside the workspace,
// but the mount is still present in ws-daemon mount table. Unmounting the mark from
// ws-daemon is required to ensure the proper termination of the workspace pod.
if err := unmountMark(sess.ServiceLocDaemon); err != nil {
log.WithError(err).WithField("workspaceId", req.Id).Error("cannot unmount mark mount")
}

// remove workspace daemon directory in the node
if err := os.RemoveAll(sess.ServiceLocDaemon); err != nil {
log.WithError(err).WithField("workspaceId", req.Id).Error("cannot delete workspace daemon directory")
Expand Down Expand Up @@ -787,48 +775,3 @@ func workspaceLifecycleHooks(cfg Config, kubernetesNamespace string, workspaceEx
session.WorkspaceDisposed: {iws.StopServingWorkspace},
}
}

// if the mark mount still exists in /proc/mounts it means we failed to unmount it and
// we cannot remove the content. As a side effect the pod will stay in Terminating state
func unmountMark(dir string) error {
mounts, err := ioutil.ReadFile("/proc/mounts")
if err != nil {
return xerrors.Errorf("cannot read /proc/mounts: %w", err)
}

path := fromPartialMount(filepath.Join(dir, "mark"), mounts)
// empty path means no mount found
if path == "" {
return nil
}

// in some scenarios we need to wait for the unmount
var errorFn = func(err error) bool {
return strings.Contains(err.Error(), "device or resource busy")
}

return retry.OnError(wait.Backoff{
Steps: 5,
Duration: 1 * time.Second,
Factor: 5.0,
Jitter: 0.1,
}, errorFn, func() error {
return unix.Unmount(path, 0)
})
}

func fromPartialMount(path string, info []byte) string {
scanner := bufio.NewScanner(bytes.NewReader(info))
for scanner.Scan() {
mount := strings.Split(scanner.Text(), " ")
if len(mount) < 2 {
continue
}

if strings.Contains(mount[1], path) {
return mount[1]
}
}

return ""
}
177 changes: 177 additions & 0 deletions components/ws-daemon/pkg/daemon/markunmount.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// Copyright (c) 2021 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package daemon

import (
"bufio"
"bytes"
"context"
"io/ioutil"
"path/filepath"
"strings"
"sync"
"time"

"golang.org/x/sync/errgroup"
"golang.org/x/sys/unix"
"golang.org/x/xerrors"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"

"github.com/gitpod-io/gitpod/common-go/log"
"github.com/gitpod-io/gitpod/ws-daemon/pkg/content"
"github.com/gitpod-io/gitpod/ws-daemon/pkg/dispatch"
"github.com/prometheus/client_golang/prometheus"
)

const (
// propagationGracePeriod is the time we allow on top of a container's deletionGracePeriod
// to make sure the changes propagate on the data plane.
propagationGracePeriod = 10 * time.Second
)

// NewMarkUnmountFallback produces a new MarkUnmountFallback. reg can be nil
func NewMarkUnmountFallback(reg prometheus.Registerer) (*MarkUnmountFallback, error) {
counter := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "markunmountfallback_active_total",
Help: "counts how often the mark unmount fallback was active",
}, []string{"successful"})
if reg != nil {
err := reg.Register(counter)
if err != nil {
return nil, err
}
}

return &MarkUnmountFallback{
activityCounter: counter,
}, nil
}

// MarkUnmountFallback works around the mount propagation of the ring1 FS mark mount.
// When ws-daemon restarts runc propagates all rootfs mounts to ws-daemon's mount namespace.
// This prevents proper unmounting of the mark mount, hence the rootfs of the workspace container.
//
// To work around this issue we wait pod.terminationGracePeriod + propagationGracePeriod and,
// after which we attempt to unmount the mark mount.
//
// Some clusters might run an older version of containerd, for which we build this workaround.
type MarkUnmountFallback struct {
mu sync.Mutex
handled map[string]struct{}

activityCounter *prometheus.CounterVec
}

// WorkspaceAdded does nothing but implemented the dispatch.Listener interface
func (c *MarkUnmountFallback) WorkspaceAdded(ctx context.Context, ws *dispatch.Workspace) error {
return nil
}

// WorkspaceUpdated gets called when a workspace pod is updated. For containers being deleted, we'll check
// if they're still running after their terminationGracePeriod and if Kubernetes still knows about them.
func (c *MarkUnmountFallback) WorkspaceUpdated(ctx context.Context, ws *dispatch.Workspace) error {
if ws.Pod.DeletionTimestamp == nil {
return nil
}

c.mu.Lock()
if c.handled == nil {
c.handled = make(map[string]struct{})
}
if _, exists := c.handled[ws.InstanceID]; exists {
c.mu.Unlock()
return nil
}
c.handled[ws.InstanceID] = struct{}{}
c.mu.Unlock()

var gracePeriod int64
if ws.Pod.DeletionGracePeriodSeconds != nil {
gracePeriod = *ws.Pod.DeletionGracePeriodSeconds
} else {
gracePeriod = 30
}
ttl := time.Duration(gracePeriod)*time.Second + propagationGracePeriod

go func() {
time.Sleep(ttl)

dsp := dispatch.GetFromContext(ctx)
if !dsp.WorkspaceExistsOnNode(ws.InstanceID) {
// container is already gone - all is well
return
}

err := unmountMark(ws.InstanceID)
if err != nil {
log.WithFields(ws.OWI()).WithError(err).Error("cannot unmount mark mount from within ws-daemon")
c.activityCounter.WithLabelValues("false").Inc()
} else {
c.activityCounter.WithLabelValues("true").Inc()
}

// We expect the container to be gone now. Don't keep its referenec in memory.
c.mu.Lock()
delete(c.handled, ws.InstanceID)
c.mu.Unlock()
}()

return nil
}

// if the mark mount still exists in /proc/mounts it means we failed to unmount it and
// we cannot remove the content. As a side effect the pod will stay in Terminating state
func unmountMark(instanceID string) error {
mounts, err := ioutil.ReadFile("/proc/mounts")
if err != nil {
return xerrors.Errorf("cannot read /proc/mounts: %w", err)
}

dir := content.ServiceDirName(instanceID)
path := fromPartialMount(filepath.Join(dir, "mark"), mounts)
// empty path means no mount found
if len(path) == 0 {
return nil
}

// in some scenarios we need to wait for the unmount
var errorFn = func(err error) bool {
return strings.Contains(err.Error(), "device or resource busy")
}

var eg errgroup.Group
for _, p := range path {
// add p as closure so that we can use it inside the Go routine.
p := p
eg.Go(func() error {
return retry.OnError(wait.Backoff{
Steps: 5,
Duration: 1 * time.Second,
Factor: 5.0,
Jitter: 0.1,
}, errorFn, func() error {
return unix.Unmount(p, 0)
})
})
}
return eg.Wait()
}

func fromPartialMount(path string, info []byte) (res []string) {
scanner := bufio.NewScanner(bytes.NewReader(info))
for scanner.Scan() {
mount := strings.Split(scanner.Text(), " ")
if len(mount) < 2 {
continue
}

if strings.Contains(mount[1], path) {
res = append(res, mount[1])
}
}

return res
}
17 changes: 2 additions & 15 deletions components/ws-manager/pkg/manager/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,22 +363,9 @@ func actOnPodEvent(ctx context.Context, m actingManager, status *api.WorkspaceSt
_, gone := wso.Pod.Annotations[wsk8s.ContainerIsGoneAnnotation]

if terminated || gone {
// workaround for https://github.com/containerd/containerd/pull/4214 which can prevent pod status
// propagation. ws-daemon observes the pods and propagates this state out-of-band via the annotation.
// We start finalizing the workspace content only after the container is gone. This way we ensure there's
// no process modifying the workspace content as we create the backup.
go m.finalizeWorkspaceContent(ctx, wso)
} else {
// add an additional wait time on top of a deletionGracePeriod
// to make sure the changes propagate on the data plane.
var gracePeriod int64 = 30
if wso.Pod.DeletionGracePeriodSeconds != nil {
gracePeriod = *wso.Pod.DeletionGracePeriodSeconds
}
ttl := time.Duration(gracePeriod) * time.Second * 2

go func() {
time.Sleep(ttl)
m.finalizeWorkspaceContent(ctx, wso)
}()
}
}

Expand Down

0 comments on commit 78dcf4a

Please sign in to comment.