Skip to content

Commit

Permalink
Merge pull request #77 from Random-Liu/unmount-dev-shm
Browse files Browse the repository at this point in the history
Unmount dev shm and cleanup container when stop/remove sandbox
  • Loading branch information
Random-Liu committed Jun 16, 2017
2 parents cc43c86 + 57b8b43 commit 9658159
Show file tree
Hide file tree
Showing 25 changed files with 1,131 additions and 23 deletions.
5 changes: 5 additions & 0 deletions Godeps/Godeps.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion pkg/os/os.go
Expand Up @@ -22,6 +22,7 @@ import (
"os"

"github.com/containerd/fifo"
"github.com/docker/docker/pkg/mount"
"golang.org/x/net/context"
"golang.org/x/sys/unix"
)
Expand Down Expand Up @@ -90,7 +91,12 @@ func (RealOS) Mount(source string, target string, fstype string, flags uintptr,
return unix.Mount(source, target, fstype, flags, data)
}

// Unmount will call unix.Unmount to unmount the file.
// Unmount will call unix.Unmount to unmount the file. The function doesn't
// return error if target is not mounted.
func (RealOS) Unmount(target string, flags int) error {
// TODO(random-liu): Follow symlink to make sure the result is correct.
if mounted, err := mount.Mounted(target); err != nil || !mounted {
return err
}
return unix.Unmount(target, flags)
}
33 changes: 21 additions & 12 deletions pkg/server/container_stop.go
Expand Up @@ -55,59 +55,68 @@ func (c *criContainerdService) StopContainer(ctx context.Context, r *runtime.Sto
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %v", r.GetContainerId(), err)
}
id := r.GetContainerId()

if err := c.stopContainer(ctx, meta, time.Duration(r.GetTimeout())*time.Second); err != nil {
return nil, err
}

return &runtime.StopContainerResponse{}, nil
}

// stopContainer stops a container based on the container metadata.
func (c *criContainerdService) stopContainer(ctx context.Context, meta *metadata.ContainerMetadata, timeout time.Duration) error {
id := meta.ID

// Return without error if container is not running. This makes sure that
// stop only takes real action after the container is started.
if meta.State() != runtime.ContainerState_CONTAINER_RUNNING {
glog.V(2).Infof("Container to stop %q is not running, current state %q",
id, criContainerStateToString(meta.State()))
return &runtime.StopContainerResponse{}, nil
return nil
}

if r.GetTimeout() > 0 {
if timeout > 0 {
// TODO(random-liu): [P1] Get stop signal from image config.
stopSignal := unix.SIGTERM
glog.V(2).Infof("Stop container %q with signal %v", id, stopSignal)
_, err = c.taskService.Kill(ctx, &execution.KillRequest{
_, err := c.taskService.Kill(ctx, &execution.KillRequest{
ContainerID: id,
Signal: uint32(stopSignal),
PidOrAll: &execution.KillRequest_All{All: true},
})
if err != nil {
if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) {
return nil, fmt.Errorf("failed to stop container %q: %v", id, err)
return fmt.Errorf("failed to stop container %q: %v", id, err)
}
// Move on to make sure container status is updated.
}

err = c.waitContainerStop(ctx, id, time.Duration(r.GetTimeout())*time.Second)
err = c.waitContainerStop(ctx, id, timeout)
if err == nil {
return &runtime.StopContainerResponse{}, nil
return nil
}
glog.Errorf("Stop container %q timed out: %v", id, err)
}

// Event handler will Delete the container from containerd after it handles the Exited event.
glog.V(2).Infof("Kill container %q", id)
_, err = c.taskService.Kill(ctx, &execution.KillRequest{
_, err := c.taskService.Kill(ctx, &execution.KillRequest{
ContainerID: id,
Signal: uint32(unix.SIGKILL),
PidOrAll: &execution.KillRequest_All{All: true},
})
if err != nil {
if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) {
return nil, fmt.Errorf("failed to kill container %q: %v", id, err)
return fmt.Errorf("failed to kill container %q: %v", id, err)
}
// Move on to make sure container status is updated.
}

// Wait for a fixed timeout until container stop is observed by event monitor.
if err := c.waitContainerStop(ctx, id, killContainerTimeout); err != nil {
return nil, fmt.Errorf("an error occurs during waiting for container %q to stop: %v",
id, err)
return fmt.Errorf("an error occurs during waiting for container %q to stop: %v", id, err)
}
return &runtime.StopContainerResponse{}, nil
return nil
}

// waitContainerStop polls container state until timeout exceeds or container is stopped.
Expand Down
22 changes: 19 additions & 3 deletions pkg/server/sandbox_remove.go
Expand Up @@ -53,8 +53,6 @@ func (c *criContainerdService) RemovePodSandbox(ctx context.Context, r *runtime.
// Use the full sandbox id.
id := sandbox.ID

// TODO(random-liu): [P2] Remove all containers in the sandbox.

// Return error if sandbox container is not fully stopped.
// TODO(random-liu): [P0] Make sure network is torn down, may need to introduce a state.
_, err = c.taskService.Info(ctx, &execution.InfoRequest{ContainerID: id})
Expand All @@ -73,7 +71,25 @@ func (c *criContainerdService) RemovePodSandbox(ctx context.Context, r *runtime.
glog.V(5).Infof("Remove called for snapshot %q that does not exist", id)
}

// TODO(random-liu): [P0] Cleanup shm created in RunPodSandbox.
// Remove all containers inside the sandbox.
// NOTE(random-liu): container could still be created after this point, Kubelet should
// not rely on this behavior.
// TODO(random-liu): Introduce an intermediate state to avoid container creation after
// this point.
cntrs, err := c.containerStore.List()
if err != nil {
return nil, fmt.Errorf("failed to list all containers: %v", err)
}
for _, cntr := range cntrs {
if cntr.SandboxID != id {
continue
}
_, err = c.RemoveContainer(ctx, &runtime.RemoveContainerRequest{ContainerId: cntr.ID})
if err != nil {
return nil, fmt.Errorf("failed to remove container %q: %v", cntr.ID, err)
}
}

// TODO(random-liu): [P1] Remove permanent namespace once used.

// Cleanup the sandbox root directory.
Expand Down
55 changes: 55 additions & 0 deletions pkg/server/sandbox_remove_test.go
Expand Up @@ -17,6 +17,7 @@ package server
import (
"fmt"
"testing"
"time"

"github.com/containerd/containerd/api/services/containers"
snapshotapi "github.com/containerd/containerd/api/services/snapshot"
Expand Down Expand Up @@ -176,3 +177,57 @@ func TestRemovePodSandbox(t *testing.T) {
assert.NotNil(t, res, "remove should be idempotent")
}
}

func TestRemoveContainersInSandbox(t *testing.T) {
testID := "test-id"
testName := "test-name"
testMetadata := metadata.SandboxMetadata{
ID: testID,
Name: testName,
}
testContainersMetadata := []*metadata.ContainerMetadata{
{
ID: "test-cid-1",
Name: "test-cname-1",
SandboxID: testID,
FinishedAt: time.Now().UnixNano(),
},
{
ID: "test-cid-2",
Name: "test-cname-2",
SandboxID: testID,
FinishedAt: time.Now().UnixNano(),
},
{
ID: "test-cid-3",
Name: "test-cname-3",
SandboxID: "other-sandbox-id",
FinishedAt: time.Now().UnixNano(),
},
}

c := newTestCRIContainerdService()
WithFakeSnapshotClient(c)
assert.NoError(t, c.sandboxNameIndex.Reserve(testName, testID))
assert.NoError(t, c.sandboxIDIndex.Add(testID))
assert.NoError(t, c.sandboxStore.Create(testMetadata))
for _, cntr := range testContainersMetadata {
assert.NoError(t, c.containerNameIndex.Reserve(cntr.Name, cntr.ID))
assert.NoError(t, c.containerStore.Create(*cntr))
}

res, err := c.RemovePodSandbox(context.Background(), &runtime.RemovePodSandboxRequest{
PodSandboxId: testID,
})
assert.NoError(t, err)
assert.NotNil(t, res)

meta, err := c.sandboxStore.Get(testID)
assert.Error(t, err)
assert.True(t, metadata.IsNotExistError(err))
assert.Nil(t, meta, "sandbox metadata should be removed")

cntrs, err := c.containerStore.List()
assert.NoError(t, err)
assert.Equal(t, testContainersMetadata[2:], cntrs, "container metadata should be removed")
}
18 changes: 12 additions & 6 deletions pkg/server/sandbox_run.go
Expand Up @@ -183,7 +183,10 @@ func (c *criContainerdService) RunPodSandbox(ctx context.Context, r *runtime.Run
}
defer func() {
if retErr != nil {
c.cleanupSandboxFiles(sandboxRootDir, config)
if err = c.unmountSandboxFiles(sandboxRootDir, config); err != nil {
glog.Errorf("Failed to unmount sandbox files in %q: %v",
sandboxRootDir, err)
}
}
}()

Expand Down Expand Up @@ -403,12 +406,15 @@ func parseDNSOptions(servers, searches, options []string) (string, error) {
return resolvContent, nil
}

// cleanupSandboxFiles only unmount files, we rely on the removal of sandbox root directory to remove files.
// Each cleanup task should log error instead of returning, so as to keep on cleanup on error.
func (c *criContainerdService) cleanupSandboxFiles(rootDir string, config *runtime.PodSandboxConfig) {
// unmountSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
// remove these files. Unmount should *NOT* return error when:
// 1) The mount point is already unmounted.
// 2) The mount point doesn't exist.
func (c *criContainerdService) unmountSandboxFiles(rootDir string, config *runtime.PodSandboxConfig) error {
if !config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetHostIpc() {
if err := c.os.Unmount(getSandboxDevShm(rootDir), unix.MNT_DETACH); err != nil && os.IsNotExist(err) {
glog.Errorf("failed to unmount sandbox shm: %v", err)
if err := c.os.Unmount(getSandboxDevShm(rootDir), unix.MNT_DETACH); err != nil && !os.IsNotExist(err) {
return err
}
}
return nil
}
25 changes: 24 additions & 1 deletion pkg/server/sandbox_stop.go
Expand Up @@ -46,6 +46,25 @@ func (c *criContainerdService) StopPodSandbox(ctx context.Context, r *runtime.St
// Use the full sandbox id.
id := sandbox.ID

// Stop all containers inside the sandbox. This terminates the container forcibly,
// and container may still be so production should not rely on this behavior.
// TODO(random-liu): Delete the sandbox container before this after permanent network namespace
// is introduced, so that no container will be started after that.
containers, err := c.containerStore.List()
if err != nil {
return nil, fmt.Errorf("failed to list all containers: %v", err)
}
for _, container := range containers {
if container.SandboxID != id {
continue
}
// Forcibly stop the container. Do not use `StopContainer`, because it introduces a race
// if a container is removed after list.
if err = c.stopContainer(ctx, container, 0); err != nil {
return nil, fmt.Errorf("failed to stop container %q: %v", container.ID, err)
}
}

// Teardown network for sandbox.
_, err = c.os.Stat(sandbox.NetNS)
if err == nil {
Expand All @@ -58,13 +77,17 @@ func (c *criContainerdService) StopPodSandbox(ctx context.Context, r *runtime.St
}
glog.V(2).Infof("TearDown network for sandbox %q successfully", id)

sandboxRoot := getSandboxRootDir(c.rootDir, id)
if err = c.unmountSandboxFiles(sandboxRoot, sandbox.Config); err != nil {
return nil, fmt.Errorf("failed to unmount sandbox files in %q: %v", sandboxRoot, err)
}

// TODO(random-liu): [P1] Handle sandbox container graceful deletion.
// Delete the sandbox container from containerd.
_, err = c.taskService.Delete(ctx, &execution.DeleteRequest{ContainerID: id})
if err != nil && !isContainerdGRPCNotFoundError(err) {
return nil, fmt.Errorf("failed to delete sandbox container %q: %v", id, err)
}

// TODO(random-liu): [P2] Stop all containers inside the sandbox.
return &runtime.StopPodSandboxResponse{}, nil
}

0 comments on commit 9658159

Please sign in to comment.