Skip to content

Commit

Permalink
Merge pull request moby#40174 from AkihiroSuda/cgroup2
Browse files Browse the repository at this point in the history
support cgroup2
  • Loading branch information
thaJeztah committed Jan 9, 2020
2 parents a9507c6 + 491531c commit e6c1820
Show file tree
Hide file tree
Showing 15 changed files with 161 additions and 35 deletions.
7 changes: 6 additions & 1 deletion cmd/dockerd/config_unix.go
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/docker/docker/opts"
"github.com/docker/docker/rootless"
units "github.com/docker/go-units"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/pkg/errors"
"github.com/spf13/pflag"
)
Expand Down Expand Up @@ -64,6 +65,10 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) error {
// rootless needs to be explicitly specified for running "rootful" dockerd in rootless dockerd (#38702)
// Note that defaultUserlandProxyPath and honorXDG are configured according to the value of rootless.RunningWithRootlessKit, not the value of --rootless.
flags.BoolVar(&conf.Rootless, "rootless", rootless.RunningWithRootlessKit(), "Enable rootless mode; typically used with RootlessKit (experimental)")
flags.StringVar(&conf.CgroupNamespaceMode, "default-cgroupns-mode", config.DefaultCgroupNamespaceMode, `Default mode for containers cgroup namespace ("host" | "private")`)
defaultCgroupNamespaceMode := "host"
if cgroups.IsCgroup2UnifiedMode() {
defaultCgroupNamespaceMode = "private"
}
flags.StringVar(&conf.CgroupNamespaceMode, "default-cgroupns-mode", defaultCgroupNamespaceMode, `Default mode for containers cgroup namespace ("host" | "private")`)
return nil
}
2 changes: 0 additions & 2 deletions daemon/config/config_unix.go
Expand Up @@ -11,8 +11,6 @@ import (
)

const (
// DefaultCgroupNamespaceMode is the default for a container's CgroupnsMode, if not set otherwise
DefaultCgroupNamespaceMode = "host" // TODO: change to private
// DefaultIpcMode is default for container's IpcMode, if not set otherwise
DefaultIpcMode = "private"
)
Expand Down
5 changes: 3 additions & 2 deletions daemon/daemon.go
Expand Up @@ -794,6 +794,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
PluginStore: pluginStore,
startupDone: make(chan struct{}),
}

// Ensure the daemon is properly shutdown if there is a failure during
// initialization
defer func() {
Expand Down Expand Up @@ -914,7 +915,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
}
}

return pluginexec.New(ctx, getPluginExecRoot(config.Root), pluginCli, config.ContainerdPluginNamespace, m)
return pluginexec.New(ctx, getPluginExecRoot(config.Root), pluginCli, config.ContainerdPluginNamespace, m, d.useShimV2())
}

// Plugin system initialization should happen before restore. Do not change order.
Expand Down Expand Up @@ -1063,7 +1064,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S

go d.execCommandGC()

d.containerd, err = libcontainerd.NewClient(ctx, d.containerdCli, filepath.Join(config.ExecRoot, "containerd"), config.ContainerdNamespace, d)
d.containerd, err = libcontainerd.NewClient(ctx, d.containerdCli, filepath.Join(config.ExecRoot, "containerd"), config.ContainerdNamespace, d, d.useShimV2())
if err != nil {
return nil, err
}
Expand Down
21 changes: 17 additions & 4 deletions daemon/daemon_unix.go
Expand Up @@ -364,10 +364,15 @@ func (daemon *Daemon) adaptContainerSettings(hostConfig *containertypes.HostConf

// Set default cgroup namespace mode, if unset for container
if hostConfig.CgroupnsMode.IsEmpty() {
if hostConfig.Privileged {
// for cgroup v2: unshare cgroupns even for privileged containers
// https://github.com/containers/libpod/pull/4374#issuecomment-549776387
if hostConfig.Privileged && !cgroups.IsCgroup2UnifiedMode() {
hostConfig.CgroupnsMode = containertypes.CgroupnsMode("host")
} else {
m := config.DefaultCgroupNamespaceMode
m := "host"
if cgroups.IsCgroup2UnifiedMode() {
m = "private"
}
if daemon.configStore != nil {
m = daemon.configStore.CgroupNamespaceMode
}
Expand Down Expand Up @@ -708,8 +713,8 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
warnings = append(warnings, "Your kernel does not support cgroup namespaces. Cgroup namespace setting discarded.")
}

if hostConfig.Privileged {
return warnings, fmt.Errorf("privileged mode is incompatible with private cgroup namespaces. You must run the container in the host cgroup namespace when running privileged mode")
if hostConfig.Privileged && !cgroups.IsCgroup2UnifiedMode() {
return warnings, fmt.Errorf("privileged mode is incompatible with private cgroup namespaces on cgroup v1 host. You must run the container in the host cgroup namespace when running privileged mode")
}
}

Expand Down Expand Up @@ -1594,6 +1599,10 @@ func (daemon *Daemon) initCgroupsPath(path string) error {
return nil
}

if cgroups.IsCgroup2UnifiedMode() {
return fmt.Errorf("daemon-scoped cpu-rt-period and cpu-rt-runtime are not implemented for cgroup v2")
}

// Recursively create cgroup to ensure that the system and all parent cgroups have values set
// for the period and runtime as this limits what the children can be set to.
daemon.initCgroupsPath(filepath.Dir(path))
Expand Down Expand Up @@ -1639,3 +1648,7 @@ func (daemon *Daemon) setupSeccompProfile() error {
}
return nil
}

func (daemon *Daemon) useShimV2() bool {
return cgroups.IsCgroup2UnifiedMode()
}
4 changes: 4 additions & 0 deletions daemon/daemon_windows.go
Expand Up @@ -653,3 +653,7 @@ func (daemon *Daemon) initRuntimes(_ map[string]types.Runtime) error {

func setupResolvConf(config *config.Config) {
}

func (daemon *Daemon) useShimV2() bool {
return true
}
4 changes: 3 additions & 1 deletion daemon/oci_linux.go
Expand Up @@ -316,7 +316,9 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
return fmt.Errorf("invalid cgroup namespace mode: %v", cgroupNsMode)
}

if cgroupNsMode.IsPrivate() && !c.HostConfig.Privileged {
// for cgroup v2: unshare cgroupns even for privileged containers
// https://github.com/containers/libpod/pull/4374#issuecomment-549776387
if cgroupNsMode.IsPrivate() && (cgroups.IsCgroup2UnifiedMode() || !c.HostConfig.Privileged) {
nsCgroup := specs.LinuxNamespace{Type: "cgroup"}
setNamespace(s, nsCgroup)
}
Expand Down
15 changes: 15 additions & 0 deletions daemon/start_unix.go
Expand Up @@ -8,6 +8,7 @@ import (
"path/filepath"

"github.com/containerd/containerd/runtime/linux/runctypes"
v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
"github.com/docker/docker/container"
"github.com/docker/docker/errdefs"
"github.com/pkg/errors"
Expand Down Expand Up @@ -43,6 +44,20 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain
if err != nil {
return nil, err
}
if daemon.useShimV2() {
opts := &v2runcoptions.Options{
BinaryName: path,
Root: filepath.Join(daemon.configStore.ExecRoot,
fmt.Sprintf("runtime-%s", container.HostConfig.Runtime)),
}

if UsingSystemd(daemon.configStore) {
opts.SystemdCgroup = true
}

return opts, nil

}
opts := &runctypes.RuncOptions{
Runtime: path,
RuntimeRoot: filepath.Join(daemon.configStore.ExecRoot,
Expand Down
2 changes: 1 addition & 1 deletion integration/container/run_cgroupns_linux_test.go
Expand Up @@ -115,7 +115,7 @@ func TestCgroupNamespacesRunPrivilegedAndPrivate(t *testing.T) {
skip.If(t, !requirement.CgroupNamespacesEnabled())

// Running with both privileged and cgroupns=private is not allowed
errStr := "privileged mode is incompatible with private cgroup namespaces. You must run the container in the host cgroup namespace when running privileged mode"
errStr := "privileged mode is incompatible with private cgroup namespaces on cgroup v1 host. You must run the container in the host cgroup namespace when running privileged mode"
testCreateFailureWithCgroupNs(t, "private", errStr, container.WithPrivileged(true), container.WithCgroupnsMode("private"))
}

Expand Down
4 changes: 2 additions & 2 deletions libcontainerd/libcontainerd_linux.go
Expand Up @@ -9,6 +9,6 @@ import (
)

// NewClient creates a new libcontainerd client from a containerd client
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
return remote.NewClient(ctx, cli, stateDir, ns, b)
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
return remote.NewClient(ctx, cli, stateDir, ns, b, useShimV2)
}
5 changes: 3 additions & 2 deletions libcontainerd/libcontainerd_windows.go
Expand Up @@ -11,9 +11,10 @@ import (
)

// NewClient creates a new libcontainerd client from a containerd client
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
if !system.ContainerdRuntimeSupported() {
// useShimV2 is ignored for windows
return local.NewClient(ctx, cli, stateDir, ns, b)
}
return remote.NewClient(ctx, cli, stateDir, ns, b)
return remote.NewClient(ctx, cli, stateDir, ns, b, useShimV2)
}
68 changes: 52 additions & 16 deletions libcontainerd/remote/client.go
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/runtime/linux/runctypes"
v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
"github.com/containerd/typeurl"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/libcontainerd/queue"
Expand All @@ -45,21 +46,27 @@ type client struct {
logger *logrus.Entry
ns string

backend libcontainerdtypes.Backend
eventQ queue.Queue
oomMu sync.Mutex
oom map[string]bool
backend libcontainerdtypes.Backend
eventQ queue.Queue
oomMu sync.Mutex
oom map[string]bool
useShimV2 bool
v2runcoptionsMu sync.Mutex
// v2runcoptions is used for copying options specified on Create() to Start()
v2runcoptions map[string]v2runcoptions.Options
}

// NewClient creates a new libcontainerd client from a containerd client
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
c := &client{
client: cli,
stateDir: stateDir,
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
ns: ns,
backend: b,
oom: make(map[string]bool),
client: cli,
stateDir: stateDir,
logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
ns: ns,
backend: b,
oom: make(map[string]bool),
useShimV2: useShimV2,
v2runcoptions: make(map[string]v2runcoptions.Options),
}

go c.processEventStream(ctx, ns)
Expand Down Expand Up @@ -126,9 +133,13 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
bdir := c.bundleDir(id)
c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")

rt := runtimeName
if c.useShimV2 {
rt = shimV2RuntimeName
}
newOpts := []containerd.NewContainerOpts{
containerd.WithSpec(ociSpec),
containerd.WithRuntime(runtimeName, runtimeOptions),
containerd.WithRuntime(rt, runtimeOptions),
WithBundle(bdir, ociSpec),
}
opts = append(opts, newOpts...)
Expand All @@ -140,6 +151,13 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
}
return wrapError(err)
}
if c.useShimV2 {
if x, ok := runtimeOptions.(*v2runcoptions.Options); ok {
c.v2runcoptionsMu.Lock()
c.v2runcoptions[id] = *x
c.v2runcoptionsMu.Unlock()
}
}
return nil
}

Expand Down Expand Up @@ -200,11 +218,26 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin

if runtime.GOOS != "windows" {
taskOpts = append(taskOpts, func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
info.Options = &runctypes.CreateOptions{
IoUid: uint32(uid),
IoGid: uint32(gid),
NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
if c.useShimV2 {
// For v2, we need to inherit options specified on Create
c.v2runcoptionsMu.Lock()
opts, ok := c.v2runcoptions[id]
c.v2runcoptionsMu.Unlock()
if !ok {
opts = v2runcoptions.Options{}
}
opts.IoUid = uint32(uid)
opts.IoGid = uint32(gid)
opts.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
info.Options = &opts
} else {
info.Options = &runctypes.CreateOptions{
IoUid: uint32(uid),
IoGid: uint32(gid),
NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
}
}

return nil
})
} else {
Expand Down Expand Up @@ -466,6 +499,9 @@ func (c *client) Delete(ctx context.Context, containerID string) error {
c.oomMu.Lock()
delete(c.oom, containerID)
c.oomMu.Unlock()
c.v2runcoptionsMu.Lock()
delete(c.v2runcoptions, containerID)
c.v2runcoptionsMu.Unlock()
if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" {
if err := os.RemoveAll(bundle); err != nil {
c.logger.WithError(err).WithFields(logrus.Fields{
Expand Down
5 changes: 4 additions & 1 deletion libcontainerd/remote/client_linux.go
Expand Up @@ -16,7 +16,10 @@ import (
"github.com/sirupsen/logrus"
)

const runtimeName = "io.containerd.runtime.v1.linux"
const (
runtimeName = "io.containerd.runtime.v1.linux"
shimV2RuntimeName = "io.containerd.runc.v2"
)

func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) {
return &libcontainerdtypes.Summary{}, nil
Expand Down
5 changes: 4 additions & 1 deletion libcontainerd/remote/client_windows.go
Expand Up @@ -16,7 +16,10 @@ import (
"github.com/sirupsen/logrus"
)

const runtimeName = "io.containerd.runhcs.v1"
const (
runtimeName = "io.containerd.runhcs.v1"
shimV2RuntimeName = runtimeName
)

func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) {
switch pd := i.(type) {
Expand Down

0 comments on commit e6c1820

Please sign in to comment.