Skip to content

Commit

Permalink
Add systemd-cgroup support to runsc.
Browse files Browse the repository at this point in the history
This change adds systemd-cgroup support to cgroupv2 and modifies existing
cgroup tests for systemd support.

Fixes #193

PiperOrigin-RevId: 435457762
  • Loading branch information
manninglucas authored and gvisor-bot committed Mar 17, 2022
1 parent 4989629 commit bf86207
Show file tree
Hide file tree
Showing 20 changed files with 719 additions and 225 deletions.
5 changes: 2 additions & 3 deletions .buildkite/hooks/pre-command
Expand Up @@ -25,9 +25,8 @@ export TOTAL_PARTITIONS=${BUILDKITE_PARALLEL_JOB_COUNT:-1}

# Ensure Docker has experimental enabled.
EXPERIMENTAL=$(sudo docker version --format='{{.Server.Experimental}}')
CGDRIVER=$(sudo docker info --format='{{.CgroupDriver}}')
if test "${EXPERIMENTAL}" != "true" || test "${CGDRIVER}" != "cgroupfs"; then
make sudo TARGETS=//runsc:runsc ARGS="install --experimental=true --cgroupdriver=cgroupfs"
if test "${EXPERIMENTAL}" != "true"; then
make sudo TARGETS=//runsc:runsc ARGS="install --experimental=true"
sudo systemctl restart docker
fi

Expand Down
1 change: 1 addition & 0 deletions Makefile
Expand Up @@ -161,6 +161,7 @@ dev: $(RUNTIME_BIN) ## Installs a set of local runtimes. Requires sudo.
@$(call configure_noreload,$(RUNTIME)-fuse-d,--net-raw --debug --strace --log-packets --fuse)
@$(call configure_noreload,$(RUNTIME)-cgroup-d,--net-raw --debug --strace --log-packets --cgroupfs)
@$(call configure_noreload,$(RUNTIME)-lisafs-d,--net-raw --debug --strace --log-packets --lisafs)
@$(call configure_noreload,$(RUNTIME)-systemd-d,--net-raw --debug --strace --log-packets --systemd-cgroup)
@$(call reload_docker)
.PHONY: dev

Expand Down
11 changes: 9 additions & 2 deletions WORKSPACE
Expand Up @@ -1491,8 +1491,8 @@ go_repository(
go_repository(
name = "com_github_coreos_go_systemd_v22",
importpath = "github.com/coreos/go-systemd/v22",
sum = "h1:kq/SbG2BCKLkDKkjQf5OWwKWUKj1lgs3lFI4PxnR5lg=",
version = "v22.1.0",
sum = "h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI=",
version = "v22.3.2",
)

go_repository(
Expand Down Expand Up @@ -1896,3 +1896,10 @@ go_repository(
sum = "h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=",
version = "v1.2.0",
)

go_repository(
name = "com_github_bits_and_blooms_bitset",
importpath = "github.com/bits-and-blooms/bitset",
sum = "h1:M+/hrU9xlMp7t4TyTDQW97d3tRPVuKFC6zBEK16QnXY=",
version = "v1.2.1",
)
5 changes: 3 additions & 2 deletions go.mod
Expand Up @@ -5,13 +5,15 @@ go 1.17
require (
github.com/BurntSushi/toml v0.3.1
github.com/bazelbuild/rules_go v0.30.0
github.com/bits-and-blooms/bitset v1.2.0
github.com/cenkalti/backoff v1.1.1-0.20190506075156-2146c9339422
github.com/containerd/cgroups v1.0.1
github.com/containerd/console v1.0.1
github.com/containerd/containerd v1.3.9
github.com/containerd/fifo v1.0.0
github.com/containerd/go-runc v1.0.0
github.com/containerd/typeurl v1.0.2
github.com/coreos/go-systemd/v22 v22.3.2
github.com/gofrs/flock v0.8.0
github.com/gogo/protobuf v1.3.2
github.com/google/btree v1.0.1
Expand Down Expand Up @@ -39,10 +41,9 @@ require (
github.com/cilium/ebpf v0.4.0 // indirect
github.com/containerd/continuity v0.2.1 // indirect
github.com/containerd/ttrpc v1.0.2 // indirect
github.com/coreos/go-systemd/v22 v22.1.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/docker/go-units v0.4.0 // indirect
github.com/godbus/dbus/v5 v5.0.3 // indirect
github.com/godbus/dbus/v5 v5.0.4 // indirect
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/google/go-cmp v0.5.6 // indirect
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Expand Up @@ -61,6 +61,8 @@ github.com/bazelbuild/rules_go v0.30.0 h1:kX4jVcstqrsRqKPJSn2mq2o+TI21edRzEJSrEO
github.com/bazelbuild/rules_go v0.30.0/go.mod h1:MC23Dc/wkXEyk3Wpq6lCqz0ZAYOZDw2DR5y3N1q2i7M=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/cenkalti/backoff v1.1.1-0.20190506075156-2146c9339422 h1:8eZxmY1yvxGHzdzTEhI09npjMVGzNAdrqzruTX6jcK4=
github.com/cenkalti/backoff v1.1.1-0.20190506075156-2146c9339422/go.mod h1:b6Nc7NRH5C4aCISLry0tLnTjcuTEvoiqcWDdsU0sOGM=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
Expand Down Expand Up @@ -112,6 +114,8 @@ github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7
github.com/coreos/go-systemd/v22 v22.0.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk=
github.com/coreos/go-systemd/v22 v22.1.0 h1:kq/SbG2BCKLkDKkjQf5OWwKWUKj1lgs3lFI4PxnR5lg=
github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk=
github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI=
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
Expand Down Expand Up @@ -155,6 +159,8 @@ github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dp
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/godbus/dbus/v5 v5.0.3 h1:ZqHaoEF7TBzh4jzPmqVhE/5A1z9of6orkAe5uHoAeME=
github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofrs/flock v0.8.0 h1:MSdYClljsF3PbENUUEx85nkWfJSGfzYI9yEBZOJz6CY=
github.com/gofrs/flock v0.8.0/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
Expand Down
1 change: 1 addition & 0 deletions pkg/test/dockerutil/BUILD
Expand Up @@ -15,6 +15,7 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/test/testutil",
"//runsc/cgroup",
"@com_github_docker_docker//api/types:go_default_library",
"@com_github_docker_docker//api/types/container:go_default_library",
"@com_github_docker_docker//api/types/mount:go_default_library",
Expand Down
39 changes: 39 additions & 0 deletions pkg/test/dockerutil/dockerutil.go
Expand Up @@ -29,6 +29,7 @@ import (
"time"

"gvisor.dev/gvisor/pkg/test/testutil"
"gvisor.dev/gvisor/runsc/cgroup"
)

var (
Expand All @@ -54,6 +55,11 @@ var (
pprofCPU = flag.Bool("pprof-cpu", false, "enables CPU profiling with runsc debug")
pprofHeap = flag.Bool("pprof-heap", false, "enables heap profiling with runsc debug")
pprofMutex = flag.Bool("pprof-mutex", false, "enables mutex profiling with runsc debug")

// This matches the string "native.cgroupdriver=systemd" (including optional
// whitespace), which can be found in a docker daemon configuration file's
// exec-opts field.
useSystemdRgx = regexp.MustCompile("\\s*(native\\.cgroupdriver)\\s*=\\s*(systemd)\\s*")
)

// EnsureSupportedDockerVersion checks if correct docker is installed.
Expand Down Expand Up @@ -92,6 +98,39 @@ func RuntimePath() (string, error) {
return p, nil
}

// UsingSystemdCgroup returns true if the docker configuration has the
// native.cgroupdriver=systemd option set in "exec-opts", or if the
// system is using cgroupv2, in which case systemd is the default driver.
func UsingSystemdCgroup() (bool, error) {
// Read the configuration data; the file must exist.
configBytes, err := ioutil.ReadFile(*config)
if err != nil {
return false, err
}
// Unmarshal the configuration.
c := make(map[string]interface{})
if err := json.Unmarshal(configBytes, &c); err != nil {
return false, err
}
// Decode the expected configuration.
e, ok := c["exec-opts"]
if !ok {
// No exec-opts. Default is true on cgroupv2, false otherwise.
return cgroup.IsOnlyV2(), nil
}
eos, ok := e.([]interface{})
if !ok {
// The exec opts are not an array.
return false, fmt.Errorf("unexpected format: %+v", eos)
}
for _, opt := range eos {
if optStr, ok := opt.(string); ok && useSystemdRgx.MatchString(optStr) {
return true, nil
}
}
return false, nil
}

func runtimeMap() (map[string]interface{}, error) {
// Read the configuration data; the file must exist.
configBytes, err := ioutil.ReadFile(*config)
Expand Down
1 change: 1 addition & 0 deletions runsc/cgroup/BUILD
Expand Up @@ -13,6 +13,7 @@ go_library(
deps = [
"//pkg/cleanup",
"//pkg/log",
"@com_github_bits_and_blooms_bitset//:go_default_library",
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_coreos_go_systemd_v22//dbus:go_default_library",
"@com_github_godbus_dbus_v5//:go_default_library",
Expand Down
51 changes: 41 additions & 10 deletions runsc/cgroup/cgroup.go
Expand Up @@ -332,25 +332,34 @@ type cgroupV1 struct {

// NewFromSpec creates a new Cgroup instance if the spec includes a cgroup path.
// Returns nil otherwise. Cgroup paths are loaded based on the current process.
func NewFromSpec(spec *specs.Spec) (Cgroup, error) {
// If useSystemd is true, the Cgroup will be created and managed with
// systemd. This requires systemd (>=v244) to be running on the host and the
// cgroup path to be in the form `slice:prefix:name`.
func NewFromSpec(spec *specs.Spec, useSystemd bool) (Cgroup, error) {
if spec.Linux == nil || spec.Linux.CgroupsPath == "" {
return nil, nil
}
return NewFromPath(spec.Linux.CgroupsPath)
return NewFromPath(spec.Linux.CgroupsPath, useSystemd)
}

// NewFromPath creates a new Cgroup instance from the specified relative path.
// Cgroup paths are loaded based on the current process.
func NewFromPath(cgroupsPath string) (Cgroup, error) {
return new("self", cgroupsPath)
// If useSystemd is true, the Cgroup will be created and managed with
// systemd. This requires systemd (>=v244) to be running on the host and the
// cgroup path to be in the form `slice:prefix:name`.
func NewFromPath(cgroupsPath string, useSystemd bool) (Cgroup, error) {
return new("self", cgroupsPath, useSystemd)
}

// NewFromPid loads cgroup for the given process.
func NewFromPid(pid int) (Cgroup, error) {
return new(strconv.Itoa(pid), "")
// If useSystemd is true, the Cgroup will be created and managed with
// systemd. This requires systemd (>=v244) to be running on the host and the
// cgroup path to be in the form `slice:prefix:name`.
func NewFromPid(pid int, useSystemd bool) (Cgroup, error) {
return new(strconv.Itoa(pid), "", useSystemd)
}

func new(pid, cgroupsPath string) (Cgroup, error) {
func new(pid, cgroupsPath string, useSystemd bool) (Cgroup, error) {
var (
parents map[string]string
err error
Expand All @@ -367,13 +376,15 @@ func new(pid, cgroupsPath string) (Cgroup, error) {
}

if IsOnlyV2() {
if p, ok := parents[cgroup2Key]; ok {
// The cgroupsPath is in a special `slice:prefix:name` format for systemd
// that should not be modified.
if p, ok := parents[cgroup2Key]; ok && !useSystemd {
// The cgroup of current pid will have tasks in it and we can't use
// that, instead, use the its parent which should not have tasks in it.
cgroupsPath = filepath.Join(filepath.Dir(p), cgroupsPath)
}
// Assume that for v2, cgroup is always mounted at cgroupRoot.
cg, err = newCgroupV2(cgroupRoot, cgroupsPath)
cg, err = newCgroupV2(cgroupRoot, cgroupsPath, useSystemd)
if err != nil {
return nil, err
}
Expand All @@ -390,7 +401,8 @@ func new(pid, cgroupsPath string) (Cgroup, error) {

// CgroupJSON is a wrapper for Cgroup that can be encoded to JSON.
type CgroupJSON struct {
Cgroup Cgroup `json:"cgroup"`
Cgroup Cgroup `json:"cgroup"`
UseSystemd bool `json:"useSystemd"`
}

type cgroupJSONv1 struct {
Expand All @@ -401,8 +413,23 @@ type cgroupJSONv2 struct {
Cgroup *cgroupV2 `json:"cgroup"`
}

type cgroupJSONSystemd struct {
Cgroup *cgroupSystemd `json:"cgroup"`
}

// UnmarshalJSON implements json.Unmarshaler.UnmarshalJSON
func (c *CgroupJSON) UnmarshalJSON(data []byte) error {
if c.UseSystemd {
systemd := cgroupJSONSystemd{}
if err := json.Unmarshal(data, &systemd); err != nil {
return err
}
if systemd.Cgroup != nil {
c.Cgroup = systemd.Cgroup
}
return nil
}

if IsOnlyV2() {
v2 := cgroupJSONv2{}
err := json.Unmarshal(data, &v2)
Expand All @@ -426,6 +453,10 @@ func (c *CgroupJSON) MarshalJSON() ([]byte, error) {
return json.Marshal(&v1)
}
if IsOnlyV2() {
if c.UseSystemd {
systemd := cgroupJSONSystemd{Cgroup: c.Cgroup.(*cgroupSystemd)}
return json.Marshal(&systemd)
}
v2 := cgroupJSONv2{Cgroup: c.Cgroup.(*cgroupV2)}
return json.Marshal(&v2)
}
Expand Down

0 comments on commit bf86207

Please sign in to comment.