Skip to content

Commit

Permalink
support cgroup2
Browse files Browse the repository at this point in the history
* only shim v2 runc v2 ("io.containerd.runc.v2") is supported
* only PID metrics is implemented. Others should be implemented in separate PRs.
* lots of code duplication in v1 metrics and v2 metrics. Dedupe should be separate PR.

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
  • Loading branch information
AkihiroSuda committed Dec 11, 2019
1 parent f01665a commit 8f870c2
Show file tree
Hide file tree
Showing 69 changed files with 10,619 additions and 160 deletions.
1 change: 1 addition & 0 deletions cmd/containerd/builtins_linux.go
Expand Up @@ -19,6 +19,7 @@ package main
import (
_ "github.com/containerd/aufs"
_ "github.com/containerd/containerd/metrics/cgroups"
_ "github.com/containerd/containerd/metrics/cgroups/v2"
_ "github.com/containerd/containerd/runtime/v1/linux"
_ "github.com/containerd/containerd/runtime/v2"
_ "github.com/containerd/containerd/runtime/v2/runc/options"
Expand Down
16 changes: 15 additions & 1 deletion cmd/ctr/commands/tasks/metrics.go
Expand Up @@ -27,6 +27,7 @@ import (

wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats"
v1 "github.com/containerd/cgroups/stats/v1"
v2 "github.com/containerd/cgroups/v2/stats"
"github.com/containerd/containerd/cmd/ctr/commands"
"github.com/containerd/typeurl"
"github.com/urfave/cli"
Expand Down Expand Up @@ -80,11 +81,14 @@ var metricsCommand = cli.Command{
}
var (
data *v1.Metrics
data2 *v2.Metrics
windowsStats *wstats.Statistics
)
switch v := anydata.(type) {
case *v1.Metrics:
data = v
case *v2.Metrics:
data2 = v
case *wstats.Statistics:
windowsStats = v
default:
Expand All @@ -98,6 +102,8 @@ var metricsCommand = cli.Command{
fmt.Fprintf(w, "%s\t%s\t\n\n", metric.ID, metric.Timestamp)
if data != nil {
printCgroupMetricsTable(w, data)
} else if data2 != nil {
printCgroup2MetricsTable(w, data2)
} else {
if windowsStats.GetLinux() != nil {
printCgroupMetricsTable(w, windowsStats.GetLinux())
Expand All @@ -111,7 +117,7 @@ var metricsCommand = cli.Command{
}
return w.Flush()
case formatJSON:
marshaledJSON, err := json.MarshalIndent(data, "", " ")
marshaledJSON, err := json.MarshalIndent(anydata, "", " ")
if err != nil {
return err
}
Expand Down Expand Up @@ -140,6 +146,14 @@ func printCgroupMetricsTable(w *tabwriter.Writer, data *v1.Metrics) {
}
}

func printCgroup2MetricsTable(w *tabwriter.Writer, data *v2.Metrics) {
fmt.Fprintf(w, "METRIC\tVALUE\t\n")
if data.Pids != nil {
fmt.Fprintf(w, "pids.current\t%v\t\n", data.Pids.Current)
fmt.Fprintf(w, "pids.limit\t%v\t\n", data.Pids.Limit)
}
}

func printWindowsContainerStatistics(w *tabwriter.Writer, stats *wstats.WindowsContainerStatistics) {
fmt.Fprintf(w, "METRIC\tVALUE\t\n")
fmt.Fprintf(w, "timestamp\t%s\t\n", stats.Timestamp)
Expand Down
95 changes: 95 additions & 0 deletions metrics/cgroups/v2/cgroups.go
@@ -0,0 +1,95 @@
// +build linux

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v2

import (
"context"

"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/platforms"
"github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/runtime"
"github.com/containerd/containerd/runtime/v1/linux"
metrics "github.com/docker/go-metrics"
)

// Config for the cgroups monitor
type Config struct {
NoPrometheus bool `toml:"no_prometheus"`
}

func init() {
plugin.Register(&plugin.Registration{
Type: plugin.TaskMonitorPlugin,
ID: "cgroups-v2",
InitFn: New,
Config: &Config{},
})
}

// New returns a new cgroups monitor
func New(ic *plugin.InitContext) (interface{}, error) {
var ns *metrics.Namespace
config := ic.Config.(*Config)
if !config.NoPrometheus {
ns = metrics.NewNamespace("container", "", nil)
}
collector := newCollector(ns)
if ns != nil {
metrics.Register(ns)
}
ic.Meta.Platforms = append(ic.Meta.Platforms, platforms.DefaultSpec())
return &cgroupsMonitor{
collector: collector,
context: ic.Context,
publisher: ic.Events,
}, nil
}

type cgroupsMonitor struct {
collector *collector
context context.Context
publisher events.Publisher
}

func (m *cgroupsMonitor) Monitor(c runtime.Task) error {
if err := m.collector.Add(c); err != nil {
return err
}
t, ok := c.(*linux.Task)
if !ok {
return nil
}
cg, err := t.Cgroup()
if err != nil {
if errdefs.IsNotFound(err) {
return nil
}
return err
}
// OOM handler is not implemented yet
_ = cg
return nil
}

func (m *cgroupsMonitor) Stop(c runtime.Task) error {
m.collector.Remove(c)
return nil
}
61 changes: 61 additions & 0 deletions metrics/cgroups/v2/metric.go
@@ -0,0 +1,61 @@
// +build linux

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v2

import (
v2 "github.com/containerd/containerd/metrics/types/v2"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)

type value struct {
v float64
l []string
}

type metric struct {
name string
help string
unit metrics.Unit
vt prometheus.ValueType
labels []string
// getValues returns the value and labels for the data
getValues func(stats *v2.Metrics) []value
}

func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
// the namespace label is for containerd namespaces
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"container_id", "namespace"}, m.labels...)...)
}

func (m *metric) collect(id, namespace string, stats *v2.Metrics, ns *metrics.Namespace, ch chan<- prometheus.Metric, block bool) {
values := m.getValues(stats)
for _, v := range values {
// block signals to block on the sending the metrics so none are missed
if block {
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...)
continue
}
// non-blocking metrics can be dropped if the chan is full
select {
case ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...):
default:
}
}
}
141 changes: 141 additions & 0 deletions metrics/cgroups/v2/metrics.go
@@ -0,0 +1,141 @@
// +build linux

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v2

import (
"context"
"fmt"
"sync"

"github.com/containerd/containerd/log"
v2 "github.com/containerd/containerd/metrics/types/v2"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/runtime"
"github.com/containerd/typeurl"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)

// newCollector registers the collector with the provided namespace and returns it so
// that cgroups can be added for collection
func newCollector(ns *metrics.Namespace) *collector {
if ns == nil {
return &collector{}
}
c := &collector{
ns: ns,
tasks: make(map[string]runtime.Task),
}
c.metrics = append(c.metrics, pidMetrics...)
c.storedMetrics = make(chan prometheus.Metric, 100*len(c.metrics))
ns.Add(c)
return c
}

func taskID(id, namespace string) string {
return fmt.Sprintf("%s-%s", id, namespace)
}

// collector provides the ability to collect container stats and export
// them in the prometheus format
type collector struct {
mu sync.RWMutex

tasks map[string]runtime.Task
ns *metrics.Namespace
metrics []*metric
storedMetrics chan prometheus.Metric
}

func (c *collector) Describe(ch chan<- *prometheus.Desc) {
for _, m := range c.metrics {
ch <- m.desc(c.ns)
}
}

func (c *collector) Collect(ch chan<- prometheus.Metric) {
c.mu.RLock()
wg := &sync.WaitGroup{}
for _, t := range c.tasks {
wg.Add(1)
go c.collect(t, ch, true, wg)
}
storedLoop:
for {
// read stored metrics until the channel is flushed
select {
case m := <-c.storedMetrics:
ch <- m
default:
break storedLoop
}
}
c.mu.RUnlock()
wg.Wait()
}

func (c *collector) collect(t runtime.Task, ch chan<- prometheus.Metric, block bool, wg *sync.WaitGroup) {
if wg != nil {
defer wg.Done()
}
ctx := namespaces.WithNamespace(context.Background(), t.Namespace())
stats, err := t.Stats(ctx)
if err != nil {
log.L.WithError(err).Errorf("stat task %s", t.ID())
return
}
data, err := typeurl.UnmarshalAny(stats)
if err != nil {
log.L.WithError(err).Errorf("unmarshal stats for %s", t.ID())
return
}
s, ok := data.(*v2.Metrics)
if !ok {
log.L.WithError(err).Errorf("invalid metric type for %s", t.ID())
return
}
for _, m := range c.metrics {
m.collect(t.ID(), t.Namespace(), s, c.ns, ch, block)
}
}

// Add adds the provided cgroup and id so that metrics are collected and exported
func (c *collector) Add(t runtime.Task) error {
if c.ns == nil {
return nil
}
c.mu.Lock()
defer c.mu.Unlock()
id := taskID(t.ID(), t.Namespace())
if _, ok := c.tasks[id]; ok {
return nil // requests to collect metrics should be idempotent
}
c.tasks[id] = t
return nil
}

// Remove removes the provided cgroup by id from the collector
func (c *collector) Remove(t runtime.Task) {
if c.ns == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
delete(c.tasks, taskID(t.ID(), t.Namespace()))
}

0 comments on commit 8f870c2

Please sign in to comment.