From 6c6cc5ec107f10ccf4d4acbfe89d572a52d58a92 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Thu, 22 Dec 2022 19:27:20 +0000 Subject: [PATCH] add network plugin metrics Add network plugin metrics. The metrics are the same that were used in dockershim/kubelet until it was deprecated in kubernetes 1.23 https://github.com/kubernetes/kubernetes/blob/release-1.23/pkg/kubelet/dockershim/network/metrics/metrics.go Signed-off-by: Antonio Ojea --- pkg/cri/server/metrics.go | 16 ++++++++++++++++ pkg/cri/server/sandbox_run.go | 4 ++++ pkg/cri/server/sandbox_stop.go | 10 +++++++++- pkg/cri/server/update_runtime_config.go | 12 ++++++++++-- 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pkg/cri/server/metrics.go b/pkg/cri/server/metrics.go index 9712bb1128d1..0e285fe20ddb 100644 --- a/pkg/cri/server/metrics.go +++ b/pkg/cri/server/metrics.go @@ -34,6 +34,10 @@ var ( containerCreateTimer metrics.LabeledTimer containerStopTimer metrics.LabeledTimer containerStartTimer metrics.LabeledTimer + + networkPluginOperations metrics.LabeledCounter + networkPluginOperationsErrors metrics.LabeledCounter + networkPluginOperationsLatency metrics.LabeledTimer ) func init() { @@ -54,5 +58,17 @@ func init() { containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") + networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type") + networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type") + networkPluginOperationsLatency = ns.NewLabeledTimer("network_plugin_operations_duration_seconds", "latency in seconds of network plugin operations. Broken down by operation type", "operation_type") + metrics.Register(ns) } + +// for backwards compatibility with kubelet/dockershim metrics +// https://github.com/containerd/containerd/issues/7801 +const ( + networkStatusOp = "get_pod_network_status" + networkSetUpOp = "set_up_pod" + networkTearDownOp = "tear_down_pod" +) diff --git a/pkg/cri/server/sandbox_run.go b/pkg/cri/server/sandbox_run.go index 082344022894..5f19ffb62f13 100644 --- a/pkg/cri/server/sandbox_run.go +++ b/pkg/cri/server/sandbox_run.go @@ -444,8 +444,12 @@ func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore. return fmt.Errorf("get cni namespace options: %w", err) } log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup") + netStart := time.Now() result, err := netPlugin.Setup(ctx, id, path, opts...) + networkPluginOperations.WithValues(networkSetUpOp).Inc() + networkPluginOperationsLatency.WithValues(networkSetUpOp).UpdateSince(netStart) if err != nil { + networkPluginOperationsErrors.WithValues(networkSetUpOp).Inc() return err } logDebugCNIResult(ctx, id, result) diff --git a/pkg/cri/server/sandbox_stop.go b/pkg/cri/server/sandbox_stop.go index a3231631070d..f2d4e6b84160 100644 --- a/pkg/cri/server/sandbox_stop.go +++ b/pkg/cri/server/sandbox_stop.go @@ -185,7 +185,15 @@ func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstor return fmt.Errorf("get cni namespace options: %w", err) } - return netPlugin.Remove(ctx, id, path, opts...) + netStart := time.Now() + err = netPlugin.Remove(ctx, id, path, opts...) + networkPluginOperations.WithValues(networkTearDownOp).Inc() + networkPluginOperationsLatency.WithValues(networkTearDownOp).UpdateSince(netStart) + if err != nil { + networkPluginOperationsErrors.WithValues(networkTearDownOp).Inc() + return err + } + return nil } // cleanupUnknownSandbox cleanup stopped sandbox in unknown state. diff --git a/pkg/cri/server/update_runtime_config.go b/pkg/cri/server/update_runtime_config.go index 3cf0b5529b67..ad991c674e87 100644 --- a/pkg/cri/server/update_runtime_config.go +++ b/pkg/cri/server/update_runtime_config.go @@ -23,6 +23,7 @@ import ( "path/filepath" "strings" "text/template" + "time" "github.com/containerd/containerd/log" "golang.org/x/net/context" @@ -74,10 +75,17 @@ func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateR log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) return &runtime.UpdateRuntimeConfigResponse{}, nil } - if err := netPlugin.Status(); err == nil { + + netStart := time.Now() + err = netPlugin.Status() + networkPluginOperations.WithValues(networkStatusOp).Inc() + networkPluginOperationsLatency.WithValues(networkStatusOp).UpdateSince(netStart) + if err == nil { log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) return &runtime.UpdateRuntimeConfigResponse{}, nil - } else if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { + } + networkPluginOperationsErrors.WithValues(networkStatusOp).Inc() + if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) return &runtime.UpdateRuntimeConfigResponse{}, nil }