Skip to content

Commit

Permalink
[cri] add sandbox and container latency metrics
Browse files Browse the repository at this point in the history
These are simple metrics that allow users to view more fine grained metrics on
internal operations.

Signed-off-by: Michael Crosby <michael@thepasture.io>
  • Loading branch information
crosbymichael committed Nov 9, 2021
1 parent 432ddec commit 91bbaf6
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 0 deletions.
3 changes: 3 additions & 0 deletions pkg/cri/server/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
}

start := time.Now()
// Run container using the same runtime with sandbox.
sandboxInfo, err := sandbox.Container.Info(ctx)
if err != nil {
Expand Down Expand Up @@ -278,6 +279,8 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
return nil, errors.Wrapf(err, "failed to add container %q into store", id)
}

containerCreateTimer.WithValues(ociRuntime.Type).UpdateSince(start)

return &runtime.CreateContainerResponse{ContainerId: id}, nil
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/cri/server/container_list.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package server

import (
"time"

"golang.org/x/net/context"

runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
Expand All @@ -26,6 +28,7 @@ import (

// ListContainers lists all containers matching the filter.
func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) {
start := time.Now()
// List all containers from store.
containersInStore := c.containerStore.List()

Expand All @@ -35,6 +38,8 @@ func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContaine
}

containers = c.filterCRIContainers(containers, r.GetFilter())

containerListTimer.UpdateSince(start)
return &runtime.ListContainersResponse{Containers: containers}, nil
}

Expand Down
9 changes: 9 additions & 0 deletions pkg/cri/server/container_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package server

import (
"time"

"github.com/containerd/containerd"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/log"
Expand All @@ -30,6 +32,7 @@ import (

// RemoveContainer removes the container.
func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) {
start := time.Now()
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
if !errdefs.IsNotFound(err) {
Expand All @@ -40,6 +43,10 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
return &runtime.RemoveContainerResponse{}, nil
}
id := container.ID
i, err := container.Container.Info(ctx)
if err != nil {
return nil, errors.Wrap(err, "get container info")
}

// Forcibly stop the containers if they are in running or unknown state
state := container.Status.Get().State()
Expand Down Expand Up @@ -99,6 +106,8 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta

c.containerNameIndex.ReleaseByKey(id)

containerRemoveTimer.WithValues(i.Runtime.Name).UpdateSince(start)

return &runtime.RemoveContainerResponse{}, nil
}

Expand Down
8 changes: 8 additions & 0 deletions pkg/cri/server/container_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@ import (

// StartContainer starts the container.
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
start := time.Now()
cntr, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
}

info, err := cntr.Container.Info(ctx)
if err != nil {
return nil, errors.Wrap(err, "get container info")
}

id := cntr.ID
meta := cntr.Metadata
container := cntr.Container
Expand Down Expand Up @@ -162,6 +168,8 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
// It handles the TaskExit event and update container state after this.
c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh)

containerStartTimer.WithValues(info.Runtime.Name).UpdateSince(start)

return &runtime.StartContainerResponse{}, nil
}

Expand Down
8 changes: 8 additions & 0 deletions pkg/cri/server/container_stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (

// StopContainer stops a running container with a grace period (i.e., timeout).
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
start := time.Now()
// Get container config from container store.
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
Expand All @@ -45,6 +46,13 @@ func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainer
return nil, err
}

i, err := container.Container.Info(ctx)
if err != nil {
return nil, errors.Wrap(err, "get container info")
}

containerStopTimer.WithValues(i.Runtime.Name).UpdateSince(start)

return &runtime.StopContainerResponse{}, nil
}

Expand Down
58 changes: 58 additions & 0 deletions pkg/cri/server/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package server

import (
metrics "github.com/docker/go-metrics"
)

var (
sandboxListTimer metrics.Timer
sandboxCreateNetworkTimer metrics.Timer
sandboxDeleteNetwork metrics.Timer

sandboxRuntimeCreateTimer metrics.LabeledTimer
sandboxRuntimeStopTimer metrics.LabeledTimer
sandboxRemoveTimer metrics.LabeledTimer

containerListTimer metrics.Timer
containerRemoveTimer metrics.LabeledTimer
containerCreateTimer metrics.LabeledTimer
containerStopTimer metrics.LabeledTimer
containerStartTimer metrics.LabeledTimer
)

func init() {
// these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle.
ns := metrics.NewNamespace("containerd", "cri", nil)

sandboxListTimer = ns.NewTimer("sandbox_list", "time to list sandboxes")
sandboxCreateNetworkTimer = ns.NewTimer("sandbox_create_network", "time to create the network for a sandbox")
sandboxDeleteNetwork = ns.NewTimer("sandbox_delete_network", "time to delete a sandbox's network")

sandboxRuntimeCreateTimer = ns.NewLabeledTimer("sandbox_runtime_create", "time to create a sandbox in the runtime", "runtime")
sandboxRuntimeStopTimer = ns.NewLabeledTimer("sandbox_runtime_stop", "time to stop a sandbox", "runtime")
sandboxRemoveTimer = ns.NewLabeledTimer("sandbox_remove", "time to remove a sandbox", "runtime")

containerListTimer = ns.NewTimer("container_list", "time to list containers")
containerRemoveTimer = ns.NewLabeledTimer("container_remove", "time to remove a container", "runtime")
containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime")
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")

metrics.Register(ns)
}
5 changes: 5 additions & 0 deletions pkg/cri/server/sandbox_list.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package server

import (
"time"

"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"

Expand All @@ -25,6 +27,7 @@ import (

// ListPodSandbox returns a list of Sandbox.
func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) {
start := time.Now()
// List all sandboxes from store.
sandboxesInStore := c.sandboxStore.List()
var sandboxes []*runtime.PodSandbox
Expand All @@ -36,6 +39,8 @@ func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandb
}

sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter())

sandboxListTimer.UpdateSince(start)
return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/cri/server/sandbox_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package server

import (
"time"

"github.com/containerd/containerd"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/log"
Expand All @@ -30,6 +32,7 @@ import (
// RemovePodSandbox removes the sandbox. If there are running containers in the
// sandbox, they should be forcibly removed.
func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) {
start := time.Now()
sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
if err != nil {
if !errdefs.IsNotFound(err) {
Expand Down Expand Up @@ -108,5 +111,7 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
// Release the sandbox name reserved for the sandbox.
c.sandboxNameIndex.ReleaseByKey(id)

sandboxRemoveTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(start)

return &runtime.RemovePodSandboxResponse{}, nil
}
6 changes: 6 additions & 0 deletions pkg/cri/server/sandbox_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"path/filepath"
goruntime "runtime"
"strings"
"time"

"github.com/containerd/containerd"
containerdio "github.com/containerd/containerd/cio"
Expand Down Expand Up @@ -123,6 +124,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
}

if podNetwork {
netStart := time.Now()
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
Expand Down Expand Up @@ -163,8 +165,10 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id)
}
sandboxCreateNetworkTimer.UpdateSince(netStart)
}

runtimeStart := time.Now()
// Create sandbox container.
// NOTE: sandboxContainerSpec SHOULD NOT have side
// effect, e.g. accessing/creating files, so that we can test
Expand Down Expand Up @@ -345,6 +349,8 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
// but we don't care about sandbox TaskOOM right now, so it is fine.
c.eventMonitor.startSandboxExitMonitor(context.Background(), id, task.Pid(), exitCh)

sandboxRuntimeCreateTimer.WithValues(ociRuntime.Type).UpdateSince(runtimeStart)

return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/cri/server/sandbox_stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
// Stop all containers inside the sandbox. This terminates the container forcibly,
// and container may still be created, so production should not rely on this behavior.
// TODO(random-liu): Introduce a state in sandbox to avoid future container creation.
stop := time.Now()
containers := c.containerStore.List()
for _, container := range containers {
if container.SandboxID != id {
Expand All @@ -77,9 +78,11 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
return errors.Wrapf(err, "failed to stop sandbox container %q in %q state", id, state)
}
}
sandboxRuntimeStopTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(stop)

// Teardown network for sandbox.
if sandbox.NetNS != nil {
netStop := time.Now()
// Use empty netns path if netns is not available. This is defined in:
// https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md
if closed, err := sandbox.NetNS.Closed(); err != nil {
Expand All @@ -93,6 +96,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
if err := sandbox.NetNS.Remove(); err != nil {
return errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id)
}
sandboxDeleteNetwork.UpdateSince(netStop)
}

log.G(ctx).Infof("TearDown network for sandbox %q successfully", id)
Expand Down

0 comments on commit 91bbaf6

Please sign in to comment.