Skip to content

Commit

Permalink
Add a check to skip stats for containers that are not running
Browse files Browse the repository at this point in the history
When a container is just created, exited state the container will not have stats. A common case for this in k8s is the init containers for a pod. The will be present in the listed containers but will not have a running task and there for no stats.

Signed-off-by: James Sturtevant <jstur@microsoft.com>
  • Loading branch information
jsturtevant committed Jun 7, 2023
1 parent 147aea0 commit 58b6b99
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 47 deletions.
5 changes: 5 additions & 0 deletions pkg/cri/sbserver/sandbox_stats_windows.go
Expand Up @@ -129,6 +129,11 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
for _, cntr := range containers {
containerMetric := statsMap[cntr.ID]

if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
// containers that are just created, in a failed state or exited (init containers) will not have stats
continue
}

if containerMetric == nil {
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
}
Expand Down
156 changes: 133 additions & 23 deletions pkg/cri/sbserver/sandbox_stats_windows_test.go
Expand Up @@ -112,7 +112,99 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
{Metadata: containerstore.Metadata{ID: "c1"}},
newContainer("c1", running, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
UsageNanoCores: 0,
WorkingSetBytes: 40,
},
expectedContainerStats: []expectedStats{
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
},
expectError: false,
},
"pod stats will include the init container stats": {
metrics: map[string]*wstats.Statistics{
"c1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"s1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"i1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
newContainer("c1", running, nil),
newContainer("i1", running, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 600,
UsageNanoCores: 0,
WorkingSetBytes: 60,
},
expectedContainerStats: []expectedStats{
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
},
expectError: false,
},
"pod stats will not include the init container stats if it is stopped": {
metrics: map[string]*wstats.Statistics{
"c1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"s1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
newContainer("c1", running, nil),
newContainer("i1", exitedValid, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
UsageNanoCores: 0,
WorkingSetBytes: 40,
},
expectedContainerStats: []expectedStats{
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
},
expectError: false,
},
"pod stats will not include the init container stats if it is stopped in failed state": {
metrics: map[string]*wstats.Statistics{
"c1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"s1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
newContainer("c1", running, nil),
newContainer("i1", exitedInvalid, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
Expand All @@ -139,13 +231,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxPod("s1", initialStatsTimestamp, 400),
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{ID: "c1"},
Stats: &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
},
},
newContainer("c1", running, &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
}),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 800,
Expand All @@ -170,13 +259,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{ID: "c1"},
Stats: &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
},
},
newContainer("c1", running, &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
}),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
Expand All @@ -201,13 +287,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{ID: "c1"},
Stats: &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
},
},
newContainer("c1", running, &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
}),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
Expand All @@ -230,7 +313,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
assert.NotNil(t, err)
return
}

assert.Nil(t, err)
assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value)
assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value)

Expand Down Expand Up @@ -266,6 +349,33 @@ func windowsStat(timestamp time.Time, cpu uint64, memory uint64) *wstats.Statist
}
}

func newContainer(id string, status containerstore.Status, stats *stats.ContainerStats) containerstore.Container {
cntr, err := containerstore.NewContainer(containerstore.Metadata{ID: id}, containerstore.WithFakeStatus(status))
if err != nil {
panic(err)
}
if stats != nil {
cntr.Stats = stats
}
return cntr
}

var exitedValid = containerstore.Status{
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
ExitCode: 0,
}

var exitedInvalid = containerstore.Status{
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
ExitCode: 1,
}

var running = containerstore.Status{
StartedAt: time.Now().UnixNano(),
}

func Test_criService_saveSandBoxMetrics(t *testing.T) {

timestamp := time.Now()
Expand Down
6 changes: 5 additions & 1 deletion pkg/cri/server/sandbox_stats_windows.go
Expand Up @@ -71,7 +71,6 @@ func (c *criService) podSandboxStats(
podSandboxStats.Windows.Cpu = podCPU.Cpu
podSandboxStats.Windows.Memory = podCPU.Memory
podSandboxStats.Windows.Containers = containerStats

podSandboxStats.Windows.Network = windowsNetworkUsage(ctx, sandbox, timestamp)

pidCount, err := c.getSandboxPidCount(ctx, sandbox)
Expand Down Expand Up @@ -128,6 +127,11 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
for _, cntr := range containers {
containerMetric := statsMap[cntr.ID]

if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
// containers that are just created, in a failed state or exited (init containers) will not have stats
continue
}

if containerMetric == nil {
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
}
Expand Down

0 comments on commit 58b6b99

Please sign in to comment.