From 9611ef697e34aceebe7a3505a8d3761dc71311fa Mon Sep 17 00:00:00 2001 From: Paul Thiele Date: Thu, 4 Dec 2025 12:34:40 +0100 Subject: [PATCH 1/2] Fix docker health state --- container/docker/handler.go | 24 ++++++++++++++------- integration/tests/api/docker_test.go | 31 ++++++++++++++++++++++++++++ metrics/prometheus.go | 29 ++++++++++++++------------ metrics/prometheus_test.go | 20 ++++++++++++++++++ 4 files changed, 83 insertions(+), 21 deletions(-) diff --git a/container/docker/handler.go b/container/docker/handler.go index 1d6d4c0b7c..b9936de3c1 100644 --- a/container/docker/handler.go +++ b/container/docker/handler.go @@ -68,9 +68,8 @@ type dockerContainerHandler struct { creationTime time.Time // Metadata associated with the container. - envs map[string]string - labels map[string]string - healthStatus string + envs map[string]string + labels map[string]string // Image name used for this container. image string @@ -93,6 +92,9 @@ type dockerContainerHandler struct { reference info.ContainerReference libcontainerHandler *containerlibcontainer.Handler + + // the docker client is needed to inspect the container and get the health status + client docker.APIClient } var _ container.ContainerHandler = &dockerContainerHandler{} @@ -201,10 +203,7 @@ func newDockerContainerHandler( labels: ctnr.Config.Labels, includedMetrics: metrics, zfsParent: zfsParent, - } - // Health status may be nil if no health check is configured - if ctnr.State.Health != nil { - handler.healthStatus = ctnr.State.Health.Status + client: client, } // Timestamp returned by Docker is in time.RFC3339Nano format. handler.creationTime, err = time.Parse(time.RFC3339Nano, ctnr.Created) @@ -331,7 +330,16 @@ func (h *dockerContainerHandler) GetStats() (*info.ContainerStats, error) { if err != nil { return stats, err } - stats.Health.Status = h.healthStatus + + // We assume that if Inspect fails then the container is not known to docker. + ctnr, err := h.client.ContainerInspect(context.Background(), h.reference.Id) + if err != nil { + return nil, fmt.Errorf("failed to inspect container %q: %v", h.reference.Id, err) + } + + if ctnr.State.Health != nil { + stats.Health.Status = ctnr.State.Health.Status + } // Get filesystem stats. err = FsStats(stats, h.machineInfoFactory, h.includedMetrics, h.storageDriver, diff --git a/integration/tests/api/docker_test.go b/integration/tests/api/docker_test.go index a331e31e58..e9a606fc26 100644 --- a/integration/tests/api/docker_test.go +++ b/integration/tests/api/docker_test.go @@ -398,3 +398,34 @@ func TestDockerFilesystemStats(t *testing.T) { t.Fail() } } + +func TestDockerHealthState(t *testing.T) { + fm := framework.New(t) + defer fm.Cleanup() + + containerID := fm.Docker().Run(framework.DockerRunArgs{ + Image: "registry.k8s.io/busybox:1.27", + Args: []string{ + "--health-cmd", "exit 0", + "--health-interval", "1s", + }, + }, "sh", "-c", "sleep 10") + + // Wait for the container to show up. + waitForContainer(containerID, fm) + + getHealth := func() string { + containerInfo, err := fm.Cadvisor().Client().DockerContainer(containerID, &info.ContainerInfoRequest{NumStats: 1}) + require.NoError(t, err) + require.Len(t, containerInfo.Stats, 1) + return containerInfo.Stats[0].Health.Status + } + + // Initially the container is in starting state. + require.Equal(t, "starting", getHealth()) + + // Eventually the container should be in healthy state. + require.Eventually(t, func() bool { + return getHealth() == "healthy" + }, 10*time.Second, 100*time.Millisecond) +} diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 7346469125..cc85b873e4 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -139,19 +139,7 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri name: "container_health_state", help: "The result of the container's health check", valueType: prometheus.GaugeValue, - getValues: func(s *info.ContainerStats) metricValues { - return metricValues{{ - // inline if to check if s.health.status = healthy - value: func(s *info.ContainerStats) float64 { - if s.Health.Status == "healthy" { - return 1 - } else { - return 0 - } - }(s), - timestamp: s.Timestamp, - }} - }, + getValues: getContainerHealthState, }, }, includedMetrics: includedMetrics, @@ -2109,3 +2097,18 @@ func getMinCoreScalingRatio(s *info.ContainerStats) metricValues { } return values } + +func getContainerHealthState(s *info.ContainerStats) metricValues { + value := float64(0) + switch s.Health.Status { + case "healthy": + value = 1 + case "": // if container has no health check defined + value = -1 + default: // starting or unhealthy + } + return metricValues{{ + value: value, + timestamp: s.Timestamp, + }} +} diff --git a/metrics/prometheus_test.go b/metrics/prometheus_test.go index 11c44054e6..df05cd298a 100644 --- a/metrics/prometheus_test.go +++ b/metrics/prometheus_test.go @@ -336,3 +336,23 @@ func TestGetMinCoreScalingRatio(t *testing.T) { assert.Contains(t, values, 0.5) assert.Contains(t, values, 0.3) } + +func TestGetContainerHealthState(t *testing.T) { + testCases := []struct { + name string + containerStats *info.ContainerStats + expectedValue float64 + }{ + {name: "healthy", expectedValue: 1.0, containerStats: &info.ContainerStats{Health: info.Health{Status: "healthy"}}}, + {name: "unhealthy", expectedValue: 0.0, containerStats: &info.ContainerStats{Health: info.Health{Status: "unhealthy"}}}, + {name: "starting", expectedValue: 0.0, containerStats: &info.ContainerStats{Health: info.Health{Status: "unknown"}}}, + {name: "empty", expectedValue: -1.0, containerStats: &info.ContainerStats{}}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + metricVals := getContainerHealthState(tc.containerStats) + assert.Equal(t, 1, len(metricVals)) + assert.Equal(t, tc.expectedValue, metricVals[0].value) + }) + } +} From dbbcf2c32dc619bb116595d671b061b9ae3af954 Mon Sep 17 00:00:00 2001 From: Paul Thiele Date: Fri, 5 Dec 2025 17:06:48 +0100 Subject: [PATCH 2/2] use v20250702-52f5173c3a instead of latest for gcr.io/k8s-staging-test-infra/bootstrap --- build/integration-in-docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/integration-in-docker.sh b/build/integration-in-docker.sh index 62b8f86181..479e59df07 100755 --- a/build/integration-in-docker.sh +++ b/build/integration-in-docker.sh @@ -58,7 +58,7 @@ function run_tests() { --privileged \ --cap-add="sys_admin" \ --entrypoint="" \ - gcr.io/k8s-staging-test-infra/bootstrap \ + gcr.io/k8s-staging-test-infra/bootstrap:v20250702-52f5173c3a \ bash -c "export DEBIAN_FRONTEND=noninteractive && \ apt update && \ apt install -y $PACKAGES && \