From 0ce995ab61c4c9c1d0ff4a26fe84fa9754fb596c Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Fri, 2 Dec 2022 16:21:42 +0200 Subject: [PATCH] balloons: add sharedidlecpus and balloon topology to metrics --- .../policy/builtin/balloons/metrics.go | 47 +++++++++++++++++++ .../test02-prometheus-metrics/code.var.sh | 2 +- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/pkg/cri/resource-manager/policy/builtin/balloons/metrics.go b/pkg/cri/resource-manager/policy/builtin/balloons/metrics.go index 96501cc6d..93ed3f086 100644 --- a/pkg/cri/resource-manager/policy/builtin/balloons/metrics.go +++ b/pkg/cri/resource-manager/policy/builtin/balloons/metrics.go @@ -40,6 +40,17 @@ var descriptors = []*prometheus.Desc{ "cpus_max", "balloon", "cpus", + "cpus_count", + "numas", + "numas_count", + "dies", + "dies_count", + "packages", + "packages_count", + "sharedidlecpus", + "sharedidlecpus_count", + "cpus_allowed", + "cpus_allowed_count", "mems", "containers", "tot_req_millicpu", @@ -62,6 +73,17 @@ type BalloonMetrics struct { // Balloon instance metrics PrettyName string Cpus cpuset.CPUSet + CpusCount int + Numas []string + NumasCount int + Dies []string + DiesCount int + Packages []string + PackagesCount int + SharedIdleCpus cpuset.CPUSet + SharedIdleCpusCount int + CpusAllowed cpuset.CPUSet + CpusAllowedCount int Mems string ContainerNames string ContainerReqMilliCpus int @@ -78,6 +100,7 @@ func (p *balloons) PollMetrics() policy.Metrics { policyMetrics := &Metrics{} policyMetrics.Balloons = make([]*BalloonMetrics, len(p.balloons)) for index, bln := range p.balloons { + cpuLoc := p.cpuTree.CpuLocations(bln.Cpus) bm := &BalloonMetrics{} policyMetrics.Balloons[index] = bm bm.DefName = bln.Def.Name @@ -86,6 +109,19 @@ func (p *balloons) PollMetrics() policy.Metrics { bm.MaxCpus = bln.Def.MaxCpus bm.PrettyName = bln.PrettyName() bm.Cpus = bln.Cpus + bm.CpusCount = bm.Cpus.Size() + if len(cpuLoc) > 3 { + bm.Numas = cpuLoc[3] + bm.NumasCount = len(bm.Numas) + bm.Dies = cpuLoc[2] + bm.DiesCount = len(bm.Dies) + bm.Packages = cpuLoc[1] + bm.PackagesCount = len(bm.Packages) + } + bm.SharedIdleCpus = bln.SharedIdleCpus + bm.SharedIdleCpusCount = bm.SharedIdleCpus.Size() + bm.CpusAllowed = bm.Cpus.Union(bm.SharedIdleCpus) + bm.CpusAllowedCount = bm.CpusAllowed.Size() bm.Mems = bln.Mems.String() cNames := []string{} // Get container names and total requested milliCPUs. @@ -123,6 +159,17 @@ func (p *balloons) CollectMetrics(m policy.Metrics) ([]prometheus.Metric, error) strconv.Itoa(bm.MaxCpus), bm.PrettyName, bm.Cpus.String(), + strconv.Itoa(bm.CpusCount), + strings.Join(bm.Numas, ","), + strconv.Itoa(bm.NumasCount), + strings.Join(bm.Dies, ","), + strconv.Itoa(bm.DiesCount), + strings.Join(bm.Packages, ","), + strconv.Itoa(bm.PackagesCount), + bm.SharedIdleCpus.String(), + strconv.Itoa(bm.SharedIdleCpusCount), + bm.CpusAllowed.String(), + strconv.Itoa(bm.CpusAllowedCount), bm.Mems, bm.ContainerNames, strconv.Itoa(bm.ContainerReqMilliCpus)) diff --git a/test/e2e/policies.test-suite/balloons/n4c16/test02-prometheus-metrics/code.var.sh b/test/e2e/policies.test-suite/balloons/n4c16/test02-prometheus-metrics/code.var.sh index 49efc567f..1a0dee383 100644 --- a/test/e2e/policies.test-suite/balloons/n4c16/test02-prometheus-metrics/code.var.sh +++ b/test/e2e/policies.test-suite/balloons/n4c16/test02-prometheus-metrics/code.var.sh @@ -24,7 +24,7 @@ POD_ANNOTATION="balloon.balloons.cri-resource-manager.intel.com: full-core" CONT report allowed verify-metrics-has-line 'balloon="default\[0\]"' verify-metrics-has-line 'balloon="reserved\[0\]"' -verify-metrics-has-line 'balloons{balloon="full-core\[0\]",balloon_type="full-core",containers="pod0:pod0c0,pod0:pod0c1",cpu_class="normal",cpus="2-3",cpus_max="2",cpus_min="2",mems="0",tot_req_millicpu="(199|200)"} 2' +verify-metrics-has-line 'balloons{balloon="full-core\[0\]",balloon_type="full-core",containers="pod0:pod0c0,pod0:pod0c1",cpu_class="normal",cpus=".*",cpus_allowed=".*",cpus_allowed_count="2",cpus_count="2",cpus_max="2",cpus_min="2",dies="p[01]d0",dies_count="1",mems="0",numas="p[01]d0n[0-3]",numas_count="1",packages="p[01]",packages_count="1",sharedidlecpus="",sharedidlecpus_count="0",tot_req_millicpu="(199|200)"} 2' # pod1 in fast-dualcore[0] CPUREQ="200m" MEMREQ="" CPULIM="200m" MEMLIM=""