From a73e1df557592f52ae84a0d7c97480b37985b0d3 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Fri, 4 Jul 2025 10:49:00 +0100 Subject: [PATCH 1/3] added non-systemd cgroup mode --- pkg/collector/cgroup.go | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/pkg/collector/cgroup.go b/pkg/collector/cgroup.go index 42a11e0e..981715fc 100644 --- a/pkg/collector/cgroup.go +++ b/pkg/collector/cgroup.go @@ -73,6 +73,11 @@ const ( netSubsystem = "net_cls,net_prio" ) +const ( + systemdSlicesName = "machine.slice" + nonSystemdSlicesName = "machine" +) + // Regular expressions of cgroup paths for different resource managers. // ^.*/(?:(.*?)_)?slurm(?:_(.*?)/)?(?:.*?)/job_([0-9]+)(?:.*$) // ^.*/slurm(?:_(.*?))?/(?:.*?)/job_([0-9]+)(?:.*$) @@ -98,9 +103,12 @@ var ( For v2 possibilities are /machine.slice/machine-qemu\x2d2\x2dinstance\x2d00000001.scope /machine.slice/machine-qemu\x2d2\x2dinstance\x2d00000001.scope/libvirt + + Non systemd: machine/qemu-1-instance1.libvirt-qemu */ var ( libvirtCgroupPathRegex = regexp.MustCompile("^.*/(?:.+?)-qemu-(?:[0-9]+)-(?Pinstance-[0-9a-f]+)(?:.*$)") + libvirtCgroupNoSystemdPathRegex = regex.MustCompile("^.*/(?:.+?)qemu-(?:[0-9]+)-(?Pinstance-[0-9a-f]+)(?:.*$)") ) // Ref: https://linuxera.org/cpu-memory-management-kubernetes-cgroupsv2/ @@ -137,8 +145,27 @@ var ( "collector.cgroups.force-version", "Set cgroups version manually. Used only for testing.", ).Hidden().Enum("v1", "v2") + + noSystemdMode = CEEMSExporterApp.Flag( + "collector.cgroups.no-systemd-mode", + "Set if running on a non-systemd host", + ).Default("false").Bool() ) +func resolveSlices(nonSystemdMode bool) string { + if nonSystemdMode: + return nonSystemdSlicesName + else + return systemdSlicesName +} + +func resolveLibvirtRegex(nonSystemdMode bool) *Regexp { + if nonSystemdMode: + return libvirtCgroupNoSystemdPathRegex + else + return libvirtCgroupPathRegex +} + // resolveSubsystem returns the resolved cgroups v1 subsystem. func resolveSubsystem(subsystem string) string { switch subsystem { @@ -311,7 +338,7 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) fs: fs, mode: cgroups.Unified, root: *cgroupfsPath, - slices: []string{"machine.slice"}, + slices: []string{resolveSlices(*nonSystemdMode)}, } } else { var mode cgroups.CGMode @@ -330,7 +357,7 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) mode: mode, root: *cgroupfsPath, activeController: activeSubsystem, - slices: []string{"machine.slice"}, + slices: []string{resolveSlices(*nonSystemdMode)}, } } @@ -339,10 +366,10 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) manager.name = rmNames[name] // Add path regex - manager.idRegex = libvirtCgroupPathRegex + manager.idRegex = resolveLibvirtRegex(*nonSystemdMode) // Identify child cgroup - // In cgroups v1, all the child cgroups like emulator, vcpu* are flat whereas + // In cgroups v1 or on a non-systemd host, all the child cgroups like emulator, vcpu* are flat whereas // in v2 they are all inside libvirt child manager.isChild = func(p string) bool { return strings.Contains(p, "/libvirt") || strings.Contains(p, "/emulator") || strings.Contains(p, "/vcpu") @@ -1078,10 +1105,9 @@ func (c *cgroupCollector) cpusFromChildren(path string) (int, error) { // In cgroup v1, they are flat whereas in cgroup v2 they are inside libvirt folder var vcpuPath string - switch c.cgroupManager.mode { //nolint:exhaustive - case cgroups.Unified: + if c.cgroupManager.mode == cgroups.Unified && !(*noSystemdMode){ vcpuPath = fmt.Sprintf("%s%s/libvirt/vcpu*", c.cgroupManager.root, path) - default: + } else { vcpuPath = fmt.Sprintf("%s%s/vcpu*", c.cgroupManager.root, path) } From d623547f73645eef38311ffcff62c8b2d180d7ed Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Fri, 4 Jul 2025 11:06:55 +0100 Subject: [PATCH 2/3] fixes and formatting --- pkg/collector/cgroup.go | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/pkg/collector/cgroup.go b/pkg/collector/cgroup.go index 981715fc..571a4547 100644 --- a/pkg/collector/cgroup.go +++ b/pkg/collector/cgroup.go @@ -74,7 +74,7 @@ const ( ) const ( - systemdSlicesName = "machine.slice" + systemdSlicesName = "machine.slice" nonSystemdSlicesName = "machine" ) @@ -107,8 +107,8 @@ var ( Non systemd: machine/qemu-1-instance1.libvirt-qemu */ var ( - libvirtCgroupPathRegex = regexp.MustCompile("^.*/(?:.+?)-qemu-(?:[0-9]+)-(?Pinstance-[0-9a-f]+)(?:.*$)") - libvirtCgroupNoSystemdPathRegex = regex.MustCompile("^.*/(?:.+?)qemu-(?:[0-9]+)-(?Pinstance-[0-9a-f]+)(?:.*$)") + libvirtCgroupPathRegex = regexp.MustCompile("^.*/(?:.+?)-qemu-(?:[0-9]+)-(?Pinstance-[0-9a-f]+)(?:.*$)") + libvirtCgroupNoSystemdPathRegex = regexp.MustCompile("^.*/(?:.+?)qemu-(?:[0-9]+)-(?Pinstance-[0-9a-f]+)(?:.*$)") ) // Ref: https://linuxera.org/cpu-memory-management-kubernetes-cgroupsv2/ @@ -153,17 +153,19 @@ var ( ) func resolveSlices(nonSystemdMode bool) string { - if nonSystemdMode: - return nonSystemdSlicesName - else - return systemdSlicesName + if nonSystemdMode { + return nonSystemdSlicesName + } else { + return systemdSlicesName + } } -func resolveLibvirtRegex(nonSystemdMode bool) *Regexp { - if nonSystemdMode: - return libvirtCgroupNoSystemdPathRegex - else - return libvirtCgroupPathRegex +func resolveLibvirtRegex(nonSystemdMode bool) *regexp.Regexp { + if nonSystemdMode { + return libvirtCgroupNoSystemdPathRegex + } else { + return libvirtCgroupPathRegex + } } // resolveSubsystem returns the resolved cgroups v1 subsystem. @@ -338,7 +340,7 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) fs: fs, mode: cgroups.Unified, root: *cgroupfsPath, - slices: []string{resolveSlices(*nonSystemdMode)}, + slices: []string{resolveSlices(*noSystemdMode)}, } } else { var mode cgroups.CGMode @@ -357,7 +359,7 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) mode: mode, root: *cgroupfsPath, activeController: activeSubsystem, - slices: []string{resolveSlices(*nonSystemdMode)}, + slices: []string{resolveSlices(*noSystemdMode)}, } } @@ -366,7 +368,7 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) manager.name = rmNames[name] // Add path regex - manager.idRegex = resolveLibvirtRegex(*nonSystemdMode) + manager.idRegex = resolveLibvirtRegex(*noSystemdMode) // Identify child cgroup // In cgroups v1 or on a non-systemd host, all the child cgroups like emulator, vcpu* are flat whereas @@ -1105,7 +1107,7 @@ func (c *cgroupCollector) cpusFromChildren(path string) (int, error) { // In cgroup v1, they are flat whereas in cgroup v2 they are inside libvirt folder var vcpuPath string - if c.cgroupManager.mode == cgroups.Unified && !(*noSystemdMode){ + if c.cgroupManager.mode == cgroups.Unified && !(*noSystemdMode) { vcpuPath = fmt.Sprintf("%s%s/libvirt/vcpu*", c.cgroupManager.root, path) } else { vcpuPath = fmt.Sprintf("%s%s/vcpu*", c.cgroupManager.root, path) From 7e84cb7e92bf5dce9a4ea1e0ff58e8582c903892 Mon Sep 17 00:00:00 2001 From: wtripp180901 Date: Tue, 15 Jul 2025 14:57:53 +0100 Subject: [PATCH 3/3] autodetect cgroup layout + tweaks --- pkg/collector/cgroup.go | 61 +++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/pkg/collector/cgroup.go b/pkg/collector/cgroup.go index 571a4547..be28ad00 100644 --- a/pkg/collector/cgroup.go +++ b/pkg/collector/cgroup.go @@ -73,11 +73,6 @@ const ( netSubsystem = "net_cls,net_prio" ) -const ( - systemdSlicesName = "machine.slice" - nonSystemdSlicesName = "machine" -) - // Regular expressions of cgroup paths for different resource managers. // ^.*/(?:(.*?)_)?slurm(?:_(.*?)/)?(?:.*?)/job_([0-9]+)(?:.*$) // ^.*/slurm(?:_(.*?))?/(?:.*?)/job_([0-9]+)(?:.*$) @@ -104,11 +99,10 @@ var ( For v2 possibilities are /machine.slice/machine-qemu\x2d2\x2dinstance\x2d00000001.scope /machine.slice/machine-qemu\x2d2\x2dinstance\x2d00000001.scope/libvirt - Non systemd: machine/qemu-1-instance1.libvirt-qemu + For non-systemd layouts: machine/qemu-1-instance1.libvirt-qemu */ var ( - libvirtCgroupPathRegex = regexp.MustCompile("^.*/(?:.+?)-qemu-(?:[0-9]+)-(?Pinstance-[0-9a-f]+)(?:.*$)") - libvirtCgroupNoSystemdPathRegex = regexp.MustCompile("^.*/(?:.+?)qemu-(?:[0-9]+)-(?Pinstance-[0-9a-f]+)(?:.*$)") + libvirtCgroupPathRegex = regexp.MustCompile("^.*/(?:.+?)qemu-(?:[0-9]+)-(?Pinstance-[0-9a-f]+)(?:.*$)") ) // Ref: https://linuxera.org/cpu-memory-management-kubernetes-cgroupsv2/ @@ -145,29 +139,8 @@ var ( "collector.cgroups.force-version", "Set cgroups version manually. Used only for testing.", ).Hidden().Enum("v1", "v2") - - noSystemdMode = CEEMSExporterApp.Flag( - "collector.cgroups.no-systemd-mode", - "Set if running on a non-systemd host", - ).Default("false").Bool() ) -func resolveSlices(nonSystemdMode bool) string { - if nonSystemdMode { - return nonSystemdSlicesName - } else { - return systemdSlicesName - } -} - -func resolveLibvirtRegex(nonSystemdMode bool) *regexp.Regexp { - if nonSystemdMode { - return libvirtCgroupNoSystemdPathRegex - } else { - return libvirtCgroupPathRegex - } -} - // resolveSubsystem returns the resolved cgroups v1 subsystem. func resolveSubsystem(subsystem string) string { switch subsystem { @@ -228,6 +201,7 @@ type cgroupManager struct { idRegex *regexp.Regexp // Regular expression to capture cgroup ID set by resource manager isChild func(string) bool // Function to identify child cgroup paths. Function must return true if cgroup is a child to root cgroup ignoreProc func(string) bool // Function to filter processes in cgroup based on cmdline. Function must return true if process must be ignored + nonSystemdLayout bool // Libvirt collector only. Whether Libvirt is using a non-systemd cgroup layout } // NewCgroupManager returns an instance of cgroupManager based on resource manager. @@ -340,7 +314,7 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) fs: fs, mode: cgroups.Unified, root: *cgroupfsPath, - slices: []string{resolveSlices(*noSystemdMode)}, + slices: []string{}, } } else { var mode cgroups.CGMode @@ -359,7 +333,28 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) mode: mode, root: *cgroupfsPath, activeController: activeSubsystem, - slices: []string{resolveSlices(*noSystemdMode)}, + slices: []string{}, + } + } + + // Discover cgroup layout depending on if nova-libvirt uses systemd + var slicesPrefix string + + switch manager.mode { //nolint:exhaustive + case cgroups.Unified: + slicesPrefix = *cgroupfsPath + default: + slicesPrefix = filepath.Join(*cgroupfsPath, manager.activeController) + } + + for _, slice := range []string{"machine", "machine.slice"} { + if _, err := os.Stat(filepath.Join(slicesPrefix, slice)); err == nil { + manager.slices = append(manager.slices, slice) + if slice == "machine" { + manager.nonSystemdLayout = true + } + + break // This should be fine as there will atmost one of machine or machine.slice exist at any given time } } @@ -368,7 +363,7 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) manager.name = rmNames[name] // Add path regex - manager.idRegex = resolveLibvirtRegex(*noSystemdMode) + manager.idRegex = libvirtCgroupPathRegex // Identify child cgroup // In cgroups v1 or on a non-systemd host, all the child cgroups like emulator, vcpu* are flat whereas @@ -1107,7 +1102,7 @@ func (c *cgroupCollector) cpusFromChildren(path string) (int, error) { // In cgroup v1, they are flat whereas in cgroup v2 they are inside libvirt folder var vcpuPath string - if c.cgroupManager.mode == cgroups.Unified && !(*noSystemdMode) { + if c.cgroupManager.mode == cgroups.Unified && !(c.cgroupManager.nonSystemdLayout) { vcpuPath = fmt.Sprintf("%s%s/libvirt/vcpu*", c.cgroupManager.root, path) } else { vcpuPath = fmt.Sprintf("%s%s/vcpu*", c.cgroupManager.root, path)