diff --git a/docs/changelog/77128.yaml b/docs/changelog/77128.yaml new file mode 100644 index 0000000000000..63133b6c76acc --- /dev/null +++ b/docs/changelog/77128.yaml @@ -0,0 +1,7 @@ +pr: 77128 +summary: Handle cgroups v2 in `OsProbe` +area: Infra/Core +type: enhancement +issues: + - 77126 + - 76812 diff --git a/qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java b/qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java index d5fd91a427bac..45689f0fed691 100644 --- a/qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java +++ b/qa/os/src/test/java/org/elasticsearch/packaging/test/DockerTests.java @@ -849,7 +849,6 @@ public void test131InitProcessHasCorrectPID() { /** * Check that Elasticsearch reports per-node cgroup information. */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/76812") public void test140CgroupOsStatsAreAvailable() throws Exception { waitForElasticsearch(installation, USERNAME, PASSWORD); diff --git a/qa/os/src/test/java/org/elasticsearch/packaging/util/Packages.java b/qa/os/src/test/java/org/elasticsearch/packaging/util/Packages.java index 20cbaeac464f9..db07be0d1eb86 100644 --- a/qa/os/src/test/java/org/elasticsearch/packaging/util/Packages.java +++ b/qa/os/src/test/java/org/elasticsearch/packaging/util/Packages.java @@ -247,12 +247,18 @@ private static void verifyDefaultInstallation(Installation es, Distribution dist /** * Starts Elasticsearch, without checking that startup is successful. */ - public static Shell.Result runElasticsearchStartCommand(Shell sh) throws IOException { + public static Shell.Result runElasticsearchStartCommand(Shell sh) { if (isSystemd()) { + Packages.JournaldWrapper journald = new Packages.JournaldWrapper(sh); sh.run("systemctl daemon-reload"); sh.run("systemctl enable elasticsearch.service"); sh.run("systemctl is-enabled elasticsearch.service"); - return sh.runIgnoreExitCode("systemctl start elasticsearch.service"); + Result exitCode = sh.runIgnoreExitCode("systemctl start elasticsearch.service"); + if (exitCode.isSuccess() == false) { + logger.warn(sh.runIgnoreExitCode("systemctl status elasticsearch.service").stdout); + logger.warn(journald.getLogs().stdout); + } + return exitCode; } return sh.runIgnoreExitCode("service elasticsearch start"); } diff --git a/server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java b/server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java index a20b7c7e27396..8ee5a945d490d 100644 --- a/server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java +++ b/server/src/main/java/org/elasticsearch/monitor/os/OsProbe.java @@ -30,25 +30,31 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * The {@link OsProbe} class retrieves information about the physical and swap size of the machine * memory, as well as the system load average and cpu load. * - * In some exceptional cases, it's possible the underlying native methods used by + *

In some exceptional cases, it's possible the underlying native methods used by * {@link #getFreePhysicalMemorySize()}, {@link #getTotalPhysicalMemorySize()}, * {@link #getFreeSwapSpaceSize()}, and {@link #getTotalSwapSpaceSize()} can return a * negative value. Because of this, we prevent those methods from returning negative values, * returning 0 instead. * - * The OS can report a negative number in a number of cases: - * - Non-supported OSes (HP-UX, or AIX) - * - A failure of macOS to initialize host statistics - * - An OS that does not support the {@code _SC_PHYS_PAGES} or {@code _SC_PAGE_SIZE} flags for the {@code sysconf()} linux kernel call - * - An overflow of the product of {@code _SC_PHYS_PAGES} and {@code _SC_PAGE_SIZE} - * - An error case retrieving these values from a linux kernel - * - A non-standard libc implementation not implementing the required values - * For a more exhaustive explanation, see https://github.com/elastic/elasticsearch/pull/42725 + *

The OS can report a negative number in a number of cases: + * + *

+ * + *

For a more exhaustive explanation, see https://github.com/elastic/elasticsearch/pull/42725 */ public class OsProbe { @@ -178,7 +184,7 @@ final double[] getSystemLoadAverage() { final String procLoadAvg = readProcLoadavg(); assert procLoadAvg.matches("(\\d+\\.\\d+\\s+){3}\\d+/\\d+\\s+\\d+"); final String[] fields = procLoadAvg.split("\\s+"); - return new double[]{Double.parseDouble(fields[0]), Double.parseDouble(fields[1]), Double.parseDouble(fields[2])}; + return new double[] { Double.parseDouble(fields[0]), Double.parseDouble(fields[1]), Double.parseDouble(fields[2]) }; } catch (final IOException e) { if (logger.isDebugEnabled()) { logger.debug("error reading /proc/loadavg", e); @@ -192,7 +198,7 @@ final double[] getSystemLoadAverage() { } try { final double oneMinuteLoadAverage = (double) getSystemLoadAverage.invoke(osMxBean); - return new double[]{oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1, -1, -1}; + return new double[] { oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1, -1, -1 }; } catch (IllegalAccessException | InvocationTargetException e) { if (logger.isDebugEnabled()) { logger.debug("error reading one minute load average from operating system", e); @@ -318,6 +324,23 @@ String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOEx return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpuacct", controlGroup, "cpuacct.usage")); } + private long[] getCgroupV2CpuLimit(String controlGroup) throws IOException { + String entry = readCgroupV2CpuLimit(controlGroup); + String[] parts = entry.split("\\s+"); + assert parts.length == 2 : "Expected 2 fields in [cpu.max]"; + + long[] values = new long[2]; + + values[0] = "max".equals(parts[0]) ? -1L : Long.parseLong(parts[0]); + values[1] = Long.parseLong(parts[1]); + return values; + } + + @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu.max") + String readCgroupV2CpuLimit(String controlGroup) throws IOException { + return readSingleLine(PathUtils.get("/sys/fs/cgroup/", controlGroup, "cpu.max")); + } + /** * The total period of time in microseconds for how frequently the Elasticsearch control group's access to CPU resources will be * reallocated. @@ -454,6 +477,35 @@ String readSysFsCgroupMemoryLimitInBytes(final String controlGroup) throws IOExc return readSingleLine(PathUtils.get("/sys/fs/cgroup/memory", controlGroup, "memory.limit_in_bytes")); } + /** + * The maximum amount of user memory (including file cache). + * If there is no limit then some Linux versions return the maximum value that can be stored in an + * unsigned 64 bit number, and this will overflow a long, hence the result type is String. + * (The alternative would have been BigInteger but then it would not be possible to index + * the OS stats document into Elasticsearch without losing information, as BigInteger is + * not a supported Elasticsearch type.) + * + * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem + * @return the maximum amount of user memory (including file cache) + * @throws IOException if an I/O exception occurs reading {@code memory.limit_in_bytes} for the control group + */ + private String getCgroupV2MemoryLimitInBytes(final String controlGroup) throws IOException { + return readSysFsCgroupV2MemoryLimitInBytes(controlGroup); + } + + /** + * Returns the line from {@code memory.max} for the control group to which the Elasticsearch process belongs for the + * {@code memory} subsystem. This line represents the maximum amount of user memory (including file cache). + * + * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem + * @return the line from {@code memory.max} + * @throws IOException if an I/O exception occurs reading {@code memory.max} for the control group + */ + @SuppressForbidden(reason = "access /sys/fs/cgroup/memory.max") + String readSysFsCgroupV2MemoryLimitInBytes(final String controlGroup) throws IOException { + return readSingleLine(PathUtils.get("/sys/fs/cgroup/", controlGroup, "memory.max")); + } + /** * The total current memory usage by processes in the cgroup (in bytes). * If there is no limit then some Linux versions return the maximum value that can be stored in an @@ -483,6 +535,35 @@ String readSysFsCgroupMemoryUsageInBytes(final String controlGroup) throws IOExc return readSingleLine(PathUtils.get("/sys/fs/cgroup/memory", controlGroup, "memory.usage_in_bytes")); } + /** + * The total current memory usage by processes in the cgroup (in bytes). + * If there is no limit then some Linux versions return the maximum value that can be stored in an + * unsigned 64 bit number, and this will overflow a long, hence the result type is String. + * (The alternative would have been BigInteger but then it would not be possible to index + * the OS stats document into Elasticsearch without losing information, as BigInteger is + * not a supported Elasticsearch type.) + * + * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem + * @return the total current memory usage by processes in the cgroup (in bytes) + * @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group + */ + private String getCgroupV2MemoryUsageInBytes(final String controlGroup) throws IOException { + return readSysFsCgroupV2MemoryUsageInBytes(controlGroup); + } + + /** + * Returns the line from {@code memory.current} for the control group to which the Elasticsearch process belongs for the + * {@code memory} subsystem. This line represents the total current memory usage by processes in the cgroup (in bytes). + * + * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem + * @return the line from {@code memory.current} + * @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group + */ + @SuppressForbidden(reason = "access /sys/fs/cgroup/memory.current") + String readSysFsCgroupV2MemoryUsageInBytes(final String controlGroup) throws IOException { + return readSingleLine(PathUtils.get("/sys/fs/cgroup/", controlGroup, "memory.current")); + } + /** * Checks if cgroup stats are available by checking for the existence of {@code /proc/self/cgroup}, {@code /sys/fs/cgroup/cpu}, * {@code /sys/fs/cgroup/cpuacct} and {@code /sys/fs/cgroup/memory}. @@ -490,20 +571,60 @@ String readSysFsCgroupMemoryUsageInBytes(final String controlGroup) throws IOExc * @return {@code true} if the stats are available, otherwise {@code false} */ @SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, /sys/fs/cgroup/cpuacct and /sys/fs/cgroup/memory") - boolean areCgroupStatsAvailable() { + boolean areCgroupStatsAvailable() throws IOException { if (Files.exists(PathUtils.get("/proc/self/cgroup")) == false) { return false; } - if (Files.exists(PathUtils.get("/sys/fs/cgroup/cpu")) == false) { - return false; - } - if (Files.exists(PathUtils.get("/sys/fs/cgroup/cpuacct")) == false) { - return false; + + List lines = readProcSelfCgroup(); + + // cgroup v2 + if (lines.size() == 1 && lines.get(0).startsWith("0::")) { + return Stream.of("/sys/fs/cgroup/cpu.stat", "/sys/fs/cgroup/memory.stat").allMatch(path -> Files.exists(PathUtils.get(path))); } - if (Files.exists(PathUtils.get("/sys/fs/cgroup/memory")) == false) { - return false; + + return Stream.of("/sys/fs/cgroup/cpu", "/sys/fs/cgroup/cpuacct", "/sys/fs/cgroup/memory") + .allMatch(path -> Files.exists(PathUtils.get(path))); + } + + /** + * The CPU statistics for all tasks in the Elasticsearch control group. + * + * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem + * @return the CPU statistics + * @throws IOException if an I/O exception occurs reading {@code cpu.stat} for the control group + */ + @SuppressForbidden(reason = "Uses PathUtils.get to generate meaningful assertion messages") + private Map getCgroupV2CpuStats(String controlGroup) throws IOException { + final List lines = readCgroupV2CpuStats(controlGroup); + final Map stats = new HashMap<>(); + + for (String line : lines) { + String[] parts = line.split("\\s+"); + assert parts.length == 2 : "Corrupt cpu.stat line: [" + line + "]"; + stats.put(parts[0], Long.parseLong(parts[1])); } - return true; + + final List expectedKeys = List.of("system_usec", "usage_usec", "user_usec"); + expectedKeys.forEach(key -> { + assert stats.containsKey(key) : "[" + key + "] missing from " + PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat"); + assert stats.get(key) != -1 : stats.get(key); + }); + + final List optionalKeys = List.of("nr_periods", "nr_throttled", "throttled_usec"); + optionalKeys.forEach(key -> { + if (stats.containsKey(key) == false) { + stats.put(key, 0L); + } + assert stats.get(key) != -1L : "[" + key + "] in " + PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat") + " is -1"; + }); + + return stats; + } + + @SuppressForbidden(reason = "access /sys/fs/cgroup/cpu.stat") + List readCgroupV2CpuStats(final String controlGroup) throws IOException { + return Files.readAllLines(PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat")); } /** @@ -515,45 +636,79 @@ private OsStats.Cgroup getCgroup() { try { if (areCgroupStatsAvailable() == false) { return null; - } else { - final Map controllerMap = getControlGroups(); - assert controllerMap.isEmpty() == false; + } + + final Map controllerMap = getControlGroups(); + assert controllerMap.isEmpty() == false; - final String cpuAcctControlGroup = controllerMap.get("cpuacct"); + final String cpuAcctControlGroup; + final long cgroupCpuAcctUsageNanos; + final long cgroupCpuAcctCpuCfsPeriodMicros; + final long cgroupCpuAcctCpuCfsQuotaMicros; + final String cpuControlGroup; + final OsStats.Cgroup.CpuStat cpuStat; + final String memoryControlGroup; + final String cgroupMemoryLimitInBytes; + final String cgroupMemoryUsageInBytes; + + if (controllerMap.size() == 1 && controllerMap.containsKey("")) { + // There's a single hierarchy for all controllers + cpuControlGroup = cpuAcctControlGroup = memoryControlGroup = controllerMap.get(""); + + // `cpuacct` was merged with `cpu` in v2 + final Map cpuStatsMap = getCgroupV2CpuStats(cpuControlGroup); + + cgroupCpuAcctUsageNanos = cpuStatsMap.get("usage_usec"); + + long[] cpuLimits = getCgroupV2CpuLimit(cpuControlGroup); + cgroupCpuAcctCpuCfsQuotaMicros = cpuLimits[0]; + cgroupCpuAcctCpuCfsPeriodMicros = cpuLimits[1]; + + cpuStat = new OsStats.Cgroup.CpuStat( + cpuStatsMap.get("nr_periods"), + cpuStatsMap.get("nr_throttled"), + cpuStatsMap.get("throttled_usec") + ); + + cgroupMemoryLimitInBytes = getCgroupV2MemoryLimitInBytes(memoryControlGroup); + cgroupMemoryUsageInBytes = getCgroupV2MemoryUsageInBytes(memoryControlGroup); + } else { + cpuAcctControlGroup = controllerMap.get("cpuacct"); if (cpuAcctControlGroup == null) { logger.debug("no [cpuacct] data found in cgroup stats"); return null; } - final long cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup); + cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup); - final String cpuControlGroup = controllerMap.get("cpu"); + cpuControlGroup = controllerMap.get("cpu"); if (cpuControlGroup == null) { logger.debug("no [cpu] data found in cgroup stats"); return null; } - final long cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup); - final long cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup); - final OsStats.Cgroup.CpuStat cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup); + cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup); + cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup); + cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup); - final String memoryControlGroup = controllerMap.get("memory"); + memoryControlGroup = controllerMap.get("memory"); if (memoryControlGroup == null) { logger.debug("no [memory] data found in cgroup stats"); return null; } - final String cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes(memoryControlGroup); - final String cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes(memoryControlGroup); - - return new OsStats.Cgroup( - cpuAcctControlGroup, - cgroupCpuAcctUsageNanos, - cpuControlGroup, - cgroupCpuAcctCpuCfsPeriodMicros, - cgroupCpuAcctCpuCfsQuotaMicros, - cpuStat, - memoryControlGroup, - cgroupMemoryLimitInBytes, - cgroupMemoryUsageInBytes); + cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes(memoryControlGroup); + cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes(memoryControlGroup); } + + return new OsStats.Cgroup( + cpuAcctControlGroup, + cgroupCpuAcctUsageNanos, + cpuControlGroup, + cgroupCpuAcctCpuCfsPeriodMicros, + cgroupCpuAcctCpuCfsQuotaMicros, + cpuStat, + memoryControlGroup, + cgroupMemoryLimitInBytes, + cgroupMemoryUsageInBytes + ); } catch (final IOException e) { logger.debug("error reading control group stats", e); return null; @@ -576,13 +731,14 @@ public static OsProbe getInstance() { OsInfo osInfo(long refreshInterval, int allocatedProcessors) throws IOException { return new OsInfo( - refreshInterval, - Runtime.getRuntime().availableProcessors(), - allocatedProcessors, - Constants.OS_NAME, - getPrettyName(), - Constants.OS_ARCH, - Constants.OS_VERSION); + refreshInterval, + Runtime.getRuntime().availableProcessors(), + allocatedProcessors, + Constants.OS_NAME, + getPrettyName(), + Constants.OS_ARCH, + Constants.OS_VERSION + ); } private String getPrettyName() throws IOException { @@ -594,11 +750,13 @@ private String getPrettyName() throws IOException { * wrapped in single- or double-quotes. */ final List etcOsReleaseLines = readOsRelease(); - final List prettyNameLines = - etcOsReleaseLines.stream().filter(line -> line.startsWith("PRETTY_NAME")).collect(Collectors.toList()); + final List prettyNameLines = etcOsReleaseLines.stream() + .filter(line -> line.startsWith("PRETTY_NAME")) + .collect(Collectors.toList()); assert prettyNameLines.size() <= 1 : prettyNameLines; - final Optional maybePrettyNameLine = - prettyNameLines.size() == 1 ? Optional.of(prettyNameLines.get(0)) : Optional.empty(); + final Optional maybePrettyNameLine = prettyNameLines.size() == 1 + ? Optional.of(prettyNameLines.get(0)) + : Optional.empty(); if (maybePrettyNameLine.isPresent()) { // we trim since some OS contain trailing space, for example, Oracle Linux Server 6.9 has a trailing space after the quote final String trimmedPrettyNameLine = maybePrettyNameLine.get().trim(); @@ -695,11 +853,15 @@ boolean isDebian8() throws IOException { return Constants.LINUX && getPrettyName().equals("Debian GNU/Linux 8 (jessie)"); } + OsStats.Cgroup getCgroup(boolean isLinux) { + return isLinux ? getCgroup() : null; + } + public OsStats osStats() { final OsStats.Cpu cpu = new OsStats.Cpu(getSystemCpuPercent(), getSystemLoadAverage()); final OsStats.Mem mem = new OsStats.Mem(getTotalPhysicalMemorySize(), getFreePhysicalMemorySize()); final OsStats.Swap swap = new OsStats.Swap(getTotalSwapSpaceSize(), getFreeSwapSpaceSize()); - final OsStats.Cgroup cgroup = Constants.LINUX ? getCgroup() : null; + final OsStats.Cgroup cgroup = getCgroup(Constants.LINUX); return new OsStats(System.currentTimeMillis(), cpu, mem, swap, cgroup); } diff --git a/server/src/main/resources/org/elasticsearch/bootstrap/security.policy b/server/src/main/resources/org/elasticsearch/bootstrap/security.policy index 8ffc0d0eea47d..5ae15e74ec2d4 100644 --- a/server/src/main/resources/org/elasticsearch/bootstrap/security.policy +++ b/server/src/main/resources/org/elasticsearch/bootstrap/security.policy @@ -144,14 +144,11 @@ grant { permission java.io.FilePermission "/proc/self/mountinfo", "read"; permission java.io.FilePermission "/proc/diskstats", "read"; - // control group stats on Linux + // control group stats on Linux. cgroup v2 stats are in an unpredicable + // location under `/sys/fs/cgroup`, so unfortunately we have to allow + // read access to the entire directory hierarchy. permission java.io.FilePermission "/proc/self/cgroup", "read"; - permission java.io.FilePermission "/sys/fs/cgroup/cpu", "read"; - permission java.io.FilePermission "/sys/fs/cgroup/cpu/-", "read"; - permission java.io.FilePermission "/sys/fs/cgroup/cpuacct", "read"; - permission java.io.FilePermission "/sys/fs/cgroup/cpuacct/-", "read"; - permission java.io.FilePermission "/sys/fs/cgroup/memory", "read"; - permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read"; + permission java.io.FilePermission "/sys/fs/cgroup/-", "read"; // system memory on Linux systems affected by JDK bug (#66629) permission java.io.FilePermission "/proc/meminfo", "read"; diff --git a/server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java b/server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java index 8be3723d72cc3..ac802cf738500 100644 --- a/server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java +++ b/server/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java @@ -43,7 +43,7 @@ public void testOsInfo() throws IOException { final OsProbe osProbe = new OsProbe() { @Override - List readOsRelease() throws IOException { + List readOsRelease() { assert Constants.LINUX : Constants.OS_NAME; if (prettyName != null) { final String quote = randomFrom("\"", "'", ""); @@ -78,8 +78,10 @@ public void testOsStats() { OsStats stats = osProbe.osStats(); assertNotNull(stats); assertThat(stats.getTimestamp(), greaterThan(0L)); - assertThat(stats.getCpu().getPercent(), anyOf(equalTo((short) -1), - is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100))))); + assertThat( + stats.getCpu().getPercent(), + anyOf(equalTo((short) -1), is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100)))) + ); double[] loadAverage = stats.getCpu().getLoadAverage(); if (loadAverage != null) { assertThat(loadAverage.length, equalTo(3)); @@ -173,16 +175,14 @@ String readProcLoadavg() { } public void testCgroupProbe() { - assumeTrue("test runs on Linux only", Constants.LINUX); - - final boolean areCgroupStatsAvailable = randomBoolean(); + final int availableCgroupsVersion = randomFrom(0, 1, 2); final String hierarchy = randomAlphaOfLength(16); - final OsProbe probe = buildStubOsProbe(areCgroupStatsAvailable, hierarchy); + final OsProbe probe = buildStubOsProbe(availableCgroupsVersion, hierarchy); final OsStats.Cgroup cgroup = probe.osStats().getCgroup(); - if (areCgroupStatsAvailable) { + if (availableCgroupsVersion > 0) { assertNotNull(cgroup); assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy)); assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063112L)); @@ -200,17 +200,14 @@ public void testCgroupProbe() { } public void testCgroupProbeWithMissingCpuAcct() { - assumeTrue("test runs on Linux only", Constants.LINUX); - final String hierarchy = randomAlphaOfLength(16); // This cgroup data is missing a line about cpuacct - List procSelfCgroupLines = getProcSelfGroupLines(hierarchy) - .stream() + List procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream() .map(line -> line.replaceFirst(",cpuacct", "")) .collect(Collectors.toList()); - final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines); + final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines); final OsStats.Cgroup cgroup = probe.osStats().getCgroup(); @@ -218,18 +215,14 @@ public void testCgroupProbeWithMissingCpuAcct() { } public void testCgroupProbeWithMissingCpu() { - assumeTrue("test runs on Linux only", Constants.LINUX); - final String hierarchy = randomAlphaOfLength(16); // This cgroup data is missing a line about cpu - List procSelfCgroupLines = getProcSelfGroupLines(hierarchy) - .stream() + List procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream() .map(line -> line.replaceFirst(":cpu,", ":")) .collect(Collectors.toList()); - - final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines); + final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines); final OsStats.Cgroup cgroup = probe.osStats().getCgroup(); @@ -237,17 +230,14 @@ public void testCgroupProbeWithMissingCpu() { } public void testCgroupProbeWithMissingMemory() { - assumeTrue("test runs on Linux only", Constants.LINUX); - final String hierarchy = randomAlphaOfLength(16); // This cgroup data is missing a line about memory - List procSelfCgroupLines = getProcSelfGroupLines(hierarchy) - .stream() + List procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream() .filter(line -> line.contains(":memory:") == false) .collect(Collectors.toList()); - final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines); + final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines); final OsStats.Cgroup cgroup = probe.osStats().getCgroup(); @@ -255,6 +245,8 @@ public void testCgroupProbeWithMissingMemory() { } public void testGetTotalMemFromProcMeminfo() throws Exception { + int cgroupsVersion = randomFrom(1, 2); + // missing MemTotal line var meminfoLines = Arrays.asList( "MemFree: 8467692 kB", @@ -265,7 +257,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception { "Active: 43637908 kB", "Inactive: 8130280 kB" ); - OsProbe probe = buildStubOsProbe(true, "", List.of(), meminfoLines); + OsProbe probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines); assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L)); // MemTotal line with invalid value @@ -279,7 +271,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception { "Active: 43637908 kB", "Inactive: 8130280 kB" ); - probe = buildStubOsProbe(true, "", List.of(), meminfoLines); + probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines); assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L)); // MemTotal line with invalid unit @@ -293,7 +285,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception { "Active: 43637908 kB", "Inactive: 8130280 kB" ); - probe = buildStubOsProbe(true, "", List.of(), meminfoLines); + probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines); assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L)); // MemTotal line with random valid value @@ -308,7 +300,7 @@ public void testGetTotalMemFromProcMeminfo() throws Exception { "Active: 43637908 kB", "Inactive: 8130280 kB" ); - probe = buildStubOsProbe(true, "", List.of(), meminfoLines); + probe = buildStubOsProbe(cgroupsVersion, "", List.of(), meminfoLines); assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(memTotalInKb * 1024L)); } @@ -319,7 +311,13 @@ public void testGetTotalMemoryOnDebian8() throws Exception { assertThat(osProbe.getTotalPhysicalMemorySize(), greaterThan(0L)); } - private static List getProcSelfGroupLines(String hierarchy) { + private static List getProcSelfGroupLines(int cgroupsVersion, String hierarchy) { + // It doesn't really matter if cgroupsVersion == 0 here + + if (cgroupsVersion == 2) { + return List.of("0::/" + hierarchy); + } + return Arrays.asList( "10:freezer:/", "9:net_cls,net_prio:/", @@ -331,32 +329,40 @@ private static List getProcSelfGroupLines(String hierarchy) { "3:perf_event:/", "2:cpu,cpuacct,cpuset:/" + hierarchy, "1:name=systemd:/user.slice/user-1000.slice/session-2359.scope", - "0::/cgroup2"); + "0::/cgroup2" + ); } - private static OsProbe buildStubOsProbe(final boolean areCgroupStatsAvailable, final String hierarchy) { - List procSelfCgroupLines = getProcSelfGroupLines(hierarchy); + private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy) { + List procSelfCgroupLines = getProcSelfGroupLines(availableCgroupsVersion, hierarchy); - return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines); + return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines); } /** * Builds a test instance of OsProbe. Methods that ordinarily read from the filesystem are overridden to return values based upon * the arguments to this method. * - * @param areCgroupStatsAvailable whether or not cgroup data is available. Normally OsProbe establishes this for itself. + * @param availableCgroupsVersion what version of cgroups are available, 1 or 2, or 0 for no cgroups. Normally OsProbe establishes this + * for itself. * @param hierarchy a mock value used to generate a cgroup hierarchy. * @param procSelfCgroupLines the lines that will be used as the content of /proc/self/cgroup * @param procMeminfoLines lines that will be used as the content of /proc/meminfo * @return a test instance */ private static OsProbe buildStubOsProbe( - final boolean areCgroupStatsAvailable, + final int availableCgroupsVersion, final String hierarchy, List procSelfCgroupLines, List procMeminfoLines ) { return new OsProbe() { + @Override + OsStats.Cgroup getCgroup(boolean isLinux) { + // Pretend we're always on Linux so that we can run the cgroup tests + return super.getCgroup(true); + } + @Override List readProcSelfCgroup() { return procSelfCgroupLines; @@ -382,10 +388,7 @@ String readSysFsCgroupCpuAcctCpuAcctCfsQuota(String controlGroup) { @Override List readSysFsCgroupCpuAcctCpuStat(String controlGroup) { - return Arrays.asList( - "nr_periods 17992", - "nr_throttled 1311", - "throttled_time 139298645489"); + return Arrays.asList("nr_periods 17992", "nr_throttled 1311", "throttled_time 139298645489"); } @Override @@ -403,22 +406,50 @@ String readSysFsCgroupMemoryUsageInBytes(String controlGroup) { @Override boolean areCgroupStatsAvailable() { - return areCgroupStatsAvailable; + return availableCgroupsVersion > 0; } @Override - List readProcMeminfo() throws IOException { + List readProcMeminfo() { return procMeminfoLines; } + + @Override + String readSysFsCgroupV2MemoryLimitInBytes(String controlGroup) { + assertThat(controlGroup, equalTo("/" + hierarchy)); + // This is the highest value that can be stored in an unsigned 64 bit number, hence too big for long + return "18446744073709551615"; + } + + @Override + String readSysFsCgroupV2MemoryUsageInBytes(String controlGroup) { + assertThat(controlGroup, equalTo("/" + hierarchy)); + return "4796416"; + } + + @Override + List readCgroupV2CpuStats(String controlGroup) { + assertThat(controlGroup, equalTo("/" + hierarchy)); + return List.of( + "usage_usec 364869866063112", + "user_usec 34636", + "system_usec 9896", + "nr_periods 17992", + "nr_throttled 1311", + "throttled_usec 139298645489" + ); + } + + @Override + String readCgroupV2CpuLimit(String controlGroup) { + assertThat(controlGroup, equalTo("/" + hierarchy)); + return "50000 100000"; + } }; } - private static OsProbe buildStubOsProbe( - final boolean areCgroupStatsAvailable, - final String hierarchy, - List procSelfCgroupLines - ) { - return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines, List.of()); + private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy, List procSelfCgroupLines) { + return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines, List.of()); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 864eefdb567cd..226aabc9bb02c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -1260,7 +1260,7 @@ static long machineMemoryFromStats(OsStats stats) { OsStats.Cgroup cgroup = stats.getCgroup(); if (cgroup != null) { String containerLimitStr = cgroup.getMemoryLimitInBytes(); - if (containerLimitStr != null) { + if (containerLimitStr != null && containerLimitStr.equals("max") == false) { BigInteger containerLimit = new BigInteger(containerLimitStr); if ((containerLimit.compareTo(BigInteger.valueOf(mem)) < 0 && containerLimit.compareTo(BigInteger.ZERO) > 0) // mem <= 0 means the value couldn't be obtained for some reason