From e106f0cdc8386be98364fda1aad06fb97093589b Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Fri, 20 Aug 2021 18:36:01 +0300 Subject: [PATCH 01/15] utils: Add run_process(communicate) param --- gprofiler/utils.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/gprofiler/utils.py b/gprofiler/utils.py index 63fa80f06..4471274b3 100644 --- a/gprofiler/utils.py +++ b/gprofiler/utils.py @@ -164,13 +164,23 @@ def run_process( check: bool = True, timeout: int = None, kill_signal: signal.Signals = signal.SIGKILL, + communicate: bool = True, **kwargs, ) -> CompletedProcess: + stdout = None + stderr = None with start_process(cmd, via_staticx, **kwargs) as process: try: if stop_event is None: - stdout, stderr = process.communicate(timeout=timeout) + assert timeout is None + if communicate: + # wait for stderr & stdout to be closed + stdout, stderr = process.communicate(timeout=timeout) + else: + # just wait for the process to exit + process.wait() else: + assert communicate end_time = (time.monotonic() + timeout) if timeout is not None else None while True: try: From 647d4a6db2c095d17306e81938584ff86d2b3c52 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Fri, 20 Aug 2021 18:36:49 +0300 Subject: [PATCH 02/15] java: Add AP CPU mode support --- Dockerfile | 1 + gprofiler/profilers/java.py | 42 ++++++++++++++++++++++++++++++------- tests/test_java.py | 2 +- tests/test_sanity.py | 2 +- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7426bb794..9f519f03d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -108,6 +108,7 @@ COPY --from=phpspy-builder /binutils/binutils-2.25/bin/bin/strings gprofiler/res COPY --from=async-profiler-builder /async-profiler/build/jattach gprofiler/resources/java/jattach COPY --from=async-profiler-builder /async-profiler/build/async-profiler-version gprofiler/resources/java/async-profiler-version COPY --from=async-profiler-builder /async-profiler/build/libasyncProfiler.so gprofiler/resources/java/libasyncProfiler.so +COPY --from=async-profiler-builder /async-profiler/build/fdtransfer gprofiler/resources/java/fdtransfer COPY --from=rbspy-builder /rbspy/rbspy gprofiler/resources/ruby/rbspy diff --git a/gprofiler/profilers/java.py b/gprofiler/profilers/java.py index aff11240a..5c115ef8e 100644 --- a/gprofiler/profilers/java.py +++ b/gprofiler/profilers/java.py @@ -57,6 +57,11 @@ def jattach_path() -> str: return resource_path("java/jattach") +@functools.lru_cache(maxsize=1) +def fdtransfer_path() -> str: + return resource_path("java/fdtransfer") + + @functools.lru_cache(maxsize=1) def get_ap_version() -> str: return Path(resource_path("java/async-profiler-version")).read_text() @@ -67,12 +72,11 @@ class AsyncProfiledProcess: Represents a process profiled with async-profiler. """ - AP_EVENT_TYPE = "itimer" FORMAT_PARAMS = "ann,sig" OUTPUT_FORMAT = "collapsed" OUTPUTS_MODE = 0o622 # readable by root, writable by all - def __init__(self, process: Process, storage_dir: str, buildids: bool): + def __init__(self, process: Process, storage_dir: str, buildids: bool, mode: str): self.process = process self._process_root = f"/proc/{process.pid}/root" # not using storage_dir for AP itself on purpose: this path should remain constant for the lifetime @@ -98,6 +102,8 @@ def __init__(self, process: Process, storage_dir: str, buildids: bool): self._log_path_process = remove_prefix(self._log_path_host, self._process_root) self._buildids = buildids + assert mode in ("cpu", "itimer"), f"unexpected mode: {mode}" + self._mode = mode def __enter__(self): os.makedirs(self._ap_dir_host, 0o755, exist_ok=True) @@ -178,9 +184,10 @@ def _get_base_cmd(self) -> List[str]: def _get_start_cmd(self, interval: int) -> List[str]: return self._get_base_cmd() + [ - f"start,event={self.AP_EVENT_TYPE},file={self._output_path_process},{self.OUTPUT_FORMAT}," - f"{self.FORMAT_PARAMS},interval={interval},framebuf=2000000,log={self._log_path_process}" - f"{',buildids' if self._buildids else ''}", + f"start,event={self._mode},file={self._output_path_process}," + f"{self.OUTPUT_FORMAT},{self.FORMAT_PARAMS},interval={interval},framebuf=2000000," + f"log={self._log_path_process}{',buildids' if self._buildids else ''}" + f"{',fdtransfer' if self._mode == 'cpu' else ''}", ] def _get_stop_cmd(self, with_output: bool) -> List[str]: @@ -208,10 +215,19 @@ def _run_async_profiler(self, cmd: List[str]) -> None: raise JattachException(e.returncode, e.cmd, e.stdout, e.stderr, self.process.pid, ap_log) from None - def start_async_profiler(self, interval: int) -> bool: + def _run_fdtransfer(self) -> None: + """ + Start fdtransfer; it will fork & exit once ready, so we can continue with jattach. + """ + run_process([fdtransfer_path(), str(self.process.pid)], communicate=False) + + def start_async_profiler(self, interval: int, second_try: bool = False) -> bool: """ Returns True if profiling was started; False if it was already started. """ + if self._mode == "cpu" and not second_try: + self._run_fdtransfer() + start_cmd = self._get_start_cmd(interval) try: self._run_async_profiler(start_cmd) @@ -264,6 +280,14 @@ def read_output(self) -> Optional[str]: action="store_false", help="Skip the JDK version check (that is done before invoking async-profiler)", ), + ProfilerArgument( + "--java-async-profiler-mode", + dest="java_async_profiler_mode", + choices=["cpu", "itimer"], + default="cpu", + help="Select async-profiler's mode: 'cpu' (based on perf_events & fdtransfer) or 'itimer' (no perf_events)." + " Defaults to 'cpu'.", + ), ], ) class JavaProfiler(ProcessProfilerBase): @@ -277,6 +301,7 @@ def __init__( storage_dir: str, java_async_profiler_buildids: bool, java_version_check: bool, + java_async_profiler_mode: str, java_mode: str, ): assert java_mode == "ap", "Java profiler should not be initialized, wrong java_mode value given" @@ -286,6 +311,7 @@ def __init__( self._interval = int((1 / frequency) * 1000_000_000) self._buildids = java_async_profiler_buildids self._version_check = java_version_check + self._mode = java_async_profiler_mode def _is_jdk_version_supported(self, java_version_cmd_output: str) -> bool: return all(exclusion not in java_version_cmd_output for exclusion in self.JDK_EXCLUSIONS) @@ -342,7 +368,7 @@ def _profile_process(self, process: Process) -> Optional[StackToSampleCount]: logger.warning(f"Process {process.pid} running unsupported Java version, skipping...") return None - with AsyncProfiledProcess(process, self._storage_dir, self._buildids) as ap_proc: + with AsyncProfiledProcess(process, self._storage_dir, self._buildids, self._mode) as ap_proc: return self._profile_ap_process(ap_proc) def _profile_ap_process(self, ap_proc: AsyncProfiledProcess) -> Optional[StackToSampleCount]: @@ -355,7 +381,7 @@ def _profile_ap_process(self, ap_proc: AsyncProfiledProcess) -> Optional[StackTo # not using the "resume" action because I'm not sure it properly reconfigures all settings; while stop;start # surely does. ap_proc.stop_async_profiler(with_output=False) - started = ap_proc.start_async_profiler(self._interval) + started = ap_proc.start_async_profiler(self._interval, second_try=True) if not started: raise Exception( f"async-profiler is still running in {ap_proc.process.pid}, even after trying to stop it!" diff --git a/tests/test_java.py b/tests/test_java.py index efc6dab26..8a5880cc0 100644 --- a/tests/test_java.py +++ b/tests/test_java.py @@ -75,7 +75,7 @@ def test_java_async_profiler_stopped( # run "status" proc = psutil.Process(application_pid) - with AsyncProfiledProcessForTests(proc, tmp_path, False) as ap_proc: + with AsyncProfiledProcessForTests(proc, tmp_path, False, "cpu") as ap_proc: ap_proc.status_async_profiler() # printed the process' stdout, see ACTION_STATUS case in async-profiler/profiler.cpp expected_message = b"Profiling is running for " diff --git a/tests/test_sanity.py b/tests/test_sanity.py index 487792259..b127aada2 100644 --- a/tests/test_sanity.py +++ b/tests/test_sanity.py @@ -26,7 +26,7 @@ def test_java_from_host( application_pid: int, assert_collapsed, ) -> None: - with JavaProfiler(1000, 1, Event(), str(tmp_path), False, False, "ap") as profiler: + with JavaProfiler(1000, 1, Event(), str(tmp_path), False, "itimer", "ap") as profiler: process_collapsed = profiler.snapshot().get(application_pid) assert_collapsed(process_collapsed, check_comm=True) From 6cfdad568313e14a8689a6a552494ca9c8d484a6 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Mon, 4 Oct 2021 10:39:14 +0300 Subject: [PATCH 03/15] tests: Fix param --- tests/test_sanity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sanity.py b/tests/test_sanity.py index b127aada2..d4a320bd3 100644 --- a/tests/test_sanity.py +++ b/tests/test_sanity.py @@ -26,7 +26,7 @@ def test_java_from_host( application_pid: int, assert_collapsed, ) -> None: - with JavaProfiler(1000, 1, Event(), str(tmp_path), False, "itimer", "ap") as profiler: + with JavaProfiler(1000, 1, Event(), str(tmp_path), False, True, "itimer", "ap") as profiler: process_collapsed = profiler.snapshot().get(application_pid) assert_collapsed(process_collapsed, check_comm=True) From 762194add8dc106969fb653b1f5c86280440fa6a Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Mon, 4 Oct 2021 10:39:25 +0300 Subject: [PATCH 04/15] Update AP version --- scripts/async_profiler_build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/async_profiler_build.sh b/scripts/async_profiler_build.sh index e79d22ed0..770abe3c1 100755 --- a/scripts/async_profiler_build.sh +++ b/scripts/async_profiler_build.sh @@ -5,8 +5,8 @@ # set -euo pipefail -VERSION=v2.0g5 -GIT_REV=3f1505bbf4a4ec1575b54804e1fbc7b9f38ad96b +VERSION=v2.5g1 +GIT_REV="02cb785f7cc0024581a1802126f51ed94d5b0475" git clone --depth 1 -b "$VERSION" https://github.com/Granulate/async-profiler.git && cd async-profiler && git reset --hard "$GIT_REV" set +eu # this funny script has errors :shrug: From d7d12bda0e0ae54a624c8e07d78c60daeb75163d Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Mon, 4 Oct 2021 10:41:16 +0300 Subject: [PATCH 05/15] java: Default to itimer, not cpu --- gprofiler/profilers/java.py | 4 ++-- tests/containers/java/Fibonacci.java | 14 ++++++++++++++ tests/test_java.py | 28 +++++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/gprofiler/profilers/java.py b/gprofiler/profilers/java.py index 5c115ef8e..0f7404ed5 100644 --- a/gprofiler/profilers/java.py +++ b/gprofiler/profilers/java.py @@ -284,9 +284,9 @@ def read_output(self) -> Optional[str]: "--java-async-profiler-mode", dest="java_async_profiler_mode", choices=["cpu", "itimer"], - default="cpu", + default="itimer", help="Select async-profiler's mode: 'cpu' (based on perf_events & fdtransfer) or 'itimer' (no perf_events)." - " Defaults to 'cpu'.", + " Defaults to '%(default)s'.", ), ], ) diff --git a/tests/containers/java/Fibonacci.java b/tests/containers/java/Fibonacci.java index 75a50fccc..8f66ac5d7 100644 --- a/tests/containers/java/Fibonacci.java +++ b/tests/containers/java/Fibonacci.java @@ -2,12 +2,26 @@ // Copyright (c) Granulate. All rights reserved. // Licensed under the AGPL3 License. See LICENSE.md in the project root for license information. // +import java.io.File; + public class Fibonacci { private static long fibonacci(final int n) { return n <= 1 ? n : fibonacci(n - 1) + fibonacci(n - 2); } public static void main(final String[] args) { + Thread thread = new Thread() { + public void run() { + while (true) { + try { + new File("/").list(); + } catch (Exception e) { + } + } + } + }; + thread.start(); + while (true) { fibonacci(30); } diff --git a/tests/test_java.py b/tests/test_java.py index 8a5880cc0..fdfbc12d3 100644 --- a/tests/test_java.py +++ b/tests/test_java.py @@ -5,6 +5,7 @@ import time from pathlib import Path from subprocess import Popen +from threading import Event from typing import Callable, List, Mapping, Optional import psutil @@ -14,8 +15,8 @@ from docker.models.images import Image from gprofiler.merge import parse_one_collapsed -from gprofiler.profilers.java import AsyncProfiledProcess -from tests.utils import run_gprofiler_in_container +from gprofiler.profilers.java import AsyncProfiledProcess, JavaProfiler +from tests.utils import assert_function_in_collapsed, run_gprofiler_in_container # adds the "status" command to AsyncProfiledProcess from gProfiler. @@ -24,7 +25,11 @@ def status_async_profiler(self): self._run_async_profiler(self._get_base_cmd() + [f"status,log={self._log_path_process}"]) -@pytest.mark.parametrize("runtime", ["java"]) +@pytest.fixture +def runtime() -> str: + return "java" + + def test_java_async_profiler_stopped( docker_client: DockerClient, application_pid: int, @@ -101,3 +106,20 @@ def test_java_async_profiler_stopped( collapsed = parse_one_collapsed(Path(output_directory / "last_profile.col").read_text()) assert_collapsed(collapsed) + + +@pytest.mark.parametrize("in_container", [True]) +def test_java_async_profiler_cpu_mode( + tmp_path: Path, + application_pid: int, + assert_collapsed, +) -> None: + """ + Run Java in a container and enable async-profiler in CPU mode, make sure we get kernel stacks. + """ + with JavaProfiler(1000, 1, Event(), str(tmp_path), False, True, "cpu", "ap") as profiler: + process_collapsed = profiler.snapshot().get(application_pid) + assert_collapsed(process_collapsed, check_comm=True) + assert_function_in_collapsed( + "do_syscall_64_[k]", "java", process_collapsed, True + ) # ensure kernels stacks exist From 78e89a3c1d7f5653610caec5c64d170769a09c7a Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Mon, 4 Oct 2021 13:36:32 +0300 Subject: [PATCH 06/15] Add fdtransfer in exe --- pyi.Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/pyi.Dockerfile b/pyi.Dockerfile index 49d483864..fdb113e22 100644 --- a/pyi.Dockerfile +++ b/pyi.Dockerfile @@ -126,6 +126,7 @@ COPY --from=centos:6 /usr/bin/xargs gprofiler/resources/php/xargs COPY --from=async-profiler-builder /async-profiler/build/jattach gprofiler/resources/java/jattach COPY --from=async-profiler-builder /async-profiler/build/async-profiler-version gprofiler/resources/java/async-profiler-version COPY --from=async-profiler-builder /async-profiler/build/libasyncProfiler.so gprofiler/resources/java/libasyncProfiler.so +COPY --from=async-profiler-builder /async-profiler/build/fdtransfer gprofiler/resources/java/fdtransfer COPY --from=burn-builder /go/burn/burn gprofiler/resources/burn From 57d779650918934577ea5b2186ef836ec779657a Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 5 Oct 2021 11:19:24 +0300 Subject: [PATCH 07/15] perf: Always print its stderr If perf fails, then wait_for_file_by_prefix() will raise. This way, we'll see the output in either case. --- gprofiler/profilers/perf.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/gprofiler/profilers/perf.py b/gprofiler/profilers/perf.py index 8b7fa68ed..c5c574ff6 100644 --- a/gprofiler/profilers/perf.py +++ b/gprofiler/profilers/perf.py @@ -84,12 +84,14 @@ def switch_output(self) -> None: self._process.send_signal(signal.SIGUSR2) def wait_and_script(self) -> str: - perf_data = wait_for_file_by_prefix(f"{self._output_path}.", self._dump_timeout_s, self._stop_event) - - # using read1() which performs just a single read() call and doesn't read until EOF - # (unlike Popen.communicate()) - assert self._process is not None - logger.debug(f"perf stderr: {self._process.stderr.read1(4096)}") + try: + perf_data = wait_for_file_by_prefix(f"{self._output_path}.", self._dump_timeout_s, self._stop_event) + finally: + # always read its stderr + # using read1() which performs just a single read() call and doesn't read until EOF + # (unlike Popen.communicate()) + assert self._process is not None + logger.debug(f"perf stderr: {self._process.stderr.read1(4096)}") if self._inject_jit: inject_data = Path(f"{str(perf_data)}.inject") From eb5d74055a7df85e098e35419bf0a2fac6351ab5 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 5 Oct 2021 12:08:54 +0300 Subject: [PATCH 08/15] perf: Explicitly pass "-m" --- gprofiler/profilers/perf.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gprofiler/profilers/perf.py b/gprofiler/profilers/perf.py index c5c574ff6..25466dc97 100644 --- a/gprofiler/profilers/perf.py +++ b/gprofiler/profilers/perf.py @@ -27,6 +27,8 @@ def perf_path() -> str: class PerfProcess: _dump_timeout_s = 5 _poll_timeout_s = 5 + _mmap_size_fp = 129 # default number of pages used by "perf record" when perf_event_mlock_kb=516 + _mmap_size_dwarf = 257 # doubling the previous def __init__( self, @@ -56,6 +58,12 @@ def _get_perf_cmd(self) -> List[str]: "-o", self._output_path, "--switch-output=signal", + # explicitly pass '-m', otherwise perf defaults to deriving this number from perf_event_mlock_kb, + # and it ends up using it entirely (and we want to spare some for async-profiler) + # this number scales linearly with the number of active cores (so we don't need to do this calculation + # here) + "-m", + str(self._mmap_size_fp) if self._type == "fp" else str(self._mmap_size_dwarf), ] + self._extra_args def start(self) -> None: From b44ad82fccd7dd0ab82834e23a4c13549d4f19e4 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 5 Oct 2021 12:09:44 +0300 Subject: [PATCH 09/15] utils: Add utils to read/write /proc/sys/kernel/perf_event_mlock_kb --- gprofiler/utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/gprofiler/utils.py b/gprofiler/utils.py index 4471274b3..dda395632 100644 --- a/gprofiler/utils.py +++ b/gprofiler/utils.py @@ -501,3 +501,14 @@ def process_comm(process: Process) -> str: name_line = status.splitlines()[0] assert name_line.startswith("Name:\t") return name_line.split("\t", 1)[1] + + +PERF_EVENT_MLOCK_KB = "/proc/sys/kernel/perf_event_mlock_kb" + + +def read_perf_event_mlock_kb() -> int: + return int(Path(PERF_EVENT_MLOCK_KB).read_text()) + + +def write_perf_event_mlock_kb(value: int) -> None: + Path(PERF_EVENT_MLOCK_KB).write_text(str(value)) From a813d1cb372108b79c900e216ce1b6b7f7632aac Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 5 Oct 2021 12:10:00 +0300 Subject: [PATCH 10/15] java: Update perf_event_mlock_kb in async-profiler CPU mode --- gprofiler/profilers/java.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/gprofiler/profilers/java.py b/gprofiler/profilers/java.py index 0f7404ed5..b875e8fbf 100644 --- a/gprofiler/profilers/java.py +++ b/gprofiler/profilers/java.py @@ -24,6 +24,7 @@ get_process_nspid, pgrep_maps, process_comm, + read_perf_event_mlock_kb, remove_path, remove_prefix, resolve_proc_root_links, @@ -31,6 +32,7 @@ run_in_ns, run_process, touch_path, + write_perf_event_mlock_kb, ) logger = get_logger_adapter(__name__) @@ -293,6 +295,8 @@ def read_output(self) -> Optional[str]: class JavaProfiler(ProcessProfilerBase): JDK_EXCLUSIONS = ["OpenJ9", "Zing"] + _new_perf_event_mlock_kb = 8192 + def __init__( self, frequency: int, @@ -312,6 +316,7 @@ def __init__( self._buildids = java_async_profiler_buildids self._version_check = java_version_check self._mode = java_async_profiler_mode + self._saved_mlock: Optional[int] = None def _is_jdk_version_supported(self, java_version_cmd_output: str) -> bool: return all(exclusion not in java_version_cmd_output for exclusion in self.JDK_EXCLUSIONS) @@ -412,3 +417,33 @@ def _profile_ap_process(self, ap_proc: AsyncProfiledProcess) -> Optional[StackTo def _select_processes_to_profile(self) -> List[Process]: return pgrep_maps(r"^.+/libjvm\.so$") + + def start(self) -> None: + super().start() + if self._mode == "cpu": + # short tech dive: + # perf has this accounting logic when mmaping perf_event_open fds (from kernel/events/core.c:perf_mmap()) + # 1. if the *user* locked pages have not exceeded the limit of perf_event_mlock_kb, charge from + # that limit. + # 2. after exceeding, charge from ulimit (mlock) + # 3. if both are expended, fail the mmap unless task is privileged / perf_event_paranoid is + # permissive (-1). + # + # in our case, we run "perf" alongside and it starts before async-profiler. so its mmaps get charged from + # the user (root) locked pages. + # then, when async-profiler starts, and we profile a Java process running in a container as root (which is + # common), it is treated as the same user, and since its limit is expended, and the container is not + # privileged & has low mlock ulimit (Docker defaults to 64) - async-profiler fails to mmap! + # instead, we update perf_event_mlock_kb here for the lifetime of gProfiler, leaving some room + # for async-profiler (and also make sure "perf" doesn't use it in its entirety) + # + # (alternatively, we could change async-profiler's fdtransfer to do the mmap as well as the perf_event_open; + # this way, the privileged program gets charged, and when async-profiler mmaps it again, it will use the + # same pages and won't get charged). + self._saved_mlock = read_perf_event_mlock_kb() + write_perf_event_mlock_kb(self._new_perf_event_mlock_kb) + + def stop(self) -> None: + super().stop() + if self._saved_mlock is not None: + write_perf_event_mlock_kb(self._saved_mlock) From e44d4982988caa080a2fedb9ba951e213acbde56 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Tue, 5 Oct 2021 12:30:30 +0300 Subject: [PATCH 11/15] Add assert messages --- gprofiler/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gprofiler/utils.py b/gprofiler/utils.py index 1d8ac3bd3..98cce7aba 100644 --- a/gprofiler/utils.py +++ b/gprofiler/utils.py @@ -175,7 +175,7 @@ def run_process( try: communicate_kwargs = dict(input=stdin) if stdin is not None else {} if stop_event is None: - assert timeout is None + assert timeout is None, f"expected no timeout, got {timeout!r}" if communicate: # wait for stderr & stdout to be closed stdout, stderr = process.communicate(timeout=timeout, **communicate_kwargs) @@ -183,7 +183,7 @@ def run_process( # just wait for the process to exit process.wait() else: - assert communicate + assert communicate, "expected communicate=True if stop_event is given" end_time = (time.monotonic() + timeout) if timeout is not None else None while True: try: From 0f23b09e89b2693457a138b124c8783ef516c558 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Wed, 6 Oct 2021 11:05:08 +0300 Subject: [PATCH 12/15] Empty From aa3fc8a1bf746abeabdfd62cb311338e103678f5 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Wed, 6 Oct 2021 18:51:24 +0300 Subject: [PATCH 13/15] Nicer with dict --- gprofiler/profilers/perf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gprofiler/profilers/perf.py b/gprofiler/profilers/perf.py index 25466dc97..f96af5fb0 100644 --- a/gprofiler/profilers/perf.py +++ b/gprofiler/profilers/perf.py @@ -27,8 +27,9 @@ def perf_path() -> str: class PerfProcess: _dump_timeout_s = 5 _poll_timeout_s = 5 - _mmap_size_fp = 129 # default number of pages used by "perf record" when perf_event_mlock_kb=516 - _mmap_size_dwarf = 257 # doubling the previous + # default number of pages used by "perf record" when perf_event_mlock_kb=516 + # we use double for dwarf. + _mmap_sizes = {"fp": 129, "dwarf": 257} def __init__( self, @@ -63,7 +64,7 @@ def _get_perf_cmd(self) -> List[str]: # this number scales linearly with the number of active cores (so we don't need to do this calculation # here) "-m", - str(self._mmap_size_fp) if self._type == "fp" else str(self._mmap_size_dwarf), + str(self._mmap_sizes[self._type]), ] + self._extra_args def start(self) -> None: From 65ecd364ff8af09b3ac2c0fc403c06325928ad13 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Wed, 6 Oct 2021 19:14:57 +0300 Subject: [PATCH 14/15] tests: Print logs upon test failure --- tests/test_java.py | 1 + tests/utils.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_java.py b/tests/test_java.py index fdfbc12d3..d1314fac1 100644 --- a/tests/test_java.py +++ b/tests/test_java.py @@ -76,6 +76,7 @@ def test_java_async_profiler_stopped( container.kill("SIGKILL") finally: if container is not None: + print("gProfiler container logs:", container.logs().decode(), sep="\n") container.remove(force=True) # run "status" diff --git a/tests/utils.py b/tests/utils.py index 1e973f247..8709488ad 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -36,7 +36,11 @@ def run_privileged_container( assert isinstance(container_or_logs, bytes), container_or_logs container, logs = None, container_or_logs.decode() - print("Container logs:", logs) # print, so failing tests display it + # print, so failing tests display it + print( + "Container logs:", + logs if len(logs) > 0 else "(empty, possibly because container was detached and is running now)", + ) return container, logs From b86fc796d17b11340d15fa9b5cfbd4dd03c18453 Mon Sep 17 00:00:00 2001 From: Yonatan Goldschmidt Date: Thu, 7 Oct 2021 12:49:31 +0300 Subject: [PATCH 15/15] tests: java: Write AP's "status" output to a file --- tests/test_java.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/tests/test_java.py b/tests/test_java.py index d1314fac1..173881e9e 100644 --- a/tests/test_java.py +++ b/tests/test_java.py @@ -22,7 +22,9 @@ # adds the "status" command to AsyncProfiledProcess from gProfiler. class AsyncProfiledProcessForTests(AsyncProfiledProcess): def status_async_profiler(self): - self._run_async_profiler(self._get_base_cmd() + [f"status,log={self._log_path_process}"]) + self._run_async_profiler( + self._get_base_cmd() + [f"status,log={self._log_path_process},file={self._output_path_process}"] + ) @pytest.fixture @@ -79,24 +81,15 @@ def test_java_async_profiler_stopped( print("gProfiler container logs:", container.logs().decode(), sep="\n") container.remove(force=True) - # run "status" proc = psutil.Process(application_pid) - with AsyncProfiledProcessForTests(proc, tmp_path, False, "cpu") as ap_proc: - ap_proc.status_async_profiler() - # printed the process' stdout, see ACTION_STATUS case in async-profiler/profiler.cpp - expected_message = b"Profiling is running for " - assert any("libasyncProfiler.so" in m.path for m in proc.memory_maps()) - # container case - if application_docker_container is not None: - assert expected_message in application_docker_container.logs() - # else, process - else: - assert application_process is not None - assert application_process.stdout is not None - # mypy complains about read1 - assert expected_message in application_process.stdout.read1(4096) # type: ignore + # run "status" + with AsyncProfiledProcessForTests(proc, tmp_path, False, "itimer") as ap_proc: + ap_proc.status_async_profiler() + + # printed the output file, see ACTION_STATUS case in async-profiler/profiler.cpp\ + assert "Profiling is running for " in ap_proc.read_output() # then start again, with 1 second assert args[2] == "1000"