diff --git a/gprofiler/merge.py b/gprofiler/merge.py index 2508bc154..4c3404fc3 100644 --- a/gprofiler/merge.py +++ b/gprofiler/merge.py @@ -150,9 +150,9 @@ def add_highest_avg_depth_stacks_per_process( merged_pid_to_stacks_counters[pid] = fp_collapsed_stacks_counters continue - fp_frame_count_average = _get_average_frame_count(fp_collapsed_stacks_counters.keys()) + fp_frame_count_average = get_average_frame_count(fp_collapsed_stacks_counters.keys()) dwarf_collapsed_stacks_counters = dwarf_perf[pid] - dwarf_frame_count_average = _get_average_frame_count(dwarf_collapsed_stacks_counters.keys()) + dwarf_frame_count_average = get_average_frame_count(dwarf_collapsed_stacks_counters.keys()) if fp_frame_count_average > dwarf_frame_count_average: merged_pid_to_stacks_counters[pid] = fp_collapsed_stacks_counters else: @@ -179,8 +179,27 @@ def scale_sample_counts(stacks: StackToSampleCount, ratio: float) -> StackToSamp return scaled_stacks -def _get_average_frame_count(stacks: Iterable[str]) -> float: - frame_count_per_samples = [sample.count(";") for sample in stacks] +def get_average_frame_count(samples: Iterable[str]) -> float: + """ + Get the average frame count for all samples. + Avoids counting kernel frames because this function is used to determine whether FP stacks + or DWARF stacks are to be used. FP stacks are collected regardless of FP or DWARF, so we don't + count them in this heuristic. + """ + frame_count_per_samples = [] + for sample in samples: + kernel_split = sample.split("_[k];", 1) + if len(kernel_split) == 1: + kernel_split = sample.split("_[k] ", 1) + + # Do we have any kernel frames in this sample? + if len(kernel_split) > 1: + # example: "a;b;c;d_[k];e_[k] 1" should return the same value as "a;b;c 1", so we don't + # add 1 to the frames count like we do in the other branch. + frame_count_per_samples.append(kernel_split[0].count(";")) + else: + # no kernel frames, so e.g "a;b;c 1" and frame count is one more than ";" count. + frame_count_per_samples.append(kernel_split[0].count(";") + 1) return sum(frame_count_per_samples) / len(frame_count_per_samples) diff --git a/gprofiler/profilers/perf.py b/gprofiler/profilers/perf.py index 8ac90c794..f3be3a5f5 100644 --- a/gprofiler/profilers/perf.py +++ b/gprofiler/profilers/perf.py @@ -27,6 +27,8 @@ logger = get_logger_adapter(__name__) +DEFAULT_PERF_DWARF_STACK_SIZE = 8192 + # TODO: automatically disable this profiler if can_i_use_perf_events() returns False? class PerfProcess: @@ -145,7 +147,7 @@ def wait_and_script(self) -> str: help="The max stack size for the Dwarf perf, in bytes. Must be <=65528." " Relevant for --perf-mode dwarf|smart. Default: %(default)s", type=int, - default=8192, + default=DEFAULT_PERF_DWARF_STACK_SIZE, dest="perf_dwarf_stack_size", ) ], diff --git a/tests/conftest.py b/tests/conftest.py index c8c7bb906..d06a2abc5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,6 +94,10 @@ def command_line(runtime: str, java_command_line: List[str]) -> List[str]: "--interpreted-frames-native-stack", str(CONTAINERS_DIRECTORY / "nodejs/fibonacci.js"), ], + # these do not have non-container application - so it will result in an error if the command + # line is used. + "native_fp": ["/bin/false"], + "native_dwarf": ["/bin/false"], }[runtime] @@ -157,6 +161,16 @@ def gprofiler_docker_image(docker_client: DockerClient) -> Iterable[Image]: def application_docker_images(docker_client: DockerClient) -> Iterable[Mapping[str, Image]]: images = {} for runtime in os.listdir(str(CONTAINERS_DIRECTORY)): + if runtime == "native": + path = CONTAINERS_DIRECTORY / runtime + images[runtime + "_fp"], _ = docker_client.images.build( + path=str(path), dockerfile=str(path / "fp.Dockerfile"), rm=True + ) + images[runtime + "_dwarf"], _ = docker_client.images.build( + path=str(path), dockerfile=str(path / "dwarf.Dockerfile"), rm=True + ) + continue + images[runtime], _ = docker_client.images.build(path=str(CONTAINERS_DIRECTORY / runtime), rm=True) # for java - add additional images diff --git a/tests/containers/native/dwarf.Dockerfile b/tests/containers/native/dwarf.Dockerfile new file mode 100644 index 000000000..0ec605311 --- /dev/null +++ b/tests/containers/native/dwarf.Dockerfile @@ -0,0 +1,10 @@ +FROM gcc:8 + +COPY native.c . + +RUN gcc -g -fomit-frame-pointer native.c -o native + +# ensure it's built with debug info +RUN file native | grep -q "with debug_info" + +CMD ["./native"] diff --git a/tests/containers/native/fp.Dockerfile b/tests/containers/native/fp.Dockerfile new file mode 100644 index 000000000..94382d68f --- /dev/null +++ b/tests/containers/native/fp.Dockerfile @@ -0,0 +1,10 @@ +FROM gcc:8 + +COPY native.c . + +RUN gcc -fno-omit-frame-pointer native.c -o native + +# ensure it's built without debug info +RUN file native | grep -zvq "with debug_info" + +CMD ["./native"] diff --git a/tests/containers/native/native.c b/tests/containers/native/native.c new file mode 100644 index 000000000..21b4ff721 --- /dev/null +++ b/tests/containers/native/native.c @@ -0,0 +1,14 @@ +// This one isn't fibonacci like the others, because it's used in tests for perf smart mode, +// which require consistent stacktraces (and fibonacci is not consistent; these stacks are) +static void recursive(unsigned int n) { + if (n > 0) { + recursive(n - 1); + } + + while (1) ; +} + +int main(void) { + recursive(10); + return 0; +} diff --git a/tests/test_merge.py b/tests/test_merge.py new file mode 100644 index 000000000..90900d18e --- /dev/null +++ b/tests/test_merge.py @@ -0,0 +1,28 @@ +# +# Copyright (c) Granulate. All rights reserved. +# Licensed under the AGPL3 License. See LICENSE.md in the project root for license information. +# + +""" +Tests for the logic from gprofiler/merge.py +""" + +import pytest + +from gprofiler.merge import get_average_frame_count + + +@pytest.mark.parametrize( + "samples,count", + [ + (["a 1"], 1), + (["d_[k] 1"], 0), + (["d_[k];e_[k] 1"], 0), + (["a;b;c;d_[k] 1"], 3), + (["a;b;c;d_[k];e_[k] 1"], 3), + (["a 1", "a;b 1"], 1.5), + (["d_[k] 1", "a;d_[k] 1"], 0.5), + ], +) +def test_get_average_frame_count(samples: str, count: float) -> None: + assert get_average_frame_count(samples) == count diff --git a/tests/test_perf.py b/tests/test_perf.py new file mode 100644 index 000000000..d8375d908 --- /dev/null +++ b/tests/test_perf.py @@ -0,0 +1,51 @@ +# +# Copyright (c) Granulate. All rights reserved. +# Licensed under the AGPL3 License. See LICENSE.md in the project root for license information. +# + +from pathlib import Path +from threading import Event + +import pytest + +from gprofiler.profilers.perf import DEFAULT_PERF_DWARF_STACK_SIZE, SystemProfiler +from tests.utils import assert_function_in_collapsed, is_function_in_collapsed, snapshot_pid_collapsed + + +@pytest.mark.parametrize("runtime", ["native_fp", "native_dwarf"]) +@pytest.mark.parametrize("perf_mode", ["fp", "dwarf", "smart"]) +@pytest.mark.parametrize("in_container", [True]) # native app is built only for container +def test_perf( + tmp_path: Path, + application_pid: int, + runtime: str, + perf_mode: str, +) -> None: + """ """ + with SystemProfiler( + 99, + 3, + Event(), + str(tmp_path), + False, + perf_mode=perf_mode, + perf_inject=False, + perf_dwarf_stack_size=DEFAULT_PERF_DWARF_STACK_SIZE, + ) as profiler: + process_collapsed = snapshot_pid_collapsed(profiler, application_pid) + + if runtime == "native_dwarf": + # app is built with DWARF info and without FP, so we expect to see a callstack only in DWARF or smart modes. + assert is_function_in_collapsed(";recursive;recursive;recursive;recursive;", process_collapsed) ^ bool( + perf_mode not in ("dwarf", "smart") + ) + else: + # app is built with FP and without DWARF info, but DWARF mode is able to do FP unwinding, + # so it should always succeed. + assert runtime == "native_fp" + assert_function_in_collapsed(";recursive;recursive;recursive;recursive;", process_collapsed) + + # expect to see libc stacks only when collecting with DWARF or smart. + assert is_function_in_collapsed(";_start;__libc_start_main;main;", process_collapsed) ^ bool( + perf_mode not in ("dwarf", "smart") + ) diff --git a/tests/utils.py b/tests/utils.py index fbbbbf51e..014b369bb 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -124,11 +124,13 @@ def chmod_path_parts(path: Path, add_mode: int) -> None: os.chmod(subpath, os.stat(subpath).st_mode | add_mode) +def is_function_in_collapsed(function_name: str, collapsed: StackToSampleCount) -> bool: + return any((function_name in record) for record in collapsed.keys()) + + def assert_function_in_collapsed(function_name: str, collapsed: StackToSampleCount) -> None: print(f"collapsed: {collapsed}") - assert any( - (function_name in record) for record in collapsed.keys() - ), f"function {function_name!r} missing in collapsed data!" + assert is_function_in_collapsed(function_name, collapsed), f"function {function_name!r} missing in collapsed data!" def snapshot_one_profile(profiler: ProfilerInterface) -> ProfileData: