intel · Jongy · Jul 24, 2022 · Jul 23, 2022 · Jul 23, 2022 · Jul 23, 2022
diff --git a/gprofiler/merge.py b/gprofiler/merge.py
@@ -150,9 +150,9 @@ def add_highest_avg_depth_stacks_per_process(
             merged_pid_to_stacks_counters[pid] = fp_collapsed_stacks_counters
             continue
 
-        fp_frame_count_average = _get_average_frame_count(fp_collapsed_stacks_counters.keys())
+        fp_frame_count_average = get_average_frame_count(fp_collapsed_stacks_counters.keys())
         dwarf_collapsed_stacks_counters = dwarf_perf[pid]
-        dwarf_frame_count_average = _get_average_frame_count(dwarf_collapsed_stacks_counters.keys())
+        dwarf_frame_count_average = get_average_frame_count(dwarf_collapsed_stacks_counters.keys())
         if fp_frame_count_average > dwarf_frame_count_average:
             merged_pid_to_stacks_counters[pid] = fp_collapsed_stacks_counters
         else:
@@ -179,8 +179,27 @@ def scale_sample_counts(stacks: StackToSampleCount, ratio: float) -> StackToSamp
     return scaled_stacks
 
 
-def _get_average_frame_count(stacks: Iterable[str]) -> float:
-    frame_count_per_samples = [sample.count(";") for sample in stacks]
+def get_average_frame_count(samples: Iterable[str]) -> float:
+    """
+    Get the average frame count for all samples.
+    Avoids counting kernel frames because this function is used to determine whether FP stacks
+    or DWARF stacks are to be used. FP stacks are collected regardless of FP or DWARF, so we don't
+    count them in this heuristic.
+    """
+    frame_count_per_samples = []
+    for sample in samples:
+        kernel_split = sample.split("_[k];", 1)
+        if len(kernel_split) == 1:
+            kernel_split = sample.split("_[k] ", 1)
+
+        # Do we have any kernel frames in this sample?
+        if len(kernel_split) > 1:
+            # example: "a;b;c;d_[k];e_[k] 1" should return the same value as "a;b;c 1", so we don't
+            # add 1 to the frames count like we do in the other branch.
+            frame_count_per_samples.append(kernel_split[0].count(";"))
+        else:
+            # no kernel frames, so e.g "a;b;c 1" and frame count is one more than ";" count.
+            frame_count_per_samples.append(kernel_split[0].count(";") + 1)
     return sum(frame_count_per_samples) / len(frame_count_per_samples)
 
 

diff --git a/gprofiler/profilers/perf.py b/gprofiler/profilers/perf.py
@@ -27,6 +27,8 @@
 
 logger = get_logger_adapter(__name__)
 
+DEFAULT_PERF_DWARF_STACK_SIZE = 8192
+
 
 # TODO: automatically disable this profiler if can_i_use_perf_events() returns False?
 class PerfProcess:
@@ -145,7 +147,7 @@ def wait_and_script(self) -> str:
             help="The max stack size for the Dwarf perf, in bytes. Must be <=65528."
             " Relevant for --perf-mode dwarf|smart. Default: %(default)s",
             type=int,
-            default=8192,
+            default=DEFAULT_PERF_DWARF_STACK_SIZE,
             dest="perf_dwarf_stack_size",
         )
     ],

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -94,6 +94,10 @@ def command_line(runtime: str, java_command_line: List[str]) -> List[str]:
             "--interpreted-frames-native-stack",
             str(CONTAINERS_DIRECTORY / "nodejs/fibonacci.js"),
         ],
+        # these do not have non-container application - so it will result in an error if the command
+        # line is used.
+        "native_fp": ["/bin/false"],
+        "native_dwarf": ["/bin/false"],
     }[runtime]
 
 
@@ -157,6 +161,16 @@ def gprofiler_docker_image(docker_client: DockerClient) -> Iterable[Image]:
 def application_docker_images(docker_client: DockerClient) -> Iterable[Mapping[str, Image]]:
     images = {}
     for runtime in os.listdir(str(CONTAINERS_DIRECTORY)):
+        if runtime == "native":
+            path = CONTAINERS_DIRECTORY / runtime
+            images[runtime + "_fp"], _ = docker_client.images.build(
+                path=str(path), dockerfile=str(path / "fp.Dockerfile"), rm=True
+            )
+            images[runtime + "_dwarf"], _ = docker_client.images.build(
+                path=str(path), dockerfile=str(path / "dwarf.Dockerfile"), rm=True
+            )
+            continue
+
         images[runtime], _ = docker_client.images.build(path=str(CONTAINERS_DIRECTORY / runtime), rm=True)
 
         # for java - add additional images

diff --git a/tests/containers/native/dwarf.Dockerfile b/tests/containers/native/dwarf.Dockerfile
@@ -0,0 +1,10 @@
+FROM gcc:8
+
+COPY native.c .
+
+RUN gcc -g -fomit-frame-pointer native.c -o native
+
+# ensure it's built with debug info
+RUN file native | grep -q "with debug_info"
+
+CMD ["./native"]
diff --git a/tests/containers/native/fp.Dockerfile b/tests/containers/native/fp.Dockerfile
@@ -0,0 +1,10 @@
+FROM gcc:8
+
+COPY native.c .
+
+RUN gcc -fno-omit-frame-pointer native.c -o native
+
+# ensure it's built without debug info
+RUN file native | grep -zvq "with debug_info"
+
+CMD ["./native"]
diff --git a/tests/containers/native/native.c b/tests/containers/native/native.c
@@ -0,0 +1,14 @@
+// This one isn't fibonacci like the others, because it's used in tests for perf smart mode,
+// which require consistent stacktraces (and fibonacci is not consistent; these stacks are)
+static void recursive(unsigned int n) {
+    if (n > 0) {
+        recursive(n - 1);
+    }
+
+    while (1) ;
+}
+
+int main(void) {
+    recursive(10);
+    return 0;
+}
diff --git a/tests/test_merge.py b/tests/test_merge.py
@@ -0,0 +1,28 @@
+#
+# Copyright (c) Granulate. All rights reserved.
+# Licensed under the AGPL3 License. See LICENSE.md in the project root for license information.
+#
+
+"""
+Tests for the logic from gprofiler/merge.py
+"""
+
+import pytest
+
+from gprofiler.merge import get_average_frame_count
+
+
+@pytest.mark.parametrize(
+    "samples,count",
+    [
+        (["a 1"], 1),
+        (["d_[k] 1"], 0),
+        (["d_[k];e_[k] 1"], 0),
+        (["a;b;c;d_[k] 1"], 3),
+        (["a;b;c;d_[k];e_[k] 1"], 3),
+        (["a 1", "a;b 1"], 1.5),
+        (["d_[k] 1", "a;d_[k] 1"], 0.5),
+    ],
+)
+def test_get_average_frame_count(samples: str, count: float) -> None:
+    assert get_average_frame_count(samples) == count
diff --git a/tests/test_perf.py b/tests/test_perf.py
@@ -0,0 +1,51 @@
+#
+# Copyright (c) Granulate. All rights reserved.
+# Licensed under the AGPL3 License. See LICENSE.md in the project root for license information.
+#
+
+from pathlib import Path
+from threading import Event
+
+import pytest
+
+from gprofiler.profilers.perf import DEFAULT_PERF_DWARF_STACK_SIZE, SystemProfiler
+from tests.utils import assert_function_in_collapsed, is_function_in_collapsed, snapshot_pid_collapsed
+
+
+@pytest.mark.parametrize("runtime", ["native_fp", "native_dwarf"])
+@pytest.mark.parametrize("perf_mode", ["fp", "dwarf", "smart"])
+@pytest.mark.parametrize("in_container", [True])  # native app is built only for container
+def test_perf(
+    tmp_path: Path,
+    application_pid: int,
+    runtime: str,
+    perf_mode: str,
+) -> None:
+    """ """
+    with SystemProfiler(
+        99,
+        3,
+        Event(),
+        str(tmp_path),
+        False,
+        perf_mode=perf_mode,
+        perf_inject=False,
+        perf_dwarf_stack_size=DEFAULT_PERF_DWARF_STACK_SIZE,
+    ) as profiler:
+        process_collapsed = snapshot_pid_collapsed(profiler, application_pid)
+
+        if runtime == "native_dwarf":
+            # app is built with DWARF info and without FP, so we expect to see a callstack only in DWARF or smart modes.
+            assert is_function_in_collapsed(";recursive;recursive;recursive;recursive;", process_collapsed) ^ bool(
+                perf_mode not in ("dwarf", "smart")
+            )
+        else:
+            # app is built with FP and without DWARF info, but DWARF mode is able to do FP unwinding,
+            # so it should always succeed.
+            assert runtime == "native_fp"
+            assert_function_in_collapsed(";recursive;recursive;recursive;recursive;", process_collapsed)
+
+        # expect to see libc stacks only when collecting with DWARF or smart.
+        assert is_function_in_collapsed(";_start;__libc_start_main;main;", process_collapsed) ^ bool(
+            perf_mode not in ("dwarf", "smart")
+        )
diff --git a/tests/utils.py b/tests/utils.py
@@ -124,11 +124,13 @@ def chmod_path_parts(path: Path, add_mode: int) -> None:
         os.chmod(subpath, os.stat(subpath).st_mode | add_mode)
 
 
+def is_function_in_collapsed(function_name: str, collapsed: StackToSampleCount) -> bool:
+    return any((function_name in record) for record in collapsed.keys())
+
+
 def assert_function_in_collapsed(function_name: str, collapsed: StackToSampleCount) -> None:
     print(f"collapsed: {collapsed}")
-    assert any(
-        (function_name in record) for record in collapsed.keys()
-    ), f"function {function_name!r} missing in collapsed data!"
+    assert is_function_in_collapsed(function_name, collapsed), f"function {function_name!r} missing in collapsed data!"
 
 
 def snapshot_one_profile(profiler: ProfilerInterface) -> ProfileData: