Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions gprofiler/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ def add_highest_avg_depth_stacks_per_process(
merged_pid_to_stacks_counters[pid] = fp_collapsed_stacks_counters
continue

fp_frame_count_average = _get_average_frame_count(fp_collapsed_stacks_counters.keys())
fp_frame_count_average = get_average_frame_count(fp_collapsed_stacks_counters.keys())
dwarf_collapsed_stacks_counters = dwarf_perf[pid]
dwarf_frame_count_average = _get_average_frame_count(dwarf_collapsed_stacks_counters.keys())
dwarf_frame_count_average = get_average_frame_count(dwarf_collapsed_stacks_counters.keys())
if fp_frame_count_average > dwarf_frame_count_average:
merged_pid_to_stacks_counters[pid] = fp_collapsed_stacks_counters
else:
Expand All @@ -179,8 +179,27 @@ def scale_sample_counts(stacks: StackToSampleCount, ratio: float) -> StackToSamp
return scaled_stacks


def _get_average_frame_count(stacks: Iterable[str]) -> float:
frame_count_per_samples = [sample.count(";") for sample in stacks]
def get_average_frame_count(samples: Iterable[str]) -> float:
"""
Get the average frame count for all samples.
Avoids counting kernel frames because this function is used to determine whether FP stacks
or DWARF stacks are to be used. FP stacks are collected regardless of FP or DWARF, so we don't
count them in this heuristic.
"""
frame_count_per_samples = []
for sample in samples:
kernel_split = sample.split("_[k];", 1)
if len(kernel_split) == 1:
kernel_split = sample.split("_[k] ", 1)

# Do we have any kernel frames in this sample?
if len(kernel_split) > 1:
# example: "a;b;c;d_[k];e_[k] 1" should return the same value as "a;b;c 1", so we don't
# add 1 to the frames count like we do in the other branch.
frame_count_per_samples.append(kernel_split[0].count(";"))
else:
# no kernel frames, so e.g "a;b;c 1" and frame count is one more than ";" count.
frame_count_per_samples.append(kernel_split[0].count(";") + 1)
return sum(frame_count_per_samples) / len(frame_count_per_samples)


Expand Down
4 changes: 3 additions & 1 deletion gprofiler/profilers/perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

logger = get_logger_adapter(__name__)

DEFAULT_PERF_DWARF_STACK_SIZE = 8192


# TODO: automatically disable this profiler if can_i_use_perf_events() returns False?
class PerfProcess:
Expand Down Expand Up @@ -145,7 +147,7 @@ def wait_and_script(self) -> str:
help="The max stack size for the Dwarf perf, in bytes. Must be <=65528."
" Relevant for --perf-mode dwarf|smart. Default: %(default)s",
type=int,
default=8192,
default=DEFAULT_PERF_DWARF_STACK_SIZE,
dest="perf_dwarf_stack_size",
)
],
Expand Down
14 changes: 14 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ def command_line(runtime: str, java_command_line: List[str]) -> List[str]:
"--interpreted-frames-native-stack",
str(CONTAINERS_DIRECTORY / "nodejs/fibonacci.js"),
],
# these do not have non-container application - so it will result in an error if the command
# line is used.
"native_fp": ["/bin/false"],
"native_dwarf": ["/bin/false"],
}[runtime]


Expand Down Expand Up @@ -157,6 +161,16 @@ def gprofiler_docker_image(docker_client: DockerClient) -> Iterable[Image]:
def application_docker_images(docker_client: DockerClient) -> Iterable[Mapping[str, Image]]:
images = {}
for runtime in os.listdir(str(CONTAINERS_DIRECTORY)):
if runtime == "native":
path = CONTAINERS_DIRECTORY / runtime
images[runtime + "_fp"], _ = docker_client.images.build(
path=str(path), dockerfile=str(path / "fp.Dockerfile"), rm=True
)
images[runtime + "_dwarf"], _ = docker_client.images.build(
path=str(path), dockerfile=str(path / "dwarf.Dockerfile"), rm=True
)
continue

images[runtime], _ = docker_client.images.build(path=str(CONTAINERS_DIRECTORY / runtime), rm=True)

# for java - add additional images
Expand Down
10 changes: 10 additions & 0 deletions tests/containers/native/dwarf.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM gcc:8

COPY native.c .

RUN gcc -g -fomit-frame-pointer native.c -o native

# ensure it's built with debug info
RUN file native | grep -q "with debug_info"

CMD ["./native"]
10 changes: 10 additions & 0 deletions tests/containers/native/fp.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM gcc:8

COPY native.c .

RUN gcc -fno-omit-frame-pointer native.c -o native

# ensure it's built without debug info
RUN file native | grep -zvq "with debug_info"

CMD ["./native"]
14 changes: 14 additions & 0 deletions tests/containers/native/native.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// This one isn't fibonacci like the others, because it's used in tests for perf smart mode,
// which require consistent stacktraces (and fibonacci is not consistent; these stacks are)
static void recursive(unsigned int n) {
if (n > 0) {
recursive(n - 1);
}

while (1) ;
}

int main(void) {
recursive(10);
return 0;
}
28 changes: 28 additions & 0 deletions tests/test_merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#
# Copyright (c) Granulate. All rights reserved.
# Licensed under the AGPL3 License. See LICENSE.md in the project root for license information.
#

"""
Tests for the logic from gprofiler/merge.py
"""

import pytest

from gprofiler.merge import get_average_frame_count


@pytest.mark.parametrize(
"samples,count",
[
(["a 1"], 1),
(["d_[k] 1"], 0),
(["d_[k];e_[k] 1"], 0),
(["a;b;c;d_[k] 1"], 3),
(["a;b;c;d_[k];e_[k] 1"], 3),
(["a 1", "a;b 1"], 1.5),
(["d_[k] 1", "a;d_[k] 1"], 0.5),
],
)
def test_get_average_frame_count(samples: str, count: float) -> None:
assert get_average_frame_count(samples) == count
51 changes: 51 additions & 0 deletions tests/test_perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#
# Copyright (c) Granulate. All rights reserved.
# Licensed under the AGPL3 License. See LICENSE.md in the project root for license information.
#

from pathlib import Path
from threading import Event

import pytest

from gprofiler.profilers.perf import DEFAULT_PERF_DWARF_STACK_SIZE, SystemProfiler
from tests.utils import assert_function_in_collapsed, is_function_in_collapsed, snapshot_pid_collapsed


@pytest.mark.parametrize("runtime", ["native_fp", "native_dwarf"])
@pytest.mark.parametrize("perf_mode", ["fp", "dwarf", "smart"])
@pytest.mark.parametrize("in_container", [True]) # native app is built only for container
def test_perf(
tmp_path: Path,
application_pid: int,
runtime: str,
perf_mode: str,
) -> None:
""" """
with SystemProfiler(
99,
3,
Event(),
str(tmp_path),
False,
perf_mode=perf_mode,
perf_inject=False,
perf_dwarf_stack_size=DEFAULT_PERF_DWARF_STACK_SIZE,
) as profiler:
process_collapsed = snapshot_pid_collapsed(profiler, application_pid)

if runtime == "native_dwarf":
# app is built with DWARF info and without FP, so we expect to see a callstack only in DWARF or smart modes.
assert is_function_in_collapsed(";recursive;recursive;recursive;recursive;", process_collapsed) ^ bool(
perf_mode not in ("dwarf", "smart")
)
else:
# app is built with FP and without DWARF info, but DWARF mode is able to do FP unwinding,
# so it should always succeed.
assert runtime == "native_fp"
assert_function_in_collapsed(";recursive;recursive;recursive;recursive;", process_collapsed)

# expect to see libc stacks only when collecting with DWARF or smart.
assert is_function_in_collapsed(";_start;__libc_start_main;main;", process_collapsed) ^ bool(
perf_mode not in ("dwarf", "smart")
)
8 changes: 5 additions & 3 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,13 @@ def chmod_path_parts(path: Path, add_mode: int) -> None:
os.chmod(subpath, os.stat(subpath).st_mode | add_mode)


def is_function_in_collapsed(function_name: str, collapsed: StackToSampleCount) -> bool:
return any((function_name in record) for record in collapsed.keys())


def assert_function_in_collapsed(function_name: str, collapsed: StackToSampleCount) -> None:
print(f"collapsed: {collapsed}")
assert any(
(function_name in record) for record in collapsed.keys()
), f"function {function_name!r} missing in collapsed data!"
assert is_function_in_collapsed(function_name, collapsed), f"function {function_name!r} missing in collapsed data!"


def snapshot_one_profile(profiler: ProfilerInterface) -> ProfileData:
Expand Down