Skip to content
41 changes: 28 additions & 13 deletions gprofiler/profilers/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import re
import shutil
import signal
from collections import Counter
from enum import Enum
from pathlib import Path
from threading import Event
Expand All @@ -30,7 +31,7 @@
from psutil import Process

from gprofiler.exceptions import CalledProcessError
from gprofiler.gprofiler_types import StackToSampleCount
from gprofiler.gprofiler_types import ProcessToStackSampleCounters, StackToSampleCount
from gprofiler.kernel_messages import get_kernel_messages_provider
from gprofiler.log import get_logger_adapter
from gprofiler.merge import parse_one_collapsed
Expand Down Expand Up @@ -515,6 +516,8 @@ def __init__(
self._ap_safemode = java_async_profiler_safemode
self._init_java_safemode(java_safemode)
self._should_profile = True
# if set, profiling is disabled due to this safemode reason.
self._safemode_disable_reason: Optional[str] = None
self._profiled_pids: Set[int] = set()
self._pids_to_remove: Set[int] = set()
self._kernel_messages_provider = get_kernel_messages_provider()
Expand Down Expand Up @@ -545,9 +548,15 @@ def _init_java_safemode(self, java_safemode: str) -> None:
), f"async-profiler safemode must be set to 127 in --java-safemode={JAVA_SAFEMODE_ALL} (or --java-safemode)"

def _disable_profiling(self, cause: str):
if self._should_profile and cause in self._java_safemode:
if self._safemode_disable_reason is None and cause in self._java_safemode:
logger.warning("Java profiling has been disabled, will avoid profiling any new java processes", cause=cause)
self._should_profile = False
self._safemode_disable_reason = cause

def _profiling_skipped_stack(self, reason: str, comm: str) -> StackToSampleCount:
# return 1 sample, it will be scaled later in merge_profiles().
# if --perf-mode=none mode is used, it will not, but we don't have anything logical to
# do here in that case :/
return Counter({f"{comm};[Profiling skipped: {reason}]": 1})

def _is_jvm_type_supported(self, java_version_cmd_output: str) -> bool:
return all(exclusion not in java_version_cmd_output for exclusion in self.JDK_EXCLUSIONS)
Expand Down Expand Up @@ -622,7 +631,7 @@ def _check_jvm_type_supported(self, process: Process, java_version_output: str)

return True

def _is_profiling_supported(self, process: Process) -> bool:
def _is_jvm_profiling_supported(self, process: Process) -> bool:
process_basename = os.path.basename(process.exe())
if JavaSafemodeOptions.JAVA_EXTENDED_VERSION_CHECKS in self._java_safemode:
# TODO we can get the "java" binary by extracting the java home from the libjvm path,
Expand Down Expand Up @@ -676,11 +685,16 @@ def _check_async_profiler_loaded(self, process: Process) -> bool:
return False

def _profile_process(self, process: Process) -> Optional[StackToSampleCount]:
if not self._is_profiling_supported(process):
return None
comm = process_comm(process)

if self._safemode_disable_reason is not None:
return self._profiling_skipped_stack(f"disabled due to {self._safemode_disable_reason}", comm)

if not self._is_jvm_profiling_supported(process):
return self._profiling_skipped_stack("profiling this JVM is not supported", comm)

if self._check_async_profiler_loaded(process):
return None
return self._profiling_skipped_stack("async-profiler is already loaded", comm)

# track profiled PIDs only if proc_events are in use, otherwise there is no use in them.
# TODO: it is possible to run in contexts where we're unable to use proc_events but are able to listen
Expand All @@ -691,9 +705,9 @@ def _profile_process(self, process: Process) -> Optional[StackToSampleCount]:

logger.info(f"Profiling process {process.pid} with async-profiler")
with AsyncProfiledProcess(process, self._storage_dir, self._buildids, self._mode, self._ap_safemode) as ap_proc:
return self._profile_ap_process(ap_proc)
return self._profile_ap_process(ap_proc, comm)

def _profile_ap_process(self, ap_proc: AsyncProfiledProcess) -> Optional[StackToSampleCount]:
def _profile_ap_process(self, ap_proc: AsyncProfiledProcess, comm: str) -> Optional[StackToSampleCount]:
started = ap_proc.start_async_profiler(self._interval)
if not started:
logger.info(f"Found async-profiler already started on {ap_proc.process.pid}, trying to stop it...")
Expand Down Expand Up @@ -730,7 +744,7 @@ def _profile_ap_process(self, ap_proc: AsyncProfiledProcess) -> Optional[StackTo
return None
else:
logger.info(f"Finished profiling process {ap_proc.process.pid}")
return parse_one_collapsed(output, process_comm(ap_proc.process))
return parse_one_collapsed(output, comm)

def _check_hotspot_error(self, ap_proc):
pid = ap_proc.process.pid
Expand Down Expand Up @@ -759,9 +773,10 @@ def _check_hotspot_error(self, ap_proc):
self._disable_profiling(JavaSafemodeOptions.HSERR)

def _select_processes_to_profile(self) -> List[Process]:
if not self._should_profile:
if self._safemode_disable_reason is not None:
logger.debug("Java profiling has been disabled, skipping profiling of all java processes")
return []
# continue - _profile_process will return an appropriate error for each process selected for
# profiling.
return pgrep_maps(r"^.+/libjvm\.so$")

def start(self) -> None:
Expand Down Expand Up @@ -841,7 +856,7 @@ def _handle_new_kernel_messages(self):
else:
self._handle_kernel_messages(messages)

def snapshot(self):
def snapshot(self) -> ProcessToStackSampleCounters:
try:
return super().snapshot()
finally:
Expand Down
42 changes: 21 additions & 21 deletions tests/test_java.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
#
import logging
import signal
from collections import Counter
from pathlib import Path
from threading import Event

import pytest
from packaging.version import Version

from gprofiler.profilers.java import JAVA_SAFEMODE_ALL, AsyncProfiledProcess, JavaProfiler, parse_jvm_version
from tests.utils import assert_function_in_collapsed
from tests.utils import assert_function_in_collapsed, snapshot_one_collaped


# adds the "status" command to AsyncProfiledProcess from gProfiler.
Expand Down Expand Up @@ -61,9 +62,7 @@ def test_async_profiler_already_running(application_pid, assert_collapsed, tmp_p
assert "Profiling is running for " in ap_proc.read_output()

# then start again
result = profiler.snapshot()
assert len(result) == 1
collapsed = result[next(iter(result.keys()))]
collapsed = snapshot_one_collaped(profiler)
assert "Found async-profiler already started" in caplog.text
assert "Finished profiling process" in caplog.text
assert_collapsed(collapsed)
Expand All @@ -90,11 +89,9 @@ def test_java_async_profiler_cpu_mode(
java_safemode="",
java_mode="ap",
) as profiler:
result = profiler.snapshot()
assert len(result) == 1
process_collapsed = result[next(iter(result.keys()))]
assert_collapsed(process_collapsed, check_comm=True)
assert_function_in_collapsed("do_syscall_64_[k]", process_collapsed, True) # ensure kernels stacks exist
collapsed = snapshot_one_collaped(profiler)
assert_collapsed(collapsed, check_comm=True)
assert_function_in_collapsed("do_syscall_64_[k]", collapsed, True) # ensure kernels stacks exist


@pytest.mark.parametrize("in_container", [True])
Expand All @@ -120,11 +117,9 @@ def test_java_async_profiler_musl_and_cpu(
java_safemode="",
java_mode="ap",
) as profiler:
result = profiler.snapshot()
assert len(result) == 1
process_collapsed = result[next(iter(result.keys()))]
assert_collapsed(process_collapsed, check_comm=True)
assert_function_in_collapsed("do_syscall_64_[k]", process_collapsed, True) # ensure kernels stacks exist
collapsed = snapshot_one_collaped(profiler)
assert_collapsed(collapsed, check_comm=True)
assert_function_in_collapsed("do_syscall_64_[k]", collapsed, True) # ensure kernels stacks exist


def test_java_safemode_parameters(tmp_path) -> None:
Expand Down Expand Up @@ -178,7 +173,8 @@ def test_java_safemode_version_check(
) as profiler:
process = profiler._select_processes_to_profile()[0]
jvm_version = parse_jvm_version(profiler._get_java_version(process))
profiler.snapshot()
collapsed = snapshot_one_collaped(profiler)
assert collapsed == Counter({"java;[Profiling skipped: profiling this JVM is not supported]": 1})

assert next(filter(lambda r: r.message == "Unsupported JVM version", caplog.records)).gprofiler_adapter_extra[
"jvm_version"
Expand All @@ -203,7 +199,8 @@ def test_java_safemode_build_number_check(
process = profiler._select_processes_to_profile()[0]
jvm_version = parse_jvm_version(profiler._get_java_version(process))
monkeypatch.setitem(JavaProfiler.MINIMAL_SUPPORTED_VERSIONS, 8, (jvm_version.version, 999))
profiler.snapshot()
collapsed = snapshot_one_collaped(profiler)
assert collapsed == Counter({"java;[Profiling skipped: profiling this JVM is not supported]": 1})

assert next(filter(lambda r: r.message == "Unsupported JVM version", caplog.records)).gprofiler_adapter_extra[
"jvm_version"
Expand Down Expand Up @@ -250,21 +247,23 @@ def sap_and_crash(self, *args, **kwargs):
assert "libpthread.so" in caplog.text
assert "memory_usage_in_bytes:" in caplog.text
assert "Java profiling has been disabled, will avoid profiling any new java process" in caplog.text
assert not profiler._should_profile
assert profiler._safemode_disable_reason is not None


def test_disable_java_profiling(application_pid, tmp_path, monkeypatch, caplog):
caplog.set_level(logging.DEBUG)

profiler = JavaProfiler(1, 5, Event(), str(tmp_path), False, False, "cpu", 0, False, "ap")
monkeypatch.setattr(profiler, "_should_profile", False)
dummy_reason = "dummy reason"
monkeypatch.setattr(profiler, "_safemode_disable_reason", dummy_reason)
with profiler:
assert len(profiler.snapshot()) == 0
collapsed = snapshot_one_collaped(profiler)
assert collapsed == Counter({f"java;[Profiling skipped: disabled due to {dummy_reason}]": 1})

assert "Java profiling has been disabled, skipping profiling of all java process" in caplog.text


def test_already_loaded_ap_profiling_failure(tmp_path, monkeypatch, caplog, application_pid) -> None:
def test_already_loaded_async_profiler_profiling_failure(tmp_path, monkeypatch, caplog, application_pid) -> None:
with monkeypatch.context() as m:
m.setattr("gprofiler.profilers.java.TEMPORARY_STORAGE_PATH", "/tmp/fake_gprofiler_tmp")
with JavaProfiler(
Expand Down Expand Up @@ -295,5 +294,6 @@ def test_already_loaded_ap_profiling_failure(tmp_path, monkeypatch, caplog, appl
) as profiler:
process = profiler._select_processes_to_profile()[0]
assert any("/tmp/fake_gprofiler_tmp" in mmap.path for mmap in process.memory_maps())
profiler.snapshot()
collapsed = snapshot_one_collaped(profiler)
assert collapsed == Counter({"java;[Profiling skipped: async-profiler is already loaded]": 1})
assert "Non-gProfiler async-profiler is already loaded to the target process" in caplog.text
14 changes: 9 additions & 5 deletions tests/test_sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
from gprofiler.profilers.python import PySpyProfiler, PythonEbpfProfiler
from gprofiler.profilers.ruby import RbSpyProfiler
from tests import PHPSPY_DURATION
from tests.utils import RUNTIME_PROFILERS, assert_function_in_collapsed, run_gprofiler_in_container
from tests.utils import (
RUNTIME_PROFILERS,
assert_function_in_collapsed,
run_gprofiler_in_container,
snapshot_one_collaped,
)


@pytest.mark.parametrize("runtime", ["java"])
Expand All @@ -40,9 +45,7 @@ def test_java_from_host(
java_mode="ap",
) as profiler:
_ = assert_application_name # Required for mypy unused argument warning
result = profiler.snapshot()
assert len(result) == 1
process_collapsed = result[next(iter(result.keys()))]
process_collapsed = snapshot_one_collaped(profiler)
assert_collapsed(process_collapsed, check_comm=True)


Expand All @@ -56,6 +59,7 @@ def test_pyspy(
) -> None:
_ = assert_application_name # Required for mypy unused argument warning
with PySpyProfiler(1000, 3, Event(), str(tmp_path), add_versions=True) as profiler:
# not using snapshot_one_collaped because there are multiple Python processes running usually.
process_collapsed = profiler.snapshot().get(application_pid)
assert_collapsed(process_collapsed, check_comm=True)
assert_function_in_collapsed("PyYAML==6.0", process_collapsed) # Ensure package info is presented
Expand Down Expand Up @@ -85,7 +89,7 @@ def test_rbspy(
gprofiler_docker_image: Image,
) -> None:
with RbSpyProfiler(1000, 3, Event(), str(tmp_path), "rbspy") as profiler:
process_collapsed = profiler.snapshot().get(application_pid)
process_collapsed = snapshot_one_collaped(profiler)
assert_collapsed(process_collapsed, check_comm=True)


Expand Down
9 changes: 9 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from docker.models.containers import Container
from docker.models.images import Image

from gprofiler.gprofiler_types import StackToSampleCount
from gprofiler.profilers.profiler_base import ProfilerInterface

RUNTIME_PROFILERS = [
("java", "ap"),
("python", "py-spy"),
Expand Down Expand Up @@ -98,3 +101,9 @@ def assert_function_in_collapsed(function_name: str, collapsed: Mapping[str, int
assert any(
(function_name in record) for record in collapsed.keys()
), f"function {function_name!r} missing in collapsed data!"


def snapshot_one_collaped(profiler: ProfilerInterface) -> StackToSampleCount:
result = profiler.snapshot()
assert len(result) == 1
return next(iter(result.values()))