Skip to content

Commit

Permalink
An option to accumulate JFR events in memory instead of flushing to a…
Browse files Browse the repository at this point in the history
… file (#925)
  • Loading branch information
apangin committed May 7, 2024
1 parent 07e6015 commit 9733a08
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 8 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,11 @@ $ asprof -d 30 -f /tmp/flamegraph.html 8983
only events between the safepoint request and the start of the VM operation
will be recorded.

* `--jfropts OPTIONS` - comma separated list of JFR recording options.
Currently, the only available option is `mem` supported on Linux 3.17+.
`mem` enables accumulating events in memory instead of flushing
synchronously to a file.

* `--jfrsync CONFIG` - start Java Flight Recording with the given configuration
synchronously with the profiler. The output .jfr file will include all regular
JFR events, except that execution samples will be obtained from async-profiler.
Expand Down
14 changes: 11 additions & 3 deletions src/arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ static const Multiplier UNIVERSAL[] = {{'n', 1}, {'u', 1000}, {'m', 1000000}, {'
// flamegraph - produce Flame Graph in HTML format
// tree - produce call tree in HTML format
// jfr - dump events in Java Flight Recorder format
// jfrsync[=CONFIG] - start Java Flight Recording with the given config along with the profiler
// jfropts=OPTIONS - JFR recording options: numeric bitmask or 'mem'
// jfrsync[=CONFIG] - start Java Flight Recording with the given config along with the profiler
// traces[=N] - dump top N call traces
// flat[=N] - dump top N methods (aka flat profile)
// samples - count the number of samples (default)
Expand Down Expand Up @@ -167,13 +168,20 @@ Error Arguments::parse(const char* args) {

CASE("jfr")
_output = OUTPUT_JFR;
if (value != NULL) {

CASE("jfropts")
_output = OUTPUT_JFR;
if (value == NULL) {
msg = "Invalid jfropts";
} else if (value[0] >= '0' && value[0] <= '9') {
_jfr_options = (int)strtol(value, NULL, 0);
} else if (strstr(value, "mem")) {
_jfr_options |= IN_MEMORY;
}

CASE("jfrsync")
_output = OUTPUT_JFR;
_jfr_options = JFR_SYNC_OPTS;
_jfr_options |= JFR_SYNC_OPTS;
_jfr_sync = value == NULL ? "default" : value;

CASE("traces")
Expand Down
2 changes: 2 additions & 0 deletions src/arguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ enum JfrOption {
NO_CPU_LOAD = 0x8,
NO_HEAP_SUMMARY = 0x10,

IN_MEMORY = 0x100,

JFR_SYNC_OPTS = NO_SYSTEM_INFO | NO_SYSTEM_PROPS | NO_NATIVE_LIBS | NO_CPU_LOAD | NO_HEAP_SUMMARY
};

Expand Down
26 changes: 24 additions & 2 deletions src/flightRecorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ class Recording {

RecordingBuffer _buf[CONCURRENCY_LEVEL];
int _fd;
int _memfd;
char* _master_recording_file;
off_t _chunk_start;
ThreadFilter _thread_set;
Expand All @@ -442,6 +443,7 @@ class Recording {
int _available_processors;
int _recorded_lib_count;

bool _in_memory;
bool _cpu_monitor_enabled;
bool _heap_monitor_enabled;
u32 _last_gc_id;
Expand All @@ -460,6 +462,7 @@ class Recording {
_start_ticks = TSC::ticks();
_base_id = 0;
_bytes_written = 0;
_in_memory = false;

_chunk_size = args._chunk_size <= 0 ? MAX_JLONG : (args._chunk_size < 262144 ? 262144 : args._chunk_size);
_chunk_time = args._chunk_time <= 0 ? MAX_JLONG : (args._chunk_time < 5 ? 5 : args._chunk_time) * 1000000ULL;
Expand All @@ -485,6 +488,10 @@ class Recording {
}
flush(_buf);

if (args.hasOption(IN_MEMORY) && (_memfd = OS::createMemoryFile("async-profiler-recording")) >= 0) {
_in_memory = true;
}

_cpu_monitor_enabled = !args.hasOption(NO_CPU_LOAD);
if (_cpu_monitor_enabled) {
_last_times.proc.real = OS::getProcessCpuTime(&_last_times.proc.user, &_last_times.proc.system);
Expand All @@ -498,6 +505,10 @@ class Recording {
~Recording() {
off_t chunk_end = finishChunk();

if (_memfd >= 0) {
close(_memfd);
}

if (_master_recording_file != NULL) {
appendRecording(_master_recording_file, chunk_end);
free(_master_recording_file);
Expand All @@ -518,6 +529,11 @@ class Recording {
_stop_time = OS::micros();
_stop_ticks = TSC::ticks();

if (_memfd >= 0) {
OS::copyFile(_memfd, _fd, 0, lseek(_memfd, 0, SEEK_CUR));
_in_memory = false;
}

off_t cpool_offset = lseek(_fd, 0, SEEK_CUR);
writeCpool(_buf);
flush(_buf);
Expand Down Expand Up @@ -563,14 +579,20 @@ class Recording {
writeMetadata(_buf);
writeRecordingInfo(_buf);
flush(_buf);

if (_memfd >= 0) {
while (ftruncate(_memfd, 0) < 0 && errno == EINTR); // restart if interrupted
_in_memory = true;
}
}

bool needSwitchChunk(u64 wall_time) {
return loadAcquire(_bytes_written) >= _chunk_size || wall_time - _start_time >= _chunk_time;
}

size_t usedMemory() {
return _method_map.usedMemory() + _thread_set.usedMemory();
return _method_map.usedMemory() + _thread_set.usedMemory() +
(_memfd >= 0 ? lseek(_memfd, 0, SEEK_CUR) : 0);
}

void cpuMonitorCycle() {
Expand Down Expand Up @@ -695,7 +717,7 @@ class Recording {
}

void flush(Buffer* buf) {
ssize_t result = write(_fd, buf->data(), buf->offset());
ssize_t result = write(_in_memory ? _memfd : _fd, buf->data(), buf->offset());
if (result > 0) {
atomicInc(_bytes_written, result);
}
Expand Down
1 change: 1 addition & 0 deletions src/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ static const char USAGE_STRING[] =
" --begin function begin profiling when function is executed\n"
" --end function end profiling when function is executed\n"
" --ttsp time-to-safepoint profiling\n"
" --jfropts opts JFR recording options: mem\n"
" --jfrsync config synchronize profiler with JFR recording\n"
" --fdtransfer use fdtransfer to serve perf requests\n"
" from the non-privileged target\n"
Expand Down
1 change: 1 addition & 0 deletions src/os.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class OS {
static u64 getProcessCpuTime(u64* utime, u64* stime);
static u64 getTotalCpuTime(u64* utime, u64* stime);

static int createMemoryFile(const char* name);
static void copyFile(int src_fd, int dst_fd, off_t offset, size_t size);
static void freePageCache(int fd, off_t start_offset);
};
Expand Down
4 changes: 4 additions & 0 deletions src/os_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,10 @@ u64 OS::getTotalCpuTime(u64* utime, u64* stime) {
return real;
}

int OS::createMemoryFile(const char* name) {
return syscall(__NR_memfd_create, name, 0);
}

void OS::copyFile(int src_fd, int dst_fd, off_t offset, size_t size) {
// copy_file_range() is probably better, but not supported on all kernels
while (size > 0) {
Expand Down
5 changes: 5 additions & 0 deletions src/os_macos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,11 @@ u64 OS::getTotalCpuTime(u64* utime, u64* stime) {
return user + system + idle;
}

int OS::createMemoryFile(const char* name) {
// Not supported on macOS
return -1;
}

void OS::copyFile(int src_fd, int dst_fd, off_t offset, size_t size) {
char* buf = (char*)mmap(NULL, size + offset, PROT_READ, MAP_PRIVATE, src_fd, 0);
if (buf == NULL) {
Expand Down
8 changes: 5 additions & 3 deletions src/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,8 @@ Error Profiler::dump(Writer& out, Arguments& args) {

void Profiler::printUsedMemory(Writer& out) {
size_t call_trace_storage = _call_trace_storage.usedMemory();
size_t dictionaries = _class_map.usedMemory() + _symbol_map.usedMemory() + _thread_filter.usedMemory() + _jfr.usedMemory();
size_t flight_recording = _jfr.usedMemory();
size_t dictionaries = _class_map.usedMemory() + _symbol_map.usedMemory() + _thread_filter.usedMemory();

size_t code_cache = _runtime_stubs.usedMemory();
int native_lib_count = _native_libs.count();
Expand All @@ -1350,12 +1351,13 @@ void Profiler::printUsedMemory(Writer& out) {
const size_t KB = 1024;
snprintf(buf, sizeof(buf) - 1,
"Call trace storage: %7zu KB\n"
" Flight recording: %7zu KB\n"
" Dictionaries: %7zu KB\n"
" Code cache: %7zu KB\n"
"------------------------------\n"
" Total: %7zu KB\n",
call_trace_storage / KB, dictionaries / KB, code_cache / KB,
(call_trace_storage + dictionaries + code_cache) / KB);
call_trace_storage / KB, flight_recording / KB, dictionaries / KB, code_cache / KB,
(call_trace_storage + flight_recording + dictionaries + code_cache) / KB);
out << buf;
}

Expand Down

0 comments on commit 9733a08

Please sign in to comment.