Skip to content
Permalink
3b31d243a1
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
6354 lines (5417 sloc) 212 KB
/*
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
// no precompiled headers
#include "jvm.h"
#include "classfile/classLoader.hpp"
#include "classfile/systemDictionary.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/icBuffer.hpp"
#include "code/vtableStubs.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/disassembler.hpp"
#include "interpreter/interpreter.hpp"
#include "logging/log.hpp"
#include "logging/logStream.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/filemap.hpp"
#include "oops/oop.inline.hpp"
#include "os_linux.inline.hpp"
#include "os_share_linux.hpp"
#include "osContainer_linux.hpp"
#include "prims/jniFastGetField.hpp"
#include "prims/jvm_misc.hpp"
#include "runtime/arguments.hpp"
#include "runtime/atomic.hpp"
#include "runtime/extendedPC.hpp"
#include "runtime/globals.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/init.hpp"
#include "runtime/java.hpp"
#include "runtime/javaCalls.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/osThread.hpp"
#include "runtime/perfMemory.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/statSampler.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/thread.inline.hpp"
#include "runtime/threadCritical.hpp"
#include "runtime/threadSMR.hpp"
#include "runtime/timer.hpp"
#include "runtime/vm_version.hpp"
#include "semaphore_posix.hpp"
#include "services/attachListener.hpp"
#include "services/memTracker.hpp"
#include "services/runtimeService.hpp"
#include "utilities/align.hpp"
#include "utilities/decoder.hpp"
#include "utilities/defaultStream.hpp"
#include "utilities/events.hpp"
#include "utilities/elfFile.hpp"
#include "utilities/growableArray.hpp"
#include "utilities/macros.hpp"
#include "utilities/vmError.hpp"
// put OS-includes here
# include <sys/types.h>
# include <sys/mman.h>
# include <sys/stat.h>
# include <sys/select.h>
# include <pthread.h>
# include <signal.h>
# include <errno.h>
# include <dlfcn.h>
# include <stdio.h>
# include <unistd.h>
# include <sys/resource.h>
# include <pthread.h>
# include <sys/stat.h>
# include <sys/time.h>
# include <sys/times.h>
# include <sys/utsname.h>
# include <sys/socket.h>
# include <sys/wait.h>
# include <pwd.h>
# include <poll.h>
# include <fcntl.h>
# include <string.h>
# include <syscall.h>
# include <sys/sysinfo.h>
# include <gnu/libc-version.h>
# include <sys/ipc.h>
# include <sys/shm.h>
# include <link.h>
# include <stdint.h>
# include <inttypes.h>
# include <sys/ioctl.h>
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#include <sched.h>
#undef _GNU_SOURCE
#else
#include <sched.h>
#endif
// if RUSAGE_THREAD for getrusage() has not been defined, do it here. The code calling
// getrusage() is prepared to handle the associated failure.
#ifndef RUSAGE_THREAD
#define RUSAGE_THREAD (1) /* only the calling thread */
#endif
#define MAX_PATH (2 * K)
#define MAX_SECS 100000000
// for timer info max values which include all bits
#define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
enum CoredumpFilterBit {
FILE_BACKED_PVT_BIT = 1 << 2,
FILE_BACKED_SHARED_BIT = 1 << 3,
LARGEPAGES_BIT = 1 << 6,
DAX_SHARED_BIT = 1 << 8
};
////////////////////////////////////////////////////////////////////////////////
// global variables
julong os::Linux::_physical_memory = 0;
address os::Linux::_initial_thread_stack_bottom = NULL;
uintptr_t os::Linux::_initial_thread_stack_size = 0;
int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
int (*os::Linux::_pthread_setname_np)(pthread_t, const char*) = NULL;
Mutex* os::Linux::_createThread_lock = NULL;
pthread_t os::Linux::_main_thread;
int os::Linux::_page_size = -1;
bool os::Linux::_supports_fast_thread_cpu_time = false;
uint32_t os::Linux::_os_version = 0;
const char * os::Linux::_glibc_version = NULL;
const char * os::Linux::_libpthread_version = NULL;
static jlong initial_time_count=0;
static int clock_tics_per_sec = 100;
// If the VM might have been created on the primordial thread, we need to resolve the
// primordial thread stack bounds and check if the current thread might be the
// primordial thread in places. If we know that the primordial thread is never used,
// such as when the VM was created by one of the standard java launchers, we can
// avoid this
static bool suppress_primordial_thread_resolution = false;
// For diagnostics to print a message once. see run_periodic_checks
static sigset_t check_signal_done;
static bool check_signals = true;
// Signal number used to suspend/resume a thread
// do not use any signal number less than SIGSEGV, see 4355769
static int SR_signum = SIGUSR2;
sigset_t SR_sigset;
// utility functions
static int SR_initialize();
julong os::available_memory() {
return Linux::available_memory();
}
julong os::Linux::available_memory() {
// values in struct sysinfo are "unsigned long"
struct sysinfo si;
julong avail_mem;
if (OSContainer::is_containerized()) {
jlong mem_limit, mem_usage;
if ((mem_limit = OSContainer::memory_limit_in_bytes()) < 1) {
log_debug(os, container)("container memory limit %s: " JLONG_FORMAT ", using host value",
mem_limit == OSCONTAINER_ERROR ? "failed" : "unlimited", mem_limit);
}
if (mem_limit > 0 && (mem_usage = OSContainer::memory_usage_in_bytes()) < 1) {
log_debug(os, container)("container memory usage failed: " JLONG_FORMAT ", using host value", mem_usage);
}
if (mem_limit > 0 && mem_usage > 0 ) {
avail_mem = mem_limit > mem_usage ? (julong)mem_limit - (julong)mem_usage : 0;
log_trace(os)("available container memory: " JULONG_FORMAT, avail_mem);
return avail_mem;
}
}
sysinfo(&si);
avail_mem = (julong)si.freeram * si.mem_unit;
log_trace(os)("available memory: " JULONG_FORMAT, avail_mem);
return avail_mem;
}
julong os::physical_memory() {
jlong phys_mem = 0;
if (OSContainer::is_containerized()) {
jlong mem_limit;
if ((mem_limit = OSContainer::memory_limit_in_bytes()) > 0) {
log_trace(os)("total container memory: " JLONG_FORMAT, mem_limit);
return mem_limit;
}
log_debug(os, container)("container memory limit %s: " JLONG_FORMAT ", using host value",
mem_limit == OSCONTAINER_ERROR ? "failed" : "unlimited", mem_limit);
}
phys_mem = Linux::physical_memory();
log_trace(os)("total system memory: " JLONG_FORMAT, phys_mem);
return phys_mem;
}
static uint64_t initial_total_ticks = 0;
static uint64_t initial_steal_ticks = 0;
static bool has_initial_tick_info = false;
static void next_line(FILE *f) {
int c;
do {
c = fgetc(f);
} while (c != '\n' && c != EOF);
}
bool os::Linux::get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu) {
FILE* fh;
uint64_t userTicks, niceTicks, systemTicks, idleTicks;
// since at least kernel 2.6 : iowait: time waiting for I/O to complete
// irq: time servicing interrupts; softirq: time servicing softirqs
uint64_t iowTicks = 0, irqTicks = 0, sirqTicks= 0;
// steal (since kernel 2.6.11): time spent in other OS when running in a virtualized environment
uint64_t stealTicks = 0;
// guest (since kernel 2.6.24): time spent running a virtual CPU for guest OS under the
// control of the Linux kernel
uint64_t guestNiceTicks = 0;
int logical_cpu = -1;
const int required_tickinfo_count = (which_logical_cpu == -1) ? 4 : 5;
int n;
memset(pticks, 0, sizeof(CPUPerfTicks));
if ((fh = fopen("/proc/stat", "r")) == NULL) {
return false;
}
if (which_logical_cpu == -1) {
n = fscanf(fh, "cpu " UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " "
UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " "
UINT64_FORMAT " " UINT64_FORMAT " ",
&userTicks, &niceTicks, &systemTicks, &idleTicks,
&iowTicks, &irqTicks, &sirqTicks,
&stealTicks, &guestNiceTicks);
} else {
// Move to next line
next_line(fh);
// find the line for requested cpu faster to just iterate linefeeds?
for (int i = 0; i < which_logical_cpu; i++) {
next_line(fh);
}
n = fscanf(fh, "cpu%u " UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " "
UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " " UINT64_FORMAT " "
UINT64_FORMAT " " UINT64_FORMAT " ",
&logical_cpu, &userTicks, &niceTicks,
&systemTicks, &idleTicks, &iowTicks, &irqTicks, &sirqTicks,
&stealTicks, &guestNiceTicks);
}
fclose(fh);
if (n < required_tickinfo_count || logical_cpu != which_logical_cpu) {
return false;
}
pticks->used = userTicks + niceTicks;
pticks->usedKernel = systemTicks + irqTicks + sirqTicks;
pticks->total = userTicks + niceTicks + systemTicks + idleTicks +
iowTicks + irqTicks + sirqTicks + stealTicks + guestNiceTicks;
if (n > required_tickinfo_count + 3) {
pticks->steal = stealTicks;
pticks->has_steal_ticks = true;
} else {
pticks->steal = 0;
pticks->has_steal_ticks = false;
}
return true;
}
// Return true if user is running as root.
bool os::have_special_privileges() {
static bool init = false;
static bool privileges = false;
if (!init) {
privileges = (getuid() != geteuid()) || (getgid() != getegid());
init = true;
}
return privileges;
}
#ifndef SYS_gettid
// i386: 224, ia64: 1105, amd64: 186, sparc 143
#ifdef __ia64__
#define SYS_gettid 1105
#else
#ifdef __i386__
#define SYS_gettid 224
#else
#ifdef __amd64__
#define SYS_gettid 186
#else
#ifdef __sparc__
#define SYS_gettid 143
#else
#error define gettid for the arch
#endif
#endif
#endif
#endif
#endif
// pid_t gettid()
//
// Returns the kernel thread id of the currently running thread. Kernel
// thread id is used to access /proc.
pid_t os::Linux::gettid() {
int rslt = syscall(SYS_gettid);
assert(rslt != -1, "must be."); // old linuxthreads implementation?
return (pid_t)rslt;
}
// Most versions of linux have a bug where the number of processors are
// determined by looking at the /proc file system. In a chroot environment,
// the system call returns 1. This causes the VM to act as if it is
// a single processor and elide locking (see is_MP() call).
static bool unsafe_chroot_detected = false;
static const char *unstable_chroot_error = "/proc file system not found.\n"
"Java may be unstable running multithreaded in a chroot "
"environment on Linux when /proc filesystem is not mounted.";
void os::Linux::initialize_system_info() {
set_processor_count(sysconf(_SC_NPROCESSORS_CONF));
if (processor_count() == 1) {
pid_t pid = os::Linux::gettid();
char fname[32];
jio_snprintf(fname, sizeof(fname), "/proc/%d", pid);
FILE *fp = fopen(fname, "r");
if (fp == NULL) {
unsafe_chroot_detected = true;
} else {
fclose(fp);
}
}
_physical_memory = (julong)sysconf(_SC_PHYS_PAGES) * (julong)sysconf(_SC_PAGESIZE);
assert(processor_count() > 0, "linux error");
}
void os::init_system_properties_values() {
// The next steps are taken in the product version:
//
// Obtain the JAVA_HOME value from the location of libjvm.so.
// This library should be located at:
// <JAVA_HOME>/lib/{client|server}/libjvm.so.
//
// If "/jre/lib/" appears at the right place in the path, then we
// assume libjvm.so is installed in a JDK and we use this path.
//
// Otherwise exit with message: "Could not create the Java virtual machine."
//
// The following extra steps are taken in the debugging version:
//
// If "/jre/lib/" does NOT appear at the right place in the path
// instead of exit check for $JAVA_HOME environment variable.
//
// If it is defined and we are able to locate $JAVA_HOME/jre/lib/<arch>,
// then we append a fake suffix "hotspot/libjvm.so" to this path so
// it looks like libjvm.so is installed there
// <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm.so.
//
// Otherwise exit.
//
// Important note: if the location of libjvm.so changes this
// code needs to be changed accordingly.
// See ld(1):
// The linker uses the following search paths to locate required
// shared libraries:
// 1: ...
// ...
// 7: The default directories, normally /lib and /usr/lib.
#if defined(AMD64) || (defined(_LP64) && defined(SPARC)) || defined(PPC64) || defined(S390)
#define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
#else
#define DEFAULT_LIBPATH "/lib:/usr/lib"
#endif
// Base path of extensions installed on the system.
#define SYS_EXT_DIR "/usr/java/packages"
#define EXTENSIONS_DIR "/lib/ext"
// Buffer that fits several sprintfs.
// Note that the space for the colon and the trailing null are provided
// by the nulls included by the sizeof operator.
const size_t bufsize =
MAX2((size_t)MAXPATHLEN, // For dll_dir & friends.
(size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR) + sizeof(SYS_EXT_DIR) + sizeof(EXTENSIONS_DIR)); // extensions dir
char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
// sysclasspath, java_home, dll_dir
{
char *pslash;
os::jvm_path(buf, bufsize);
// Found the full path to libjvm.so.
// Now cut the path to <java_home>/jre if we can.
pslash = strrchr(buf, '/');
if (pslash != NULL) {
*pslash = '\0'; // Get rid of /libjvm.so.
}
pslash = strrchr(buf, '/');
if (pslash != NULL) {
*pslash = '\0'; // Get rid of /{client|server|hotspot}.
}
Arguments::set_dll_dir(buf);
if (pslash != NULL) {
pslash = strrchr(buf, '/');
if (pslash != NULL) {
*pslash = '\0'; // Get rid of /lib.
}
}
Arguments::set_java_home(buf);
set_boot_path('/', ':');
}
// Where to look for native libraries.
//
// Note: Due to a legacy implementation, most of the library path
// is set in the launcher. This was to accomodate linking restrictions
// on legacy Linux implementations (which are no longer supported).
// Eventually, all the library path setting will be done here.
//
// However, to prevent the proliferation of improperly built native
// libraries, the new path component /usr/java/packages is added here.
// Eventually, all the library path setting will be done here.
{
// Get the user setting of LD_LIBRARY_PATH, and prepended it. It
// should always exist (until the legacy problem cited above is
// addressed).
const char *v = ::getenv("LD_LIBRARY_PATH");
const char *v_colon = ":";
if (v == NULL) { v = ""; v_colon = ""; }
// That's +1 for the colon and +1 for the trailing '\0'.
char *ld_library_path = (char *)NEW_C_HEAP_ARRAY(char,
strlen(v) + 1 +
sizeof(SYS_EXT_DIR) + sizeof("/lib/") + sizeof(DEFAULT_LIBPATH) + 1,
mtInternal);
sprintf(ld_library_path, "%s%s" SYS_EXT_DIR "/lib:" DEFAULT_LIBPATH, v, v_colon);
Arguments::set_library_path(ld_library_path);
FREE_C_HEAP_ARRAY(char, ld_library_path);
}
// Extensions directories.
sprintf(buf, "%s" EXTENSIONS_DIR ":" SYS_EXT_DIR EXTENSIONS_DIR, Arguments::get_java_home());
Arguments::set_ext_dirs(buf);
FREE_C_HEAP_ARRAY(char, buf);
#undef DEFAULT_LIBPATH
#undef SYS_EXT_DIR
#undef EXTENSIONS_DIR
}
////////////////////////////////////////////////////////////////////////////////
// breakpoint support
void os::breakpoint() {
BREAKPOINT;
}
extern "C" void breakpoint() {
// use debugger to set breakpoint here
}
////////////////////////////////////////////////////////////////////////////////
// signal support
debug_only(static bool signal_sets_initialized = false);
static sigset_t unblocked_sigs, vm_sigs;
void os::Linux::signal_sets_init() {
// Should also have an assertion stating we are still single-threaded.
assert(!signal_sets_initialized, "Already initialized");
// Fill in signals that are necessarily unblocked for all threads in
// the VM. Currently, we unblock the following signals:
// SHUTDOWN{1,2,3}_SIGNAL: for shutdown hooks support (unless over-ridden
// by -Xrs (=ReduceSignalUsage));
// BREAK_SIGNAL which is unblocked only by the VM thread and blocked by all
// other threads. The "ReduceSignalUsage" boolean tells us not to alter
// the dispositions or masks wrt these signals.
// Programs embedding the VM that want to use the above signals for their
// own purposes must, at this time, use the "-Xrs" option to prevent
// interference with shutdown hooks and BREAK_SIGNAL thread dumping.
// (See bug 4345157, and other related bugs).
// In reality, though, unblocking these signals is really a nop, since
// these signals are not blocked by default.
sigemptyset(&unblocked_sigs);
sigaddset(&unblocked_sigs, SIGILL);
sigaddset(&unblocked_sigs, SIGSEGV);
sigaddset(&unblocked_sigs, SIGBUS);
sigaddset(&unblocked_sigs, SIGFPE);
#if defined(PPC64)
sigaddset(&unblocked_sigs, SIGTRAP);
#endif
sigaddset(&unblocked_sigs, SR_signum);
if (!ReduceSignalUsage) {
if (!os::Posix::is_sig_ignored(SHUTDOWN1_SIGNAL)) {
sigaddset(&unblocked_sigs, SHUTDOWN1_SIGNAL);
}
if (!os::Posix::is_sig_ignored(SHUTDOWN2_SIGNAL)) {
sigaddset(&unblocked_sigs, SHUTDOWN2_SIGNAL);
}
if (!os::Posix::is_sig_ignored(SHUTDOWN3_SIGNAL)) {
sigaddset(&unblocked_sigs, SHUTDOWN3_SIGNAL);
}
}
// Fill in signals that are blocked by all but the VM thread.
sigemptyset(&vm_sigs);
if (!ReduceSignalUsage) {
sigaddset(&vm_sigs, BREAK_SIGNAL);
}
debug_only(signal_sets_initialized = true);
}
// These are signals that are unblocked while a thread is running Java.
// (For some reason, they get blocked by default.)
sigset_t* os::Linux::unblocked_signals() {
assert(signal_sets_initialized, "Not initialized");
return &unblocked_sigs;
}
// These are the signals that are blocked while a (non-VM) thread is
// running Java. Only the VM thread handles these signals.
sigset_t* os::Linux::vm_signals() {
assert(signal_sets_initialized, "Not initialized");
return &vm_sigs;
}
void os::Linux::hotspot_sigmask(Thread* thread) {
//Save caller's signal mask before setting VM signal mask
sigset_t caller_sigmask;
pthread_sigmask(SIG_BLOCK, NULL, &caller_sigmask);
OSThread* osthread = thread->osthread();
osthread->set_caller_sigmask(caller_sigmask);
pthread_sigmask(SIG_UNBLOCK, os::Linux::unblocked_signals(), NULL);
if (!ReduceSignalUsage) {
if (thread->is_VM_thread()) {
// Only the VM thread handles BREAK_SIGNAL ...
pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL);
} else {
// ... all other threads block BREAK_SIGNAL
pthread_sigmask(SIG_BLOCK, vm_signals(), NULL);
}
}
}
//////////////////////////////////////////////////////////////////////////////
// detecting pthread library
void os::Linux::libpthread_init() {
// Save glibc and pthread version strings.
#if !defined(_CS_GNU_LIBC_VERSION) || \
!defined(_CS_GNU_LIBPTHREAD_VERSION)
#error "glibc too old (< 2.3.2)"
#endif
size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
assert(n > 0, "cannot retrieve glibc version");
char *str = (char *)malloc(n, mtInternal);
confstr(_CS_GNU_LIBC_VERSION, str, n);
os::Linux::set_glibc_version(str);
n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
assert(n > 0, "cannot retrieve pthread version");
str = (char *)malloc(n, mtInternal);
confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);
os::Linux::set_libpthread_version(str);
}
/////////////////////////////////////////////////////////////////////////////
// thread stack expansion
// os::Linux::manually_expand_stack() takes care of expanding the thread
// stack. Note that this is normally not needed: pthread stacks allocate
// thread stack using mmap() without MAP_NORESERVE, so the stack is already
// committed. Therefore it is not necessary to expand the stack manually.
//
// Manually expanding the stack was historically needed on LinuxThreads
// thread stacks, which were allocated with mmap(MAP_GROWSDOWN). Nowadays
// it is kept to deal with very rare corner cases:
//
// For one, user may run the VM on an own implementation of threads
// whose stacks are - like the old LinuxThreads - implemented using
// mmap(MAP_GROWSDOWN).
//
// Also, this coding may be needed if the VM is running on the primordial
// thread. Normally we avoid running on the primordial thread; however,
// user may still invoke the VM on the primordial thread.
//
// The following historical comment describes the details about running
// on a thread stack allocated with mmap(MAP_GROWSDOWN):
// Force Linux kernel to expand current thread stack. If "bottom" is close
// to the stack guard, caller should block all signals.
//
// MAP_GROWSDOWN:
// A special mmap() flag that is used to implement thread stacks. It tells
// kernel that the memory region should extend downwards when needed. This
// allows early versions of LinuxThreads to only mmap the first few pages
// when creating a new thread. Linux kernel will automatically expand thread
// stack as needed (on page faults).
//
// However, because the memory region of a MAP_GROWSDOWN stack can grow on
// demand, if a page fault happens outside an already mapped MAP_GROWSDOWN
// region, it's hard to tell if the fault is due to a legitimate stack
// access or because of reading/writing non-exist memory (e.g. buffer
// overrun). As a rule, if the fault happens below current stack pointer,
// Linux kernel does not expand stack, instead a SIGSEGV is sent to the
// application (see Linux kernel fault.c).
//
// This Linux feature can cause SIGSEGV when VM bangs thread stack for
// stack overflow detection.
//
// Newer version of LinuxThreads (since glibc-2.2, or, RH-7.x) and NPTL do
// not use MAP_GROWSDOWN.
//
// To get around the problem and allow stack banging on Linux, we need to
// manually expand thread stack after receiving the SIGSEGV.
//
// There are two ways to expand thread stack to address "bottom", we used
// both of them in JVM before 1.5:
// 1. adjust stack pointer first so that it is below "bottom", and then
// touch "bottom"
// 2. mmap() the page in question
//
// Now alternate signal stack is gone, it's harder to use 2. For instance,
// if current sp is already near the lower end of page 101, and we need to
// call mmap() to map page 100, it is possible that part of the mmap() frame
// will be placed in page 100. When page 100 is mapped, it is zero-filled.
// That will destroy the mmap() frame and cause VM to crash.
//
// The following code works by adjusting sp first, then accessing the "bottom"
// page to force a page fault. Linux kernel will then automatically expand the
// stack mapping.
//
// _expand_stack_to() assumes its frame size is less than page size, which
// should always be true if the function is not inlined.
static void NOINLINE _expand_stack_to(address bottom) {
address sp;
size_t size;
volatile char *p;
// Adjust bottom to point to the largest address within the same page, it
// gives us a one-page buffer if alloca() allocates slightly more memory.
bottom = (address)align_down((uintptr_t)bottom, os::Linux::page_size());
bottom += os::Linux::page_size() - 1;
// sp might be slightly above current stack pointer; if that's the case, we
// will alloca() a little more space than necessary, which is OK. Don't use
// os::current_stack_pointer(), as its result can be slightly below current
// stack pointer, causing us to not alloca enough to reach "bottom".
sp = (address)&sp;
if (sp > bottom) {
size = sp - bottom;
p = (volatile char *)alloca(size);
assert(p != NULL && p <= (volatile char *)bottom, "alloca problem?");
p[0] = '\0';
}
}
void os::Linux::expand_stack_to(address bottom) {
_expand_stack_to(bottom);
}
bool os::Linux::manually_expand_stack(JavaThread * t, address addr) {
assert(t!=NULL, "just checking");
assert(t->osthread()->expanding_stack(), "expand should be set");
assert(t->stack_base() != NULL, "stack_base was not initialized");
if (addr < t->stack_base() && addr >= t->stack_reserved_zone_base()) {
sigset_t mask_all, old_sigset;
sigfillset(&mask_all);
pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset);
_expand_stack_to(addr);
pthread_sigmask(SIG_SETMASK, &old_sigset, NULL);
return true;
}
return false;
}
//////////////////////////////////////////////////////////////////////////////
// create new thread
// Thread start routine for all newly created threads
static void *thread_native_entry(Thread *thread) {
thread->record_stack_base_and_size();
// Try to randomize the cache line index of hot stack frames.
// This helps when threads of the same stack traces evict each other's
// cache lines. The threads can be either from the same JVM instance, or
// from different JVM instances. The benefit is especially true for
// processors with hyperthreading technology.
static int counter = 0;
int pid = os::current_process_id();
alloca(((pid ^ counter++) & 7) * 128);
thread->initialize_thread_current();
OSThread* osthread = thread->osthread();
Monitor* sync = osthread->startThread_lock();
osthread->set_thread_id(os::current_thread_id());
log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").",
os::current_thread_id(), (uintx) pthread_self());
if (UseNUMA) {
int lgrp_id = os::numa_get_group_id();
if (lgrp_id != -1) {
thread->set_lgrp_id(lgrp_id);
}
}
// initialize signal mask for this thread
os::Linux::hotspot_sigmask(thread);
// initialize floating point control register
os::Linux::init_thread_fpu_state();
// handshaking with parent thread
{
MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
// notify parent thread
osthread->set_state(INITIALIZED);
sync->notify_all();
// wait until os::start_thread()
while (osthread->get_state() == INITIALIZED) {
sync->wait(Mutex::_no_safepoint_check_flag);
}
}
// call one more level start routine
thread->call_run();
// Note: at this point the thread object may already have deleted itself.
// Prevent dereferencing it from here on out.
thread = NULL;
log_info(os, thread)("Thread finished (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").",
os::current_thread_id(), (uintx) pthread_self());
return 0;
}
bool os::create_thread(Thread* thread, ThreadType thr_type,
size_t req_stack_size) {
assert(thread->osthread() == NULL, "caller responsible");
// Allocate the OSThread object
OSThread* osthread = new OSThread(NULL, NULL);
if (osthread == NULL) {
return false;
}
// set the correct thread state
osthread->set_thread_type(thr_type);
// Initial state is ALLOCATED but not INITIALIZED
osthread->set_state(ALLOCATED);
thread->set_osthread(osthread);
// init thread attributes
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
// Calculate stack size if it's not specified by caller.
size_t stack_size = os::Posix::get_initial_stack_size(thr_type, req_stack_size);
// In the Linux NPTL pthread implementation the guard size mechanism
// is not implemented properly. The posix standard requires adding
// the size of the guard pages to the stack size, instead Linux
// takes the space out of 'stacksize'. Thus we adapt the requested
// stack_size by the size of the guard pages to mimick proper
// behaviour. However, be careful not to end up with a size
// of zero due to overflow. Don't add the guard page in that case.
size_t guard_size = os::Linux::default_guard_size(thr_type);
if (stack_size <= SIZE_MAX - guard_size) {
stack_size += guard_size;
}
assert(is_aligned(stack_size, os::vm_page_size()), "stack_size not aligned");
int status = pthread_attr_setstacksize(&attr, stack_size);
assert_status(status == 0, status, "pthread_attr_setstacksize");
// Configure glibc guard page.
pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type));
ThreadState state;
{
pthread_t tid;
int ret = pthread_create(&tid, &attr, (void* (*)(void*)) thread_native_entry, thread);
char buf[64];
if (ret == 0) {
log_info(os, thread)("Thread started (pthread id: " UINTX_FORMAT ", attributes: %s). ",
(uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr));
} else {
log_warning(os, thread)("Failed to start thread - pthread_create failed (%s) for attributes: %s.",
os::errno_name(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr));
// Log some OS information which might explain why creating the thread failed.
log_info(os, thread)("Number of threads approx. running in the VM: %d", Threads::number_of_threads());
LogStream st(Log(os, thread)::info());
os::Posix::print_rlimit_info(&st);
os::print_memory_info(&st);
os::Linux::print_proc_sys_info(&st);
os::Linux::print_container_info(&st);
}
pthread_attr_destroy(&attr);
if (ret != 0) {
// Need to clean up stuff we've allocated so far
thread->set_osthread(NULL);
delete osthread;
return false;
}
// Store pthread info into the OSThread
osthread->set_pthread_id(tid);
// Wait until child thread is either initialized or aborted
{
Monitor* sync_with_child = osthread->startThread_lock();
MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
while ((state = osthread->get_state()) == ALLOCATED) {
sync_with_child->wait(Mutex::_no_safepoint_check_flag);
}
}
}
// Aborted due to thread limit being reached
if (state == ZOMBIE) {
thread->set_osthread(NULL);
delete osthread;
return false;
}
// The thread is returned suspended (in state INITIALIZED),
// and is started higher up in the call chain
assert(state == INITIALIZED, "race condition");
return true;
}
/////////////////////////////////////////////////////////////////////////////
// attach existing thread
// bootstrap the main thread
bool os::create_main_thread(JavaThread* thread) {
assert(os::Linux::_main_thread == pthread_self(), "should be called inside main thread");
return create_attached_thread(thread);
}
bool os::create_attached_thread(JavaThread* thread) {
#ifdef ASSERT
thread->verify_not_published();
#endif
// Allocate the OSThread object
OSThread* osthread = new OSThread(NULL, NULL);
if (osthread == NULL) {
return false;
}
// Store pthread info into the OSThread
osthread->set_thread_id(os::Linux::gettid());
osthread->set_pthread_id(::pthread_self());
// initialize floating point control register
os::Linux::init_thread_fpu_state();
// Initial thread state is RUNNABLE
osthread->set_state(RUNNABLE);
thread->set_osthread(osthread);
if (UseNUMA) {
int lgrp_id = os::numa_get_group_id();
if (lgrp_id != -1) {
thread->set_lgrp_id(lgrp_id);
}
}
if (os::is_primordial_thread()) {
// If current thread is primordial thread, its stack is mapped on demand,
// see notes about MAP_GROWSDOWN. Here we try to force kernel to map
// the entire stack region to avoid SEGV in stack banging.
// It is also useful to get around the heap-stack-gap problem on SuSE
// kernel (see 4821821 for details). We first expand stack to the top
// of yellow zone, then enable stack yellow zone (order is significant,
// enabling yellow zone first will crash JVM on SuSE Linux), so there
// is no gap between the last two virtual memory regions.
JavaThread *jt = (JavaThread *)thread;
address addr = jt->stack_reserved_zone_base();
assert(addr != NULL, "initialization problem?");
assert(jt->stack_available(addr) > 0, "stack guard should not be enabled");
osthread->set_expanding_stack();
os::Linux::manually_expand_stack(jt, addr);
osthread->clear_expanding_stack();
}
// initialize signal mask for this thread
// and save the caller's signal mask
os::Linux::hotspot_sigmask(thread);
log_info(os, thread)("Thread attached (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").",
os::current_thread_id(), (uintx) pthread_self());
return true;
}
void os::pd_start_thread(Thread* thread) {
OSThread * osthread = thread->osthread();
assert(osthread->get_state() != INITIALIZED, "just checking");
Monitor* sync_with_child = osthread->startThread_lock();
MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
sync_with_child->notify();
}
// Free Linux resources related to the OSThread
void os::free_thread(OSThread* osthread) {
assert(osthread != NULL, "osthread not set");
// We are told to free resources of the argument thread,
// but we can only really operate on the current thread.
assert(Thread::current()->osthread() == osthread,
"os::free_thread but not current thread");
#ifdef ASSERT
sigset_t current;
sigemptyset(&current);
pthread_sigmask(SIG_SETMASK, NULL, &current);
assert(!sigismember(&current, SR_signum), "SR signal should not be blocked!");
#endif
// Restore caller's signal mask
sigset_t sigmask = osthread->caller_sigmask();
pthread_sigmask(SIG_SETMASK, &sigmask, NULL);
delete osthread;
}
//////////////////////////////////////////////////////////////////////////////
// primordial thread
// Check if current thread is the primordial thread, similar to Solaris thr_main.
bool os::is_primordial_thread(void) {
if (suppress_primordial_thread_resolution) {
return false;
}
char dummy;
// If called before init complete, thread stack bottom will be null.
// Can be called if fatal error occurs before initialization.
if (os::Linux::initial_thread_stack_bottom() == NULL) return false;
assert(os::Linux::initial_thread_stack_bottom() != NULL &&
os::Linux::initial_thread_stack_size() != 0,
"os::init did not locate primordial thread's stack region");
if ((address)&dummy >= os::Linux::initial_thread_stack_bottom() &&
(address)&dummy < os::Linux::initial_thread_stack_bottom() +
os::Linux::initial_thread_stack_size()) {
return true;
} else {
return false;
}
}
// Find the virtual memory area that contains addr
static bool find_vma(address addr, address* vma_low, address* vma_high) {
FILE *fp = fopen("/proc/self/maps", "r");
if (fp) {
address low, high;
while (!feof(fp)) {
if (fscanf(fp, "%p-%p", &low, &high) == 2) {
if (low <= addr && addr < high) {
if (vma_low) *vma_low = low;
if (vma_high) *vma_high = high;
fclose(fp);
return true;
}
}
for (;;) {
int ch = fgetc(fp);
if (ch == EOF || ch == (int)'\n') break;
}
}
fclose(fp);
}
return false;
}
// Locate primordial thread stack. This special handling of primordial thread stack
// is needed because pthread_getattr_np() on most (all?) Linux distros returns
// bogus value for the primordial process thread. While the launcher has created
// the VM in a new thread since JDK 6, we still have to allow for the use of the
// JNI invocation API from a primordial thread.
void os::Linux::capture_initial_stack(size_t max_size) {
// max_size is either 0 (which means accept OS default for thread stacks) or
// a user-specified value known to be at least the minimum needed. If we
// are actually on the primordial thread we can make it appear that we have a
// smaller max_size stack by inserting the guard pages at that location. But we
// cannot do anything to emulate a larger stack than what has been provided by
// the OS or threading library. In fact if we try to use a stack greater than
// what is set by rlimit then we will crash the hosting process.
// Maximum stack size is the easy part, get it from RLIMIT_STACK.
// If this is "unlimited" then it will be a huge value.
struct rlimit rlim;
getrlimit(RLIMIT_STACK, &rlim);
size_t stack_size = rlim.rlim_cur;
// 6308388: a bug in ld.so will relocate its own .data section to the
// lower end of primordial stack; reduce ulimit -s value a little bit
// so we won't install guard page on ld.so's data section.
// But ensure we don't underflow the stack size - allow 1 page spare
if (stack_size >= (size_t)(3 * page_size())) {
stack_size -= 2 * page_size();
}
// Try to figure out where the stack base (top) is. This is harder.
//
// When an application is started, glibc saves the initial stack pointer in
// a global variable "__libc_stack_end", which is then used by system
// libraries. __libc_stack_end should be pretty close to stack top. The
// variable is available since the very early days. However, because it is
// a private interface, it could disappear in the future.
//
// Linux kernel saves start_stack information in /proc/<pid>/stat. Similar
// to __libc_stack_end, it is very close to stack top, but isn't the real
// stack top. Note that /proc may not exist if VM is running as a chroot
// program, so reading /proc/<pid>/stat could fail. Also the contents of
// /proc/<pid>/stat could change in the future (though unlikely).
//
// We try __libc_stack_end first. If that doesn't work, look for
// /proc/<pid>/stat. If neither of them works, we use current stack pointer
// as a hint, which should work well in most cases.
uintptr_t stack_start;
// try __libc_stack_end first
uintptr_t *p = (uintptr_t *)dlsym(RTLD_DEFAULT, "__libc_stack_end");
if (p && *p) {
stack_start = *p;
} else {
// see if we can get the start_stack field from /proc/self/stat
FILE *fp;
int pid;
char state;
int ppid;
int pgrp;
int session;
int nr;
int tpgrp;
unsigned long flags;
unsigned long minflt;
unsigned long cminflt;
unsigned long majflt;
unsigned long cmajflt;
unsigned long utime;
unsigned long stime;
long cutime;
long cstime;
long prio;
long nice;
long junk;
long it_real;
uintptr_t start;
uintptr_t vsize;
intptr_t rss;
uintptr_t rsslim;
uintptr_t scodes;
uintptr_t ecode;
int i;
// Figure what the primordial thread stack base is. Code is inspired
// by email from Hans Boehm. /proc/self/stat begins with current pid,
// followed by command name surrounded by parentheses, state, etc.
char stat[2048];
int statlen;
fp = fopen("/proc/self/stat", "r");
if (fp) {
statlen = fread(stat, 1, 2047, fp);
stat[statlen] = '\0';
fclose(fp);
// Skip pid and the command string. Note that we could be dealing with
// weird command names, e.g. user could decide to rename java launcher
// to "java 1.4.2 :)", then the stat file would look like
// 1234 (java 1.4.2 :)) R ... ...
// We don't really need to know the command string, just find the last
// occurrence of ")" and then start parsing from there. See bug 4726580.
char * s = strrchr(stat, ')');
i = 0;
if (s) {
// Skip blank chars
do { s++; } while (s && isspace(*s));
#define _UFM UINTX_FORMAT
#define _DFM INTX_FORMAT
// 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2
// 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8
i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld " _UFM _UFM _DFM _UFM _UFM _UFM _UFM,
&state, // 3 %c
&ppid, // 4 %d
&pgrp, // 5 %d
&session, // 6 %d
&nr, // 7 %d
&tpgrp, // 8 %d
&flags, // 9 %lu
&minflt, // 10 %lu
&cminflt, // 11 %lu
&majflt, // 12 %lu
&cmajflt, // 13 %lu
&utime, // 14 %lu
&stime, // 15 %lu
&cutime, // 16 %ld
&cstime, // 17 %ld
&prio, // 18 %ld
&nice, // 19 %ld
&junk, // 20 %ld
&it_real, // 21 %ld
&start, // 22 UINTX_FORMAT
&vsize, // 23 UINTX_FORMAT
&rss, // 24 INTX_FORMAT
&rsslim, // 25 UINTX_FORMAT
&scodes, // 26 UINTX_FORMAT
&ecode, // 27 UINTX_FORMAT
&stack_start); // 28 UINTX_FORMAT
}
#undef _UFM
#undef _DFM
if (i != 28 - 2) {
assert(false, "Bad conversion from /proc/self/stat");
// product mode - assume we are the primordial thread, good luck in the
// embedded case.
warning("Can't detect primordial thread stack location - bad conversion");
stack_start = (uintptr_t) &rlim;
}
} else {
// For some reason we can't open /proc/self/stat (for example, running on
// FreeBSD with a Linux emulator, or inside chroot), this should work for
// most cases, so don't abort:
warning("Can't detect primordial thread stack location - no /proc/self/stat");
stack_start = (uintptr_t) &rlim;
}
}
// Now we have a pointer (stack_start) very close to the stack top, the
// next thing to do is to figure out the exact location of stack top. We
// can find out the virtual memory area that contains stack_start by
// reading /proc/self/maps, it should be the last vma in /proc/self/maps,
// and its upper limit is the real stack top. (again, this would fail if
// running inside chroot, because /proc may not exist.)
uintptr_t stack_top;
address low, high;
if (find_vma((address)stack_start, &low, &high)) {
// success, "high" is the true stack top. (ignore "low", because initial
// thread stack grows on demand, its real bottom is high - RLIMIT_STACK.)
stack_top = (uintptr_t)high;
} else {
// failed, likely because /proc/self/maps does not exist
warning("Can't detect primordial thread stack location - find_vma failed");
// best effort: stack_start is normally within a few pages below the real
// stack top, use it as stack top, and reduce stack size so we won't put
// guard page outside stack.
stack_top = stack_start;
stack_size -= 16 * page_size();
}
// stack_top could be partially down the page so align it
stack_top = align_up(stack_top, page_size());
// Allowed stack value is minimum of max_size and what we derived from rlimit
if (max_size > 0) {
_initial_thread_stack_size = MIN2(max_size, stack_size);
} else {
// Accept the rlimit max, but if stack is unlimited then it will be huge, so
// clamp it at 8MB as we do on Solaris
_initial_thread_stack_size = MIN2(stack_size, 8*M);
}
_initial_thread_stack_size = align_down(_initial_thread_stack_size, page_size());
_initial_thread_stack_bottom = (address)stack_top - _initial_thread_stack_size;
assert(_initial_thread_stack_bottom < (address)stack_top, "overflow!");
if (log_is_enabled(Info, os, thread)) {
// See if we seem to be on primordial process thread
bool primordial = uintptr_t(&rlim) > uintptr_t(_initial_thread_stack_bottom) &&
uintptr_t(&rlim) < stack_top;
log_info(os, thread)("Capturing initial stack in %s thread: req. size: " SIZE_FORMAT "K, actual size: "
SIZE_FORMAT "K, top=" INTPTR_FORMAT ", bottom=" INTPTR_FORMAT,
primordial ? "primordial" : "user", max_size / K, _initial_thread_stack_size / K,
stack_top, intptr_t(_initial_thread_stack_bottom));
}
}
////////////////////////////////////////////////////////////////////////////////
// time support
// Time since start-up in seconds to a fine granularity.
// Used by VMSelfDestructTimer and the MemProfiler.
double os::elapsedTime() {
return ((double)os::elapsed_counter()) / os::elapsed_frequency(); // nanosecond resolution
}
jlong os::elapsed_counter() {
return javaTimeNanos() - initial_time_count;
}
jlong os::elapsed_frequency() {
return NANOSECS_PER_SEC; // nanosecond resolution
}
bool os::supports_vtime() { return true; }
bool os::enable_vtime() { return false; }
bool os::vtime_enabled() { return false; }
double os::elapsedVTime() {
struct rusage usage;
int retval = getrusage(RUSAGE_THREAD, &usage);
if (retval == 0) {
return (double) (usage.ru_utime.tv_sec + usage.ru_stime.tv_sec) + (double) (usage.ru_utime.tv_usec + usage.ru_stime.tv_usec) / (1000 * 1000);
} else {
// better than nothing, but not much
return elapsedTime();
}
}
jlong os::javaTimeMillis() {
timeval time;
int status = gettimeofday(&time, NULL);
assert(status != -1, "linux error");
return jlong(time.tv_sec) * 1000 + jlong(time.tv_usec / 1000);
}
void os::javaTimeSystemUTC(jlong &seconds, jlong &nanos) {
timeval time;
int status = gettimeofday(&time, NULL);
assert(status != -1, "linux error");
seconds = jlong(time.tv_sec);
nanos = jlong(time.tv_usec) * 1000;
}
#ifndef CLOCK_MONOTONIC
#define CLOCK_MONOTONIC (1)
#endif
void os::Linux::clock_init() {
// we do dlopen's in this particular order due to bug in linux
// dynamical loader (see 6348968) leading to crash on exit
void* handle = dlopen("librt.so.1", RTLD_LAZY);
if (handle == NULL) {
handle = dlopen("librt.so", RTLD_LAZY);
}
if (handle) {
int (*clock_getres_func)(clockid_t, struct timespec*) =
(int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_getres");
int (*clock_gettime_func)(clockid_t, struct timespec*) =
(int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_gettime");
if (clock_getres_func && clock_gettime_func) {
// See if monotonic clock is supported by the kernel. Note that some
// early implementations simply return kernel jiffies (updated every
// 1/100 or 1/1000 second). It would be bad to use such a low res clock
// for nano time (though the monotonic property is still nice to have).
// It's fixed in newer kernels, however clock_getres() still returns
// 1/HZ. We check if clock_getres() works, but will ignore its reported
// resolution for now. Hopefully as people move to new kernels, this
// won't be a problem.
struct timespec res;
struct timespec tp;
if (clock_getres_func (CLOCK_MONOTONIC, &res) == 0 &&
clock_gettime_func(CLOCK_MONOTONIC, &tp) == 0) {
// yes, monotonic clock is supported
_clock_gettime = clock_gettime_func;
return;
} else {
// close librt if there is no monotonic clock
dlclose(handle);
}
}
}
warning("No monotonic clock was available - timed services may " \
"be adversely affected if the time-of-day clock changes");
}
#ifndef SYS_clock_getres
#if defined(X86) || defined(PPC64) || defined(S390)
#define SYS_clock_getres AMD64_ONLY(229) IA32_ONLY(266) PPC64_ONLY(247) S390_ONLY(261)
#define sys_clock_getres(x,y) ::syscall(SYS_clock_getres, x, y)
#else
#warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time"
#define sys_clock_getres(x,y) -1
#endif
#else
#define sys_clock_getres(x,y) ::syscall(SYS_clock_getres, x, y)
#endif
void os::Linux::fast_thread_clock_init() {
if (!UseLinuxPosixThreadCPUClocks) {
return;
}
clockid_t clockid;
struct timespec tp;
int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) =
(int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid");
// Switch to using fast clocks for thread cpu time if
// the sys_clock_getres() returns 0 error code.
// Note, that some kernels may support the current thread
// clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks
// returned by the pthread_getcpuclockid().
// If the fast Posix clocks are supported then the sys_clock_getres()
// must return at least tp.tv_sec == 0 which means a resolution
// better than 1 sec. This is extra check for reliability.
if (pthread_getcpuclockid_func &&
pthread_getcpuclockid_func(_main_thread, &clockid) == 0 &&
sys_clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) {
_supports_fast_thread_cpu_time = true;
_pthread_getcpuclockid = pthread_getcpuclockid_func;
}
}
jlong os::javaTimeNanos() {
if (os::supports_monotonic_clock()) {
struct timespec tp;
int status = Linux::clock_gettime(CLOCK_MONOTONIC, &tp);
assert(status == 0, "gettime error");
jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec);
return result;
} else {
timeval time;
int status = gettimeofday(&time, NULL);
assert(status != -1, "linux error");
jlong usecs = jlong(time.tv_sec) * (1000 * 1000) + jlong(time.tv_usec);
return 1000 * usecs;
}
}
void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
if (os::supports_monotonic_clock()) {
info_ptr->max_value = ALL_64_BITS;
// CLOCK_MONOTONIC - amount of time since some arbitrary point in the past
info_ptr->may_skip_backward = false; // not subject to resetting or drifting
info_ptr->may_skip_forward = false; // not subject to resetting or drifting
} else {
// gettimeofday - based on time in seconds since the Epoch thus does not wrap
info_ptr->max_value = ALL_64_BITS;
// gettimeofday is a real time clock so it skips
info_ptr->may_skip_backward = true;
info_ptr->may_skip_forward = true;
}
info_ptr->kind = JVMTI_TIMER_ELAPSED; // elapsed not CPU time
}
// Return the real, user, and system times in seconds from an
// arbitrary fixed point in the past.
bool os::getTimesSecs(double* process_real_time,
double* process_user_time,
double* process_system_time) {
struct tms ticks;
clock_t real_ticks = times(&ticks);
if (real_ticks == (clock_t) (-1)) {
return false;
} else {
double ticks_per_second = (double) clock_tics_per_sec;
*process_user_time = ((double) ticks.tms_utime) / ticks_per_second;
*process_system_time = ((double) ticks.tms_stime) / ticks_per_second;
*process_real_time = ((double) real_ticks) / ticks_per_second;
return true;
}
}
char * os::local_time_string(char *buf, size_t buflen) {
struct tm t;
time_t long_time;
time(&long_time);
localtime_r(&long_time, &t);
jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d",
t.tm_year + 1900, t.tm_mon + 1, t.tm_mday,
t.tm_hour, t.tm_min, t.tm_sec);
return buf;
}
struct tm* os::localtime_pd(const time_t* clock, struct tm* res) {
return localtime_r(clock, res);
}
////////////////////////////////////////////////////////////////////////////////
// runtime exit support
// Note: os::shutdown() might be called very early during initialization, or
// called from signal handler. Before adding something to os::shutdown(), make
// sure it is async-safe and can handle partially initialized VM.
void os::shutdown() {
// allow PerfMemory to attempt cleanup of any persistent resources
perfMemory_exit();
// needs to remove object in file system
AttachListener::abort();
// flush buffered output, finish log files
ostream_abort();
// Check for abort hook
abort_hook_t abort_hook = Arguments::abort_hook();
if (abort_hook != NULL) {
abort_hook();
}
}
// Note: os::abort() might be called very early during initialization, or
// called from signal handler. Before adding something to os::abort(), make
// sure it is async-safe and can handle partially initialized VM.
void os::abort(bool dump_core, void* siginfo, const void* context) {
os::shutdown();
if (dump_core) {
if (DumpPrivateMappingsInCore) {
ClassLoader::close_jrt_image();
}
#ifndef PRODUCT
fdStream out(defaultStream::output_fd());
out.print_raw("Current thread is ");
char buf[16];
jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id());
out.print_raw_cr(buf);
out.print_raw_cr("Dumping core ...");
#endif
::abort(); // dump core
}
::exit(1);
}
// Die immediately, no exit hook, no abort hook, no cleanup.
void os::die() {
::abort();
}
// This method is a copy of JDK's sysGetLastErrorString
// from src/solaris/hpi/src/system_md.c
size_t os::lasterror(char *buf, size_t len) {
if (errno == 0) return 0;
const char *s = os::strerror(errno);
size_t n = ::strlen(s);
if (n >= len) {
n = len - 1;
}
::strncpy(buf, s, n);
buf[n] = '\0';
return n;
}
// thread_id is kernel thread id (similar to Solaris LWP id)
intx os::current_thread_id() { return os::Linux::gettid(); }
int os::current_process_id() {
return ::getpid();
}
// DLL functions
const char* os::dll_file_extension() { return ".so"; }
// This must be hard coded because it's the system's temporary
// directory not the java application's temp directory, ala java.io.tmpdir.
const char* os::get_temp_directory() { return "/tmp"; }
static bool file_exists(const char* filename) {
struct stat statbuf;
if (filename == NULL || strlen(filename) == 0) {
return false;
}
return os::stat(filename, &statbuf) == 0;
}
// check if addr is inside libjvm.so
bool os::address_is_in_vm(address addr) {
static address libjvm_base_addr;
Dl_info dlinfo;
if (libjvm_base_addr == NULL) {
if (dladdr(CAST_FROM_FN_PTR(void *, os::address_is_in_vm), &dlinfo) != 0) {
libjvm_base_addr = (address)dlinfo.dli_fbase;
}
assert(libjvm_base_addr !=NULL, "Cannot obtain base address for libjvm");
}
if (dladdr((void *)addr, &dlinfo) != 0) {
if (libjvm_base_addr == (address)dlinfo.dli_fbase) return true;
}
return false;
}
bool os::dll_address_to_function_name(address addr, char *buf,
int buflen, int *offset,
bool demangle) {
// buf is not optional, but offset is optional
assert(buf != NULL, "sanity check");
Dl_info dlinfo;
if (dladdr((void*)addr, &dlinfo) != 0) {
// see if we have a matching symbol
if (dlinfo.dli_saddr != NULL && dlinfo.dli_sname != NULL) {
if (!(demangle && Decoder::demangle(dlinfo.dli_sname, buf, buflen))) {
jio_snprintf(buf, buflen, "%s", dlinfo.dli_sname);
}
if (offset != NULL) *offset = addr - (address)dlinfo.dli_saddr;
return true;
}
// no matching symbol so try for just file info
if (dlinfo.dli_fname != NULL && dlinfo.dli_fbase != NULL) {
if (Decoder::decode((address)(addr - (address)dlinfo.dli_fbase),
buf, buflen, offset, dlinfo.dli_fname, demangle)) {
return true;
}
}
}
buf[0] = '\0';
if (offset != NULL) *offset = -1;
return false;
}
struct _address_to_library_name {
address addr; // input : memory address
size_t buflen; // size of fname
char* fname; // output: library name
address base; // library base addr
};
static int address_to_library_name_callback(struct dl_phdr_info *info,
size_t size, void *data) {
int i;
bool found = false;
address libbase = NULL;
struct _address_to_library_name * d = (struct _address_to_library_name *)data;
// iterate through all loadable segments
for (i = 0; i < info->dlpi_phnum; i++) {
address segbase = (address)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr);
if (info->dlpi_phdr[i].p_type == PT_LOAD) {
// base address of a library is the lowest address of its loaded
// segments.
if (libbase == NULL || libbase > segbase) {
libbase = segbase;
}
// see if 'addr' is within current segment
if (segbase <= d->addr &&
d->addr < segbase + info->dlpi_phdr[i].p_memsz) {
found = true;
}
}
}
// dlpi_name is NULL or empty if the ELF file is executable, return 0
// so dll_address_to_library_name() can fall through to use dladdr() which
// can figure out executable name from argv[0].
if (found && info->dlpi_name && info->dlpi_name[0]) {
d->base = libbase;
if (d->fname) {
jio_snprintf(d->fname, d->buflen, "%s", info->dlpi_name);
}
return 1;
}
return 0;
}
bool os::dll_address_to_library_name(address addr, char* buf,
int buflen, int* offset) {
// buf is not optional, but offset is optional
assert(buf != NULL, "sanity check");
Dl_info dlinfo;
struct _address_to_library_name data;
// There is a bug in old glibc dladdr() implementation that it could resolve
// to wrong library name if the .so file has a base address != NULL. Here
// we iterate through the program headers of all loaded libraries to find
// out which library 'addr' really belongs to. This workaround can be
// removed once the minimum requirement for glibc is moved to 2.3.x.
data.addr = addr;
data.fname = buf;
data.buflen = buflen;
data.base = NULL;
int rslt = dl_iterate_phdr(address_to_library_name_callback, (void *)&data);
if (rslt) {
// buf already contains library name
if (offset) *offset = addr - data.base;
return true;
}
if (dladdr((void*)addr, &dlinfo) != 0) {
if (dlinfo.dli_fname != NULL) {
jio_snprintf(buf, buflen, "%s", dlinfo.dli_fname);
}
if (dlinfo.dli_fbase != NULL && offset != NULL) {
*offset = addr - (address)dlinfo.dli_fbase;
}
return true;
}
buf[0] = '\0';
if (offset) *offset = -1;
return false;
}
// Loads .dll/.so and
// in case of error it checks if .dll/.so was built for the
// same architecture as Hotspot is running on
// Remember the stack's state. The Linux dynamic linker will change
// the stack to 'executable' at most once, so we must safepoint only once.
bool os::Linux::_stack_is_executable = false;
// VM operation that loads a library. This is necessary if stack protection
// of the Java stacks can be lost during loading the library. If we
// do not stop the Java threads, they can stack overflow before the stacks
// are protected again.
class VM_LinuxDllLoad: public VM_Operation {
private:
const char *_filename;
char *_ebuf;
int _ebuflen;
void *_lib;
public:
VM_LinuxDllLoad(const char *fn, char *ebuf, int ebuflen) :
_filename(fn), _ebuf(ebuf), _ebuflen(ebuflen), _lib(NULL) {}
VMOp_Type type() const { return VMOp_LinuxDllLoad; }
void doit() {
_lib = os::Linux::dll_load_in_vmthread(_filename, _ebuf, _ebuflen);
os::Linux::_stack_is_executable = true;
}
void* loaded_library() { return _lib; }
};
void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
void * result = NULL;
bool load_attempted = false;
log_info(os)("attempting shared library load of %s", filename);
// Check whether the library to load might change execution rights
// of the stack. If they are changed, the protection of the stack
// guard pages will be lost. We need a safepoint to fix this.
//
// See Linux man page execstack(8) for more info.
if (os::uses_stack_guard_pages() && !os::Linux::_stack_is_executable) {
if (!ElfFile::specifies_noexecstack(filename)) {
if (!is_init_completed()) {
os::Linux::_stack_is_executable = true;
// This is OK - No Java threads have been created yet, and hence no
// stack guard pages to fix.
//
// Dynamic loader will make all stacks executable after
// this function returns, and will not do that again.
assert(Threads::number_of_threads() == 0, "no Java threads should exist yet.");
} else {
warning("You have loaded library %s which might have disabled stack guard. "
"The VM will try to fix the stack guard now.\n"
"It's highly recommended that you fix the library with "
"'execstack -c <libfile>', or link it with '-z noexecstack'.",
filename);
assert(Thread::current()->is_Java_thread(), "must be Java thread");
JavaThread *jt = JavaThread::current();
if (jt->thread_state() != _thread_in_native) {
// This happens when a compiler thread tries to load a hsdis-<arch>.so file
// that requires ExecStack. Cannot enter safe point. Let's give up.
warning("Unable to fix stack guard. Giving up.");
} else {
if (!LoadExecStackDllInVMThread) {
// This is for the case where the DLL has an static
// constructor function that executes JNI code. We cannot
// load such DLLs in the VMThread.
result = os::Linux::dlopen_helper(filename, ebuf, ebuflen);
}
ThreadInVMfromNative tiv(jt);
debug_only(VMNativeEntryWrapper vew;)
VM_LinuxDllLoad op(filename, ebuf, ebuflen);
VMThread::execute(&op);
if (LoadExecStackDllInVMThread) {
result = op.loaded_library();
}
load_attempted = true;
}
}
}
}
if (!load_attempted) {
result = os::Linux::dlopen_helper(filename, ebuf, ebuflen);
}
if (result != NULL) {
// Successful loading
return result;
}
Elf32_Ehdr elf_head;
int diag_msg_max_length=ebuflen-strlen(ebuf);
char* diag_msg_buf=ebuf+strlen(ebuf);
if (diag_msg_max_length==0) {
// No more space in ebuf for additional diagnostics message
return NULL;
}
int file_descriptor= ::open(filename, O_RDONLY | O_NONBLOCK);
if (file_descriptor < 0) {
// Can't open library, report dlerror() message
return NULL;
}
bool failed_to_read_elf_head=
(sizeof(elf_head)!=
(::read(file_descriptor, &elf_head,sizeof(elf_head))));
::close(file_descriptor);
if (failed_to_read_elf_head) {
// file i/o error - report dlerror() msg
return NULL;
}
typedef struct {
Elf32_Half code; // Actual value as defined in elf.h
Elf32_Half compat_class; // Compatibility of archs at VM's sense
unsigned char elf_class; // 32 or 64 bit
unsigned char endianess; // MSB or LSB
char* name; // String representation
} arch_t;
#ifndef EM_486
#define EM_486 6 /* Intel 80486 */
#endif
#ifndef EM_AARCH64
#define EM_AARCH64 183 /* ARM AARCH64 */
#endif
static const arch_t arch_array[]={
{EM_386, EM_386, ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"},
{EM_486, EM_386, ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"},
{EM_IA_64, EM_IA_64, ELFCLASS64, ELFDATA2LSB, (char*)"IA 64"},
{EM_X86_64, EM_X86_64, ELFCLASS64, ELFDATA2LSB, (char*)"AMD 64"},
{EM_SPARC, EM_SPARC, ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"},
{EM_SPARC32PLUS, EM_SPARC, ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"},
{EM_SPARCV9, EM_SPARCV9, ELFCLASS64, ELFDATA2MSB, (char*)"Sparc v9 64"},
{EM_PPC, EM_PPC, ELFCLASS32, ELFDATA2MSB, (char*)"Power PC 32"},
#if defined(VM_LITTLE_ENDIAN)
{EM_PPC64, EM_PPC64, ELFCLASS64, ELFDATA2LSB, (char*)"Power PC 64 LE"},
{EM_SH, EM_SH, ELFCLASS32, ELFDATA2LSB, (char*)"SuperH"},
#else
{EM_PPC64, EM_PPC64, ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64"},
{EM_SH, EM_SH, ELFCLASS32, ELFDATA2MSB, (char*)"SuperH BE"},
#endif
{EM_ARM, EM_ARM, ELFCLASS32, ELFDATA2LSB, (char*)"ARM"},
{EM_S390, EM_S390, ELFCLASSNONE, ELFDATA2MSB, (char*)"IBM System/390"},
{EM_ALPHA, EM_ALPHA, ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"},
{EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"},
{EM_MIPS, EM_MIPS, ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"},
{EM_PARISC, EM_PARISC, ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
{EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"},
{EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"},
};
#if (defined IA32)
static Elf32_Half running_arch_code=EM_386;
#elif (defined AMD64)
static Elf32_Half running_arch_code=EM_X86_64;
#elif (defined IA64)
static Elf32_Half running_arch_code=EM_IA_64;
#elif (defined __sparc) && (defined _LP64)
static Elf32_Half running_arch_code=EM_SPARCV9;
#elif (defined __sparc) && (!defined _LP64)
static Elf32_Half running_arch_code=EM_SPARC;
#elif (defined __powerpc64__)
static Elf32_Half running_arch_code=EM_PPC64;
#elif (defined __powerpc__)
static Elf32_Half running_arch_code=EM_PPC;
#elif (defined AARCH64)
static Elf32_Half running_arch_code=EM_AARCH64;
#elif (defined ARM)
static Elf32_Half running_arch_code=EM_ARM;
#elif (defined S390)
static Elf32_Half running_arch_code=EM_S390;
#elif (defined ALPHA)
static Elf32_Half running_arch_code=EM_ALPHA;
#elif (defined MIPSEL)
static Elf32_Half running_arch_code=EM_MIPS_RS3_LE;
#elif (defined PARISC)
static Elf32_Half running_arch_code=EM_PARISC;
#elif (defined MIPS)
static Elf32_Half running_arch_code=EM_MIPS;
#elif (defined M68K)
static Elf32_Half running_arch_code=EM_68K;
#elif (defined SH)
static Elf32_Half running_arch_code=EM_SH;
#else
#error Method os::dll_load requires that one of following is defined:\
AARCH64, ALPHA, ARM, AMD64, IA32, IA64, M68K, MIPS, MIPSEL, PARISC, __powerpc__, __powerpc64__, S390, SH, __sparc
#endif
// Identify compatability class for VM's architecture and library's architecture
// Obtain string descriptions for architectures
arch_t lib_arch={elf_head.e_machine,0,elf_head.e_ident[EI_CLASS], elf_head.e_ident[EI_DATA], NULL};
int running_arch_index=-1;
for (unsigned int i=0; i < ARRAY_SIZE(arch_array); i++) {
if (running_arch_code == arch_array[i].code) {
running_arch_index = i;
}
if (lib_arch.code == arch_array[i].code) {
lib_arch.compat_class = arch_array[i].compat_class;
lib_arch.name = arch_array[i].name;
}
}
assert(running_arch_index != -1,
"Didn't find running architecture code (running_arch_code) in arch_array");
if (running_arch_index == -1) {
// Even though running architecture detection failed
// we may still continue with reporting dlerror() message
return NULL;
}
if (lib_arch.endianess != arch_array[running_arch_index].endianess) {
::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: endianness mismatch)");
return NULL;
}
#ifndef S390
if (lib_arch.elf_class != arch_array[running_arch_index].elf_class) {
::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: architecture word width mismatch)");
return NULL;
}
#endif // !S390
if (lib_arch.compat_class != arch_array[running_arch_index].compat_class) {
if (lib_arch.name!=NULL) {
::snprintf(diag_msg_buf, diag_msg_max_length-1,
" (Possible cause: can't load %s-bit .so on a %s-bit platform)",
lib_arch.name, arch_array[running_arch_index].name);
} else {
::snprintf(diag_msg_buf, diag_msg_max_length-1,
" (Possible cause: can't load this .so (machine code=0x%x) on a %s-bit platform)",
lib_arch.code,
arch_array[running_arch_index].name);
}
}
return NULL;
}
void * os::Linux::dlopen_helper(const char *filename, char *ebuf,
int ebuflen) {
void * result = ::dlopen(filename, RTLD_LAZY);
if (result == NULL) {
const char* error_report = ::dlerror();
if (error_report == NULL) {
error_report = "dlerror returned no error description";
}
if (ebuf != NULL && ebuflen > 0) {
::strncpy(ebuf, error_report, ebuflen-1);
ebuf[ebuflen-1]='\0';
}
Events::log(NULL, "Loading shared library %s failed, %s", filename, error_report);
log_info(os)("shared library load of %s failed, %s", filename, error_report);
} else {
Events::log(NULL, "Loaded shared library %s", filename);
log_info(os)("shared library load of %s was successful", filename);
}
return result;
}
void * os::Linux::dll_load_in_vmthread(const char *filename, char *ebuf,
int ebuflen) {
void * result = NULL;
if (LoadExecStackDllInVMThread) {
result = dlopen_helper(filename, ebuf, ebuflen);
}
// Since 7019808, libjvm.so is linked with -noexecstack. If the VM loads a
// library that requires an executable stack, or which does not have this
// stack attribute set, dlopen changes the stack attribute to executable. The
// read protection of the guard pages gets lost.
//
// Need to check _stack_is_executable again as multiple VM_LinuxDllLoad
// may have been queued at the same time.
if (!_stack_is_executable) {
for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
if (!jt->stack_guard_zone_unused() && // Stack not yet fully initialized
jt->stack_guards_enabled()) { // No pending stack overflow exceptions
if (!os::guard_memory((char *)jt->stack_end(), jt->stack_guard_zone_size())) {
warning("Attempt to reguard stack yellow zone failed.");
}
}
}
}
return result;
}
void* os::dll_lookup(void* handle, const char* name) {
void* res = dlsym(handle, name);
return res;
}
void* os::get_default_process_handle() {
return (void*)::dlopen(NULL, RTLD_LAZY);
}
static bool _print_ascii_file(const char* filename, outputStream* st, const char* hdr = NULL) {
int fd = ::open(filename, O_RDONLY);
if (fd == -1) {
return false;
}
if (hdr != NULL) {
st->print_cr("%s", hdr);
}
char buf[33];
int bytes;
buf[32] = '\0';
while ((bytes = ::read(fd, buf, sizeof(buf)-1)) > 0) {
st->print_raw(buf, bytes);
}
::close(fd);
return true;
}
void os::print_dll_info(outputStream *st) {
st->print_cr("Dynamic libraries:");
char fname[32];
pid_t pid = os::Linux::gettid();
jio_snprintf(fname, sizeof(fname), "/proc/%d/maps", pid);
if (!_print_ascii_file(fname, st)) {
st->print("Can not get library information for pid = %d\n", pid);
}
}
int os::get_loaded_modules_info(os::LoadedModulesCallbackFunc callback, void *param) {
FILE *procmapsFile = NULL;
// Open the procfs maps file for the current process
if ((procmapsFile = fopen("/proc/self/maps", "r")) != NULL) {
// Allocate PATH_MAX for file name plus a reasonable size for other fields.
char line[PATH_MAX + 100];
// Read line by line from 'file'
while (fgets(line, sizeof(line), procmapsFile) != NULL) {
u8 base, top, offset, inode;
char permissions[5];
char device[6];
char name[sizeof(line)];
// Parse fields from line
int matches = sscanf(line, UINT64_FORMAT_X "-" UINT64_FORMAT_X " %4s " UINT64_FORMAT_X " %5s " INT64_FORMAT " %s",
&base, &top, permissions, &offset, device, &inode, name);
// the last entry 'name' is empty for some entries, so we might have 6 matches instead of 7 for some lines
if (matches < 6) continue;
if (matches == 6) name[0] = '\0';
// Filter by device id '00:00' so that we only get file system mapped files.
if (strcmp(device, "00:00") != 0) {
// Call callback with the fields of interest
if(callback(name, (address)base, (address)top, param)) {
// Oops abort, callback aborted
fclose(procmapsFile);
return 1;
}
}
}
fclose(procmapsFile);
}
return 0;
}
void os::print_os_info_brief(outputStream* st) {
os::Linux::print_distro_info(st);
os::Posix::print_uname_info(st);
os::Linux::print_libversion_info(st);
}
void os::print_os_info(outputStream* st) {
st->print("OS:");
os::Linux::print_distro_info(st);
os::Posix::print_uname_info(st);
os::Linux::print_uptime_info(st);
// Print warning if unsafe chroot environment detected
if (unsafe_chroot_detected) {
st->print("WARNING!! ");
st->print_cr("%s", unstable_chroot_error);
}
os::Linux::print_libversion_info(st);
os::Posix::print_rlimit_info(st);
os::Posix::print_load_average(st);
os::Linux::print_full_memory_info(st);
os::Linux::print_proc_sys_info(st);
os::Linux::print_ld_preload_file(st);
os::Linux::print_container_info(st);
VM_Version::print_platform_virtualization_info(st);
os::Linux::print_steal_info(st);
}
// Try to identify popular distros.
// Most Linux distributions have a /etc/XXX-release file, which contains
// the OS version string. Newer Linux distributions have a /etc/lsb-release
// file that also contains the OS version string. Some have more than one
// /etc/XXX-release file (e.g. Mandrake has both /etc/mandrake-release and
// /etc/redhat-release.), so the order is important.
// Any Linux that is based on Redhat (i.e. Oracle, Mandrake, Sun JDS...) have
// their own specific XXX-release file as well as a redhat-release file.
// Because of this the XXX-release file needs to be searched for before the
// redhat-release file.
// Since Red Hat and SuSE have an lsb-release file that is not very descriptive the
// search for redhat-release / SuSE-release needs to be before lsb-release.
// Since the lsb-release file is the new standard it needs to be searched
// before the older style release files.
// Searching system-release (Red Hat) and os-release (other Linuxes) are a
// next to last resort. The os-release file is a new standard that contains
// distribution information and the system-release file seems to be an old
// standard that has been replaced by the lsb-release and os-release files.
// Searching for the debian_version file is the last resort. It contains
// an informative string like "6.0.6" or "wheezy/sid". Because of this
// "Debian " is printed before the contents of the debian_version file.
const char* distro_files[] = {
"/etc/oracle-release",
"/etc/mandriva-release",
"/etc/mandrake-release",
"/etc/sun-release",
"/etc/redhat-release",
"/etc/SuSE-release",
"/etc/lsb-release",
"/etc/turbolinux-release",
"/etc/gentoo-release",
"/etc/ltib-release",
"/etc/angstrom-version",
"/etc/system-release",
"/etc/os-release",
NULL };
void os::Linux::print_distro_info(outputStream* st) {
for (int i = 0;; i++) {
const char* file = distro_files[i];
if (file == NULL) {
break; // done
}
// If file prints, we found it.
if (_print_ascii_file(file, st)) {
return;
}
}
if (file_exists("/etc/debian_version")) {
st->print("Debian ");
_print_ascii_file("/etc/debian_version", st);
} else {
st->print("Linux");
}
st->cr();
}
static void parse_os_info_helper(FILE* fp, char* distro, size_t length, bool get_first_line) {
char buf[256];
while (fgets(buf, sizeof(buf), fp)) {
// Edit out extra stuff in expected format
if (strstr(buf, "DISTRIB_DESCRIPTION=") != NULL || strstr(buf, "PRETTY_NAME=") != NULL) {
char* ptr = strstr(buf, "\""); // the name is in quotes
if (ptr != NULL) {
ptr++; // go beyond first quote
char* nl = strchr(ptr, '\"');
if (nl != NULL) *nl = '\0';
strncpy(distro, ptr, length);
} else {
ptr = strstr(buf, "=");
ptr++; // go beyond equals then
char* nl = strchr(ptr, '\n');
if (nl != NULL) *nl = '\0';
strncpy(distro, ptr, length);
}
return;
} else if (get_first_line) {
char* nl = strchr(buf, '\n');
if (nl != NULL) *nl = '\0';
strncpy(distro, buf, length);
return;
}
}
// print last line and close
char* nl = strchr(buf, '\n');
if (nl != NULL) *nl = '\0';
strncpy(distro, buf, length);
}
static void parse_os_info(char* distro, size_t length, const char* file) {
FILE* fp = fopen(file, "r");
if (fp != NULL) {
// if suse format, print out first line
bool get_first_line = (strcmp(file, "/etc/SuSE-release") == 0);
parse_os_info_helper(fp, distro, length, get_first_line);
fclose(fp);
}
}
void os::get_summary_os_info(char* buf, size_t buflen) {
for (int i = 0;; i++) {
const char* file = distro_files[i];
if (file == NULL) {
break; // ran out of distro_files
}
if (file_exists(file)) {
parse_os_info(buf, buflen, file);
return;
}
}
// special case for debian
if (file_exists("/etc/debian_version")) {
strncpy(buf, "Debian ", buflen);
if (buflen > 7) {
parse_os_info(&buf[7], buflen-7, "/etc/debian_version");
}
} else {
strncpy(buf, "Linux", buflen);
}
}
void os::Linux::print_libversion_info(outputStream* st) {
// libc, pthread
st->print("libc:");
st->print("%s ", os::Linux::glibc_version());
st->print("%s ", os::Linux::libpthread_version());
st->cr();
}
void os::Linux::print_proc_sys_info(outputStream* st) {
st->cr();
st->print_cr("/proc/sys/kernel/threads-max (system-wide limit on the number of threads):");
_print_ascii_file("/proc/sys/kernel/threads-max", st);
st->cr();
st->cr();
st->print_cr("/proc/sys/vm/max_map_count (maximum number of memory map areas a process may have):");
_print_ascii_file("/proc/sys/vm/max_map_count", st);
st->cr();
st->cr();
st->print_cr("/proc/sys/kernel/pid_max (system-wide limit on number of process identifiers):");
_print_ascii_file("/proc/sys/kernel/pid_max", st);
st->cr();
st->cr();
}
void os::Linux::print_full_memory_info(outputStream* st) {
st->print("\n/proc/meminfo:\n");
_print_ascii_file("/proc/meminfo", st);
st->cr();
}
void os::Linux::print_ld_preload_file(outputStream* st) {
_print_ascii_file("/etc/ld.so.preload", st, "\n/etc/ld.so.preload:");
st->cr();
}
void os::Linux::print_uptime_info(outputStream* st) {
struct sysinfo sinfo;
int ret = sysinfo(&sinfo);
if (ret == 0) {
os::print_dhm(st, "OS uptime:", (long) sinfo.uptime);
}
}
void os::Linux::print_container_info(outputStream* st) {
if (!OSContainer::is_containerized()) {
return;
}
st->print("container (cgroup) information:\n");
const char *p_ct = OSContainer::container_type();
st->print("container_type: %s\n", p_ct != NULL ? p_ct : "failed");
char *p = OSContainer::cpu_cpuset_cpus();
st->print("cpu_cpuset_cpus: %s\n", p != NULL ? p : "failed");
free(p);
p = OSContainer::cpu_cpuset_memory_nodes();
st->print("cpu_memory_nodes: %s\n", p != NULL ? p : "failed");
free(p);
int i = OSContainer::active_processor_count();
if (i > 0) {
st->print("active_processor_count: %d\n", i);
} else {
st->print("active_processor_count: failed\n");
}
i = OSContainer::cpu_quota();
st->print("cpu_quota: %d\n", i);
i = OSContainer::cpu_period();
st->print("cpu_period: %d\n", i);
i = OSContainer::cpu_shares();
st->print("cpu_shares: %d\n", i);
jlong j = OSContainer::memory_limit_in_bytes();
st->print("memory_limit_in_bytes: " JLONG_FORMAT "\n", j);
j = OSContainer::memory_and_swap_limit_in_bytes();
st->print("memory_and_swap_limit_in_bytes: " JLONG_FORMAT "\n", j);
j = OSContainer::memory_soft_limit_in_bytes();
st->print("memory_soft_limit_in_bytes: " JLONG_FORMAT "\n", j);
j = OSContainer::OSContainer::memory_usage_in_bytes();
st->print("memory_usage_in_bytes: " JLONG_FORMAT "\n", j);
j = OSContainer::OSContainer::memory_max_usage_in_bytes();
st->print("memory_max_usage_in_bytes: " JLONG_FORMAT "\n", j);
st->cr();
}
void os::Linux::print_steal_info(outputStream* st) {
if (has_initial_tick_info) {
CPUPerfTicks pticks;
bool res = os::Linux::get_tick_information(&pticks, -1);
if (res && pticks.has_steal_ticks) {
uint64_t steal_ticks_difference = pticks.steal - initial_steal_ticks;
uint64_t total_ticks_difference = pticks.total - initial_total_ticks;
double steal_ticks_perc = 0.0;
if (total_ticks_difference != 0) {
steal_ticks_perc = (double) steal_ticks_difference / total_ticks_difference;
}
st->print_cr("Steal ticks since vm start: " UINT64_FORMAT, steal_ticks_difference);
st->print_cr("Steal ticks percentage since vm start:%7.3f", steal_ticks_perc);
}
}
}
void os::print_memory_info(outputStream* st) {
st->print("Memory:");
st->print(" %dk page", os::vm_page_size()>>10);
// values in struct sysinfo are "unsigned long"
struct sysinfo si;
sysinfo(&si);
st->print(", physical " UINT64_FORMAT "k",
os::physical_memory() >> 10);
st->print("(" UINT64_FORMAT "k free)",
os::available_memory() >> 10);
st->print(", swap " UINT64_FORMAT "k",
((jlong)si.totalswap * si.mem_unit) >> 10);
st->print("(" UINT64_FORMAT "k free)",
((jlong)si.freeswap * si.mem_unit) >> 10);
st->cr();
}
// Print the first "model name" line and the first "flags" line
// that we find and nothing more. We assume "model name" comes
// before "flags" so if we find a second "model name", then the
// "flags" field is considered missing.
static bool print_model_name_and_flags(outputStream* st, char* buf, size_t buflen) {
#if defined(IA32) || defined(AMD64)
// Other platforms have less repetitive cpuinfo files
FILE *fp = fopen("/proc/cpuinfo", "r");
if (fp) {
while (!feof(fp)) {
if (fgets(buf, buflen, fp)) {
// Assume model name comes before flags
bool model_name_printed = false;
if (strstr(buf, "model name") != NULL) {
if (!model_name_printed) {
st->print_raw("CPU Model and flags from /proc/cpuinfo:\n");
st->print_raw(buf);
model_name_printed = true;
} else {
// model name printed but not flags? Odd, just return
fclose(fp);
return true;
}
}
// print the flags line too
if (strstr(buf, "flags") != NULL) {
st->print_raw(buf);
fclose(fp);
return true;
}
}
}
fclose(fp);
}
#endif // x86 platforms
return false;
}
void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) {
// Only print the model name if the platform provides this as a summary
if (!print_model_name_and_flags(st, buf, buflen)) {
st->print("\n/proc/cpuinfo:\n");
if (!_print_ascii_file("/proc/cpuinfo", st)) {
st->print_cr(" <Not Available>");
}
}
}
#if defined(AMD64) || defined(IA32) || defined(X32)
const char* search_string = "model name";
#elif defined(M68K)
const char* search_string = "CPU";
#elif defined(PPC64)
const char* search_string = "cpu";
#elif defined(S390)
const char* search_string = "machine =";
#elif defined(SPARC)
const char* search_string = "cpu";
#else
const char* search_string = "Processor";
#endif
// Parses the cpuinfo file for string representing the model name.
void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
FILE* fp = fopen("/proc/cpuinfo", "r");
if (fp != NULL) {
while (!feof(fp)) {
char buf[256];
if (fgets(buf, sizeof(buf), fp)) {
char* start = strstr(buf, search_string);
if (start != NULL) {
char *ptr = start + strlen(search_string);
char *end = buf + strlen(buf);
while (ptr != end) {
// skip whitespace and colon for the rest of the name.
if (*ptr != ' ' && *ptr != '\t' && *ptr != ':') {
break;
}
ptr++;
}
if (ptr != end) {
// reasonable string, get rid of newline and keep the rest
char* nl = strchr(buf, '\n');
if (nl != NULL) *nl = '\0';
strncpy(cpuinfo, ptr, length);
fclose(fp);
return;
}
}
}
}
fclose(fp);
}
// cpuinfo not found or parsing failed, just print generic string. The entire
// /proc/cpuinfo file will be printed later in the file (or enough of it for x86)
#if defined(AARCH64)
strncpy(cpuinfo, "AArch64", length);
#elif defined(AMD64)
strncpy(cpuinfo, "x86_64", length);
#elif defined(ARM) // Order wrt. AARCH64 is relevant!
strncpy(cpuinfo, "ARM", length);
#elif defined(IA32)
strncpy(cpuinfo, "x86_32", length);
#elif defined(IA64)
strncpy(cpuinfo, "IA64", length);
#elif defined(PPC)
strncpy(cpuinfo, "PPC64", length);
#elif defined(S390)
strncpy(cpuinfo, "S390", length);
#elif defined(SPARC)
strncpy(cpuinfo, "sparcv9", length);
#elif defined(ZERO_LIBARCH)
strncpy(cpuinfo, ZERO_LIBARCH, length);
#else
strncpy(cpuinfo, "unknown", length);
#endif
}
static void print_signal_handler(outputStream* st, int sig,
char* buf, size_t buflen);
void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) {
st->print_cr("Signal Handlers:");
print_signal_handler(st, SIGSEGV, buf, buflen);
print_signal_handler(st, SIGBUS , buf, buflen);
print_signal_handler(st, SIGFPE , buf, buflen);
print_signal_handler(st, SIGPIPE, buf, buflen);
print_signal_handler(st, SIGXFSZ, buf, buflen);
print_signal_handler(st, SIGILL , buf, buflen);
print_signal_handler(st, SR_signum, buf, buflen);
print_signal_handler(st, SHUTDOWN1_SIGNAL, buf, buflen);
print_signal_handler(st, SHUTDOWN2_SIGNAL , buf, buflen);
print_signal_handler(st, SHUTDOWN3_SIGNAL , buf, buflen);
print_signal_handler(st, BREAK_SIGNAL, buf, buflen);
#if defined(PPC64)
print_signal_handler(st, SIGTRAP, buf, buflen);
#endif
}
static char saved_jvm_path[MAXPATHLEN] = {0};
// Find the full path to the current module, libjvm.so
void os::jvm_path(char *buf, jint buflen) {
// Error checking.
if (buflen < MAXPATHLEN) {
assert(false, "must use a large-enough buffer");
buf[0] = '\0';
return;
}
// Lazy resolve the path to current module.
if (saved_jvm_path[0] != 0) {
strcpy(buf, saved_jvm_path);
return;
}
char dli_fname[MAXPATHLEN];
bool ret = dll_address_to_library_name(
CAST_FROM_FN_PTR(address, os::jvm_path),
dli_fname, sizeof(dli_fname), NULL);
assert(ret, "cannot locate libjvm");
char *rp = NULL;
if (ret && dli_fname[0] != '\0') {
rp = os::Posix::realpath(dli_fname, buf, buflen);
}
if (rp == NULL) {
return;
}
if (Arguments::sun_java_launcher_is_altjvm()) {
// Support for the java launcher's '-XXaltjvm=<path>' option. Typical
// value for buf is "<JAVA_HOME>/jre/lib/<vmtype>/libjvm.so".
// If "/jre/lib/" appears at the right place in the string, then
// assume we are installed in a JDK and we're done. Otherwise, check
// for a JAVA_HOME environment variable and fix up the path so it
// looks like libjvm.so is installed there (append a fake suffix
// hotspot/libjvm.so).
const char *p = buf + strlen(buf) - 1;
for (int count = 0; p > buf && count < 5; ++count) {
for (--p; p > buf && *p != '/'; --p)
/* empty */ ;
}
if (strncmp(p, "/jre/lib/", 9) != 0) {
// Look for JAVA_HOME in the environment.
char* java_home_var = ::getenv("JAVA_HOME");
if (java_home_var != NULL && java_home_var[0] != 0) {
char* jrelib_p;
int len;
// Check the current module name "libjvm.so".
p = strrchr(buf, '/');
if (p == NULL) {
return;
}
assert(strstr(p, "/libjvm") == p, "invalid library name");
rp = os::Posix::realpath(java_home_var, buf, buflen);
if (rp == NULL) {
return;
}
// determine if this is a legacy image or modules image
// modules image doesn't have "jre" subdirectory
len = strlen(buf);
assert(len < buflen, "Ran out of buffer room");
jrelib_p = buf + len;
snprintf(jrelib_p, buflen-len, "/jre/lib");
if (0 != access(buf, F_OK)) {
snprintf(jrelib_p, buflen-len, "/lib");
}
if (0 == access(buf, F_OK)) {
// Use current module name "libjvm.so"
len = strlen(buf);
snprintf(buf + len, buflen-len, "/hotspot/libjvm.so");
} else {
// Go back to path of .so
rp = os::Posix::realpath(dli_fname, buf, buflen);
if (rp == NULL) {
return;
}
}
}
}
}
strncpy(saved_jvm_path, buf, MAXPATHLEN);
saved_jvm_path[MAXPATHLEN - 1] = '\0';
}
void os::print_jni_name_prefix_on(outputStream* st, int args_size) {
// no prefix required, not even "_"
}
void os::print_jni_name_suffix_on(outputStream* st, int args_size) {
// no suffix required
}
////////////////////////////////////////////////////////////////////////////////
// sun.misc.Signal support
static volatile jint sigint_count = 0;
static void UserHandler(int sig, void *siginfo, void *context) {
// 4511530 - sem_post is serialized and handled by the manager thread. When
// the program is interrupted by Ctrl-C, SIGINT is sent to every thread. We
// don't want to flood the manager thread with sem_post requests.
if (sig == SIGINT && Atomic::add(1, &sigint_count) > 1) {
return;
}
// Ctrl-C is pressed during error reporting, likely because the error
// handler fails to abort. Let VM die immediately.
if (sig == SIGINT && VMError::is_error_reported()) {
os::die();
}
os::signal_notify(sig);
}
void* os::user_handler() {
return CAST_FROM_FN_PTR(void*, UserHandler);
}
static struct timespec create_semaphore_timespec(unsigned int sec, int nsec) {
struct timespec ts;
// Semaphore's are always associated with CLOCK_REALTIME
os::Linux::clock_gettime(CLOCK_REALTIME, &ts);
// see os_posix.cpp for discussion on overflow checking
if (sec >= MAX_SECS) {
ts.tv_sec += MAX_SECS;
ts.tv_nsec = 0;
} else {
ts.tv_sec += sec;
ts.tv_nsec += nsec;
if (ts.tv_nsec >= NANOSECS_PER_SEC) {
ts.tv_nsec -= NANOSECS_PER_SEC;
++ts.tv_sec; // note: this must be <= max_secs
}
}
return ts;
}
extern "C" {
typedef void (*sa_handler_t)(int);
typedef void (*sa_sigaction_t)(int, siginfo_t *, void *);
}
void* os::signal(int signal_number, void* handler) {
struct sigaction sigAct, oldSigAct;
sigfillset(&(sigAct.sa_mask));
sigAct.sa_flags = SA_RESTART|SA_SIGINFO;
sigAct.sa_handler = CAST_TO_FN_PTR(sa_handler_t, handler);
if (sigaction(signal_number, &sigAct, &oldSigAct)) {
// -1 means registration failed
return (void *)-1;
}
return CAST_FROM_FN_PTR(void*, oldSigAct.sa_handler);
}
void os::signal_raise(int signal_number) {
::raise(signal_number);
}
// The following code is moved from os.cpp for making this
// code platform specific, which it is by its very nature.
// Will be modified when max signal is changed to be dynamic
int os::sigexitnum_pd() {
return NSIG;
}
// a counter for each possible signal value
static volatile jint pending_signals[NSIG+1] = { 0 };
// Linux(POSIX) specific hand shaking semaphore.
static Semaphore* sig_sem = NULL;
static PosixSemaphore sr_semaphore;
static void jdk_misc_signal_init() {
// Initialize signal structures
::memset((void*)pending_signals, 0, sizeof(pending_signals));
// Initialize signal semaphore
sig_sem = new Semaphore();
}
void os::signal_notify(int sig) {
if (sig_sem != NULL) {
Atomic::inc(&pending_signals[sig]);
sig_sem->signal();
} else {
// Signal thread is not created with ReduceSignalUsage and jdk_misc_signal_init
// initialization isn't called.
assert(ReduceSignalUsage, "signal semaphore should be created");
}
}
static int check_pending_signals() {
Atomic::store(0, &sigint_count);
for (;;) {
for (int i = 0; i < NSIG + 1; i++) {
jint n = pending_signals[i];
if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) {
return i;
}
}
JavaThread *thread = JavaThread::current();
ThreadBlockInVM tbivm(thread);
bool threadIsSuspended;
do {
thread->set_suspend_equivalent();
// cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
sig_sem->wait();
// were we externally suspended while we were waiting?
threadIsSuspended = thread->handle_special_suspend_equivalent_condition();
if (threadIsSuspended) {
// The semaphore has been incremented, but while we were waiting
// another thread suspended us. We don't want to continue running
// while suspended because that would surprise the thread that
// suspended us.
sig_sem->signal();
thread->java_suspend_self();
}
} while (threadIsSuspended);
}
}
int os::signal_wait() {
return check_pending_signals();
}
////////////////////////////////////////////////////////////////////////////////
// Virtual Memory
int os::vm_page_size() {
// Seems redundant as all get out
assert(os::Linux::page_size() != -1, "must call os::init");
return os::Linux::page_size();
}
// Solaris allocates memory by pages.
int os::vm_allocation_granularity() {
assert(os::Linux::page_size() != -1, "must call os::init");
return os::Linux::page_size();
}
// Rationale behind this function:
// current (Mon Apr 25 20:12:18 MSD 2005) oprofile drops samples without executable
// mapping for address (see lookup_dcookie() in the kernel module), thus we cannot get
// samples for JITted code. Here we create private executable mapping over the code cache
// and then we can use standard (well, almost, as mapping can change) way to provide
// info for the reporting script by storing timestamp and location of symbol
void linux_wrap_code(char* base, size_t size) {
static volatile jint cnt = 0;
if (!UseOprofile) {
return;
}
char buf[PATH_MAX+1];
int num = Atomic::add(1, &cnt);
snprintf(buf, sizeof(buf), "%s/hs-vm-%d-%d",
os::get_temp_directory(), os::current_process_id(), num);
unlink(buf);
int fd = ::open(buf, O_CREAT | O_RDWR, S_IRWXU);
if (fd != -1) {
off_t rv = ::lseek(fd, size-2, SEEK_SET);
if (rv != (off_t)-1) {
if (::write(fd, "", 1) == 1) {
mmap(base, size,
PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE, fd, 0);
}
}
::close(fd);
unlink(buf);
}
}
static bool recoverable_mmap_error(int err) {
// See if the error is one we can let the caller handle. This
// list of errno values comes from JBS-6843484. I can't find a
// Linux man page that documents this specific set of errno
// values so while this list currently matches Solaris, it may
// change as we gain experience with this failure mode.
switch (err) {
case EBADF:
case EINVAL:
case ENOTSUP:
// let the caller deal with these errors
return true;
default:
// Any remaining errors on this OS can cause our reserved mapping
// to be lost. That can cause confusion where different data
// structures think they have the same memory mapped. The worst
// scenario is if both the VM and a library think they have the
// same memory mapped.
return false;
}
}
static void warn_fail_commit_memory(char* addr, size_t size, bool exec,
int err) {
warning("INFO: os::commit_memory(" PTR_FORMAT ", " SIZE_FORMAT
", %d) failed; error='%s' (errno=%d)", p2i(addr), size, exec,
os::strerror(err), err);
}
static void warn_fail_commit_memory(char* addr, size_t size,
size_t alignment_hint, bool exec,
int err) {
warning("INFO: os::commit_memory(" PTR_FORMAT ", " SIZE_FORMAT
", " SIZE_FORMAT ", %d) failed; error='%s' (errno=%d)", p2i(addr), size,
alignment_hint, exec, os::strerror(err), err);
}
// NOTE: Linux kernel does not really reserve the pages for us.
// All it does is to check if there are enough free pages
// left at the time of mmap(). This could be a potential
// problem.
int os::Linux::commit_memory_impl(char* addr, size_t size, bool exec) {
int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
uintptr_t res = (uintptr_t) ::mmap(addr, size, prot,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
if (res != (uintptr_t) MAP_FAILED) {
if (UseNUMAInterleaving) {
numa_make_global(addr, size);
}
return 0;
}
int err = errno; // save errno from mmap() call above
if (!recoverable_mmap_error(err)) {
warn_fail_commit_memory(addr, size, exec, err);
vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "committing reserved memory.");
}
return err;
}
bool os::pd_commit_memory(char* addr, size_t size, bool exec) {
return os::Linux::commit_memory_impl(addr, size, exec) == 0;
}
void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec,
const char* mesg) {
assert(mesg != NULL, "mesg must be specified");
int err = os::Linux::commit_memory_impl(addr, size, exec);
if (err != 0) {
// the caller wants all commit errors to exit with the specified mesg:
warn_fail_commit_memory(addr, size, exec, err);
vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "%s", mesg);
}
}
// Define MAP_HUGETLB here so we can build HotSpot on old systems.
#ifndef MAP_HUGETLB
#define MAP_HUGETLB 0x40000
#endif
// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
#ifndef MADV_HUGEPAGE
#define MADV_HUGEPAGE 14
#endif
int os::Linux::commit_memory_impl(char* addr, size_t size,
size_t alignment_hint, bool exec) {
int err = os::Linux::commit_memory_impl(addr, size, exec);
if (err == 0) {
realign_memory(addr, size, alignment_hint);
}
return err;
}
bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint,
bool exec) {
return os::Linux::commit_memory_impl(addr, size, alignment_hint, exec) == 0;
}
void os::pd_commit_memory_or_exit(char* addr, size_t size,
size_t alignment_hint, bool exec,
const char* mesg) {
assert(mesg != NULL, "mesg must be specified");
int err = os::Linux::commit_memory_impl(addr, size, alignment_hint, exec);
if (err != 0) {
// the caller wants all commit errors to exit with the specified mesg:
warn_fail_commit_memory(addr, size, alignment_hint, exec, err);
vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "%s", mesg);
}
}
void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
if (UseTransparentHugePages && alignment_hint > (size_t)vm_page_size()) {
// We don't check the return value: madvise(MADV_HUGEPAGE) may not
// be supported or the memory may already be backed by huge pages.
::madvise(addr, bytes, MADV_HUGEPAGE);
}
}
void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
// This method works by doing an mmap over an existing mmaping and effectively discarding
// the existing pages. However it won't work for SHM-based large pages that cannot be
// uncommitted at all. We don't do anything in this case to avoid creating a segment with
// small pages on top of the SHM segment. This method always works for small pages, so we
// allow that in any case.
if (alignment_hint <= (size_t)os::vm_page_size() || can_commit_large_page_memory()) {
commit_memory(addr, bytes, alignment_hint, !ExecMem);
}
}
void os::numa_make_global(char *addr, size_t bytes) {
Linux::numa_interleave_memory(addr, bytes);
}
// Define for numa_set_bind_policy(int). Setting the argument to 0 will set the
// bind policy to MPOL_PREFERRED for the current thread.
#define USE_MPOL_PREFERRED 0
void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
// To make NUMA and large pages more robust when both enabled, we need to ease
// the requirements on where the memory should be allocated. MPOL_BIND is the
// default policy and it will force memory to be allocated on the specified
// node. Changing this to MPOL_PREFERRED will prefer to allocate the memory on
// the specified node, but will not force it. Using this policy will prevent
// getting SIGBUS when trying to allocate large pages on NUMA nodes with no
// free large pages.
Linux::numa_set_bind_policy(USE_MPOL_PREFERRED);
Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
}
bool os::numa_topology_changed() { return false; }
size_t os::numa_get_groups_num() {
// Return just the number of nodes in which it's possible to allocate memory
// (in numa terminology, configured nodes).
return Linux::numa_num_configured_nodes();
}
int os::numa_get_group_id() {
int cpu_id = Linux::sched_getcpu();
if (cpu_id != -1) {
int lgrp_id = Linux::get_node_by_cpu(cpu_id);
if (lgrp_id != -1) {
return lgrp_id;
}
}
return 0;
}
int os::Linux::get_existing_num_nodes() {
int node;
int highest_node_number = Linux::numa_max_node();
int num_nodes = 0;
// Get the total number of nodes in the system including nodes without memory.
for (node = 0; node <= highest_node_number; node++) {
if (isnode_in_existing_nodes(node)) {
num_nodes++;
}
}
return num_nodes;
}
size_t os::numa_get_leaf_groups(int *ids, size_t size) {
int highest_node_number = Linux::numa_max_node();
size_t i = 0;
// Map all node ids in which it is possible to allocate memory. Also nodes are
// not always consecutively available, i.e. available from 0 to the highest
// node number. If the nodes have been bound explicitly using numactl membind,
// then allocate memory from those nodes only.
for (int node = 0; node <= highest_node_number; node++) {
if (Linux::isnode_in_bound_nodes((unsigned int)node)) {
ids[i++] = node;
}
}
return i;
}
bool os::get_page_info(char *start, page_info* info) {
return false;
}
char *os::scan_pages(char *start, char* end, page_info* page_expected,
page_info* page_found) {
return end;
}
int os::Linux::sched_getcpu_syscall(void) {
unsigned int cpu = 0;
int retval = -1;
#if defined(IA32)
#ifndef SYS_getcpu
#define SYS_getcpu 318
#endif
retval = syscall(SYS_getcpu, &cpu, NULL, NULL);
#elif defined(AMD64)
// Unfortunately we have to bring all these macros here from vsyscall.h
// to be able to compile on old linuxes.
#define __NR_vgetcpu 2
#define VSYSCALL_START (-10UL << 20)
#define VSYSCALL_SIZE 1024
#define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache);
vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu);
retval = vgetcpu(&cpu, NULL, NULL);
#endif
return (retval == -1) ? retval : cpu;
}
void os::Linux::sched_getcpu_init() {
// sched_getcpu() should be in libc.
set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
dlsym(RTLD_DEFAULT, "sched_getcpu")));
// If it's not, try a direct syscall.
if (sched_getcpu() == -1) {
set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
(void*)&sched_getcpu_syscall));
}
if (sched_getcpu() == -1) {
vm_exit_during_initialization("getcpu(2) system call not supported by kernel");
}
}
// Something to do with the numa-aware allocator needs these symbols
extern "C" JNIEXPORT void numa_warn(int number, char *where, ...) { }
extern "C" JNIEXPORT void numa_error(char *where) { }
// Handle request to load libnuma symbol version 1.1 (API v1). If it fails
// load symbol from base version instead.
void* os::Linux::libnuma_dlsym(void* handle, const char *name) {
void *f = dlvsym(handle, name, "libnuma_1.1");
if (f == NULL) {
f = dlsym(handle, name);
}
return f;
}
// Handle request to load libnuma symbol version 1.2 (API v2) only.
// Return NULL if the symbol is not defined in this particular version.
void* os::Linux::libnuma_v2_dlsym(void* handle, const char* name) {
return dlvsym(handle, name, "libnuma_1.2");
}
bool os::Linux::libnuma_init() {
if (sched_getcpu() != -1) { // Requires sched_getcpu() support
void *handle = dlopen("libnuma.so.1", RTLD_LAZY);
if (handle != NULL) {
set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
libnuma_dlsym(handle, "numa_node_to_cpus")));
set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
libnuma_dlsym(handle, "numa_max_node")));
set_numa_num_configured_nodes(CAST_TO_FN_PTR(numa_num_configured_nodes_func_t,
libnuma_dlsym(handle, "numa_num_configured_nodes")));
set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
libnuma_dlsym(handle, "numa_available")));
set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
libnuma_dlsym(handle, "numa_tonode_memory")));
set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t,
libnuma_dlsym(handle, "numa_interleave_memory")));
set_numa_interleave_memory_v2(CAST_TO_FN_PTR(numa_interleave_memory_v2_func_t,
libnuma_v2_dlsym(handle, "numa_interleave_memory")));
set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t,
libnuma_dlsym(handle, "numa_set_bind_policy")));
set_numa_bitmask_isbitset(CAST_TO_FN_PTR(numa_bitmask_isbitset_func_t,
libnuma_dlsym(handle, "numa_bitmask_isbitset")));
set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
libnuma_dlsym(handle, "numa_distance")));
set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t,
libnuma_v2_dlsym(handle, "numa_get_membind")));
if (numa_available() != -1) {
set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr"));
set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr"));
// Create an index -> node mapping, since nodes are not always consecutive
_nindex_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
rebuild_nindex_to_node_map();
// Create a cpu -> node mapping
_cpu_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
rebuild_cpu_to_node_map();
return true;
}
}
}
return false;
}
size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
// Creating guard page is very expensive. Java thread has HotSpot
// guard pages, only enable glibc guard page for non-Java threads.
// (Remember: compiler thread is a Java thread, too!)
return ((thr_type == java_thread || thr_type == compiler_thread) ? 0 : page_size());
}
void os::Linux::rebuild_nindex_to_node_map() {
int highest_node_number = Linux::numa_max_node();
nindex_to_node()->clear();
for (int node = 0; node <= highest_node_number; node++) {
if (Linux::isnode_in_existing_nodes(node)) {
nindex_to_node()->append(node);
}
}
}
// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
// The table is later used in get_node_by_cpu().
void os::Linux::rebuild_cpu_to_node_map() {
const size_t NCPUS = 32768; // Since the buffer size computation is very obscure
// in libnuma (possible values are starting from 16,
// and continuing up with every other power of 2, but less
// than the maximum number of CPUs supported by kernel), and
// is a subject to change (in libnuma version 2 the requirements
// are more reasonable) we'll just hardcode the number they use
// in the library.
const size_t BitsPerCLong = sizeof(long) * CHAR_BIT;
size_t cpu_num = processor_count();
size_t cpu_map_size = NCPUS / BitsPerCLong;
size_t cpu_map_valid_size =
MIN2((cpu_num + BitsPerCLong - 1) / BitsPerCLong, cpu_map_size);
cpu_to_node()->clear();
cpu_to_node()->at_grow(cpu_num - 1);
size_t node_num = get_existing_num_nodes();
int distance = 0;
int closest_distance = INT_MAX;
int closest_node = 0;
unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size, mtInternal);
for (size_t i = 0; i < node_num; i++) {
// Check if node is configured (not a memory-less node). If it is not, find
// the closest configured node. Check also if node is bound, i.e. it's allowed
// to allocate memory from the node. If it's not allowed, map cpus in that node
// to the closest node from which memory allocation is allowed.
if (!isnode_in_configured_nodes(nindex_to_node()->at(i)) ||
!isnode_in_bound_nodes(nindex_to_node()->at(i))) {
closest_distance = INT_MAX;
// Check distance from all remaining nodes in the system. Ignore distance
// from itself, from another non-configured node, and from another non-bound
// node.
for (size_t m = 0; m < node_num; m++) {
if (m != i &&
isnode_in_configured_nodes(nindex_to_node()->at(m)) &&
isnode_in_bound_nodes(nindex_to_node()->at(m))) {
distance = numa_distance(nindex_to_node()->at(i), nindex_to_node()->at(m));
// If a closest node is found, update. There is always at least one
// configured and bound node in the system so there is always at least
// one node close.
if (distance != 0 && distance < closest_distance) {
closest_distance = distance;
closest_node = nindex_to_node()->at(m);
}
}
}
} else {
// Current node is already a configured node.
closest_node = nindex_to_node()->at(i);
}
// Get cpus from the original node and map them to the closest node. If node
// is a configured node (not a memory-less node), then original node and
// closest node are the same.
if (numa_node_to_cpus(nindex_to_node()->at(i), cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
for (size_t j = 0; j < cpu_map_valid_size; j++) {
if (cpu_map[j] != 0) {
for (size_t k = 0; k < BitsPerCLong; k++) {
if (cpu_map[j] & (1UL << k)) {
cpu_to_node()->at_put(j * BitsPerCLong + k, closest_node);
}
}
}
}
}
}
FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
}
int os::Linux::get_node_by_cpu(int cpu_id) {
if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
return cpu_to_node()->at(cpu_id);
}
return -1;
}
GrowableArray<int>* os::Linux::_cpu_to_node;
GrowableArray<int>* os::Linux::_nindex_to_node;
os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
os::Linux::numa_num_configured_nodes_func_t os::Linux::_numa_num_configured_nodes;
os::Linux::numa_available_func_t os::Linux::_numa_available;
os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
os::Linux::numa_interleave_memory_v2_func_t os::Linux::_numa_interleave_memory_v2;
os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
os::Linux::numa_distance_func_t os::Linux::_numa_distance;
os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
unsigned long* os::Linux::_numa_all_nodes;
struct bitmask* os::Linux::_numa_all_nodes_ptr;
struct bitmask* os::Linux::_numa_nodes_ptr;
bool os::pd_uncommit_memory(char* addr, size_t size) {
uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,
MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0);
return res != (uintptr_t) MAP_FAILED;
}
static address get_stack_commited_bottom(address bottom, size_t size) {
address nbot = bottom;
address ntop = bottom + size;
size_t page_sz = os::vm_page_size();
unsigned pages = size / page_sz;
unsigned char vec[1];
unsigned imin = 1, imax = pages + 1, imid;
int mincore_return_value = 0;
assert(imin <= imax, "Unexpected page size");
while (imin < imax) {
imid = (imax + imin) / 2;
nbot = ntop - (imid * page_sz);
// Use a trick with mincore to check whether the page is mapped or not.
// mincore sets vec to 1 if page resides in memory and to 0 if page
// is swapped output but if page we are asking for is unmapped
// it returns -1,ENOMEM
mincore_return_value = mincore(nbot, page_sz, vec);
if (mincore_return_value == -1) {
// Page is not mapped go up
// to find first mapped page
if (errno != EAGAIN) {
assert(errno == ENOMEM, "Unexpected mincore errno");
imax = imid;
}
} else {
// Page is mapped go down
// to find first not mapped page
imin = imid + 1;
}
}
nbot = nbot + page_sz;
// Adjust stack bottom one page up if last checked page is not mapped
if (mincore_return_value == -1) {
nbot = nbot + page_sz;
}
return nbot;
}
bool os::committed_in_range(address start, size_t size, address& committed_start, size_t& committed_size) {
int mincore_return_value;
const size_t stripe = 1024; // query this many pages each time
unsigned char vec[stripe + 1];
// set a guard
vec[stripe] = 'X';
const size_t page_sz = os::vm_page_size();
size_t pages = size / page_sz;
assert(is_aligned(start, page_sz), "Start address must be page aligned");
assert(is_aligned(size, page_sz), "Size must be page aligned");
committed_start = NULL;
int loops = (pages + stripe - 1) / stripe;
int committed_pages = 0;
address loop_base = start;
bool found_range = false;
for (int index = 0; index < loops && !found_range; index ++) {
assert(pages > 0, "Nothing to do");
int pages_to_query = (pages >= stripe) ? stripe : pages;
pages -= pages_to_query;
// Get stable read
while ((mincore_return_value = mincore(loop_base, pages_to_query * page_sz, vec)) == -1 && errno == EAGAIN);
// During shutdown, some memory goes away without properly notifying NMT,
// E.g. ConcurrentGCThread/WatcherThread can exit without deleting thread object.
// Bailout and return as not committed for now.
if (mincore_return_value == -1 && errno == ENOMEM) {
return false;
}
assert(vec[stripe] == 'X', "overflow guard");
assert(mincore_return_value == 0, "Range must be valid");
// Process this stripe
for (int vecIdx = 0; vecIdx < pages_to_query; vecIdx ++) {
if ((vec[vecIdx] & 0x01) == 0) { // not committed
// End of current contiguous region
if (committed_start != NULL) {
found_range = true;
break;
}
} else { // committed
// Start of region
if (committed_start == NULL) {
committed_start = loop_base + page_sz * vecIdx;
}
committed_pages ++;
}
}
loop_base += pages_to_query * page_sz;
}
if (committed_start != NULL) {
assert(committed_pages > 0, "Must have committed region");
assert(committed_pages <= int(size / page_sz), "Can not commit more than it has");
assert(committed_start >= start && committed_start < start + size, "Out of range");
committed_size = page_sz * committed_pages;
return true;
} else {
assert(committed_pages == 0, "Should not have committed region");
return false;
}
}
// Linux uses a growable mapping for the stack, and if the mapping for
// the stack guard pages is not removed when we detach a thread the
// stack cannot grow beyond the pages where the stack guard was
// mapped. If at some point later in the process the stack expands to
// that point, the Linux kernel cannot expand the stack any further
// because the guard pages are in the way, and a segfault occurs.
//
// However, it's essential not to split the stack region by unmapping
// a region (leaving a hole) that's already part of the stack mapping,
// so if the stack mapping has already grown beyond the guard pages at
// the time we create them, we have to truncate the stack mapping.
// So, we need to know the extent of the stack mapping when
// create_stack_guard_pages() is called.
// We only need this for stacks that are growable: at the time of
// writing thread stacks don't use growable mappings (i.e. those
// creeated with MAP_GROWSDOWN), and aren't marked "[stack]", so this
// only applies to the main thread.
// If the (growable) stack mapping already extends beyond the point
// where we're going to put our guard pages, truncate the mapping at
// that point by munmap()ping it. This ensures that when we later
// munmap() the guard pages we don't leave a hole in the stack
// mapping. This only affects the main/primordial thread
bool os::pd_create_stack_guard_pages(char* addr, size_t size) {
if (os::is_primordial_thread()) {
// As we manually grow stack up to bottom inside create_attached_thread(),
// it's likely that os::Linux::initial_thread_stack_bottom is mapped and
// we don't need to do anything special.
// Check it first, before calling heavy function.
uintptr_t stack_extent = (uintptr_t) os::Linux::initial_thread_stack_bottom();
unsigned char vec[1];
if (mincore((address)stack_extent, os::vm_page_size(), vec) == -1) {
// Fallback to slow path on all errors, including EAGAIN
stack_extent = (uintptr_t) get_stack_commited_bottom(
os::Linux::initial_thread_stack_bottom(),
(size_t)addr - stack_extent);
}
if (stack_extent < (uintptr_t)addr) {
::munmap((void*)stack_extent, (uintptr_t)(addr - stack_extent));
}
}
return os::commit_memory(addr, size, !ExecMem);
}
// If this is a growable mapping, remove the guard pages entirely by
// munmap()ping them. If not, just call uncommit_memory(). This only
// affects the main/primordial thread, but guard against future OS changes.
// It's safe to always unmap guard pages for primordial thread because we
// always place it right after end of the mapped region.
bool os::remove_stack_guard_pages(char* addr, size_t size) {
uintptr_t stack_extent, stack_base;
if (os::is_primordial_thread()) {
return ::munmap(addr, size) == 0;
}
return os::uncommit_memory(addr, size);
}
// If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory
// at 'requested_addr'. If there are existing memory mappings at the same
// location, however, they will be overwritten. If 'fixed' is false,
// 'requested_addr' is only treated as a hint, the return value may or
// may not start from the requested address. Unlike Linux mmap(), this
// function returns NULL to indicate failure.
static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) {
char * addr;
int flags;
flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS;
if (fixed) {
assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address");
flags |= MAP_FIXED;
}
// Map reserved/uncommitted pages PROT_NONE so we fail early if we
// touch an unco