Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delay lock-related operations in single thread mode #14583

Merged
merged 1 commit into from
Mar 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ private static RestoreException throwSetEnvException(Throwable cause) {
* @throws RestoreException if an error occurred during or after
* restore
*/
public void checkpointJVM() {
public synchronized void checkpointJVM() {
/* Add env variables restore hook */
registerRestoreEnvVariables();

Expand Down
16 changes: 16 additions & 0 deletions runtime/criusupport/criusupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ Java_org_eclipse_openj9_criu_CRIUSupport_checkpointJVMImpl(JNIEnv *env,
IDATA systemReturnCode = 0;
PORT_ACCESS_FROM_VMC(currentThread);

vm->checkpointState.checkpointThread = currentThread;

Trc_CRIU_checkpointJVMImpl_Entry(currentThread);
if (vmFuncs->isCheckpointAllowed(currentThread)) {
#if defined(LINUX)
Expand Down Expand Up @@ -371,6 +373,8 @@ Java_org_eclipse_openj9_criu_CRIUSupport_checkpointJVMImpl(JNIEnv *env,

toggleSuspendOnJavaThreads(currentThread, TRUE);

vm->extendedRuntimeFlags2 |= J9_EXTENDED_RUNTIME2_CRIU_SINGLE_THREAD_MODE;
DanHeidinga marked this conversation as resolved.
Show resolved Hide resolved

vmFuncs->releaseExclusiveVMAccess(currentThread);

if (FALSE == vmFuncs->jvmCheckpointHooks(currentThread)) {
Expand Down Expand Up @@ -415,10 +419,20 @@ Java_org_eclipse_openj9_criu_CRIUSupport_checkpointJVMImpl(JNIEnv *env,
goto wakeJavaThreads;
}

if (FALSE == vmFuncs->runDelayedLockRelatedOperations(currentThread)) {
currentExceptionClass = vm->criuRestoreExceptionClass;
systemReturnCode = 0;
nlsMsgFormat = j9nls_lookup_message(J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE,
J9NLS_JCL_CRIU_FAILED_DELAY_LOCK_RELATED_OPS, NULL);
}

wakeJavaThreads:
vmFuncs->acquireExclusiveVMAccess(currentThread);

wakeJavaThreadsWithExclusiveVMAccess:

vm->extendedRuntimeFlags2 &= ~J9_EXTENDED_RUNTIME2_CRIU_SINGLE_THREAD_MODE;

toggleSuspendOnJavaThreads(currentThread, FALSE);

vmFuncs->releaseExclusiveVMAccess(currentThread);
Expand Down Expand Up @@ -490,6 +504,8 @@ Java_org_eclipse_openj9_criu_CRIUSupport_checkpointJVMImpl(JNIEnv *env,
}
}

vm->checkpointState.checkpointThread = NULL;

Trc_CRIU_checkpointJVMImpl_Exit(currentThread);
}

Expand Down
8 changes: 8 additions & 0 deletions runtime/nls/j9cl/j9jcl.nls
Original file line number Diff line number Diff line change
Expand Up @@ -515,3 +515,11 @@ J9NLS_JCL_CRIU_FAILED_TO_RUN_INTERNAL_RESTORE_HOOKS.explanation=An error occured
J9NLS_JCL_CRIU_FAILED_TO_RUN_INTERNAL_RESTORE_HOOKS.system_action=The JVM will throw a RestoreException.
J9NLS_JCL_CRIU_FAILED_TO_RUN_INTERNAL_RESTORE_HOOKS.user_response=View CRIU documentation to determine how to resolve the error.
# END NON-TRANSLATABLE

J9NLS_JCL_CRIU_FAILED_DELAY_LOCK_RELATED_OPS=Could not run delayed lock-related operations succesfully, errno=%li
# START NON-TRANSLATABLE
J9NLS_JCL_CRIU_FAILED_DELAY_LOCK_RELATED_OPS.sample_input_1=1
J9NLS_JCL_CRIU_FAILED_DELAY_LOCK_RELATED_OPS.explanation=An error occured when the JVM attempted to run delayed identity operations.
J9NLS_JCL_CRIU_FAILED_DELAY_LOCK_RELATED_OPS.system_action=The JVM will throw a RestoreException.
J9NLS_JCL_CRIU_FAILED_DELAY_LOCK_RELATED_OPS.user_response=View CRIU documentation to determine how to resolve the error.
# END NON-TRANSLATABLE
4 changes: 3 additions & 1 deletion runtime/oti/j9.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2021 IBM Corp. and others
* Copyright (c) 1991, 2022 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -352,6 +352,8 @@ static const struct { \

#define J9_IS_STRING_DESCRIPTOR(str, strLen) (((strLen) > 2) && (IS_REF_OR_VAL_SIGNATURE(*(str))) && (';' == *((str) + (strLen) - 1)))

#define J9_IS_SINGLE_THREAD_MODE(vm) (J9_ARE_ALL_BITS_SET((vm)->extendedRuntimeFlags2, J9_EXTENDED_RUNTIME2_CRIU_SINGLE_THREAD_MODE))

#if defined(OPENJ9_BUILD)
#define J9_SHARED_CACHE_DEFAULT_BOOT_SHARING(vm) TRUE
#else /* defined(OPENJ9_BUILD) */
Expand Down
1 change: 1 addition & 0 deletions runtime/oti/j9consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ extern "C" {
#define J9_EXTENDED_RUNTIME2_TUNE_THROUGHPUT 0x20000
#define J9_EXTENDED_RUNTIME2_TUNE_QUICKSTART 0x40000
#define J9_EXTENDED_RUNTIME2_DISABLE_FINALIZATION 0x80000
#define J9_EXTENDED_RUNTIME2_CRIU_SINGLE_THREAD_MODE 0x100000

#define J9_OBJECT_HEADER_AGE_DEFAULT 0xA /* OBJECT_HEADER_AGE_DEFAULT */
#define J9_OBJECT_HEADER_SHAPE_MASK 0xE /* OBJECT_HEADER_SHAPE_MASK */
Expand Down
14 changes: 14 additions & 0 deletions runtime/oti/j9nonbuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -4070,11 +4070,24 @@ typedef struct J9InternalHookRecord {
struct J9Pool *instanceObjects;
} J9InternalHookRecord;

typedef struct J9DelayedLockingOpertionsRecord {
jobject globalObjectRef;
UDATA operation;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does pool iteration guarantee the same order FIFO iteration order? i.e. the first notify we delay should be the first we do after exiting single threaded mode. Similarly the 2nd should be the 2nd, etc.

If the pool iteration order isn't guaranteed (and documented!) I think these should have a J9DelayedSingleThreadModeOpRecord *next field as well and they should be stored in a linked list. The pool is fine for backing the allocation but we should iterate through the list to do the notifies.

And likely need two pointers - one for the head of the list and one for the tail so we can insert quickly at the tail and iterate from the head

struct J9DelayedLockingOpertionsRecord *linkNext;
struct J9DelayedLockingOpertionsRecord *linkPrevious;
} J9DelayedLockingOpertionsRecord;

#define J9_SINGLE_THREAD_MODE_OP_NOTIFY 0x1
#define J9_SINGLE_THREAD_MODE_OP_NOTIFY_ALL 0x2

typedef struct J9CRIUCheckpointState {
BOOLEAN isCheckPointEnabled;
BOOLEAN isCheckPointAllowed;
BOOLEAN isNonPortableRestoreMode;
struct J9DelayedLockingOpertionsRecord *delayedLockingOperationsRoot;
struct J9Pool *hookRecords;
struct J9Pool *delayedLockingOperationsRecords;
struct J9VMThread *checkpointThread;
} J9CRIUCheckpointState;
#endif /* defined(J9VM_OPT_CRIU_SUPPORT) */

Expand Down Expand Up @@ -4841,6 +4854,7 @@ typedef struct J9InternalVMFunctions {
BOOLEAN (*isCheckpointAllowed)(struct J9VMThread *currentThread);
BOOLEAN (*runInternalJVMCheckpointHooks)(struct J9VMThread *currentThread);
BOOLEAN (*runInternalJVMRestoreHooks)(struct J9VMThread *currentThread);
BOOLEAN (*runDelayedLockRelatedOperations)(struct J9VMThread *currentThread);
#endif /* defined(J9VM_OPT_CRIU_SUPPORT) */
j9object_t (*getClassNameString)(struct J9VMThread *currentThread, j9object_t classObject, jboolean internAndAssign);
} J9InternalVMFunctions;
Expand Down
11 changes: 10 additions & 1 deletion runtime/oti/vm_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ internalCreateRAMClassFromROMClass(J9VMThread *vmThread, J9ClassLoader *classLoa

#if defined(J9VM_OPT_CRIU_SUPPORT)

/* ---------------- criuhelpers.cpp ---------------- */
/* ---------------- CRIUHelpers.cpp ---------------- */
/**
* @brief Queries if CRIU support is enabled. By default support
* is not enabled, it can be enabled with `-XX:+EnableCRIUSupport`
Expand Down Expand Up @@ -556,6 +556,15 @@ runInternalJVMCheckpointHooks(J9VMThread *currentThread);
BOOLEAN
runInternalJVMRestoreHooks(J9VMThread *currentThread);

/**
* @brief This function runs the identity operations that were delayed
* during the Java checkpoint and restore hooks.
*
* @param currentThread thread token
* @return BOOLEAN TRUE if no error, otherwise FALSE
*/
BOOLEAN
runDelayedLockRelatedOperations(J9VMThread *currentThread);
#endif /* defined(J9VM_OPT_CRIU_SUPPORT) */

/* ---------------- classloadersearch.c ---------------- */
Expand Down
29 changes: 23 additions & 6 deletions runtime/vm/BytecodeInterpreter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@
#include "ArrayCopyHelpers.hpp"
#include "AtomicSupport.hpp"
#include "BytecodeAction.hpp"
#if defined(J9VM_OPT_CRIU_SUPPORT)
#include "CRIUHelpers.hpp"
#endif /* defined(J9VM_OPT_CRIU_SUPPORT) */
#if defined(J9VM_OPT_METHOD_HANDLE)
#include "MHInterpreter.hpp"
#endif /* defined(J9VM_OPT_METHOD_HANDLE) */
Expand Down Expand Up @@ -2681,16 +2684,30 @@ class INTERPRETER_CLASS
j9object_t receiver = ((j9object_t*)_sp)[0];
omrthread_monitor_t monitorPtr = NULL;

if (VM_ObjectMonitor::getMonitorForNotify(_currentThread, receiver, &monitorPtr, true)) {
if (0 != notifyFunction(monitorPtr)) {
#if defined(J9VM_OPT_CRIU_SUPPORT)
if (VM_CRIUHelpers::isJVMInSingleThreadMode(_vm)) {
UDATA operation = J9_SINGLE_THREAD_MODE_OP_NOTIFY;
if (omrthread_monitor_notify_all == notifyFunction) {
operation = J9_SINGLE_THREAD_MODE_OP_NOTIFY_ALL;
}
if (!VM_CRIUHelpers::delayedLockingOperation(_currentThread, receiver, operation)) {
rc = GOTO_THROW_CURRENT_EXCEPTION;
goto done;
}
} else
#endif /* defined(J9VM_OPT_CRIU_SUPPORT) */
{
if (VM_ObjectMonitor::getMonitorForNotify(_currentThread, receiver, &monitorPtr, true)) {
if (0 != notifyFunction(monitorPtr)) {
buildInternalNativeStackFrame(REGISTER_ARGS);
rc = THROW_ILLEGAL_MONITOR_STATE;
goto done;
}
} else if (NULL != monitorPtr) {
buildInternalNativeStackFrame(REGISTER_ARGS);
rc = THROW_ILLEGAL_MONITOR_STATE;
goto done;
}
} else if (NULL != monitorPtr) {
buildInternalNativeStackFrame(REGISTER_ARGS);
rc = THROW_ILLEGAL_MONITOR_STATE;
goto done;
}

returnVoidFromINL(REGISTER_ARGS, 1);
Expand Down
2 changes: 1 addition & 1 deletion runtime/vm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ set(main_sources

if(J9VM_OPT_CRIU_SUPPORT)
list(APPEND main_sources
criuhelpers.cpp
CRIUHelpers.cpp
)
endif()

Expand Down
93 changes: 90 additions & 3 deletions runtime/vm/criuhelpers.cpp → runtime/vm/CRIUHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "ut_j9vm.h"
#include "vm_api.h"

#include "CRIUHelpers.hpp"
#include "HeapIteratorAPI.h"
#include "ObjectAccessBarrierAPI.hpp"
#include "VMHelpers.hpp"
Expand Down Expand Up @@ -261,17 +262,29 @@ initializeCriuHooks(J9VMThread *currentThread)
vm->checkpointState.hookRecords = pool_new(sizeof(J9InternalHookRecord), 0, 0, 0, J9_GET_CALLSITE(), OMRMEM_CATEGORY_VM, POOL_FOR_PORT(vm->portLibrary));
if (NULL == vm->checkpointState.hookRecords) {
setNativeOutOfMemoryError(currentThread, 0, 0);
goto done;
}
}
if (NULL != vm->checkpointState.hookRecords) {
/* Add restore hook to re-seed java.uti.Random.seed.value. */

if (NULL == vm->checkpointState.delayedLockingOperationsRecords) {
vm->checkpointState.delayedLockingOperationsRecords = pool_new(sizeof(J9DelayedLockingOpertionsRecord), 0, 0, 0, J9_GET_CALLSITE(), OMRMEM_CATEGORY_VM, POOL_FOR_PORT(vm->portLibrary));
if (NULL == vm->checkpointState.delayedLockingOperationsRecords) {
setNativeOutOfMemoryError(currentThread, 0, 0);
goto done;
}
}

{
/* Add restore hook to re-seed java.uti.Random.seed.value */
#define JAVA_UTIL_RANDOM "java/util/Random"
J9Class *juRandomClass = hashClassTableAt(vm->systemClassLoader, (U_8 *)JAVA_UTIL_RANDOM, LITERAL_STRLEN(JAVA_UTIL_RANDOM));
J9Class *juRandomClass = peekClassHashTable(currentThread, vm->systemClassLoader, (U_8 *)JAVA_UTIL_RANDOM, LITERAL_STRLEN(JAVA_UTIL_RANDOM));
#undef JAVA_UTIL_RANDOM
if (NULL != juRandomClass) {
addInternalJVMCheckpointHook(currentThread, TRUE, juRandomClass, FALSE, juRandomReseed);
}
}

done:
Trc_VM_criu_initHooks_Exit(currentThread, vm->checkpointState.hookRecords);
}

Expand Down Expand Up @@ -389,4 +402,78 @@ runInternalJVMRestoreHooks(J9VMThread *currentThread)
return result;
}

BOOLEAN
runDelayedLockRelatedOperations(J9VMThread *currentThread)
{
J9JavaVM *vm = currentThread->javaVM;
J9InternalVMFunctions* vmFuncs = vm->internalVMFunctions;
J9DelayedLockingOpertionsRecord *delayedLockingOperation = static_cast<J9DelayedLockingOpertionsRecord*>(J9_LINKED_LIST_START_DO(vm->checkpointState.delayedLockingOperationsRoot));
BOOLEAN rc = TRUE;

Assert_VM_true(vm->checkpointState.checkpointThread == currentThread);

while (NULL != delayedLockingOperation) {
omrthread_monitor_t monitorPtr = NULL;
j9object_t instance = J9_JNI_UNWRAP_REFERENCE(delayedLockingOperation->globalObjectRef);
if (!VM_ObjectMonitor::inlineFastObjectMonitorEnter(currentThread, instance)) {
rc = objectMonitorEnterNonBlocking(currentThread, instance);
if (J9_OBJECT_MONITOR_BLOCKING == rc) {
/* owned by another thread */
Trc_VM_criu_runDelayedLockRelatedOperations_contendedMonitorEnter(currentThread, instance);
rc = FALSE;
goto done;
DanHeidinga marked this conversation as resolved.
Show resolved Hide resolved
} else if (J9_OBJECT_MONITOR_ENTER_FAILED(rc)) {
/* not possible if the the application was able to call notify earlier */
Assert_VM_unreachable();
}
}
if (!VM_ObjectMonitor::getMonitorForNotify(currentThread, instance, &monitorPtr, true)) {
if (NULL != monitorPtr) {
/* another thread owns the lock, shouldn't be possible */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tracepoint

Trc_VM_criu_runDelayedLockRelatedOperations_contendedMonitorEnter2(currentThread, monitorPtr);
rc = FALSE;
goto done;
} else {
/* no waiters */
goto next;
}
}

Trc_VM_criu_runDelayedLockRelatedOperations_runDelayedOperation(currentThread, delayedLockingOperation->operation, instance, monitorPtr);

switch(delayedLockingOperation->operation) {
case J9_SINGLE_THREAD_MODE_OP_NOTIFY:
rc = 0 == omrthread_monitor_notify(monitorPtr);
break;
case J9_SINGLE_THREAD_MODE_OP_NOTIFY_ALL:
rc = 0 == omrthread_monitor_notify_all(monitorPtr);
break;
default:
Assert_VM_unreachable();
break;
}

if (!rc) {
goto done;
}

next:
if (!VM_ObjectMonitor::inlineFastObjectMonitorExit(currentThread, instance)) {
if (0 != objectMonitorExit(currentThread, instance)) {
Assert_VM_unreachable();
}
}

vmFuncs->j9jni_deleteGlobalRef((JNIEnv*) currentThread, delayedLockingOperation->globalObjectRef, JNI_FALSE);
J9DelayedLockingOpertionsRecord *lastOperation = delayedLockingOperation;
delayedLockingOperation = J9_LINKED_LIST_NEXT_DO(vm->checkpointState.delayedLockingOperationsRoot, delayedLockingOperation);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a use after free issue as you've removed the element from the pool (free) and then dereferenced it.

pool_removeElement(vm->checkpointState.delayedLockingOperationsRecords, lastOperation);
}

done:
vm->checkpointState.delayedLockingOperationsRoot = NULL;

return rc;
}

} /* extern "C" */
Loading