Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CRIU adds concurrent mode preCheckpoint and postRestore hooks #18107

Merged
merged 1 commit into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,23 @@ private static native void checkpointJVMImpl(String imageDir,
});
}

/**
JasonFengJ9 marked this conversation as resolved.
Show resolved Hide resolved
* A hook can be one of the following states:
*
* SINGLE_THREAD_MODE - a mode in which only the Java thread that requested a
* checkpoint is permitted to run. This means that global state changes are
* permitted without the risk of race conditions. It also implies that if one
* attempts to acquire a resource held by another Java thread a deadlock might
* occur, or a JVMCheckpointException or JVMRestoreException to be thrown if
* such an attempt is detected by JVM.
*
* CONCURRENT_MODE - a hook running when the SINGLE_THREAD_MODE is NOT enabled
*/
public static enum HookMode {
SINGLE_THREAD_MODE,
CONCURRENT_MODE
}

private static boolean loadNativeLibrary() {
if (!nativeLoaded) {
try {
Expand Down Expand Up @@ -222,15 +239,21 @@ public static String getErrorMessage() {
/* Higher priority hooks are run last in pre-checkoint hooks, and are run
* first in post restore hooks.
*/
private static final int RESTORE_CLEAR_INETADDRESS_CACHE_PRIORITY = 100;
private static final int RESTORE_ENVIRONMENT_VARIABLES_PRIORITY = 100;
private static final int RESTORE_SYSTEM_PROPERTIES_PRIORITY = 100;
private static final int USER_HOOKS_PRIORITY = 1;
// the lowest priority of a user hook
public static int LOWEST_USER_HOOK_PRIORITY = 0;
// the highest priority of a user hook
public static int HIGHEST_USER_HOOK_PRIORITY = 99;
// the default SINGLE_THREAD_MODE hook priority
private static final int DEFAULT_SINGLE_THREAD_MODE_HOOK_PRIORITY = LOWEST_USER_HOOK_PRIORITY;
// other SINGLE_THREAD_MODE hook priority
private static final int RESTORE_CLEAR_INETADDRESS_CACHE_PRIORITY = HIGHEST_USER_HOOK_PRIORITY + 1;
private static final int RESTORE_ENVIRONMENT_VARIABLES_PRIORITY = HIGHEST_USER_HOOK_PRIORITY + 1;
private static final int RESTORE_SYSTEM_PROPERTIES_PRIORITY = HIGHEST_USER_HOOK_PRIORITY + 1;
/* RESET_CRIUSEC_PRIORITY and RESTORE_SECURITY_PROVIDERS_PRIORITY need to
* be higher than any other JVM hook that may require security providers.
*/
static final int RESET_CRIUSEC_PRIORITY = 100;
static final int RESTORE_SECURITY_PROVIDERS_PRIORITY = 100;
static final int RESET_CRIUSEC_PRIORITY = HIGHEST_USER_HOOK_PRIORITY + 1;
static final int RESTORE_SECURITY_PROVIDERS_PRIORITY = HIGHEST_USER_HOOK_PRIORITY + 1;

private String imageDir;
private boolean leaveRunning;
Expand Down Expand Up @@ -487,7 +510,9 @@ public CRIUSupport registerRestoreOptionsFile(Path optionsFile) {
}

/**
* User hook that is run after restoring a checkpoint image.
* User hook that is run after restoring a checkpoint image. This is equivalent
* to registerPostRestoreHook(hook, HookMode.SINGLE_THREAD_MODE,
* DEFAULT_SINGLE_THREAD_MODE_HOOK_PRIORITY);
*
* Hooks will be run in single threaded mode, no other application threads
* will be active. Users should avoid synchronization of objects that are not owned
Expand All @@ -500,20 +525,53 @@ public CRIUSupport registerRestoreOptionsFile(Path optionsFile) {
* @return this
*/
public CRIUSupport registerPostRestoreHook(Runnable hook) {
if (hook != null) {
JasonFengJ9 marked this conversation as resolved.
Show resolved Hide resolved
J9InternalCheckpointHookAPI.registerPostRestoreHook(USER_HOOKS_PRIORITY, "User post-restore hook", ()->{ //$NON-NLS-1$
try {
hook.run();
} catch (Throwable t) {
throw new JVMRestoreException("Exception thrown when running user post-restore hook", 0, t); //$NON-NLS-1$
}
});
}
return this;
return registerPostRestoreHook(hook, HookMode.SINGLE_THREAD_MODE, DEFAULT_SINGLE_THREAD_MODE_HOOK_PRIORITY);
}

/**
* User hook that is run before checkpointing the JVM.
* User hook that is run after restoring a checkpoint image.
*
* If SINGLE_THREAD_MODE is requested, no other application threads will be
* active. Users should avoid synchronization of objects that are not owned by
* the thread, terminally blocking operations and launching new threads in the
* hook. If the thread attempts to acquire a lock that it doesn't own, an
* exception will be thrown.
*
* If CONCURRENT_MODE is requested, the hook will be run alongside all other
* active Java threads.
*
* High-priority hooks are run first after restore, and vice-versa for
* low-priority hooks. The priority of the hook is with respect to the other
* hooks run within that mode. CONCURRENT_MODE hooks are implicitly lower
* priority than SINGLE_THREAD_MODE hooks. Ie. the lowest priority
* SINGLE_THREAD_MODE hook is a higher priority than the highest priority
* CONCURRENT_MODE hook. The hooks of the same mode with the same priority are
* run in random order.
*
* @param hook user hook
* @param mode the mode in which the hook is run, either CONCURRENT_MODE or
* SINGLE_THREAD_MODE
* @param priority the priority of the hook, between LOWEST_USER_HOOK_PRIORITY -
* HIGHEST_USER_HOOK_PRIORITY. Throws
* UnsupportedOperationException otherwise.
*
* @return this
*
* @throws UnsupportedOperationException if the hook mode is not
* SINGLE_THREAD_MODE or CONCURRENT_MODE
* or the priority is not between
* LOWEST_USER_HOOK_PRIORITY and
* HIGHEST_USER_HOOK_PRIORITY.
*/
public CRIUSupport registerPostRestoreHook(Runnable hook, HookMode mode, int priority)
throws UnsupportedOperationException {
return registerCheckpointHookHelper(hook, mode, priority, false);
}

/**
* User hook that is run before checkpointing the JVM. This is equivalent to
* registerPreCheckpointHook(hook, HookMode.SINGLE_THREAD_MODE,
* DEFAULT_SINGLE_THREAD_MODE_HOOK_PRIORITY).
*
* Hooks will be run in single threaded mode, no other application threads
* will be active. Users should avoid synchronization of objects that are not owned
Expand All @@ -527,14 +585,86 @@ public CRIUSupport registerPostRestoreHook(Runnable hook) {
*
*/
public CRIUSupport registerPreCheckpointHook(Runnable hook) {
return registerPreCheckpointHook(hook, HookMode.SINGLE_THREAD_MODE, DEFAULT_SINGLE_THREAD_MODE_HOOK_PRIORITY);
}

/**
* User hook that is run before checkpointing the JVM.
*
* If SINGLE_THREAD_MODE is requested, no other application threads will be
* active. Users should avoid synchronization of objects that are not owned by
* the thread, terminally blocking operations and launching new threads in the
* hook. If the thread attempts to acquire a lock that it doesn't own, an
* exception will be thrown.
*
* If CONCURRENT_MODE is requested, the hook will be run alongside all other
* active Java threads.
*
* High-priority hooks are run last before checkpoint, and vice-versa for
* low-priority hooks. The priority of the hook is with respect to the other
* hooks run within that mode. CONCURRENT_MODE hooks are implicitly lower
* priority than SINGLE_THREAD_MODEd hooks. Ie. the lowest priority
* SINGLE_THREAD_MODE hook is a higher priority than the highest priority
* CONCURRENT_MODE hook. The hooks of the same mode with the same priority are
* run in random order.
*
* @param hook user hook
* @param mode the mode in which the hook is run, either CONCURRENT_MODE or
* SINGLE_THREAD_MODE
* @param priority the priority of the hook, between LOWEST_USER_HOOK_PRIORITY -
* HIGHEST_USER_HOOK_PRIORITY. Throws
* UnsupportedOperationException otherwise.
*
* @return this
*
* @throws UnsupportedOperationException if the hook mode is not
* SINGLE_THREAD_MODE or CONCURRENT_MODE
* or the priority is not between
* LOWEST_USER_HOOK_PRIORITY and
* HIGHEST_USER_HOOK_PRIORITY.
*/
public CRIUSupport registerPreCheckpointHook(Runnable hook, HookMode mode, int priority)
throws UnsupportedOperationException {
return registerCheckpointHookHelper(hook, mode, priority, true);
}

private CRIUSupport registerCheckpointHookHelper(Runnable hook, HookMode mode, int priority,
boolean isPreCheckpoint) throws UnsupportedOperationException {
if (hook != null) {
J9InternalCheckpointHookAPI.registerPreCheckpointHook(USER_HOOKS_PRIORITY, "User pre-checkpoint hook", ()->{ //$NON-NLS-1$
try {
hook.run();
} catch (Throwable t) {
throw new JVMCheckpointException("Exception thrown when running user pre-checkpoint hook", 0, t); //$NON-NLS-1$
}
});
if ((priority < LOWEST_USER_HOOK_PRIORITY) || (priority > HIGHEST_USER_HOOK_PRIORITY)) {
throw new UnsupportedOperationException("The user hook priority must be between " //$NON-NLS-1$
+ LOWEST_USER_HOOK_PRIORITY + " and " + HIGHEST_USER_HOOK_PRIORITY + "."); //$NON-NLS-1$ //$NON-NLS-2$
}
String threadModeMsg;
if (HookMode.SINGLE_THREAD_MODE == mode) {
threadModeMsg = "single-threaded mode"; //$NON-NLS-1$
} else if (HookMode.CONCURRENT_MODE == mode) {
threadModeMsg = "concurrent mode"; //$NON-NLS-1$
} else {
throw new UnsupportedOperationException("The hook mode must be SINGLE_THREAD_MODE or CONCURRENT_MODE."); //$NON-NLS-1$
}

String checkpointMsg = isPreCheckpoint ? "pre-checkpoint " : "post-restore hook "; //$NON-NLS-1$ //$NON-NLS-2$
String commMsg = isPreCheckpoint ? "pre-checkpoint " : "post-restore hook " + threadModeMsg + " hook"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
String hookName = "User " + commMsg; //$NON-NLS-1$
String exceptionMsg = "Exception thrown when running user " + commMsg; //$NON-NLS-1$
if (isPreCheckpoint) {
J9InternalCheckpointHookAPI.registerPreCheckpointHook(mode, priority, hookName, () -> {
try {
hook.run();
} catch (Throwable t) {
throw new JVMCheckpointException(exceptionMsg, 0, t);
}
});
} else {
J9InternalCheckpointHookAPI.registerPostRestoreHook(mode, priority, hookName, () -> {
try {
hook.run();
} catch (Throwable t) {
throw new JVMRestoreException(exceptionMsg, 0, t);
}
});
}
}
return this;
}
Expand All @@ -545,7 +675,7 @@ private void registerRestoreEnvVariables() {
return;
}

J9InternalCheckpointHookAPI.registerPostRestoreHook(RESTORE_ENVIRONMENT_VARIABLES_PRIORITY,
J9InternalCheckpointHookAPI.registerPostRestoreHook(HookMode.SINGLE_THREAD_MODE, RESTORE_ENVIRONMENT_VARIABLES_PRIORITY,
"Restore environment variables via env file: " + envFile, () -> { //$NON-NLS-1$
if (!Files.exists(this.envFile)) {
throw throwSetEnvException(new IllegalArgumentException(
Expand Down Expand Up @@ -681,20 +811,22 @@ public synchronized void checkpointJVM() {
registerRestoreEnvVariables();
}

J9InternalCheckpointHookAPI.registerPostRestoreHook(RESTORE_CLEAR_INETADDRESS_CACHE_PRIORITY, "Clear InetAddress cache on restore", CRIUSupport::clearInetAddressCache); //$NON-NLS-1$
J9InternalCheckpointHookAPI.registerPostRestoreHook(RESTORE_ENVIRONMENT_VARIABLES_PRIORITY, "Restore system properties", CRIUSupport::setRestoreJavaProperties); //$NON-NLS-1$
J9InternalCheckpointHookAPI.registerPostRestoreHook(HookMode.SINGLE_THREAD_MODE, RESTORE_CLEAR_INETADDRESS_CACHE_PRIORITY, "Clear InetAddress cache on restore", CRIUSupport::clearInetAddressCache); //$NON-NLS-1$
J9InternalCheckpointHookAPI.registerPostRestoreHook(HookMode.SINGLE_THREAD_MODE, RESTORE_ENVIRONMENT_VARIABLES_PRIORITY, "Restore system properties", CRIUSupport::setRestoreJavaProperties); //$NON-NLS-1$

/* Add security provider hooks. */
SecurityProviders.registerResetCRIUState();
SecurityProviders.registerRestoreSecurityProviders();

J9InternalCheckpointHookAPI.runPreCheckpointHooksConcurrentThread();
try {
checkpointJVMImpl(imageDir, leaveRunning, shellJob, extUnixSupport, logLevel, logFile, fileLocks,
workDir, tcpEstablished, autoDedup, trackMemory, unprivileged, optionsFile, envFilePath);
} catch (UnsatisfiedLinkError ule) {
errorMsg = ule.getMessage();
throw new InternalError("There is a problem with libj9criu in the JDK"); //$NON-NLS-1$
}
J9InternalCheckpointHookAPI.runPostRestoreHooksConcurrentThread();
} else {
throw new UnsupportedOperationException(
"Running in non-portable mode (only one checkpoint is allowed), and we have already checkpointed once"); //$NON-NLS-1$
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,14 @@
import java.util.Collections;
import java.util.List;

import org.eclipse.openj9.criu.CRIUSupport.HookMode;

final class J9InternalCheckpointHookAPI {

private static List<J9InternalCheckpointHook> postRestoreHooks = new ArrayList<>();
private static List<J9InternalCheckpointHook> preCheckpointHooks = new ArrayList<>();
private static List<J9InternalCheckpointHook> postRestoreHooksSingleThread = new ArrayList<>();
private static List<J9InternalCheckpointHook> preCheckpointHooksSingleThread = new ArrayList<>();
private static List<J9InternalCheckpointHook> postRestoreHooksConcurrentThread = new ArrayList<>();
private static List<J9InternalCheckpointHook> preCheckpointHooksConcurrentThread = new ArrayList<>();

/**
* This is an internal API
Expand All @@ -44,8 +48,15 @@ final class J9InternalCheckpointHookAPI {
* @param name the name of the hook
* @param hook the runnable hook
*/
static synchronized void registerPostRestoreHook(int priority, String name, Runnable hook) {
postRestoreHooks.add(new J9InternalCheckpointHook(priority, name, hook));
static synchronized void registerPostRestoreHook(HookMode mode, int priority, String name, Runnable hook)
throws UnsupportedOperationException {
// The hook mode and priority have been verified by the caller.
J9InternalCheckpointHook internalHook = new J9InternalCheckpointHook(mode, priority, name, hook);
if (mode == CRIUSupport.HookMode.SINGLE_THREAD_MODE) {
postRestoreHooksSingleThread.add(internalHook);
} else if (mode == CRIUSupport.HookMode.CONCURRENT_MODE) {
postRestoreHooksConcurrentThread.add(internalHook);
}
}

/**
Expand All @@ -60,8 +71,15 @@ static synchronized void registerPostRestoreHook(int priority, String name, Runn
* @param name the name of the hook
* @param hook the runnable hook
*/
static synchronized void registerPreCheckpointHook(int priority, String name, Runnable hook) {
preCheckpointHooks.add(new J9InternalCheckpointHook(priority, name, hook));
static synchronized void registerPreCheckpointHook(HookMode mode, int priority, String name, Runnable hook)
throws UnsupportedOperationException {
// The hook mode and priority have been verified by the caller.
J9InternalCheckpointHook internalHook = new J9InternalCheckpointHook(mode, priority, name, hook);
if (CRIUSupport.HookMode.SINGLE_THREAD_MODE == mode) {
preCheckpointHooksSingleThread.add(internalHook);
} else if (CRIUSupport.HookMode.CONCURRENT_MODE == mode) {
preCheckpointHooksConcurrentThread.add(internalHook);
}
}

private static void runHooks(List<J9InternalCheckpointHook> hooks, boolean reverse) {
Expand All @@ -86,20 +104,29 @@ private static void runHooks(List<J9InternalCheckpointHook> hooks, boolean rever
* Only called by the VM
*/
@SuppressWarnings("unused")
private static void runPreCheckpointHooks() {
runHooks(preCheckpointHooks, true);
private static void runPreCheckpointHooksSingleThread() {
runHooks(preCheckpointHooksSingleThread, true);
}

/*
* Only called by the VM
*/
@SuppressWarnings("unused")
private static void runPostRestoreHooks() {
runHooks(postRestoreHooks, false);
private static void runPostRestoreHooksSingleThread() {
runHooks(postRestoreHooksSingleThread, false);
}

static void runPreCheckpointHooksConcurrentThread() {
runHooks(preCheckpointHooksConcurrentThread, true);
}

static void runPostRestoreHooksConcurrentThread() {
runHooks(postRestoreHooksConcurrentThread, false);
}

final private static class J9InternalCheckpointHook implements Comparable<J9InternalCheckpointHook> {

private final CRIUSupport.HookMode hookMode;
private final int priority;
private final Runnable hook;
private final String name;
Expand All @@ -117,23 +144,27 @@ void runHook() {
hook.run();
}

J9InternalCheckpointHook(int priority, String name, Runnable hook) {
J9InternalCheckpointHook(CRIUSupport.HookMode hookMode, int priority, String name, Runnable hook) {
this.hookMode = hookMode;
this.priority = priority;
this.hook = hook;
this.name = name;
}

@Override
public String toString() {
return "[J9InternalCheckpointHook: [" + name + "], priority:[" + priority + "], runnable:[" + hook + "]]"; //$NON-NLS-1$//$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
String hookModeStr = CRIUSupport.HookMode.SINGLE_THREAD_MODE == hookMode ? "single-threaded" : "concurrent"; //$NON-NLS-1$ //$NON-NLS-2$
return "[J9InternalCheckpointHook(" + hookModeStr + " mode): [" + name + "], priority:[" + priority //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
+ "], runnable:[" + hook + "]]"; //$NON-NLS-1$ //$NON-NLS-2$
}

@Override
public boolean equals(Object obj) {
J9InternalCheckpointHook o = (J9InternalCheckpointHook) obj;
if (this == o) {
return true;
} else if ((this.priority == o.priority) && (this.hook.equals(o.hook)) && (this.name.equals(o.name))) {
} else if ((this.hookMode == o.hookMode) && (this.priority == o.priority) && (this.hook.equals(o.hook))
&& (this.name.equals(o.name))) {
return true;
}

Expand All @@ -144,6 +175,5 @@ public boolean equals(Object obj) {
public int hashCode() {
return super.hashCode();
}

}
}
Loading