Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAPREDUCE-7449: Add add-opens flag to container launch commands on JDK17 nodes #5935

Merged
merged 3 commits into from
Aug 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public void testCommandLine() throws Exception {
MyMRApp app = new MyMRApp(1, 0, true, this.getClass().getName(), true);
Configuration conf = new Configuration();
conf.setBoolean(MRConfig.MAPREDUCE_APP_SUBMISSION_CROSS_PLATFORM, true);
conf.setBoolean(MRJobConfig.MAPREDUCE_JVM_ADD_OPENS_JAVA_OPT, false);
Job job = app.submit(conf);
app.waitForState(job, JobState.SUCCEEDED);
app.verifyCompleted();
Expand Down Expand Up @@ -123,7 +124,7 @@ private void testReduceCommandLine(Configuration conf)
"[" + MRApps.crossPlatformify("JAVA_HOME") + "/bin/java" +
" -Djava.net.preferIPv4Stack=true" +
" -Dhadoop.metrics.log.level=WARN " +
" -Xmx820m -Djava.io.tmpdir=" + MRApps.crossPlatformify("PWD") + "/tmp" +
" -Xmx820m <ADD_OPENS> -Djava.io.tmpdir=" + MRApps.crossPlatformify("PWD") + "/tmp" +
" -Dlog4j.configuration=container-log4j.properties" +
" -Dyarn.app.container.log.dir=<LOG_DIR>" +
" -Dyarn.app.container.log.filesize=0" +
Expand Down Expand Up @@ -165,7 +166,7 @@ public void testCommandLineWithLog4JConifg() throws Exception {
"[" + MRApps.crossPlatformify("JAVA_HOME") + "/bin/java" +
" -Djava.net.preferIPv4Stack=true" +
" -Dhadoop.metrics.log.level=WARN " +
" -Xmx820m -Djava.io.tmpdir=" + MRApps.crossPlatformify("PWD") + "/tmp" +
" -Xmx820m <ADD_OPENS> -Djava.io.tmpdir=" + MRApps.crossPlatformify("PWD") + "/tmp" +
" -Dlog4j.configuration=" + testLogPropertieFile +
" -Dyarn.app.container.log.dir=<LOG_DIR>" +
" -Dyarn.app.container.log.filesize=0" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.hadoop.util.ClassUtil;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -2207,6 +2208,13 @@ public String getTaskJavaOpts(TaskType taskType) {
javaOpts += " " + xmxArg;
}

// JDK17 support: automatically add --add-opens=java.base/java.lang=ALL-UNNAMED
// so the tasks can launch on a JDK17 node.
if (getBoolean(MRJobConfig.MAPREDUCE_JVM_ADD_OPENS_JAVA_OPT,
MRJobConfig.MAPREDUCE_JVM_ADD_OPENS_JAVA_OPT_DEFAULT)) {
javaOpts += " " + ApplicationConstants.JVM_ADD_OPENS_VAR;
}

return javaOpts;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,15 @@ public interface MRJobConfig {
"os.name,os.version,java.home,java.runtime.version,java.vendor," +
"java.version,java.vm.name,java.class.path,java.io.tmpdir,user.dir,user.name";

/*
* Flag to indicate whether JDK17's required add-opens flags should be added to MR AM and
* map/reduce containers regardless of the user specified java opts.
*/
public static final String MAPREDUCE_JVM_ADD_OPENS_JAVA_OPT =
"mapreduce.jvm.add-opens-as-default";

public static final boolean MAPREDUCE_JVM_ADD_OPENS_JAVA_OPT_DEFAULT = true;

public static final String IO_SORT_FACTOR = "mapreduce.task.io.sort.factor";

public static final int DEFAULT_IO_SORT_FACTOR = 10;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1804,6 +1804,18 @@
<description>Comma-delimited list of system properties to log on mapreduce JVM start</description>
</property>

<property>
<name>mapreduce.jvm.add-opens-as-default</name>
<value>true</value>
<description>Since on JDK17 it's no longer possible to use the reflection API to
access non-public fields and methods add-opens flags should be added to MR AM
and map/reduce containers regardless of the user specified java opts. Setting
this to true will add the flags to the container launch commands on nodes with
JDK17 or higher. Defaults to true, but the setting has no effect on nodes using
JDK16 and before.
</description>
</property>

<!-- jobhistory properties -->

<property>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,13 @@ private List<String> setupAMCommand(Configuration jobConf) {
MRJobConfig.MR_AM_COMMAND_OPTS, MRJobConfig.MR_AM_ENV);
vargs.add(mrAppMasterUserOptions);

// JDK17 support: automatically add --add-opens=java.base/java.lang=ALL-UNNAMED
// so the tasks can launch on a JDK17 node.
if (conf.getBoolean(MRJobConfig.MAPREDUCE_JVM_ADD_OPENS_JAVA_OPT,
MRJobConfig.MAPREDUCE_JVM_ADD_OPENS_JAVA_OPT_DEFAULT)) {
vargs.add(ApplicationConstants.JVM_ADD_OPENS_VAR);
}

if (jobConf.getBoolean(MRJobConfig.MR_AM_PROFILE,
MRJobConfig.DEFAULT_MR_AM_PROFILE)) {
final String profileParams = jobConf.get(MRJobConfig.MR_AM_PROFILE_PARAMS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ public interface ApplicationConstants {
String APPLICATION_WEB_PROXY_BASE_ENV =
"APPLICATION_WEB_PROXY_BASE";

/**
* The environmental variable for JDK17's add-opens workaround. This
* should be replaced either a correctly formatted add-opens option if JDK17 is used
* or an empty string if not on container launch.
*/
String JVM_ADD_OPENS_VAR = "<ADD_OPENS>";

/**
* The temporary environmental variable for container log directory. This
* should be replaced by real container log directory on container launch.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,8 @@ public boolean run() throws IOException, YarnException {
vargs.add("\"" + Environment.JAVA_HOME.$$() + "/bin/java\"");
// Set Xmx based on am memory size
vargs.add("-Xmx" + amMemory + "m");
// JDK17 support
vargs.add(ApplicationConstants.JVM_ADD_OPENS_VAR);
// Set class name
vargs.add(appMasterMainClass);
// Set params for Application Master
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,13 @@ public static String expandEnvironment(String var,
var = var.replace(ApplicationConstants.CLASS_PATH_SEPARATOR,
File.pathSeparator);

if (Shell.isJavaVersionAtLeast(17)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wondering about this. Wouldn't a node-manager config be easer to reason about?

Implementing like this is a bit strange, currently MR jobs can be configured (default=true) and the distributed shell app sets this regardless of the configuration. During the job submission/config time it's not clear if container will be launched on a Java>=17 node so that's the reason for the placeholder, later ContainerLaunch replaces it to the arg or an empty string. Maybe non MR apps would also require this option to run properly - which they could specify at the job config - but non-homogenous nodes (where nodes have different Java installs) can't be handled easily (maybe with node labels or some other trick).

I think this should be a node-manager config instead. The ContainerLaunch could just simply add the arg when the java version is at least 17 and the config option is set (if we can detect that the container is a java app, not sure about that).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it is not trivial to detect when and where to add this argument, I think the current solution is OK as is.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had an offline discussion with @tomicooler, the issue with not using a placeholder is the way commands is contructed: in contrast to it's name and type it's a String list containing one element only, the command that is to be executed. In case of Java apps we must add this parameter before the main class, hence I see two options here: add the placeholder right after -Xmx (so that we know it'll be in the correct place) or I can deconstruct the first element of the commands array and add the add-opens flag to the correct place. I think the former is a cleaner, less error-prone solution.

var = var.replace(ApplicationConstants.JVM_ADD_OPENS_VAR,
"--add-opens=java.base/java.lang=ALL-UNNAMED");
} else {
var = var.replace(ApplicationConstants.JVM_ADD_OPENS_VAR, "");
}

// replace parameter expansion marker. e.g. {{VAR}} on Windows is replaced
// as %VAR% and on Linux replaced as "$VAR"
if (Shell.WINDOWS) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -575,18 +575,21 @@ public void testEnvExpansion() throws IOException {
+ Apps.crossPlatformify("HADOOP_HOME") + "/share/hadoop/common/lib/*"
+ ApplicationConstants.CLASS_PATH_SEPARATOR
+ Apps.crossPlatformify("HADOOP_LOG_HOME")
+ ApplicationConstants.LOG_DIR_EXPANSION_VAR;
+ ApplicationConstants.LOG_DIR_EXPANSION_VAR
+ " " + ApplicationConstants.JVM_ADD_OPENS_VAR;

String res = ContainerLaunch.expandEnvironment(input, logPath);

String expectedAddOpens = Shell.isJavaVersionAtLeast(17) ?
"--add-opens=java.base/java.lang=ALL-UNNAMED" : "";
if (Shell.WINDOWS) {
Assert.assertEquals("%HADOOP_HOME%/share/hadoop/common/*;"
+ "%HADOOP_HOME%/share/hadoop/common/lib/*;"
+ "%HADOOP_LOG_HOME%/nm/container/logs", res);
+ "%HADOOP_LOG_HOME%/nm/container/logs" + " " + expectedAddOpens, res);
} else {
Assert.assertEquals("$HADOOP_HOME/share/hadoop/common/*:"
+ "$HADOOP_HOME/share/hadoop/common/lib/*:"
+ "$HADOOP_LOG_HOME/nm/container/logs", res);
+ "$HADOOP_LOG_HOME/nm/container/logs" + " " + expectedAddOpens, res);
}
System.out.println(res);
}
Expand Down