From 503ea2d126b442b5ef31c5b49a2263fbf24cf21a Mon Sep 17 00:00:00 2001 From: "peng.zhang" Date: Thu, 24 Jul 2014 21:07:21 +0800 Subject: [PATCH 1/3] Support log4j log to yarn container dir --- .../main/scala/org/apache/spark/deploy/yarn/ClientBase.scala | 3 +++ .../org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala | 3 +++ 2 files changed, 6 insertions(+) diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index c96f731923d22..108ae5bb9b585 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -388,6 +388,9 @@ trait ClientBase extends Logging { .foreach(p => javaOpts += s"-Djava.library.path=$p") } + // For log4j configuration to reference + javaOpts += "-D=spark.yarn.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + val userClass = if (args.userClass != null) { Seq("--class", YarnSparkHadoopUtil.escapeForShell(args.userClass)) diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala index 312d82a649792..9b32ff999119b 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala @@ -98,6 +98,9 @@ trait ExecutorRunnableUtil extends Logging { } */ + // For log4j configuration to reference + javaOpts += "-D=spark.yarn.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + val commands = Seq(Environment.JAVA_HOME.$() + "/bin/java", "-server", // Kill if OOM is raised - leverage yarn's failure handling to cause rescheduling. From f2b5e2a05ee46a612fdc30c9c930a704a59ac2b7 Mon Sep 17 00:00:00 2001 From: "peng.zhang" Date: Sun, 21 Sep 2014 20:28:10 +0800 Subject: [PATCH 2/3] Change variable's name, and update running-on-yarn.md --- docs/running-on-yarn.md | 2 ++ .../main/scala/org/apache/spark/deploy/yarn/ClientBase.scala | 2 +- .../org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 74bcc2eeb65f6..f067ddb044b83 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -205,6 +205,8 @@ Note that for the first option, both executors and the application master will s log4j configuration, which may cause issues when they run on the same node (e.g. trying to write to the same log file). +For a streaming application to operate 24/7, user can use RollingFileAppender in log4j.properties to avoid disk overflow of single log file. And to locate log files in YARN container log directory, configure RollingFileAppender's file location as "${spark.yarn.app.container.log.dir}/spark.log". So these log files can be viewed on YARN's container page during running. And logs will be copied to HDFS after job finished if log aggregation is turned on. + # Important notes - Before Hadoop 2.2, YARN does not support cores in container resource requests. Thus, when running against an earlier version, the numbers of cores given via command line arguments cannot be passed to YARN. Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured. diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 108ae5bb9b585..6ae4d496220a5 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -389,7 +389,7 @@ trait ClientBase extends Logging { } // For log4j configuration to reference - javaOpts += "-D=spark.yarn.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + javaOpts += "-D=spark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR val userClass = if (args.userClass != null) { diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala index 9b32ff999119b..f56f72cafe50e 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala @@ -99,7 +99,7 @@ trait ExecutorRunnableUtil extends Logging { */ // For log4j configuration to reference - javaOpts += "-D=spark.yarn.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + javaOpts += "-D=spark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR val commands = Seq(Environment.JAVA_HOME.$() + "/bin/java", "-server", From 16c5cb81ac1c3ef42bc89aabe0546904d31f544e Mon Sep 17 00:00:00 2001 From: "peng.zhang" Date: Tue, 23 Sep 2014 16:47:42 +0800 Subject: [PATCH 3/3] Update doc --- docs/running-on-yarn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index f067ddb044b83..4b3a49eca7007 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -205,7 +205,7 @@ Note that for the first option, both executors and the application master will s log4j configuration, which may cause issues when they run on the same node (e.g. trying to write to the same log file). -For a streaming application to operate 24/7, user can use RollingFileAppender in log4j.properties to avoid disk overflow of single log file. And to locate log files in YARN container log directory, configure RollingFileAppender's file location as "${spark.yarn.app.container.log.dir}/spark.log". So these log files can be viewed on YARN's container page during running. And logs will be copied to HDFS after job finished if log aggregation is turned on. +If you need a reference to the proper location to put log files in the YARN so that YARN can properly display and aggregate them, use "${spark.yarn.app.container.log.dir}" in your log4j.properties. For example, log4j.appender.file_appender.File=${spark.yarn.app.container.log.dir}/spark.log. For streaming application, configuring RollingFileAppender and setting file location to YARN's log directory will avoid disk overflow caused by large log file, and logs can be accessed using YARN's log utility. # Important notes