From 50d69a718e38c9f0e9f99422c52da75b461ba4cc Mon Sep 17 00:00:00 2001 From: Hari Shreedharan Date: Fri, 5 Jun 2015 18:23:34 -0700 Subject: [PATCH 1/2] [SPARK-8136][YARN] Fix flakiness in YarnClusterSuite. Instead of actually downloading the logs, just verify that the logs link is actually a URL and is in the expected format. --- .../apache/spark/deploy/yarn/YarnClusterSuite.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index bc42e12dfafd7..8e36abb312bc5 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.deploy.yarn import java.io.{File, FileOutputStream, OutputStreamWriter} +import java.net.URL import java.util.Properties import java.util.concurrent.TimeUnit @@ -351,11 +352,13 @@ private object YarnClusterDriver extends Logging with Matchers { assert(driverLogs.size === 2) assert(driverLogs.containsKey("stderr")) assert(driverLogs.containsKey("stdout")) - val stderr = driverLogs("stderr") // YARN puts everything in stderr. - val lines = Source.fromURL(stderr).getLines() - // Look for a line that contains YarnClusterSchedulerBackend, since that is guaranteed in - // cluster mode. - assert(lines.exists(_.contains("YarnClusterSchedulerBackend"))) + // Ensure that this is a URL, else this will throw an exception + val urlStr = driverLogs("stderr") + val url = new URL(urlStr) + assert(Utils.localHostName() === url.getHost) + val containerId = YarnSparkHadoopUtil.get.getContainerId + val user = Utils.getCurrentUserName() + assert(urlStr.endsWith(s"/node/containerlogs/$containerId/$user/stderr?start=0")) } } From 3183aebfb20fa2cc33b90d8c54ee469970e44913 Mon Sep 17 00:00:00 2001 From: Hari Shreedharan Date: Sat, 6 Jun 2015 13:58:50 -0700 Subject: [PATCH 2/2] Remove check for hostname which can fail on machines with several hostnames. Removed some unused imports. --- .../org/apache/spark/deploy/yarn/YarnClusterSuite.scala | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 8e36abb312bc5..93d587d0cb36a 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -24,7 +24,6 @@ import java.util.concurrent.TimeUnit import scala.collection.JavaConversions._ import scala.collection.mutable -import scala.io.Source import com.google.common.base.Charsets.UTF_8 import com.google.common.io.ByteStreams @@ -345,17 +344,17 @@ private object YarnClusterDriver extends Logging with Matchers { assert(info.logUrlMap.nonEmpty) } - // If we are running in yarn-cluster mode, verify that driver logs are downloadable. + // If we are running in yarn-cluster mode, verify that driver logs links and present and are + // in the expected format. if (conf.get("spark.master") == "yarn-cluster") { assert(listener.driverLogs.nonEmpty) val driverLogs = listener.driverLogs.get assert(driverLogs.size === 2) assert(driverLogs.containsKey("stderr")) assert(driverLogs.containsKey("stdout")) - // Ensure that this is a URL, else this will throw an exception val urlStr = driverLogs("stderr") - val url = new URL(urlStr) - assert(Utils.localHostName() === url.getHost) + // Ensure that this is a valid URL, else this will throw an exception + new URL(urlStr) val containerId = YarnSparkHadoopUtil.get.getContainerId val user = Utils.getCurrentUserName() assert(urlStr.endsWith(s"/node/containerlogs/$containerId/$user/stderr?start=0"))