From a4522b1bd23f53819f09d318adf859d9a86be298 Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Thu, 17 Jul 2014 15:51:34 -0700 Subject: [PATCH 1/3] Make number of connection retries configurable --- .../scala/org/apache/spark/network/ConnectionManager.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala index 8a1cdb812962e..f44661eb16e35 100644 --- a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala +++ b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala @@ -55,6 +55,8 @@ private[spark] class ConnectionManager(port: Int, conf: SparkConf, private val selector = SelectorProvider.provider.openSelector() + private val connectionRetries = conf.getInt("spark.core.connection.retries", 10) + // default to 30 second timeout waiting for authentication private val authTimeout = conf.getInt("spark.core.connection.auth.wait.timeout", 30) @@ -198,7 +200,7 @@ private[spark] class ConnectionManager(port: Int, conf: SparkConf, handleConnectExecutor.execute(new Runnable { override def run() { - var tries: Int = 10 + var tries: Int = connectionRetries while (tries >= 0) { if (conn.finishConnect(false)) return // Sleep ? From 2b9452fd81403343c3e378754fd30e0cec7d247c Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Thu, 17 Jul 2014 15:54:57 -0700 Subject: [PATCH 2/3] Document new config variable --- docs/configuration.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/configuration.md b/docs/configuration.md index a70007c165442..f1d55fd354dba 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -754,6 +754,13 @@ Apart from these, the following properties are also available, and may be useful out and giving up. + + spark.core.connection.retries + 10 + + Number of connection attempts to make before timing out. + + spark.ui.filters None From da95d518b7ff4561fcdacf593715145cf24c132b Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Fri, 25 Jul 2014 11:08:17 -0700 Subject: [PATCH 3/3] Retry connects on Exception --- .../scala/org/apache/spark/network/Connection.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/network/Connection.scala b/core/src/main/scala/org/apache/spark/network/Connection.scala index 5285ec82c1b64..cc6d017f6158e 100644 --- a/core/src/main/scala/org/apache/spark/network/Connection.scala +++ b/core/src/main/scala/org/apache/spark/network/Connection.scala @@ -325,8 +325,14 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector, logInfo("Connected to [" + address + "], " + outbox.messages.size + " messages pending") } catch { case e: Exception => { - logWarning("Error finishing connection to " + address, e) - callOnExceptionCallback(e) + if (!force) { + logWarning("finish connect failed [" + address + "], " + outbox.messages.size + + " messages pending", e) + return false + } else { + logWarning("Error finishing connection to " + address, e) + callOnExceptionCallback(e) + } } } true