From 1c2cf26c91c99738175fae18f8d49abfd8e75843 Mon Sep 17 00:00:00 2001 From: zsxwing Date: Tue, 21 Apr 2015 16:36:26 +0800 Subject: [PATCH 1/5] Add docs for rpc configurations --- docs/configuration.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/configuration.md b/docs/configuration.md index d9e9e67026cbb..1fd8b59ff8161 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -982,6 +982,34 @@ Apart from these, the following properties are also available, and may be useful This is only relevant for the Spark shell. + + spark.rpc.numRetries + 3 + How many times for an RPC ask operation to retry before giving up. + + + + + spark.rpc.retry.wait + 3s + + How long for an RPC ask operation to wait before starting the next retry. + + + + spark.rpc.askTimeout + 30s + + How long for an RPC ask operation to wait before timing out. + + + + spark.rpc.lookupTimeout + 30s + How long for an RPC remote endpoint lookup operation to wait before timing out. + + + #### Scheduling From 4f07174033fed7b99ae2599bbbbd99ff8ff38c9d Mon Sep 17 00:00:00 2001 From: zsxwing Date: Tue, 21 Apr 2015 16:50:40 +0800 Subject: [PATCH 2/5] Fix unit tests --- core/src/test/scala/org/apache/spark/SparkConfSuite.scala | 2 +- core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala index d7d8014a20498..272e6af0514e4 100644 --- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala @@ -227,7 +227,7 @@ class SparkConfSuite extends FunSuite with LocalSparkContext with ResetSystemPro test("akka deprecated configs") { val conf = new SparkConf() - assert(!conf.contains("spark.rpc.num.retries")) + assert(!conf.contains("spark.rpc.numRetries")) assert(!conf.contains("spark.rpc.retry.wait")) assert(!conf.contains("spark.rpc.askTimeout")) assert(!conf.contains("spark.rpc.lookupTimeout")) diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala index 5fbda37c7cb88..44c88b00c442a 100644 --- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala @@ -156,7 +156,7 @@ abstract class RpcEnvSuite extends FunSuite with BeforeAndAfterAll { val conf = new SparkConf() conf.set("spark.rpc.retry.wait", "0") - conf.set("spark.rpc.num.retries", "1") + conf.set("spark.rpc.numRetries", "1") val anotherEnv = createRpcEnv(conf, "remote", 13345) // Use anotherEnv to find out the RpcEndpointRef val rpcEndpointRef = anotherEnv.setupEndpointRef("local", env.address, "ask-timeout") From 55775409d84cbfcdf9f1215ca640fd646d495013 Mon Sep 17 00:00:00 2001 From: zsxwing Date: Wed, 22 Apr 2015 07:18:25 +0800 Subject: [PATCH 3/5] Use spark.network.timeout as the default timeout if it presents --- core/src/main/scala/org/apache/spark/util/RpcUtils.scala | 6 ++++-- docs/configuration.md | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala index 5ae793e0e87a3..5ccb0aa7f0736 100644 --- a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala @@ -48,11 +48,13 @@ object RpcUtils { /** Returns the default Spark timeout to use for RPC ask operations. */ def askTimeout(conf: SparkConf): FiniteDuration = { - conf.getTimeAsSeconds("spark.rpc.askTimeout", "30s") seconds + conf.getTimeAsSeconds("spark.rpc.askTimeout", + conf.get("spark.network.timeout", "30s")) seconds } /** Returns the default Spark timeout to use for RPC remote endpoint lookup. */ def lookupTimeout(conf: SparkConf): FiniteDuration = { - conf.getTimeAsSeconds("spark.rpc.lookupTimeout", "30s") seconds + conf.getTimeAsSeconds("spark.rpc.lookupTimeout", + conf.get("spark.network.timeout", "30s")) seconds } } diff --git a/docs/configuration.md b/docs/configuration.md index 1fd8b59ff8161..d2878191bc238 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -963,8 +963,9 @@ Apart from these, the following properties are also available, and may be useful Default timeout for all network interactions. This config will be used in place of spark.core.connection.ack.wait.timeout, spark.akka.timeout, - spark.storage.blockManagerSlaveTimeoutMs or - spark.shuffle.io.connectionTimeout, if they are not configured. + spark.storage.blockManagerSlaveTimeoutMs, + spark.shuffle.io.connectionTimeout, spark.rpc.askTimeout or + spark.rpc.lookupTimeout if they are not configured. From 6e37c30f5daa028d1d38908821ce348862bb8bf9 Mon Sep 17 00:00:00 2001 From: zsxwing Date: Wed, 22 Apr 2015 07:01:49 +0800 Subject: [PATCH 4/5] Update docs --- docs/configuration.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index d2878191bc238..af08b0727e986 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -986,7 +986,8 @@ Apart from these, the following properties are also available, and may be useful spark.rpc.numRetries 3 - How many times for an RPC ask operation to retry before giving up. + Number of times to retry before an RPC task gives up. + An RPC task will run at most times of this number. @@ -994,20 +995,20 @@ Apart from these, the following properties are also available, and may be useful spark.rpc.retry.wait 3s - How long for an RPC ask operation to wait before starting the next retry. + Duration for an RPC ask operation to wait before retrying. spark.rpc.askTimeout 30s - How long for an RPC ask operation to wait before timing out. + Duration for an RPC ask operation to wait before timing out. spark.rpc.lookupTimeout 30s - How long for an RPC remote endpoint lookup operation to wait before timing out. + Duration for an RPC remote endpoint lookup operation to wait before timing out. From 25a6736d545ed4e971cc7ed54fe5d01de2862716 Mon Sep 17 00:00:00 2001 From: zsxwing Date: Wed, 22 Apr 2015 08:27:36 +0800 Subject: [PATCH 5/5] Increase the default timeout to 120s --- core/src/main/scala/org/apache/spark/util/RpcUtils.scala | 4 ++-- docs/configuration.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala index 5ccb0aa7f0736..f16cc8e7e42c6 100644 --- a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala @@ -49,12 +49,12 @@ object RpcUtils { /** Returns the default Spark timeout to use for RPC ask operations. */ def askTimeout(conf: SparkConf): FiniteDuration = { conf.getTimeAsSeconds("spark.rpc.askTimeout", - conf.get("spark.network.timeout", "30s")) seconds + conf.get("spark.network.timeout", "120s")) seconds } /** Returns the default Spark timeout to use for RPC remote endpoint lookup. */ def lookupTimeout(conf: SparkConf): FiniteDuration = { conf.getTimeAsSeconds("spark.rpc.lookupTimeout", - conf.get("spark.network.timeout", "30s")) seconds + conf.get("spark.network.timeout", "120s")) seconds } } diff --git a/docs/configuration.md b/docs/configuration.md index af08b0727e986..d587b91124cb8 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1000,14 +1000,14 @@ Apart from these, the following properties are also available, and may be useful spark.rpc.askTimeout - 30s + 120s Duration for an RPC ask operation to wait before timing out. spark.rpc.lookupTimeout - 30s + 120s Duration for an RPC remote endpoint lookup operation to wait before timing out.