From d21f7db9bad878cc0a0e99d7e696fc9631b183ab Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Wed, 10 Feb 2016 12:13:16 +0000 Subject: [PATCH] Remove spark.closure.serializer option and use JavaSerializer always --- core/src/main/scala/org/apache/spark/SparkEnv.scala | 5 ++--- .../org/apache/spark/serializer/KryoSerializerSuite.scala | 3 +-- docs/configuration.md | 7 ------- docs/streaming-programming-guide.md | 2 -- 4 files changed, 3 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index 9461afdc54124..204f7356f7ef8 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -35,7 +35,7 @@ import org.apache.spark.network.netty.NettyBlockTransferService import org.apache.spark.rpc.{RpcEndpoint, RpcEndpointRef, RpcEnv} import org.apache.spark.scheduler.{LiveListenerBus, OutputCommitCoordinator} import org.apache.spark.scheduler.OutputCommitCoordinator.OutputCommitCoordinatorEndpoint -import org.apache.spark.serializer.Serializer +import org.apache.spark.serializer.{JavaSerializer, Serializer} import org.apache.spark.shuffle.ShuffleManager import org.apache.spark.storage._ import org.apache.spark.util.{RpcUtils, Utils} @@ -277,8 +277,7 @@ object SparkEnv extends Logging { "spark.serializer", "org.apache.spark.serializer.JavaSerializer") logDebug(s"Using serializer: ${serializer.getClass}") - val closureSerializer = instantiateClassFromConf[Serializer]( - "spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer") + val closureSerializer = new JavaSerializer(conf) def registerOrLookupEndpoint( name: String, endpointCreator: => RpcEndpoint): diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala index f869bcd708619..27d063630be9d 100644 --- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala @@ -282,8 +282,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext { test("kryo with fold") { val control = 1 :: 2 :: Nil // zeroValue must not be a ClassWithoutNoArgConstructor instance because it will be - // serialized by spark.closure.serializer but spark.closure.serializer only supports - // the default Java serializer. + // serialized by the Java serializer. val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_)) .fold(null)((t1, t2) => { val t1x = if (t1 == null) 0 else t1.x diff --git a/docs/configuration.md b/docs/configuration.md index cd9dc1bcfc113..cf3d253e651ae 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -586,13 +586,6 @@ Apart from these, the following properties are also available, and may be useful Whether to compress broadcast variables before sending them. Generally a good idea. - - spark.closure.serializer - org.apache.spark.serializer.
JavaSerializer - - Serializer class to use for closures. Currently only the Java serializer is supported. - - spark.io.compression.codec lz4 diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index 7e681b67cf0c2..677f5ff7bea8b 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -2163,8 +2163,6 @@ If the number of tasks launched per second is high (say, 50 or more per second), of sending out tasks to the slaves may be significant and will make it hard to achieve sub-second latencies. The overhead can be reduced by the following changes: -* **Task Serialization**: Using Kryo serialization for serializing tasks can reduce the task sizes, and therefore reduce the time taken to send them to the slaves. This is controlled by the ```spark.closure.serializer``` property. However, at this time, Kryo serialization cannot be enabled for closure serialization. This may be resolved in a future release. - * **Execution mode**: Running Spark in Standalone mode or coarse-grained Mesos mode leads to better task launch times than the fine-grained Mesos mode. Please refer to the [Running on Mesos guide](running-on-mesos.html) for more details.