-
Notifications
You must be signed in to change notification settings - Fork 29.1k
[SPARK-36705][SHUFFLE] Disable push based shuffle when IO encryption is enabled or serializer is not relocatable #33976
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,7 +70,7 @@ import org.apache.spark.internal.config.UI._ | |
| import org.apache.spark.internal.config.Worker._ | ||
| import org.apache.spark.launcher.SparkLauncher | ||
| import org.apache.spark.network.util.JavaUtils | ||
| import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance} | ||
| import org.apache.spark.serializer.{DeserializationStream, SerializationStream, Serializer, SerializerInstance} | ||
| import org.apache.spark.status.api.v1.{StackTrace, ThreadStackTrace} | ||
| import org.apache.spark.util.io.ChunkedByteBufferOutputStream | ||
|
|
||
|
|
@@ -2597,14 +2597,30 @@ private[spark] object Utils extends Logging { | |
| } | ||
|
|
||
| /** | ||
| * Push based shuffle can only be enabled when the application is submitted | ||
| * to run in YARN mode, with external shuffle service enabled | ||
| * Push based shuffle can only be enabled when below conditions are met: | ||
| * - the application is submitted to run in YARN mode | ||
| * - external shuffle service enabled | ||
| * - IO encryption disabled | ||
| * - serializer(such as KryoSerializer) supports relocation of serialized objects | ||
| */ | ||
| def isPushBasedShuffleEnabled(conf: SparkConf): Boolean = { | ||
| conf.get(PUSH_BASED_SHUFFLE_ENABLED) && | ||
| (conf.get(IS_TESTING).getOrElse(false) || | ||
| (conf.get(SHUFFLE_SERVICE_ENABLED) && | ||
| conf.get(SparkLauncher.SPARK_MASTER, null) == "yarn")) | ||
| val serializer = Utils.classForName(conf.get(SERIALIZER)).getConstructor(classOf[SparkConf]) | ||
| .newInstance(conf).asInstanceOf[Serializer] | ||
| val canDoPushBasedShuffle = | ||
| conf.get(PUSH_BASED_SHUFFLE_ENABLED) && | ||
| (conf.get(IS_TESTING).getOrElse(false) || | ||
| (conf.get(SHUFFLE_SERVICE_ENABLED) && | ||
| conf.get(SparkLauncher.SPARK_MASTER, null) == "yarn" && | ||
| // TODO: [SPARK-36744] needs to support IO encryption for push-based shuffle | ||
| !conf.get(IO_ENCRYPTION_ENABLED) && | ||
| serializer.supportsRelocationOfSerializedObjects)) | ||
mridulm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if (!canDoPushBasedShuffle) { | ||
| logWarning("Push-based shuffle can only be enabled when the application is submitted" + | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit. We need a space at the end of the string.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This has been fixed in the follow-up PR #33984. |
||
| "to run in YARN mode, with external shuffle service enabled, IO encryption disabled, and" + | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto. We need another space at the end of this line, too.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in the follow-up PR #33984.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, got it~ :) |
||
| "relocation of serialized objects supported.") | ||
| } | ||
| canDoPushBasedShuffle | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1509,10 +1509,17 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { | |
| assert(Utils.isPushBasedShuffleEnabled(conf) === false) | ||
| conf.set(SHUFFLE_SERVICE_ENABLED, true) | ||
| conf.set(SparkLauncher.SPARK_MASTER, "yarn") | ||
| conf.set("spark.yarn.maxAttempts", "1") | ||
| conf.set("spark.yarn.maxAppAttempts", "1") | ||
| conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer") | ||
| assert(Utils.isPushBasedShuffleEnabled(conf) === true) | ||
| conf.set("spark.yarn.maxAttempts", "2") | ||
| conf.set("spark.yarn.maxAppAttempts", "2") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While this test is no longer required (since we dont disable if multiple attempts are present - like we used to earlier), give it is a minor change, it is fine to include in this PR. |
||
| assert(Utils.isPushBasedShuffleEnabled(conf) === true) | ||
| conf.set(IO_ENCRYPTION_ENABLED, true) | ||
| assert(Utils.isPushBasedShuffleEnabled(conf) === false) | ||
| conf.set(IO_ENCRYPTION_ENABLED, false) | ||
| assert(Utils.isPushBasedShuffleEnabled(conf) === true) | ||
| conf.set(SERIALIZER, "org.apache.spark.serializer.JavaSerializer") | ||
| assert(Utils.isPushBasedShuffleEnabled(conf) === false) | ||
| } | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a question. Do we have another available relocation-supporting serializer except
KryoSerializer?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No. According to Tuning Spark - Data Serialization,
KryoSerializerandJavaSerializerare the only available options for data serialization,KryoSerializeris relocation-supporting whileJavaSerializeris not.UnsafeRowSerializer is relocation-supporting, but it seems only used for serializing UnsafeRows during shuffle.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ya, I thought like that. Thanks.