apache · wankunde · Mar 22, 2024 · Mar 22, 2024 · Mar 22, 2024 · Mar 22, 2024
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -21,6 +21,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.util.LinkedList;
+import java.util.Optional;
 import java.util.zip.Checksum;
 
 import org.apache.spark.SparkException;
@@ -153,8 +154,10 @@ public long[] getChecksums() {
    * @param isFinalFile if true, this indicates that we're writing the final output file and that
    *                    the bytes written should be counted towards shuffle write metrics rather
    *                    than shuffle spill metrics.
+   * @param finalDataFileDir if present, the directory to write the final output file to. If not
+   *                         present, the file will be written to a temporary directory.
    */
-  private void writeSortedFile(boolean isFinalFile) {
+  private void writeSortedFile(boolean isFinalFile, Optional<File> finalDataFileDir) {
     // Only emit the log if this is an actual spilling.
     if (!isFinalFile) {
       logger.info(
@@ -199,8 +202,9 @@ private void writeSortedFile(boolean isFinalFile) {
     // createTempShuffleBlock here; see SPARK-3426 for more details.
     final Tuple2<TempShuffleBlockId, File> spilledFileInfo =
       blockManager.diskBlockManager().createTempShuffleBlock();
-    final File file = spilledFileInfo._2();
     final TempShuffleBlockId blockId = spilledFileInfo._1();
+    final File file =
+      finalDataFileDir.map(d -> new File(d, blockId.name())).orElseGet(spilledFileInfo::_2);
     final SpillInfo spillInfo = new SpillInfo(numPartitions, file, blockId);
 
     // Unfortunately, we need a serializer instance in order to construct a DiskBlockObjectWriter.
@@ -292,7 +296,7 @@ public long spill(long size, MemoryConsumer trigger) throws IOException {
       return 0L;
     }
 
-    writeSortedFile(false);
+    writeSortedFile(false, Optional.empty());
     final long spillSize = freeMemory();
     inMemSorter.reset();
     // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the
@@ -440,14 +444,16 @@ public void insertRecord(Object recordBase, long recordOffset, int length, int p
   /**
    * Close the sorter, causing any buffered data to be sorted and written out to disk.
    *
+   * @param finalDataFileDir if present, the directory to write the final output file to. If not
+   *                         present, the file will be written to a temporary directory.
    * @return metadata for the spill files written by this sorter. If no records were ever inserted
    *         into this sorter, then this will return an empty array.
    */
-  public SpillInfo[] closeAndGetSpills() throws IOException {
+  public SpillInfo[] closeAndGetSpills(Optional<File> finalDataFileDir) throws IOException {
     if (inMemSorter != null) {
       // Here we are spilling the remaining data in the buffer. If there is no spill before, this
       // final spill file will be the final shuffle output file.
-      writeSortedFile(/* isFinalFile = */spills.isEmpty());
+      writeSortedFile(/* isFinalFile = */spills.isEmpty(), finalDataFileDir);
       freeMemory();
       inMemSorter.free();
       inMemSorter = null;

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -52,12 +52,14 @@
 import org.apache.spark.shuffle.ShuffleWriteMetricsReporter;
 import org.apache.spark.serializer.SerializationStream;
 import org.apache.spark.serializer.SerializerInstance;
+import org.apache.spark.shuffle.IndexShuffleBlockResolver;
 import org.apache.spark.shuffle.ShuffleWriter;
 import org.apache.spark.shuffle.api.ShuffleExecutorComponents;
 import org.apache.spark.shuffle.api.ShuffleMapOutputWriter;
 import org.apache.spark.shuffle.api.ShufflePartitionWriter;
 import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter;
 import org.apache.spark.shuffle.api.WritableByteChannelWrapper;
+import org.apache.spark.shuffle.sort.io.LocalDiskShuffleExecutorComponents;
 import org.apache.spark.storage.BlockManager;
 import org.apache.spark.storage.TimeTrackingOutputStream;
 import org.apache.spark.unsafe.Platform;
@@ -219,7 +221,15 @@ void closeAndWriteOutput() throws IOException {
     updatePeakMemoryUsed();
     serBuffer = null;
     serOutputStream = null;
-    final SpillInfo[] spills = sorter.closeAndGetSpills();
+    Optional<File> finalDataFileDir;
+    if (shuffleExecutorComponents instanceof LocalDiskShuffleExecutorComponents) {
+      File dataFile =
+        new IndexShuffleBlockResolver(sparkConf, blockManager).getDataFile(shuffleId, mapId);
+      finalDataFileDir = Optional.of(dataFile.getParentFile());
+    } else {
+      finalDataFileDir = Optional.empty();
+    }
+    final SpillInfo[] spills = sorter.closeAndGetSpills(finalDataFileDir);
     try {
       partitionLengths = mergeSpills(spills);
     } finally {