From ccb39ed95fa7cd8742ce323eb7096ddd1a09827b Mon Sep 17 00:00:00 2001 From: xianjingfeng <583872483@qq.com> Date: Fri, 29 Jul 2022 11:49:34 +0800 Subject: [PATCH] [Improvement] ShuffleBlock should be release when finished reading (#74) ### **What changes were proposed in this pull request?** release shuffleblock when finished reading ### **Why are the changes needed?** We found spark executor is easy be killed by yarn, and i found it is because executor use too mush offheap memory when read shuffle data. I found most of offheap memory is used to store uncompressed shuffle Data, and this part of memory will be release only when GC is triggered ### **Does this PR introduce any user-facing change?** No ### **How was this patch tested?** Add new ut --- .../reader/RssShuffleDataIterator.java | 13 ++++++++- .../uniffle/common/RssShuffleUtils.java | 28 +++++++++++++++++++ .../common/exception/RssException.java | 4 +++ .../uniffle/common/RssShuffleUtilsTest.java | 25 +++++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/client-spark/common/src/main/java/org/apache/spark/shuffle/reader/RssShuffleDataIterator.java b/client-spark/common/src/main/java/org/apache/spark/shuffle/reader/RssShuffleDataIterator.java index 775285ca2f..bd8184c5b3 100644 --- a/client-spark/common/src/main/java/org/apache/spark/shuffle/reader/RssShuffleDataIterator.java +++ b/client-spark/common/src/main/java/org/apache/spark/shuffle/reader/RssShuffleDataIterator.java @@ -38,6 +38,7 @@ import org.apache.uniffle.client.api.ShuffleReadClient; import org.apache.uniffle.client.response.CompressedShuffleBlock; import org.apache.uniffle.common.RssShuffleUtils; +import org.apache.uniffle.common.exception.RssException; public class RssShuffleDataIterator extends AbstractIterator> { @@ -54,6 +55,7 @@ public class RssShuffleDataIterator extends AbstractIterator