From 49b47f04150d7c6fd428631228fa1428a2978e9d Mon Sep 17 00:00:00 2001
From: CrazyJvm <crazyjvm@gmail.com>
Date: Tue, 19 Aug 2014 15:59:36 +0800
Subject: [PATCH 1/5] add configuration `spark.shuffle.spill.batchSize` and fix
 the value of spark.shuffle.manager

---
 docs/configuration.md | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/docs/configuration.md b/docs/configuration.md
index 981170d8b49b7..f8b92bc44c255 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -283,12 +283,19 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.shuffle.manager</code></td>
-  <td>HASH</td>
+  <td>hash</td>
   <td>
     Implementation to use for shuffling data. A hash-based shuffle manager is the default, but
     starting in Spark 1.1 there is an experimental sort-based shuffle manager that is more 
     memory-efficient in environments with small executors, such as YARN. To use that, change
-    this value to <code>SORT</code>.
+    this value to <code>sort</code>.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.spill.batchSize</code></td>
+  <td>10000</td>
+  <td>
+    Size of object batches when reading/writing from serializers.
   </td>
 </tr>
 <tr>

From 8ad9955bf26f5a20940dfecbb2d46cb70fa067ec Mon Sep 17 00:00:00 2001
From: CrazyJvm <crazyjvm@gmail.com>
Date: Tue, 19 Aug 2014 16:14:51 +0800
Subject: [PATCH 2/5] fix typo

---
 docs/configuration.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index f8b92bc44c255..c647ef5178656 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -285,10 +285,10 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.manager</code></td>
   <td>hash</td>
   <td>
-    Implementation to use for shuffling data. A hash-based shuffle manager is the default, but
-    starting in Spark 1.1 there is an experimental sort-based shuffle manager that is more 
-    memory-efficient in environments with small executors, such as YARN. To use that, change
-    this value to <code>sort</code>.
+    Implementation to use for shuffling data. A hash-based shuffle manager is the default,
+    but starting in Spark 1.1 there is an experimental sort-based shuffle manager that is
+    more memory-efficient in environments with small executors, such as YARN.
+    To use that, change this value to <code>sort</code>.
   </td>
 </tr>
 <tr>

From 876256dbd1a92cb5a4a300cb54d4c2f40a068ff9 Mon Sep 17 00:00:00 2001
From: CrazyJvm <crazyjvm@gmail.com>
Date: Tue, 19 Aug 2014 16:57:28 +0800
Subject: [PATCH 3/5] add spark.shuffle.safetyFraction in doc

---
 docs/configuration.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/configuration.md b/docs/configuration.md
index c647ef5178656..495caf3f64e83 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -256,6 +256,14 @@ Apart from these, the following properties are also available, and may be useful
     <code>spark.storage.memoryFraction</code>.
   </td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.safetyFraction</code></td>
+  <td>0.8</td>
+  <td>
+    An additional margin of safety fraction of Java heap to use for aggregation and cogroups during
+    shuffles, in case the size estimation of maps used for shuffle is not sufficiently accurate.
+  </td>
+</tr>
 <tr>
   <td><code>spark.shuffle.compress</code></td>
   <td>true</td>

From 181c512c7d2340d0e7252793622cb1faddf30e78 Mon Sep 17 00:00:00 2001
From: CrazyJvm <crazyjvm@gmail.com>
Date: Wed, 20 Aug 2014 15:09:17 +0800
Subject: [PATCH 4/5] the config value of  is case irrelevant

---
 docs/configuration.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 495caf3f64e83..315575345e13d 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -291,13 +291,13 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.shuffle.manager</code></td>
-  <td>hash</td>
-  <td>
-    Implementation to use for shuffling data. A hash-based shuffle manager is the default,
-    but starting in Spark 1.1 there is an experimental sort-based shuffle manager that is
-    more memory-efficient in environments with small executors, such as YARN.
-    To use that, change this value to <code>sort</code>.
-  </td>
+   <td>HASH</td>
+   <td>
+     Implementation to use for shuffling data. A hash-based shuffle manager is the default, but
+     starting in Spark 1.1 there is an experimental sort-based shuffle manager that is more
+     memory-efficient in environments with small executors, such as YARN. To use that, change
+     this value to <code>SORT</code>.
+   </td>
 </tr>
 <tr>
   <td><code>spark.shuffle.spill.batchSize</code></td>

From 117e9bfe214372ff49f299425b5a56bfd2b8e58b Mon Sep 17 00:00:00 2001
From: CrazyJvm <crazyjvm@gmail.com>
Date: Thu, 21 Aug 2014 13:38:12 +0800
Subject: [PATCH 5/5] style fix

---
 docs/configuration.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 315575345e13d..55d8ae0a867ef 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -291,13 +291,13 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.shuffle.manager</code></td>
-   <td>HASH</td>
-   <td>
-     Implementation to use for shuffling data. A hash-based shuffle manager is the default, but
-     starting in Spark 1.1 there is an experimental sort-based shuffle manager that is more
-     memory-efficient in environments with small executors, such as YARN. To use that, change
-     this value to <code>SORT</code>.
-   </td>
+  <td>HASH</td>
+  <td>
+    Implementation to use for shuffling data. A hash-based shuffle manager is the default, but
+    starting in Spark 1.1 there is an experimental sort-based shuffle manager that is more
+    memory-efficient in environments with small executors, such as YARN. To use that, change
+    this value to <code>SORT</code>.
+  </td>
 </tr>
 <tr>
   <td><code>spark.shuffle.spill.batchSize</code></td>