From 978ad148ea5572a3dfcb3b307a9790aa49754a38 Mon Sep 17 00:00:00 2001
From: Naveen Swamy <mn.naveen@gmail.com>
Date: Sat, 3 Nov 2018 20:00:53 -0700
Subject: [PATCH 01/11] update train_mnist

---
 .../examples/scripts/run_train_mnist.sh       | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/scala-package/examples/scripts/run_train_mnist.sh b/scala-package/examples/scripts/run_train_mnist.sh
index ea53c1ade66..dbf6ae85419 100755
--- a/scala-package/examples/scripts/run_train_mnist.sh
+++ b/scala-package/examples/scripts/run_train_mnist.sh
@@ -19,15 +19,31 @@
 
 set -e
 
+hw_type=cpu
+if [[ $1 = gpu ]]
+then
+    hw_type=gpu
+fi
+
+platform=linux-x86_64
+
+if [[ $OSTYPE = [darwin]* ]]
+then
+    platform=osx-x86_64
+    hw_type=cpu
+fi
+
 MXNET_ROOT=$(cd "$(dirname $0)/../../.."; pwd)
 echo $MXNET_ROOT
-CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-cpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*:$MXNET_ROOT/scala-package/infer/target/*
+CLASS_PATH=$MXNET_ROOT/scala-package/assembly/$platform-$hw_type/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*:$MXNET_ROOT/scala-package/infer/target/*
 
 # model dir
 DATA_PATH=$2
 
-java -XX:+PrintGC -Xms256M -Xmx512M -Dmxnet.traceLeakedObjects=false -cp $CLASS_PATH \
-        org.apache.mxnetexamples.imclassification.TrainMnist \
-        --data-dir /home/ubuntu/mxnet_scala/scala-package/examples/mnist/ \
+java -XX:+PrintGC -Dmxnet.traceLeakedObjects=false -cp $CLASS_PATH \
+        org.apache.mxnetexamples.imclassification.TrainModel \
+        --data-dir $MXNET_ROOT/scala-package/examples/mnist/ \
+        --network mlp \
+        --num-layers 50 \
         --num-epochs 10000000 \
         --batch-size 1024
\ No newline at end of file

From b1d57fa479d3a2b433629dab5e282f2d043ad661 Mon Sep 17 00:00:00 2001
From: Naveen Swamy <mn.naveen@gmail.com>
Date: Sat, 3 Nov 2018 21:40:44 -0700
Subject: [PATCH 02/11] Add documentation for JVM Memory Management

---
 scala-package/memory-management.md | 117 +++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 scala-package/memory-management.md

diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
new file mode 100644
index 00000000000..d3c8cd35bd9
--- /dev/null
+++ b/scala-package/memory-management.md
@@ -0,0 +1,117 @@
+# JVM Memory Management
+The Scala and Java binding of Apache MXNet uses native memory(C++ Heap either in RAM or GPU memory) in most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.,. the Scala classes associated with them act as wrappers, 
+the operations on these objects are directed to the MXNet C++ backend via JNI for performance , so the bytes are also stored in the native heap for fast access.   
+
+The JVM using the Garbage Collector only manages objects allocated in the JVM Heap and is not aware of the memory footprint of these objects in the native memory, hence allocation/deAllocation of the native memory has to be managed by MXNet Scala.  
+Allocating native memory is straight forward and is done during the construction of the object by a calling the associated C++ API through JNI, however since JVM languages do not have destructors, De-Allocation of these objects becomes problematic and has to explicitly de-allocated. 
+To make it easy, MXNet Scala provides a few modes of operation.
+
+## Memory Management in Scala 
+### [ResourceScope.using](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L106) (Recommended)
+`ResourceScope.using` provides the familiar Java try-with-resources primitive in Scala and also extends to automatically manage the memory of all the MXNet objects created in the code block (`body`) associated with it by tracking the allocations in a stack. 
+If an MXNet object or an Iterable containing MXNet objects is returned from the code-block, it is automatically excluded from de-allocation in the current scope and moved to 
+an outer scope if ResourceScope's are stacked.  
+
+**Usage** 
+```
+ResourceScope.using() {
+    ResourceScope.using() {
+        val r1 = NDArray.ones(Shape(2, 2))
+        val r2 = NDArray.ones(Shape(3, 4))
+        val r3 = NDArray.ones(Shape(5, 6))
+        val r4 = NDArray.ones(Shape(7, 8))
+        (r3, r4)
+    }
+    r4
+}
+```
+In the example above, we have two ResourceScopes stacked together, 4 NDArrays `(r1, r2, r3, r4)` are created in the inner scope, the inner scope returns 
+`(r3, r4)`. The ResourceScope code recognizes that it should not de-allocate these objects and automatically moves `r3` and  `r4` to the outer scope. The outer scope 
+returns `r4` from its code-block, so ResourceScope.using removes this from its list of objects to be de-allocated. All other objects are automatically released(native memory) by calling the C++ Backend to free the memory. 
+
+**Note:**
+You should consider stacking ResourceScope when you have layers of functionality in your application code which creates a lot of MXNet objects like NDArray. 
+This is because you don't want to hold onto all the memory that is created for the entire training loop and you will most likely run out of memory especially on GPUs which have limited memory in order 8 to 16 GB. 
+For example if you were writing Training code in MXNet Scala, it is recommended not to use one-uber ResourceScope block that runs the entire training code, 
+instead you should stack multiple scopes one where you run forward backward passes on each batch, 
+and 2nd scope for each epoch and an outer scope that runs the entire training script, like the example below
+```
+ResourceScope.using() {
+ val m = Module(...)
+ m.bind()
+ val k = KVStore(...)
+ ResourceScope.using() {
+     val itr = MXIterator(..)
+     val num_epochs: Int = 100
+     ... 
+     for (i <- 0 until num_epoch) {
+     ResourceScope.using() {
+        val dataBatch = itr.next()
+        while(itr.next()) {
+           m.forward(dataBatch)
+           m.backward(dataBatch)
+           m.update()
+        }
+     }
+ }
+}
+
+```  
+       
+### Using Phantom References (Recommended for some use cases)
+
+Apache MXNet uses [Phantom References](https://docs.oracle.com/javase/8/docs/api/java/lang/ref/PhantomReference.html) to track all MXNet Objects that has native memory associated with it. 
+When the Garbage Collector runs, GC identifies unreachable Scala/Java objects in the JVM Heap and finalizes them, 
+we take advantage of Garbage Collector which enqueues objects into a reference queue that are ready to be reclaimed, 
+at which point we do pre-mortem clean up by call the MXNet backend C++ API to free the native memory. 
+ 
+In this approach, you do not have to write any special code to have native memory cleaned up, however this approach solely depends on the Garbage collector to run and find unreachable objects.
+You can control the frequency of Garbage Collector by calling System.gc() at strategic points such as at the end of an epoch or at the end of a mini-batch in Training.
+
+This approach could be suitable for use-cases such as inference on CPUs and you have large amount of Memory(RAM) on your system.  
+
+**Note:**   
+Calling GC too frequently can cause your application to perform poorly. This approach might not be suitable   
+when you have large number of large NDArray allocations too quickly such as training a GAN model
+
+### Using dispose Pattern (least Recommended)
+ 
+There might be situations where you want to manage the lifecycle of Apache MXNet objects, for such use-cases we have provided `dispose()` method that you can call and it will deallocate associated native memory, we have also
+made all MXNet objects [AutoCloseable](https://docs.oracle.com/javase/8/docs/api/java/lang/AutoCloseable.html), if you are using Java8 and above you can use it with try-with-resources or call close() in the finally block.
+
+**Note:**   
+We recommend to avoid manually managing MXNet objects and instead to use `ResourceScope.using` as this could leak memory if you miss calling dispose( at some point GC will kick in and be cleaned up due to Phantom Reference)
+and create unreadable code.   
+
+```
+def showDispose(): Unit = {
+    val r = NDArray.ones(Shape (2, 2))
+    r.dispose()
+}
+```
+
+## Memory Management in Java
+Memory Management in MXNet Java is similar to Scala, We recommend to use [ResourceScope](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L32) in a `try-with-resources` block or in a `try-finally` block.   
+Java 8 onwards supports [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) where the resources declared in the try block is automatically closed. 
+The above discussed ResourceScope implements AutoCloseable and tracks all MXNet Objects created at a Thread Local scope level. 
+
+**Note:**  
+The capability of not de-allocating returned MXNet objects and Iterables containing MXNet objects 
+It is highly recommended to use a stack of try-with-resource ResourceScope's for the reason discussed in Scala's ResourceScope Note section.
+
+```
+try(ResourceScope scope = new ResourceScope()) {
+    NDArray test = NDArray.ones((Shape (2,2))
+}
+```
+or 
+```
+try {
+    ResourceScope scope = new ResourceScope()
+    NDArray r = NDArray.ones((Shape(2,2))
+} finally {
+   scope.close()
+}
+``` 
+
+

From 96ff9f9284b09eb0fc45e1bcff903de87f573005 Mon Sep 17 00:00:00 2001
From: Naveen Swamy <mn.naveen@gmail.com>
Date: Tue, 27 Nov 2018 16:51:34 -0800
Subject: [PATCH 03/11] update doc

---
 .../examples/scripts/run_train_mnist.sh       |  2 +-
 scala-package/memory-management.md            | 41 ++++++++++---------
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/scala-package/examples/scripts/run_train_mnist.sh b/scala-package/examples/scripts/run_train_mnist.sh
index dbf6ae85419..d27b7cbb365 100755
--- a/scala-package/examples/scripts/run_train_mnist.sh
+++ b/scala-package/examples/scripts/run_train_mnist.sh
@@ -35,7 +35,7 @@ fi
 
 MXNET_ROOT=$(cd "$(dirname $0)/../../.."; pwd)
 echo $MXNET_ROOT
-CLASS_PATH=$MXNET_ROOT/scala-package/assembly/$platform-$hw_type/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*:$MXNET_ROOT/scala-package/infer/target/*
+CLASS_PATH=$MXNET_ROOT/scala-package/assembly/$platform-$hw_type/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
 
 # model dir
 DATA_PATH=$2
diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
index d3c8cd35bd9..e7a17fc65ec 100644
--- a/scala-package/memory-management.md
+++ b/scala-package/memory-management.md
@@ -1,13 +1,13 @@
 # JVM Memory Management
-The Scala and Java binding of Apache MXNet uses native memory(C++ Heap either in RAM or GPU memory) in most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.,. the Scala classes associated with them act as wrappers, 
-the operations on these objects are directed to the MXNet C++ backend via JNI for performance , so the bytes are also stored in the native heap for fast access.   
+The Scala and Java bindings of Apache MXNet uses native memory(C++ heap either in RAM or GPU memory) for most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.,. 
+The Scala classes associated with them act as wrappers, the operations on these objects are directed to the MXNet C++ backend via JNI for performance , so the bytes are also stored in the native heap for fast access.
 
-The JVM using the Garbage Collector only manages objects allocated in the JVM Heap and is not aware of the memory footprint of these objects in the native memory, hence allocation/deAllocation of the native memory has to be managed by MXNet Scala.  
-Allocating native memory is straight forward and is done during the construction of the object by a calling the associated C++ API through JNI, however since JVM languages do not have destructors, De-Allocation of these objects becomes problematic and has to explicitly de-allocated. 
-To make it easy, MXNet Scala provides a few modes of operation.
+The JVM using the Garbage Collector only manages objects allocated in the JVM Heap and is not aware of the memory footprint of these objects in the native memory, hence allocation/deallocation of the native memory has to be managed by MXNet Scala.
+Allocating native memory is straight forward and is done during the construction of the object by a calling the associated C++ API through JNI, However since JVM languages do not have destructors, deallocation of these objects needs to be done explicitly.
+To make it easy, MXNet Scala provides a few modes of operation, explained in detail below.
 
 ## Memory Management in Scala 
-### [ResourceScope.using](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L106) (Recommended)
+### 1.  [ResourceScope.using](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L106) (Recommended)
 `ResourceScope.using` provides the familiar Java try-with-resources primitive in Scala and also extends to automatically manage the memory of all the MXNet objects created in the code block (`body`) associated with it by tracking the allocations in a stack. 
 If an MXNet object or an Iterable containing MXNet objects is returned from the code-block, it is automatically excluded from de-allocation in the current scope and moved to 
 an outer scope if ResourceScope's are stacked.  
@@ -27,7 +27,7 @@ ResourceScope.using() {
 ```
 In the example above, we have two ResourceScopes stacked together, 4 NDArrays `(r1, r2, r3, r4)` are created in the inner scope, the inner scope returns 
 `(r3, r4)`. The ResourceScope code recognizes that it should not de-allocate these objects and automatically moves `r3` and  `r4` to the outer scope. The outer scope 
-returns `r4` from its code-block, so ResourceScope.using removes this from its list of objects to be de-allocated. All other objects are automatically released(native memory) by calling the C++ Backend to free the memory. 
+returns `r4` from its code-block and deallocates `r3`, so ResourceScope.using removes this from its list of objects to be de-allocated. All other objects are automatically released(native memory) by calling the C++ Backend to free the memory.
 
 **Note:**
 You should consider stacking ResourceScope when you have layers of functionality in your application code which creates a lot of MXNet objects like NDArray. 
@@ -35,15 +35,15 @@ This is because you don't want to hold onto all the memory that is created for t
 For example if you were writing Training code in MXNet Scala, it is recommended not to use one-uber ResourceScope block that runs the entire training code, 
 instead you should stack multiple scopes one where you run forward backward passes on each batch, 
 and 2nd scope for each epoch and an outer scope that runs the entire training script, like the example below
-```
+```scala
 ResourceScope.using() {
- val m = Module(...)
+ val m = Module()
  m.bind()
  val k = KVStore(...)
  ResourceScope.using() {
      val itr = MXIterator(..)
      val num_epochs: Int = 100
-     ... 
+     //... 
      for (i <- 0 until num_epoch) {
      ResourceScope.using() {
         val dataBatch = itr.next()
@@ -53,12 +53,13 @@ ResourceScope.using() {
            m.update()
         }
      }
+   }
  }
 }
 
 ```  
        
-### Using Phantom References (Recommended for some use cases)
+### 2.  Using Phantom References (Recommended for some use cases)
 
 Apache MXNet uses [Phantom References](https://docs.oracle.com/javase/8/docs/api/java/lang/ref/PhantomReference.html) to track all MXNet Objects that has native memory associated with it. 
 When the Garbage Collector runs, GC identifies unreachable Scala/Java objects in the JVM Heap and finalizes them, 
@@ -90,15 +91,11 @@ def showDispose(): Unit = {
 }
 ```
 
-## Memory Management in Java
+## 3. Memory Management in Java
 Memory Management in MXNet Java is similar to Scala, We recommend to use [ResourceScope](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L32) in a `try-with-resources` block or in a `try-finally` block.   
-Java 8 onwards supports [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) where the resources declared in the try block is automatically closed. 
+Java 7 onwards supports [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) where the resources declared in the try block is automatically closed. 
 The above discussed ResourceScope implements AutoCloseable and tracks all MXNet Objects created at a Thread Local scope level. 
 
-**Note:**  
-The capability of not de-allocating returned MXNet objects and Iterables containing MXNet objects 
-It is highly recommended to use a stack of try-with-resource ResourceScope's for the reason discussed in Scala's ResourceScope Note section.
-
 ```
 try(ResourceScope scope = new ResourceScope()) {
     NDArray test = NDArray.ones((Shape (2,2))
@@ -108,10 +105,16 @@ or
 ```
 try {
     ResourceScope scope = new ResourceScope()
-    NDArray r = NDArray.ones((Shape(2,2))
+    NDArray test = NDArray.ones((Shape(2,2))
 } finally {
-   scope.close()
+    scope.close()
 }
 ``` 
+**Note:**
+ResourceScope within a try block tracks all MXNet Native Object Allocations (NDArray, Symbol, Executor, etc.,) and deallocates at
+the end of the try block even the objects that are returned, ie., in the above even if `test` were to be returned the native memory associated
+with it would be deallocated and if you use it outside of the try block, the process might crash due to illegal memory access.
 
+If you want to retain certain objects created within the try block, you should explicitly remove them from the scope by calling `scope.moveToOuterScope`
+It is highly recommended to use a stack of try-with-resource ResourceScope's so you do not have explicitly manage the lifecycle of the Native objects.
 

From a47d750727c187e598930c6226af5162be2482de Mon Sep 17 00:00:00 2001
From: Naveen Swamy <mn.naveen@gmail.com>
Date: Tue, 27 Nov 2018 17:15:09 -0800
Subject: [PATCH 04/11] address nit picks

---
 scala-package/memory-management.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
index e7a17fc65ec..d9905fa74a0 100644
--- a/scala-package/memory-management.md
+++ b/scala-package/memory-management.md
@@ -1,6 +1,6 @@
 # JVM Memory Management
-The Scala and Java bindings of Apache MXNet uses native memory(C++ heap either in RAM or GPU memory) for most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.,. 
-The Scala classes associated with them act as wrappers, the operations on these objects are directed to the MXNet C++ backend via JNI for performance , so the bytes are also stored in the native heap for fast access.
+The Scala and Java bindings of Apache MXNet uses native memory(C++ heap either in RAM or GPU memory) for most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.,
+The Scala classes associated with them act as wrappers, the operations on these objects are directed to the MXNet C++ backend via JNI for performance, so the bytes are also stored in the native heap for fast access.
 
 The JVM using the Garbage Collector only manages objects allocated in the JVM Heap and is not aware of the memory footprint of these objects in the native memory, hence allocation/deallocation of the native memory has to be managed by MXNet Scala.
 Allocating native memory is straight forward and is done during the construction of the object by a calling the associated C++ API through JNI, However since JVM languages do not have destructors, deallocation of these objects needs to be done explicitly.
@@ -61,10 +61,10 @@ ResourceScope.using() {
        
 ### 2.  Using Phantom References (Recommended for some use cases)
 
-Apache MXNet uses [Phantom References](https://docs.oracle.com/javase/8/docs/api/java/lang/ref/PhantomReference.html) to track all MXNet Objects that has native memory associated with it. 
+Apache MXNet uses [Phantom References](https://docs.oracle.com/javase/8/docs/api/java/lang/ref/PhantomReference.html) to track all MXNet Objects that have native memory associated with it. 
 When the Garbage Collector runs, GC identifies unreachable Scala/Java objects in the JVM Heap and finalizes them, 
 we take advantage of Garbage Collector which enqueues objects into a reference queue that are ready to be reclaimed, 
-at which point we do pre-mortem clean up by call the MXNet backend C++ API to free the native memory. 
+at which point we do pre-mortem clean up by calling the corresponding MXNet backend API to free the native memory. 
  
 In this approach, you do not have to write any special code to have native memory cleaned up, however this approach solely depends on the Garbage collector to run and find unreachable objects.
 You can control the frequency of Garbage Collector by calling System.gc() at strategic points such as at the end of an epoch or at the end of a mini-batch in Training.
@@ -93,7 +93,7 @@ def showDispose(): Unit = {
 
 ## 3. Memory Management in Java
 Memory Management in MXNet Java is similar to Scala, We recommend to use [ResourceScope](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L32) in a `try-with-resources` block or in a `try-finally` block.   
-Java 7 onwards supports [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) where the resources declared in the try block is automatically closed. 
+Java 7 onwards supports [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) where the resources declared in the try block are automatically closed. 
 The above discussed ResourceScope implements AutoCloseable and tracks all MXNet Objects created at a Thread Local scope level. 
 
 ```

From 3ac82693bd6e2296305758fa96327f30c201469e Mon Sep 17 00:00:00 2001
From: Naveen Swamy <mn.naveen@gmail.com>
Date: Tue, 27 Nov 2018 17:17:03 -0800
Subject: [PATCH 05/11] address nit picks

---
 scala-package/memory-management.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
index d9905fa74a0..773ec5017c9 100644
--- a/scala-package/memory-management.md
+++ b/scala-package/memory-management.md
@@ -13,7 +13,7 @@ If an MXNet object or an Iterable containing MXNet objects is returned from the
 an outer scope if ResourceScope's are stacked.  
 
 **Usage** 
-```
+```scala
 ResourceScope.using() {
     ResourceScope.using() {
         val r1 = NDArray.ones(Shape(2, 2))
@@ -84,7 +84,7 @@ made all MXNet objects [AutoCloseable](https://docs.oracle.com/javase/8/docs/api
 We recommend to avoid manually managing MXNet objects and instead to use `ResourceScope.using` as this could leak memory if you miss calling dispose( at some point GC will kick in and be cleaned up due to Phantom Reference)
 and create unreadable code.   
 
-```
+```scala
 def showDispose(): Unit = {
     val r = NDArray.ones(Shape (2, 2))
     r.dispose()
@@ -96,13 +96,13 @@ Memory Management in MXNet Java is similar to Scala, We recommend to use [Resour
 Java 7 onwards supports [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) where the resources declared in the try block are automatically closed. 
 The above discussed ResourceScope implements AutoCloseable and tracks all MXNet Objects created at a Thread Local scope level. 
 
-```
+```java
 try(ResourceScope scope = new ResourceScope()) {
     NDArray test = NDArray.ones((Shape (2,2))
 }
 ```
 or 
-```
+```java
 try {
     ResourceScope scope = new ResourceScope()
     NDArray test = NDArray.ones((Shape(2,2))

From bcd24c294dfb6027fbce21350a71d23d58ff1f55 Mon Sep 17 00:00:00 2001
From: Ayres <andayr@amazon.com>
Date: Wed, 28 Nov 2018 01:55:50 -0800
Subject: [PATCH 06/11] Grammar and clarity edits for memory management doc

---
 scala-package/memory-management.md | 61 +++++++++++++++---------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
index 773ec5017c9..ed077758e58 100644
--- a/scala-package/memory-management.md
+++ b/scala-package/memory-management.md
@@ -1,16 +1,15 @@
 # JVM Memory Management
-The Scala and Java bindings of Apache MXNet uses native memory(C++ heap either in RAM or GPU memory) for most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.,
-The Scala classes associated with them act as wrappers, the operations on these objects are directed to the MXNet C++ backend via JNI for performance, so the bytes are also stored in the native heap for fast access.
+The Scala and Java bindings of Apache MXNet use native memory (C++ heap in either RAM or GPU memory) for most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.
+The associated Scala classes act as wrappers. For performance, operations on these objects are directed to the MXNet C++ backend via JNI. Therefore, the bytes are also stored in the native heap allowing for fast access.
 
-The JVM using the Garbage Collector only manages objects allocated in the JVM Heap and is not aware of the memory footprint of these objects in the native memory, hence allocation/deallocation of the native memory has to be managed by MXNet Scala.
-Allocating native memory is straight forward and is done during the construction of the object by a calling the associated C++ API through JNI, However since JVM languages do not have destructors, deallocation of these objects needs to be done explicitly.
+The JVM Garbage Collector only manages objects allocated in the JVM Heap and is not aware of the memory footprint of these objects in the native memory. Hence, the allocation/deallocation of native memory must be managed by MXNet Scala.
+Allocating native memory is straight forward and is done during the construction of the object by calling the associated C++ API through JNI. However, since JVM languages do not have destructors, the deallocation of these objects needs to be done explicitly.
 To make it easy, MXNet Scala provides a few modes of operation, explained in detail below.
 
 ## Memory Management in Scala 
 ### 1.  [ResourceScope.using](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L106) (Recommended)
-`ResourceScope.using` provides the familiar Java try-with-resources primitive in Scala and also extends to automatically manage the memory of all the MXNet objects created in the code block (`body`) associated with it by tracking the allocations in a stack. 
-If an MXNet object or an Iterable containing MXNet objects is returned from the code-block, it is automatically excluded from de-allocation in the current scope and moved to 
-an outer scope if ResourceScope's are stacked.  
+`ResourceScope.using` provides the familiar Java try-with-resources primitive in Scala and is extended to automatically manage the memory of all the MXNet objects created in the associated code block (`body`). This is accomplished by tracking the allocations in a stack. 
+An MXNet object, or iterable containing MXNet objects, is automatically excluded from deallocation when it is returned by the code block. If ResourceScopes are stacked then it will be added to the outer scope.
 
 **Usage** 
 ```scala
@@ -25,16 +24,16 @@ ResourceScope.using() {
     r4
 }
 ```
-In the example above, we have two ResourceScopes stacked together, 4 NDArrays `(r1, r2, r3, r4)` are created in the inner scope, the inner scope returns 
-`(r3, r4)`. The ResourceScope code recognizes that it should not de-allocate these objects and automatically moves `r3` and  `r4` to the outer scope. The outer scope 
-returns `r4` from its code-block and deallocates `r3`, so ResourceScope.using removes this from its list of objects to be de-allocated. All other objects are automatically released(native memory) by calling the C++ Backend to free the memory.
+In the example above, we have two ResourceScopes stacked together. In the inner scope, 4 NDArrays `(r1, r2, r3, r4)` are created and the NDArrays 
+`(r3, r4)` are returned. The inner ResourceScope recognizes that it should not deallocate these objects and automatically moves `r3` and  `r4` to the outer scope. The outer scope 
+returns `r4` from its code-block. The outer ResourceScope.using will deallocate `r3` and remove `r4` from its list of objects to be deallocated. All other objects are automatically released by calling the C++ backend to free the native memory.
 
 **Note:**
-You should consider stacking ResourceScope when you have layers of functionality in your application code which creates a lot of MXNet objects like NDArray. 
-This is because you don't want to hold onto all the memory that is created for the entire training loop and you will most likely run out of memory especially on GPUs which have limited memory in order 8 to 16 GB. 
-For example if you were writing Training code in MXNet Scala, it is recommended not to use one-uber ResourceScope block that runs the entire training code, 
-instead you should stack multiple scopes one where you run forward backward passes on each batch, 
-and 2nd scope for each epoch and an outer scope that runs the entire training script, like the example below
+You should consider stacking ResourceScope when you have layers of functionality in your application code which create a lot of MXNet objects like NDArray. 
+This is because you don't want to hold onto all the memory that is created for an entire training loop, which could result in running out of memory (this is especially true on GPUs which have limited memory on the order of 8 to 16 GB). 
+For example, if you were writing training code in MXNet Scala, it is recommended not to use a single ResourceScope block which spans the entire training code. 
+Instead you should stack multiple scopes, one where you run forward backward passes on each batch, 
+a 2nd scope for each epoch, and an outer scope that runs the entire training script. This is demonstrated in the example below:
 ```scala
 ResourceScope.using() {
  val m = Module()
@@ -62,27 +61,27 @@ ResourceScope.using() {
 ### 2.  Using Phantom References (Recommended for some use cases)
 
 Apache MXNet uses [Phantom References](https://docs.oracle.com/javase/8/docs/api/java/lang/ref/PhantomReference.html) to track all MXNet Objects that have native memory associated with it. 
-When the Garbage Collector runs, GC identifies unreachable Scala/Java objects in the JVM Heap and finalizes them, 
-we take advantage of Garbage Collector which enqueues objects into a reference queue that are ready to be reclaimed, 
-at which point we do pre-mortem clean up by calling the corresponding MXNet backend API to free the native memory. 
+When the Garbage Collector runs, GC identifies unreachable Scala/Java objects in the JVM Heap and finalizes them. 
+The Garbage Collector enqueues objects which are ready to be reclaimed into a reference queue. We take advantage of this and do a 
+pre-mortem cleanup on these objects by calling the corresponding MXNet backend API to free the native memory.
  
-In this approach, you do not have to write any special code to have native memory cleaned up, however this approach solely depends on the Garbage collector to run and find unreachable objects.
-You can control the frequency of Garbage Collector by calling System.gc() at strategic points such as at the end of an epoch or at the end of a mini-batch in Training.
+In this approach, you do not have to write any special code to have native memory cleaned up. However, this approach solely depends on the Garbage collector to run and find unreachable objects.
+You can control the frequency of Garbage Collector by calling System.gc() at strategic points, such as at the end of an epoch or at the end of a mini-batch in training.
 
 This approach could be suitable for use-cases such as inference on CPUs and you have large amount of Memory(RAM) on your system.  
 
 **Note:**   
-Calling GC too frequently can cause your application to perform poorly. This approach might not be suitable   
-when you have large number of large NDArray allocations too quickly such as training a GAN model
+Calling GC too frequently can cause your application to perform poorly. This approach might not be suitable 
+for use cases which quickly allocate a large number of large NDArrays, such as when training a GAN model.
 
 ### Using dispose Pattern (least Recommended)
  
-There might be situations where you want to manage the lifecycle of Apache MXNet objects, for such use-cases we have provided `dispose()` method that you can call and it will deallocate associated native memory, we have also
-made all MXNet objects [AutoCloseable](https://docs.oracle.com/javase/8/docs/api/java/lang/AutoCloseable.html), if you are using Java8 and above you can use it with try-with-resources or call close() in the finally block.
+There might be situations where you want to manually manage the lifecycle of Apache MXNet objects. For such use-cases, we have provided the `dispose()` method which will deallocate the associated native memory when called. We have also
+made all MXNet objects [AutoCloseable](https://docs.oracle.com/javase/8/docs/api/java/lang/AutoCloseable.html). If you are using Java8 and above you can use it with try-with-resources or call close() in the finally block.
 
 **Note:**   
-We recommend to avoid manually managing MXNet objects and instead to use `ResourceScope.using` as this could leak memory if you miss calling dispose( at some point GC will kick in and be cleaned up due to Phantom Reference)
-and create unreadable code.   
+We recommend to avoid manually managing MXNet objects and instead to use `ResourceScope.using`. As this could leak memory if you miss calling dispose (at some point GC will kick in and be cleaned up due to Phantom Reference)
+and creates less readable code.   
 
 ```scala
 def showDispose(): Unit = {
@@ -92,9 +91,9 @@ def showDispose(): Unit = {
 ```
 
 ## 3. Memory Management in Java
-Memory Management in MXNet Java is similar to Scala, We recommend to use [ResourceScope](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L32) in a `try-with-resources` block or in a `try-finally` block.   
+Memory Management in MXNet Java is similar to Scala. We recommend to use [ResourceScope](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L32) in a `try-with-resources` block or in a `try-finally` block.   
 Java 7 onwards supports [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) where the resources declared in the try block are automatically closed. 
-The above discussed ResourceScope implements AutoCloseable and tracks all MXNet Objects created at a Thread Local scope level. 
+The ResourceScope discussed above implements AutoCloseable and tracks all MXNet Objects created at a Thread Local scope level. 
 
 ```java
 try(ResourceScope scope = new ResourceScope()) {
@@ -112,9 +111,9 @@ try {
 ``` 
 **Note:**
 ResourceScope within a try block tracks all MXNet Native Object Allocations (NDArray, Symbol, Executor, etc.,) and deallocates at
-the end of the try block even the objects that are returned, ie., in the above even if `test` were to be returned the native memory associated
-with it would be deallocated and if you use it outside of the try block, the process might crash due to illegal memory access.
+the end of the try block. This is also true of the objects that are returned, i.e., in the example above the native memory associated with `test` would be deallocated even if it were to be returned. 
+If you use it outside of the try block, the process might crash due to illegal memory access.
 
-If you want to retain certain objects created within the try block, you should explicitly remove them from the scope by calling `scope.moveToOuterScope`
+If you want to retain certain objects created within the try block, you should explicitly remove them from the scope by calling `scope.moveToOuterScope`.
 It is highly recommended to use a stack of try-with-resource ResourceScope's so you do not have explicitly manage the lifecycle of the Native objects.
 

From f6efe66c51d884a41b6a6819f60b9843ef4421f2 Mon Sep 17 00:00:00 2001
From: Zach Kimberg <kimbergz@amazon.com>
Date: Wed, 28 Nov 2018 18:23:48 -0800
Subject: [PATCH 07/11] Edits for scala memory management

---
 scala-package/memory-management.md | 66 +++++++++++++++---------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
index ed077758e58..05667eb2070 100644
--- a/scala-package/memory-management.md
+++ b/scala-package/memory-management.md
@@ -1,15 +1,15 @@
 # JVM Memory Management
-The Scala and Java bindings of Apache MXNet use native memory (C++ heap in either RAM or GPU memory) for most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.
-The associated Scala classes act as wrappers. For performance, operations on these objects are directed to the MXNet C++ backend via JNI. Therefore, the bytes are also stored in the native heap allowing for fast access.
+The Scala and Java bindings of Apache MXNet use native memory (memory from the C++ heap in either RAM or GPU memory) for most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.
+The associated Scala classes act only as wrappers. The operations done on these wrapper objects are then directed to the high performance MXNet C++ backend via the Java Native Interface (JNI). Therefore, the bytes are stored in the C++ native heap which allows for fast access.
 
-The JVM Garbage Collector only manages objects allocated in the JVM Heap and is not aware of the memory footprint of these objects in the native memory. Hence, the allocation/deallocation of native memory must be managed by MXNet Scala.
-Allocating native memory is straight forward and is done during the construction of the object by calling the associated C++ API through JNI. However, since JVM languages do not have destructors, the deallocation of these objects needs to be done explicitly.
-To make it easy, MXNet Scala provides a few modes of operation, explained in detail below.
+However, the JVM Garbage Collector only manages objects allocated in the JVM Heap and is not aware of the memory footprint of these objects in the native memory. Hence, the allocation/deallocation of native memory must be managed by MXNet Scala.
+Allocating native memory is straight forward and is done during the construction of the object by calling the associated C++ API through JNI. However, since JVM languages do not have destructors, the deallocation of these objects must be done explicitly.
+MXNet Scala provides a few easy modes of operation which are explained in detail below.
 
 ## Memory Management in Scala 
 ### 1.  [ResourceScope.using](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L106) (Recommended)
-`ResourceScope.using` provides the familiar Java try-with-resources primitive in Scala and is extended to automatically manage the memory of all the MXNet objects created in the associated code block (`body`). This is accomplished by tracking the allocations in a stack. 
-An MXNet object, or iterable containing MXNet objects, is automatically excluded from deallocation when it is returned by the code block. If ResourceScopes are stacked then it will be added to the outer scope.
+`ResourceScope.using` provides the familiar Java try-with-resources primitive in Scala and will automatically manage the memory of all the MXNet objects created in the associated code block (`body`). It works by tracking the allocations performed inside the code block deallocating when exiting the block. 
+Passing MXNet objects out of a using block can be easily accomplished by simply returning an object or an interable containing multiple MXNet objects. If you have nested using blocks, then the returned objects will be moved into the parent scope as well.
 
 **Usage** 
 ```scala
@@ -25,15 +25,15 @@ ResourceScope.using() {
 }
 ```
 In the example above, we have two ResourceScopes stacked together. In the inner scope, 4 NDArrays `(r1, r2, r3, r4)` are created and the NDArrays 
-`(r3, r4)` are returned. The inner ResourceScope recognizes that it should not deallocate these objects and automatically moves `r3` and  `r4` to the outer scope. The outer scope 
-returns `r4` from its code-block. The outer ResourceScope.using will deallocate `r3` and remove `r4` from its list of objects to be deallocated. All other objects are automatically released by calling the C++ backend to free the native memory.
+`(r3, r4)` are returned. The inner ResourceScope recognizes that it should not deallocate these objects and automatically moves `r3` and  `r4` to the outer scope. When the outer scope 
+returns `r4` from its code-block, it will only deallocate `r3` and will remove `r4` from its list of objects to be deallocated. All other objects are automatically released by calling the C++ backend to free the native memory.
 
 **Note:**
-You should consider stacking ResourceScope when you have layers of functionality in your application code which create a lot of MXNet objects like NDArray. 
-This is because you don't want to hold onto all the memory that is created for an entire training loop, which could result in running out of memory (this is especially true on GPUs which have limited memory on the order of 8 to 16 GB). 
+You should consider nesting ResourceScopes when you have layers of functionality in your application code or create a lot of MXNet objects such as NDArrays. 
+Holding onto all the memory that is created for an entire training loop can result in running out of memory, especially when training on GPUs which might only have 8 to 16 GB. 
 For example, if you were writing training code in MXNet Scala, it is recommended not to use a single ResourceScope block which spans the entire training code. 
-Instead you should stack multiple scopes, one where you run forward backward passes on each batch, 
-a 2nd scope for each epoch, and an outer scope that runs the entire training script. This is demonstrated in the example below:
+You should instead stack multiple scopes: an innermost scope where you run forward-backward passes on each batch, 
+a middle scope for each epoch, and an outer scope that runs the entire training script. This is demonstrated in the example below:
 ```scala
 ResourceScope.using() {
  val m = Module()
@@ -61,27 +61,26 @@ ResourceScope.using() {
 ### 2.  Using Phantom References (Recommended for some use cases)
 
 Apache MXNet uses [Phantom References](https://docs.oracle.com/javase/8/docs/api/java/lang/ref/PhantomReference.html) to track all MXNet Objects that have native memory associated with it. 
-When the Garbage Collector runs, GC identifies unreachable Scala/Java objects in the JVM Heap and finalizes them. 
-The Garbage Collector enqueues objects which are ready to be reclaimed into a reference queue. We take advantage of this and do a 
-pre-mortem cleanup on these objects by calling the corresponding MXNet backend API to free the native memory.
+When the Garbage Collector runs, it identifies unreachable Scala/Java objects in the JVM Heap and finalizes them. 
+It then enqueues objects which are ready to be reclaimed into a reference queue. We take advantage of this and do a 
+pre-mortem cleanup on these wrapper objects by freeing the corresponding native memory as well.
  
-In this approach, you do not have to write any special code to have native memory cleaned up. However, this approach solely depends on the Garbage collector to run and find unreachable objects.
-You can control the frequency of Garbage Collector by calling System.gc() at strategic points, such as at the end of an epoch or at the end of a mini-batch in training.
+This approach is automatic and does not require any special code to clean up the native memory. However, the Garbage Collector is not aware of the potentially large amount of native memory used and therefore may not free up memory often enough with it's standard behavior.
+You can control the frequency of garbage collection by calling System.gc() at strategic points such as the end of an epoch or the end of a mini-batch.
 
-This approach could be suitable for use-cases such as inference on CPUs and you have large amount of Memory(RAM) on your system.  
+This approach could be suitable for some use cases such as inference on CPUs where you have a large amount of Memory (RAM) on your system.
 
-**Note:**   
-Calling GC too frequently can cause your application to perform poorly. This approach might not be suitable 
-for use cases which quickly allocate a large number of large NDArrays, such as when training a GAN model.
+**Note:**
+Calling GC too frequently can also cause your application to perform poorly. This approach might not be suitable 
+for use cases which quickly allocate a large number of large NDArrays such as when training a GAN model.
 
 ### Using dispose Pattern (least Recommended)
  
-There might be situations where you want to manually manage the lifecycle of Apache MXNet objects. For such use-cases, we have provided the `dispose()` method which will deallocate the associated native memory when called. We have also
+There might be situations where you want to manually manage the lifecycle of Apache MXNet objects. For such use-cases, we have provided the `dispose()` method which will manually deallocate the associated native memory when called. We have also
 made all MXNet objects [AutoCloseable](https://docs.oracle.com/javase/8/docs/api/java/lang/AutoCloseable.html). If you are using Java8 and above you can use it with try-with-resources or call close() in the finally block.
 
-**Note:**   
-We recommend to avoid manually managing MXNet objects and instead to use `ResourceScope.using`. As this could leak memory if you miss calling dispose (at some point GC will kick in and be cleaned up due to Phantom Reference)
-and creates less readable code.   
+**Note:**
+We recommend you avoid manually managing MXNet objects and instead use `ResourceScope.using`. This creates less readable code and could leak memory if you miss calling dispose (until it is cleaned up by the Garbage Collector through the Phantom References).
 
 ```scala
 def showDispose(): Unit = {
@@ -91,8 +90,8 @@ def showDispose(): Unit = {
 ```
 
 ## 3. Memory Management in Java
-Memory Management in MXNet Java is similar to Scala. We recommend to use [ResourceScope](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L32) in a `try-with-resources` block or in a `try-finally` block.   
-Java 7 onwards supports [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) where the resources declared in the try block are automatically closed. 
+Memory Management in MXNet Java is similar to Scala. We recommend you use [ResourceScope](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L32) in a `try-with-resources` block or in a `try-finally` block.
+The [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) tracks the resources declared in the try block and automatically closes them upon exiting (supported from Java 7 onwards). 
 The ResourceScope discussed above implements AutoCloseable and tracks all MXNet Objects created at a Thread Local scope level. 
 
 ```java
@@ -109,11 +108,12 @@ try {
     scope.close()
 }
 ``` 
+
 **Note:**
-ResourceScope within a try block tracks all MXNet Native Object Allocations (NDArray, Symbol, Executor, etc.,) and deallocates at
-the end of the try block. This is also true of the objects that are returned, i.e., in the example above the native memory associated with `test` would be deallocated even if it were to be returned. 
-If you use it outside of the try block, the process might crash due to illegal memory access.
+A ResourceScope within a try block tracks all MXNet Native Object Allocations (NDArray, Symbol, Executor, etc.,) and deallocates them at
+the end of the try block. This is also true of the objects that are returned e.g. in the example above, the native memory associated with `test` would be deallocated even if it were to be returned. 
+If you use the object outside of the try block, the process might crash due to illegal memory access.
 
-If you want to retain certain objects created within the try block, you should explicitly remove them from the scope by calling `scope.moveToOuterScope`.
-It is highly recommended to use a stack of try-with-resource ResourceScope's so you do not have explicitly manage the lifecycle of the Native objects.
+To retain certain objects created within try blocks, you should explicitly remove them from the scope by calling `scope.moveToOuterScope`.
+It is highly recommended to use a stack of try-with-resource ResourceScopes so you do not have to explicitly manage the lifecycle of the Native objects.
 

From e030315999b47bba7d48402539fecfc41f50001d Mon Sep 17 00:00:00 2001
From: Naveen Swamy <mn.naveen@gmail.com>
Date: Thu, 29 Nov 2018 09:40:16 -0800
Subject: [PATCH 08/11] Update memory-management.md

---
 scala-package/memory-management.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
index 05667eb2070..d29b448e6bf 100644
--- a/scala-package/memory-management.md
+++ b/scala-package/memory-management.md
@@ -1,5 +1,5 @@
 # JVM Memory Management
-The Scala and Java bindings of Apache MXNet use native memory (memory from the C++ heap in either RAM or GPU memory) for most of the MXNet Scala objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.
+The Scala and Java bindings of Apache MXNet use native memory (memory from the C++ heap in either RAM or GPU memory) for most of the MXNet objects such as NDArray, Symbol, Executor, KVStore, Data Iterators, etc.
 The associated Scala classes act only as wrappers. The operations done on these wrapper objects are then directed to the high performance MXNet C++ backend via the Java Native Interface (JNI). Therefore, the bytes are stored in the C++ native heap which allows for fast access.
 
 However, the JVM Garbage Collector only manages objects allocated in the JVM Heap and is not aware of the memory footprint of these objects in the native memory. Hence, the allocation/deallocation of native memory must be managed by MXNet Scala.
@@ -74,7 +74,7 @@ This approach could be suitable for some use cases such as inference on CPUs whe
 Calling GC too frequently can also cause your application to perform poorly. This approach might not be suitable 
 for use cases which quickly allocate a large number of large NDArrays such as when training a GAN model.
 
-### Using dispose Pattern (least Recommended)
+### 3. Using dispose Pattern (least Recommended)
  
 There might be situations where you want to manually manage the lifecycle of Apache MXNet objects. For such use-cases, we have provided the `dispose()` method which will manually deallocate the associated native memory when called. We have also
 made all MXNet objects [AutoCloseable](https://docs.oracle.com/javase/8/docs/api/java/lang/AutoCloseable.html). If you are using Java8 and above you can use it with try-with-resources or call close() in the finally block.
@@ -89,7 +89,7 @@ def showDispose(): Unit = {
 }
 ```
 
-## 3. Memory Management in Java
+## Memory Management in Java
 Memory Management in MXNet Java is similar to Scala. We recommend you use [ResourceScope](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L32) in a `try-with-resources` block or in a `try-finally` block.
 The [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) tracks the resources declared in the try block and automatically closes them upon exiting (supported from Java 7 onwards). 
 The ResourceScope discussed above implements AutoCloseable and tracks all MXNet Objects created at a Thread Local scope level. 

From 03ad8da1fb69e152c86d32cf84cffd282106ee2d Mon Sep 17 00:00:00 2001
From: Naveen Swamy <mn.naveen@gmail.com>
Date: Thu, 29 Nov 2018 09:41:31 -0800
Subject: [PATCH 09/11] Update memory-management.md

---
 scala-package/memory-management.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
index d29b448e6bf..3eb09a2b03c 100644
--- a/scala-package/memory-management.md
+++ b/scala-package/memory-management.md
@@ -9,7 +9,7 @@ MXNet Scala provides a few easy modes of operation which are explained in detail
 ## Memory Management in Scala 
 ### 1.  [ResourceScope.using](https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/ResourceScope.scala#L106) (Recommended)
 `ResourceScope.using` provides the familiar Java try-with-resources primitive in Scala and will automatically manage the memory of all the MXNet objects created in the associated code block (`body`). It works by tracking the allocations performed inside the code block deallocating when exiting the block. 
-Passing MXNet objects out of a using block can be easily accomplished by simply returning an object or an interable containing multiple MXNet objects. If you have nested using blocks, then the returned objects will be moved into the parent scope as well.
+Passing MXNet objects out of a using block can be easily accomplished by simply returning an object or an iterable containing multiple MXNet objects. If you have nested using blocks, then the returned objects will be moved into the parent scope as well.
 
 **Usage** 
 ```scala

From 1b60a2d6946eedcf5ee233658019a3aeef199a48 Mon Sep 17 00:00:00 2001
From: Naveen Swamy <mn.naveen@gmail.com>
Date: Thu, 29 Nov 2018 09:47:44 -0800
Subject: [PATCH 10/11] Update memory-management.md

---
 scala-package/memory-management.md | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
index 3eb09a2b03c..25348502ede 100644
--- a/scala-package/memory-management.md
+++ b/scala-package/memory-management.md
@@ -29,11 +29,9 @@ In the example above, we have two ResourceScopes stacked together. In the inner
 returns `r4` from its code-block, it will only deallocate `r3` and will remove `r4` from its list of objects to be deallocated. All other objects are automatically released by calling the C++ backend to free the native memory.
 
 **Note:**
-You should consider nesting ResourceScopes when you have layers of functionality in your application code or create a lot of MXNet objects such as NDArrays. 
-Holding onto all the memory that is created for an entire training loop can result in running out of memory, especially when training on GPUs which might only have 8 to 16 GB. 
-For example, if you were writing training code in MXNet Scala, it is recommended not to use a single ResourceScope block which spans the entire training code. 
-You should instead stack multiple scopes: an innermost scope where you run forward-backward passes on each batch, 
-a middle scope for each epoch, and an outer scope that runs the entire training script. This is demonstrated in the example below:
+You should consider nesting ResourceScopes when you have layers of functionality in your application code or create a lot of MXNet objects such as NDArrays.  
+For example, Holding onto all the memory that is created for an entire training loop can result in running out of memory, especially when training on GPUs which might only have 8 to 16 GB.  
+It is recommended not to use a single ResourceScope block which spans the entire training code. You should instead nest multiple scopes: an innermost scope where you run forward-backward passes on each batch, a middle scope for each epoch, and an outer scope that runs the entire training script. This is demonstrated in the example below:
 ```scala
 ResourceScope.using() {
  val m = Module()
@@ -115,5 +113,5 @@ the end of the try block. This is also true of the objects that are returned e.g
 If you use the object outside of the try block, the process might crash due to illegal memory access.
 
 To retain certain objects created within try blocks, you should explicitly remove them from the scope by calling `scope.moveToOuterScope`.
-It is highly recommended to use a stack of try-with-resource ResourceScopes so you do not have to explicitly manage the lifecycle of the Native objects.
+It is highly recommended to nest multiple try-with-resource ResourceScopes so you do not have to explicitly manage the lifecycle of the Native objects.
 

From e13f0a36a3081e4d757576bf19eaf5a7f8205ae0 Mon Sep 17 00:00:00 2001
From: Zach Kimberg <kimbergz@amazon.com>
Date: Thu, 29 Nov 2018 10:49:17 -0800
Subject: [PATCH 11/11] capitalization fix

---
 scala-package/memory-management.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scala-package/memory-management.md b/scala-package/memory-management.md
index 25348502ede..33c36b6e6ab 100644
--- a/scala-package/memory-management.md
+++ b/scala-package/memory-management.md
@@ -30,8 +30,9 @@ returns `r4` from its code-block, it will only deallocate `r3` and will remove `
 
 **Note:**
 You should consider nesting ResourceScopes when you have layers of functionality in your application code or create a lot of MXNet objects such as NDArrays.  
-For example, Holding onto all the memory that is created for an entire training loop can result in running out of memory, especially when training on GPUs which might only have 8 to 16 GB.  
+For example, holding onto all the memory that is created for an entire training loop can result in running out of memory, especially when training on GPUs which might only have 8 to 16 GB.  
 It is recommended not to use a single ResourceScope block which spans the entire training code. You should instead nest multiple scopes: an innermost scope where you run forward-backward passes on each batch, a middle scope for each epoch, and an outer scope that runs the entire training script. This is demonstrated in the example below:
+
 ```scala
 ResourceScope.using() {
  val m = Module()