From d3f8d280098f42615c4d63d64d8797c8c76a8970 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Mon, 26 Oct 2015 22:07:49 -0700
Subject: [PATCH 1/9] Support setting spark.driver.memory from sparkEnvir when
 launching JVM backend

---
 R/pkg/R/sparkR.R | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 043b0057bd04a..54716d5b2eb2d 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -123,16 +123,30 @@ sparkR.init <- function(
     uriSep <- "////"
   }
 
+  sparkEnvirMap <- convertNamedListToEnv(sparkEnvir)
+
   existingPort <- Sys.getenv("EXISTING_SPARKR_BACKEND_PORT", "")
   if (existingPort != "") {
     backendPort <- existingPort
   } else {
     path <- tempfile(pattern = "backend_port")
+    submitOps <- Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell")
+    # spark.driver.memory cannot be set in env:
+    # http://spark.apache.org/docs/latest/configuration.html#application-properties
+    # Add spark.driver.memory if set in sparkEnvir and not already set in SPARKR_SUBMIT_ARGS
+    if (!grepl("--spark.driver.memory", submitOps)) {
+      driverMemory <- sparkEnvirMap[["spark.driver.memory"]]
+      # format for memory properties is 2 characters
+      if (!is.null(driverMemory) && nchar(driverMemory) > 1) {
+        # --option must be before the application class "sparkr-shell"
+        submitOps <- paste("--driver-memory", driverMemory, submitOps, sep = " ")
+      }
+    }
     launchBackend(
         args = path,
         sparkHome = sparkHome,
         jars = jars,
-        sparkSubmitOpts = Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"),
+        sparkSubmitOpts = submitOps,
         packages = sparkPackages)
     # wait atmost 100 seconds for JVM to launch
     wait <- 0.1
@@ -171,8 +185,6 @@ sparkR.init <- function(
     sparkHome <- suppressWarnings(normalizePath(sparkHome))
   }
 
-  sparkEnvirMap <- convertNamedListToEnv(sparkEnvir)
-
   sparkExecutorEnvMap <- convertNamedListToEnv(sparkExecutorEnv)
   if(is.null(sparkExecutorEnvMap$LD_LIBRARY_PATH)) {
     sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <-

From 5ecc9e09874c1bd63d4712412777aef4f2eb5afb Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Mon, 26 Oct 2015 22:36:39 -0700
Subject: [PATCH 2/9] oops, missed one check

---
 R/pkg/R/sparkR.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 54716d5b2eb2d..3b7701b8b63e0 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -134,7 +134,7 @@ sparkR.init <- function(
     # spark.driver.memory cannot be set in env:
     # http://spark.apache.org/docs/latest/configuration.html#application-properties
     # Add spark.driver.memory if set in sparkEnvir and not already set in SPARKR_SUBMIT_ARGS
-    if (!grepl("--spark.driver.memory", submitOps)) {
+    if (!grepl("--driver-memory", submitOps)) {
       driverMemory <- sparkEnvirMap[["spark.driver.memory"]]
       # format for memory properties is 2 characters
       if (!is.null(driverMemory) && nchar(driverMemory) > 1) {

From db8f7fd9eb556ce258e0dde0a1f1dc9451173591 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Tue, 27 Oct 2015 21:35:15 -0700
Subject: [PATCH 3/9] Update to include spark.driver.extraClassPath,
 extraJavaOptions, extraLibraryPath from feedback add quote " around parameter
 values

---
 R/pkg/R/sparkR.R | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 3b7701b8b63e0..d8fb4b6d4c1ef 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -130,18 +130,12 @@ sparkR.init <- function(
     backendPort <- existingPort
   } else {
     path <- tempfile(pattern = "backend_port")
-    submitOps <- Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell")
-    # spark.driver.memory cannot be set in env:
+    # A few Spark config cannot be set in env:
     # http://spark.apache.org/docs/latest/configuration.html#application-properties
-    # Add spark.driver.memory if set in sparkEnvir and not already set in SPARKR_SUBMIT_ARGS
-    if (!grepl("--driver-memory", submitOps)) {
-      driverMemory <- sparkEnvirMap[["spark.driver.memory"]]
-      # format for memory properties is 2 characters
-      if (!is.null(driverMemory) && nchar(driverMemory) > 1) {
-        # --option must be before the application class "sparkr-shell"
-        submitOps <- paste("--driver-memory", driverMemory, submitOps, sep = " ")
-      }
-    }
+    # Add them to spark-submit commandline if not already set in SPARKR_SUBMIT_ARGS
+    submitOps <- getClientModeSparkSubmitOpts(
+        Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"),
+        sparkEnvirMap)
     launchBackend(
         args = path,
         sparkHome = sparkHome,
@@ -332,3 +326,27 @@ clearJobGroup <- function(sc) {
 cancelJobGroup <- function(sc, groupId) {
   callJMethod(sc, "cancelJobGroup", groupId)
 }
+
+sparkConfToSubmitOps <- new.env()
+sparkConfToSubmitOps[["spark.driver.memory"]]           <- "--driver-memory"
+sparkConfToSubmitOps[["spark.driver.extraClassPath"]]   <- "--driver-class-path"
+sparkConfToSubmitOps[["spark.driver.extraJavaOptions"]] <- "--driver-java-options"
+sparkConfToSubmitOps[["spark.driver.extraLibraryPath"]] <- "--driver-library-path"
+
+# Utility function that returns Spark Submit arguments as a string
+getClientModeSparkSubmitOpts <- function(submitOps, sparkEnvirMap) {
+  envirToOps <- lapply(ls(sparkConfToSubmitOps), function(conf) {
+    opsValue <- sparkEnvirMap[[conf]]
+    # process only if --option is not already specified
+    if (!is.null(opsValue) &&
+        nchar(opsValue) > 1 &&
+        !grepl(sparkConfToSubmitOps[[conf]], submitOps)) {
+      # put "" around value in case it has spaces
+      paste0(sparkConfToSubmitOps[[conf]], " \"", opsValue, "\" ")
+    } else {
+      ""
+    }
+  })
+  # --option must be before the application class "sparkr-shell" in submitOps
+  paste0(paste0(envirToOps, collapse = ""), submitOps)
+}

From 9075eb68520b63a9b6b86aa9c0a56f651b140242 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Tue, 27 Oct 2015 22:04:36 -0700
Subject: [PATCH 4/9] add test

---
 R/pkg/inst/tests/test_sparkR.R | 41 ++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 R/pkg/inst/tests/test_sparkR.R

diff --git a/R/pkg/inst/tests/test_sparkR.R b/R/pkg/inst/tests/test_sparkR.R
new file mode 100644
index 0000000000000..af5a23c436fae
--- /dev/null
+++ b/R/pkg/inst/tests/test_sparkR.R
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+context("functions in sparkR.R")
+
+test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
+  e <- new.env()
+  e[["spark.driver.memory"]] <- "512m"
+  ops <- getClientModeSparkSubmitOpts("sparkrmain", e)
+  expect_equal("--driver-memory \"512m\" sparkrmain", ops)
+
+  e[["spark.driver.memory"]] <- "5g"
+  e[["spark.driver.extraClassPath"]] <- "/opt/class_path"
+  e[["spark.driver.extraJavaOptions"]] <- "-XX:+UseCompressedOops -XX:+UseCompressedStrings"
+  e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib"
+  e[["random"]] <- "skipthis"
+  ops2 <- getClientModeSparkSubmitOpts("sparkr-shell", e)
+  expect_equal(ops2, paste0("--driver-class-path \"/opt/class_path\" --driver-java-options \"",
+                      "-XX:+UseCompressedOops -XX:+UseCompressedStrings\" --driver-library-path \"",
+                      "/usr/local/hadoop/lib\" --driver-memory \"5g\" sparkr-shell"))
+
+  e[["spark.driver.extraClassPath"]] <- "/" # too short
+  ops3 <- getClientModeSparkSubmitOpts("--driver-memory 4g sparkr-shell2", e)
+  expect_equal(ops3, paste0("--driver-java-options \"-XX:+UseCompressedOops ",
+                      "-XX:+UseCompressedStrings\" --driver-library-path \"/usr/local/hadoop/lib\"",
+                      " --driver-memory 4g sparkr-shell2"))
+})

From 2d4ffb081cc45e72014594506e64b3d37c62da2e Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Tue, 27 Oct 2015 22:09:37 -0700
Subject: [PATCH 5/9] Didn't realize the filename is different for test, fixing
 that

---
 R/pkg/inst/tests/test_context.R | 23 ++++++++++++++++++
 R/pkg/inst/tests/test_sparkR.R  | 41 ---------------------------------
 2 files changed, 23 insertions(+), 41 deletions(-)
 delete mode 100644 R/pkg/inst/tests/test_sparkR.R

diff --git a/R/pkg/inst/tests/test_context.R b/R/pkg/inst/tests/test_context.R
index e99815ed1562c..e645d17cc1ff9 100644
--- a/R/pkg/inst/tests/test_context.R
+++ b/R/pkg/inst/tests/test_context.R
@@ -65,3 +65,26 @@ test_that("job group functions can be called", {
   cancelJobGroup(sc, "groupId")
   clearJobGroup(sc)
 })
+
+test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
+  e <- new.env()
+  e[["spark.driver.memory"]] <- "512m"
+  ops <- getClientModeSparkSubmitOpts("sparkrmain", e)
+  expect_equal("--driver-memory \"512m\" sparkrmain", ops)
+
+  e[["spark.driver.memory"]] <- "5g"
+  e[["spark.driver.extraClassPath"]] <- "/opt/class_path"
+  e[["spark.driver.extraJavaOptions"]] <- "-XX:+UseCompressedOops -XX:+UseCompressedStrings"
+  e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib"
+  e[["random"]] <- "skipthis"
+  ops2 <- getClientModeSparkSubmitOpts("sparkr-shell", e)
+  expect_equal(ops2, paste0("--driver-class-path \"/opt/class_path\" --driver-java-options \"",
+                      "-XX:+UseCompressedOops -XX:+UseCompressedStrings\" --driver-library-path \"",
+                      "/usr/local/hadoop/lib\" --driver-memory \"5g\" sparkr-shell"))
+
+  e[["spark.driver.extraClassPath"]] <- "/" # too short
+  ops3 <- getClientModeSparkSubmitOpts("--driver-memory 4g sparkr-shell2", e)
+  expect_equal(ops3, paste0("--driver-java-options \"-XX:+UseCompressedOops ",
+                      "-XX:+UseCompressedStrings\" --driver-library-path \"/usr/local/hadoop/lib\"",
+                      " --driver-memory 4g sparkr-shell2"))
+})
diff --git a/R/pkg/inst/tests/test_sparkR.R b/R/pkg/inst/tests/test_sparkR.R
deleted file mode 100644
index af5a23c436fae..0000000000000
--- a/R/pkg/inst/tests/test_sparkR.R
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-context("functions in sparkR.R")
-
-test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
-  e <- new.env()
-  e[["spark.driver.memory"]] <- "512m"
-  ops <- getClientModeSparkSubmitOpts("sparkrmain", e)
-  expect_equal("--driver-memory \"512m\" sparkrmain", ops)
-
-  e[["spark.driver.memory"]] <- "5g"
-  e[["spark.driver.extraClassPath"]] <- "/opt/class_path"
-  e[["spark.driver.extraJavaOptions"]] <- "-XX:+UseCompressedOops -XX:+UseCompressedStrings"
-  e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib"
-  e[["random"]] <- "skipthis"
-  ops2 <- getClientModeSparkSubmitOpts("sparkr-shell", e)
-  expect_equal(ops2, paste0("--driver-class-path \"/opt/class_path\" --driver-java-options \"",
-                      "-XX:+UseCompressedOops -XX:+UseCompressedStrings\" --driver-library-path \"",
-                      "/usr/local/hadoop/lib\" --driver-memory \"5g\" sparkr-shell"))
-
-  e[["spark.driver.extraClassPath"]] <- "/" # too short
-  ops3 <- getClientModeSparkSubmitOpts("--driver-memory 4g sparkr-shell2", e)
-  expect_equal(ops3, paste0("--driver-java-options \"-XX:+UseCompressedOops ",
-                      "-XX:+UseCompressedStrings\" --driver-library-path \"/usr/local/hadoop/lib\"",
-                      " --driver-memory 4g sparkr-shell2"))
-})

From ceeca816c9c96b22887202c73c6a24d15d05fa2b Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Wed, 28 Oct 2015 00:05:08 -0700
Subject: [PATCH 6/9] update for lint-r

---
 R/pkg/inst/tests/test_context.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/pkg/inst/tests/test_context.R b/R/pkg/inst/tests/test_context.R
index e645d17cc1ff9..80c1b89a4c627 100644
--- a/R/pkg/inst/tests/test_context.R
+++ b/R/pkg/inst/tests/test_context.R
@@ -73,18 +73,22 @@ test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whiteli
   expect_equal("--driver-memory \"512m\" sparkrmain", ops)
 
   e[["spark.driver.memory"]] <- "5g"
-  e[["spark.driver.extraClassPath"]] <- "/opt/class_path"
+  e[["spark.driver.extraClassPath"]] <- "/opt/class_path" # nolint
   e[["spark.driver.extraJavaOptions"]] <- "-XX:+UseCompressedOops -XX:+UseCompressedStrings"
-  e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib"
+  e[["spark.driver.extraLibraryPath"]] <- "/usr/local/hadoop/lib" # nolint
   e[["random"]] <- "skipthis"
   ops2 <- getClientModeSparkSubmitOpts("sparkr-shell", e)
+  # nolint start
   expect_equal(ops2, paste0("--driver-class-path \"/opt/class_path\" --driver-java-options \"",
                       "-XX:+UseCompressedOops -XX:+UseCompressedStrings\" --driver-library-path \"",
                       "/usr/local/hadoop/lib\" --driver-memory \"5g\" sparkr-shell"))
+  # nolint end
 
   e[["spark.driver.extraClassPath"]] <- "/" # too short
   ops3 <- getClientModeSparkSubmitOpts("--driver-memory 4g sparkr-shell2", e)
+  # nolint start
   expect_equal(ops3, paste0("--driver-java-options \"-XX:+UseCompressedOops ",
                       "-XX:+UseCompressedStrings\" --driver-library-path \"/usr/local/hadoop/lib\"",
                       " --driver-memory 4g sparkr-shell2"))
+  # nolint end
 })

From c21713ed32fd0bc68444ed410cce892d70a494fc Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Wed, 28 Oct 2015 16:17:35 -0700
Subject: [PATCH 7/9] Comment and doc updates

---
 R/pkg/R/sparkR.R | 12 ++++++++----
 docs/sparkr.md   | 24 +++++++++++++++++-------
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index d8fb4b6d4c1ef..f6ccf09dc09ff 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -93,7 +93,7 @@ sparkR.stop <- function() {
 #' sc <- sparkR.init("local[2]", "SparkR", "/home/spark",
 #'                  list(spark.executor.memory="1g"))
 #' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark",
-#'                  list(spark.executor.memory="1g"),
+#'                  list(spark.executor.memory="4g", spark.driver.memory="2g"),
 #'                  list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
 #'                  c("jarfile1.jar","jarfile2.jar"))
 #'}
@@ -130,9 +130,6 @@ sparkR.init <- function(
     backendPort <- existingPort
   } else {
     path <- tempfile(pattern = "backend_port")
-    # A few Spark config cannot be set in env:
-    # http://spark.apache.org/docs/latest/configuration.html#application-properties
-    # Add them to spark-submit commandline if not already set in SPARKR_SUBMIT_ARGS
     submitOps <- getClientModeSparkSubmitOpts(
         Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"),
         sparkEnvirMap)
@@ -334,6 +331,13 @@ sparkConfToSubmitOps[["spark.driver.extraJavaOptions"]] <- "--driver-java-option
 sparkConfToSubmitOps[["spark.driver.extraLibraryPath"]] <- "--driver-library-path"
 
 # Utility function that returns Spark Submit arguments as a string
+#
+# A few Spark Application and Runtime environment properties cannot take effort after driver
+# JVM has started, as documented in:
+# http://spark.apache.org/docs/latest/configuration.html#application-properties
+# When starting SparkR without using spark-submit, for example, in Rstudio, add them to
+# spark-submit commandline if not already set in SPARKR_SUBMIT_ARGS so that they can be
+# effective.
 getClientModeSparkSubmitOpts <- function(submitOps, sparkEnvirMap) {
   envirToOps <- lapply(ls(sparkConfToSubmitOps), function(conf) {
     opsValue <- sparkEnvirMap[[conf]]
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 7139d16b4a068..88985e6d7c516 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -37,17 +37,27 @@ sc <- sparkR.init()
 sqlContext <- sparkRSQL.init(sc)
 {% endhighlight %}
 
+In the event you are creating `SparkContext` instead of using `sparkR` shell or `spark-submit`, you
+could also specify certain Spark driver properties. Normally these
+[Application properties](configuration.html#application-properties) and [Runtime Environment](configuration.html#runtime-environment) cannot be set programmatically, as the
+driver JVM process would have been started, in this case SparkR takes care of this for you. To set
+them, pass them as you would other configuration properties in the `sparkEnvir` argument.
+
+{% highlight r %}
+sc <- sparkR.init("local[*]", "SparkR", "/home/spark", list(spark.driver.memory="2g"))
+{% endhighlight %}
+
 </div>
 
 ## Creating DataFrames
 With a `SQLContext`, applications can create `DataFrame`s from a local R data frame, from a [Hive table](sql-programming-guide.html#hive-tables), or from other [data sources](sql-programming-guide.html#data-sources).
 
 ### From local data frames
-The simplest way to create a data frame is to convert a local R data frame into a SparkR DataFrame. Specifically we can use `createDataFrame` and pass in the local R data frame to create a SparkR DataFrame. As an example, the following creates a `DataFrame` based using the `faithful` dataset from R. 
+The simplest way to create a data frame is to convert a local R data frame into a SparkR DataFrame. Specifically we can use `createDataFrame` and pass in the local R data frame to create a SparkR DataFrame. As an example, the following creates a `DataFrame` based using the `faithful` dataset from R.
 
 <div data-lang="r"  markdown="1">
 {% highlight r %}
-df <- createDataFrame(sqlContext, faithful) 
+df <- createDataFrame(sqlContext, faithful)
 
 # Displays the content of the DataFrame to stdout
 head(df)
@@ -96,7 +106,7 @@ printSchema(people)
 </div>
 
 The data sources API can also be used to save out DataFrames into multiple file formats. For example we can save the DataFrame from the previous example
-to a Parquet file using `write.df` 
+to a Parquet file using `write.df`
 
 <div data-lang="r"  markdown="1">
 {% highlight r %}
@@ -139,7 +149,7 @@ Here we include some basic examples and a complete list can be found in the [API
 <div data-lang="r"  markdown="1">
 {% highlight r %}
 # Create the DataFrame
-df <- createDataFrame(sqlContext, faithful) 
+df <- createDataFrame(sqlContext, faithful)
 
 # Get basic information about the DataFrame
 df
@@ -152,7 +162,7 @@ head(select(df, df$eruptions))
 ##2     1.800
 ##3     3.333
 
-# You can also pass in column name as strings 
+# You can also pass in column name as strings
 head(select(df, "eruptions"))
 
 # Filter the DataFrame to only retain rows with wait times shorter than 50 mins
@@ -166,7 +176,7 @@ head(filter(df, df$waiting < 50))
 
 </div>
 
-### Grouping, Aggregation 
+### Grouping, Aggregation
 
 SparkR data frames support a number of commonly used functions to aggregate data after grouping. For example we can compute a histogram of the `waiting` time in the `faithful` dataset as shown below
 
@@ -194,7 +204,7 @@ head(arrange(waiting_counts, desc(waiting_counts$count)))
 
 ### Operating on Columns
 
-SparkR also provides a number of functions that can directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions. 
+SparkR also provides a number of functions that can directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions.
 
 <div data-lang="r"  markdown="1">
 {% highlight r %}

From 557bbc14335a5957c9e87038bedd24222b9bda3f Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Wed, 28 Oct 2015 16:52:00 -0700
Subject: [PATCH 8/9] more text clean up

---
 R/pkg/R/sparkR.R |  7 +++----
 docs/sparkr.md   | 10 ++++++----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index f6ccf09dc09ff..b2acd52d91b71 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -332,12 +332,11 @@ sparkConfToSubmitOps[["spark.driver.extraLibraryPath"]] <- "--driver-library-pat
 
 # Utility function that returns Spark Submit arguments as a string
 #
-# A few Spark Application and Runtime environment properties cannot take effort after driver
+# A few Spark Application and Runtime environment properties cannot take effect after driver
 # JVM has started, as documented in:
 # http://spark.apache.org/docs/latest/configuration.html#application-properties
-# When starting SparkR without using spark-submit, for example, in Rstudio, add them to
-# spark-submit commandline if not already set in SPARKR_SUBMIT_ARGS so that they can be
-# effective.
+# When starting SparkR without using spark-submit, for example, from Rstudio, add them to
+# spark-submit commandline if not already set in SPARKR_SUBMIT_ARGS so that they can be effective.
 getClientModeSparkSubmitOpts <- function(submitOps, sparkEnvirMap) {
   envirToOps <- lapply(ls(sparkConfToSubmitOps), function(conf) {
     opsValue <- sparkEnvirMap[[conf]]
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 88985e6d7c516..497a276679f3b 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -29,7 +29,7 @@ All of the examples on this page use sample data included in R or the Spark dist
 The entry point into SparkR is the `SparkContext` which connects your R program to a Spark cluster.
 You can create a `SparkContext` using `sparkR.init` and pass in options such as the application name
 , any spark packages depended on, etc. Further, to work with DataFrames we will need a `SQLContext`,
-which can be created from the  SparkContext. If you are working from the SparkR shell, the
+which can be created from the  SparkContext. If you are working from the `sparkR` shell, the
 `SQLContext` and `SparkContext` should already be created for you.
 
 {% highlight r %}
@@ -37,11 +37,13 @@ sc <- sparkR.init()
 sqlContext <- sparkRSQL.init(sc)
 {% endhighlight %}
 
-In the event you are creating `SparkContext` instead of using `sparkR` shell or `spark-submit`, you
+In the event you are creating `SparkContext` instead of using `sparkR` shell or `spark-submit`, you 
 could also specify certain Spark driver properties. Normally these
-[Application properties](configuration.html#application-properties) and [Runtime Environment](configuration.html#runtime-environment) cannot be set programmatically, as the
+[Application properties](configuration.html#application-properties) and
+[Runtime Environment](configuration.html#runtime-environment) cannot be set programmatically, as the
 driver JVM process would have been started, in this case SparkR takes care of this for you. To set
-them, pass them as you would other configuration properties in the `sparkEnvir` argument.
+them, pass them as you would other configuration properties in the `sparkEnvir` argument to
+`sparkR.init()`.
 
 {% highlight r %}
 sc <- sparkR.init("local[*]", "SparkR", "/home/spark", list(spark.driver.memory="2g"))

From 871e971247976e086958b1d2de730bd636be5193 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Thu, 29 Oct 2015 11:57:00 -0700
Subject: [PATCH 9/9] update comment from feedback

---
 R/pkg/R/sparkR.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index b2acd52d91b71..004d08e74e1cd 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -77,7 +77,9 @@ sparkR.stop <- function() {
 
 #' Initialize a new Spark Context.
 #'
-#' This function initializes a new SparkContext.
+#' This function initializes a new SparkContext. For details on how to initialize
+#' and use SparkR, refer to SparkR programming guide at 
+#' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparkcontext-sqlcontext}.
 #'
 #' @param master The Spark master URL.
 #' @param appName Application name to register with cluster manager
@@ -93,7 +95,7 @@ sparkR.stop <- function() {
 #' sc <- sparkR.init("local[2]", "SparkR", "/home/spark",
 #'                  list(spark.executor.memory="1g"))
 #' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark",
-#'                  list(spark.executor.memory="4g", spark.driver.memory="2g"),
+#'                  list(spark.executor.memory="4g"),
 #'                  list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
 #'                  c("jarfile1.jar","jarfile2.jar"))
 #'}