From 6fc97e02975909cb72e27077aac97d4f90b332d5 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.usca.ibm.com>
Date: Tue, 23 Feb 2016 15:48:55 -0800
Subject: [PATCH 01/31] Support for collect() on Columns

---
 R/pkg/R/DataFrame.R                       |   2 +-
 R/pkg/R/column.R                          |  63 +++--
 R/pkg/R/functions.R                       | 265 +++++++++++-----------
 R/pkg/R/generics.R                        |   2 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  41 +++-
 5 files changed, 209 insertions(+), 164 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 3b7b8250b94f7..0d35429a9bcd3 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1177,7 +1177,7 @@ setMethod("foreachPartition",
 ############################## SELECT ##################################
 
 getColumn <- function(x, c) {
-  column(callJMethod(x@sdf, "col", c))
+  column(callJMethod(x@sdf, "col", c), x)
 }
 
 #' @rdname select
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 3ffd9a9890b2e..08fa4960c3391 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+setOldClass("DataFrame")
+setClassUnion("DataFrameOrNULL", c("DataFrame", "NULL"))
 
 # Column Class
 
@@ -29,24 +31,43 @@ setOldClass("jobj")
 #' @slot jc reference to JVM DataFrame column
 #' @export
 setClass("Column",
-         slots = list(jc = "jobj"))
+         slots = list(jc = "jobj", df = "DataFrameOrNULL"))
 
-setMethod("initialize", "Column", function(.Object, jc) {
+setMethod("initialize", "Column", function(.Object, jc, df) {
   .Object@jc <- jc
+
+  # Some Column objects don't have any referencing DataFrame. In such case, df will be NULL.
+  if (missing(df)) {
+    df <- NULL
+  }
+  .Object@df <- df
   .Object
 })
 
+setMethod("show", signature="Column", definition=function(object) {
+  MAX_ELEMENTS <- 20
+  show(head(object, MAX_ELEMENTS))
+  cat(paste0("\b...\nDisplaying up to ", as.character(MAX_ELEMENTS) ," elements only."))
+})
+
+setMethod("collect", signature="Column", definition=function(x) {
+  if (is.null(x@df)) {
+    stop("This column cannot be collected as it's not associated to any DataFrame.")
+  }
+  collect(select(x@df, x))[, 1]
+})
+
+setMethod("head", signature="Column", definition=function(x, n=6) {
+  if (is.null(x@df)) {
+    stop("This column cannot be collected as it's not associated to any DataFrame.")
+  }
+  head(select(x@df, x), n)[, 1]
+})
+
 setMethod("column",
           signature(x = "jobj"),
-          function(x) {
-            new("Column", x)
-          })
-
-#' @rdname show
-#' @name show
-setMethod("show", "Column",
-          function(object) {
-            cat("Column", callJMethod(object@jc, "toString"), "\n")
+          function(x, df=NA) {
+            new("Column", x, df)
           })
 
 operators <- list(
@@ -79,7 +100,7 @@ createOperator <- function(op) {
                   callJMethod(e1@jc, operators[[op]], e2)
                 }
               }
-              column(jc)
+              column(jc, e1@df)
             })
 }
 
@@ -87,7 +108,7 @@ createColumnFunction1 <- function(name) {
   setMethod(name,
             signature(x = "Column"),
             function(x) {
-              column(callJMethod(x@jc, name))
+              column(callJMethod(x@jc, name), x@df)
             })
 }
 
@@ -99,7 +120,7 @@ createColumnFunction2 <- function(name) {
                 data <- data@jc
               }
               jc <- callJMethod(x@jc, name, data)
-              column(jc)
+              column(jc, x@df)
             })
 }
 
@@ -129,7 +150,7 @@ setMethod("alias",
           signature(object = "Column"),
           function(object, data) {
             if (is.character(data)) {
-              column(callJMethod(object@jc, "as", data))
+              column(callJMethod(object@jc, "as", data), object@df)
             } else {
               stop("data should be character")
             }
@@ -148,7 +169,7 @@ setMethod("alias",
 setMethod("substr", signature(x = "Column"),
           function(x, start, stop) {
             jc <- callJMethod(x@jc, "substr", as.integer(start - 1), as.integer(stop - start + 1))
-            column(jc)
+            column(jc, x@df)
           })
 
 #' between
@@ -164,7 +185,7 @@ setMethod("between", signature(x = "Column"),
           function(x, bounds) {
             if (is.vector(bounds) && length(bounds) == 2) {
               jc <- callJMethod(x@jc, "between", bounds[1], bounds[2])
-              column(jc)
+              column(jc, x@df)
             } else {
               stop("bounds should be a vector of lower and upper bounds")
             }
@@ -184,11 +205,11 @@ setMethod("cast",
           signature(x = "Column"),
           function(x, dataType) {
             if (is.character(dataType)) {
-              column(callJMethod(x@jc, "cast", dataType))
+              column(callJMethod(x@jc, "cast", dataType), x@df)
             } else if (is.list(dataType)) {
               json <- tojson(dataType)
               jdataType <- callJStatic("org.apache.spark.sql.types.DataType", "fromJson", json)
-              column(callJMethod(x@jc, "cast", jdataType))
+              column(callJMethod(x@jc, "cast", jdataType), x@df)
             } else {
               stop("dataType should be character or list")
             }
@@ -210,7 +231,7 @@ setMethod("%in%",
           signature(x = "Column"),
           function(x, table) {
             jc <- callJMethod(x@jc, "isin", as.list(table))
-            return(column(jc))
+            column(jc, x@df)
           })
 
 #' otherwise
@@ -227,5 +248,5 @@ setMethod("otherwise",
           function(x, value) {
             value <- if (class(value) == "Column") { value@jc } else { value }
             jc <- callJMethod(x@jc, "otherwise", value)
-            column(jc)
+            column(jc, x@df)
           })
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index e5521f3cffadf..97ccebf42666a 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -54,7 +54,7 @@ setMethod("abs",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "abs", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' acos
@@ -71,7 +71,7 @@ setMethod("acos",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "acos", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' approxCountDistinct
@@ -87,7 +87,7 @@ setMethod("approxCountDistinct",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' ascii
@@ -104,7 +104,7 @@ setMethod("ascii",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "ascii", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' asin
@@ -121,7 +121,7 @@ setMethod("asin",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "asin", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' atan
@@ -137,7 +137,7 @@ setMethod("atan",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "atan", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' avg
@@ -153,7 +153,7 @@ setMethod("avg",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "avg", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' base64
@@ -170,7 +170,7 @@ setMethod("base64",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "base64", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' bin
@@ -187,7 +187,7 @@ setMethod("bin",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "bin", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' bitwiseNOT
@@ -203,7 +203,7 @@ setMethod("bitwiseNOT",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "bitwiseNOT", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' cbrt
@@ -219,7 +219,7 @@ setMethod("cbrt",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "cbrt", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' ceil
@@ -235,7 +235,7 @@ setMethod("ceil",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "ceil", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' Though scala functions has "col" function, we don't expose it in SparkR
@@ -272,7 +272,7 @@ setMethod("corr", signature(x = "Column"),
           function(x, col2) {
             stopifnot(class(col2) == "Column")
             jc <- callJStatic("org.apache.spark.sql.functions", "corr", x@jc, col2@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' cov
@@ -301,12 +301,14 @@ setMethod("cov", signature(x = "characterOrColumn"),
 setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
           function(col1, col2) {
             stopifnot(class(col1) == class(col2))
+            df <- NULL
             if (class(col1) == "Column") {
+              df <- col1@df
               col1 <- col1@jc
               col2 <- col2@jc
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "covar_samp", col1, col2)
-            column(jc)
+            column(jc, df)
           })
 
 #' covar_pop
@@ -325,12 +327,14 @@ setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterO
 setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"),
           function(col1, col2) {
             stopifnot(class(col1) == class(col2))
+            df <- NULL
             if (class(col1) == "Column") {
+              df <- col1@df
               col1 <- col1@jc
               col2 <- col2@jc
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "covar_pop", col1, col2)
-            column(jc)
+            column(jc, df)
           })
 
 #' cos
@@ -346,7 +350,7 @@ setMethod("cos",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "cos", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' cosh
@@ -362,7 +366,7 @@ setMethod("cosh",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "cosh", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' count
@@ -378,7 +382,7 @@ setMethod("count",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "count", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' crc32
@@ -395,7 +399,7 @@ setMethod("crc32",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "crc32", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' hash
@@ -415,7 +419,7 @@ setMethod("hash",
               x@jc
             })
             jc <- callJStatic("org.apache.spark.sql.functions", "hash", jcols)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' dayofmonth
@@ -431,7 +435,7 @@ setMethod("dayofmonth",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "dayofmonth", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' dayofyear
@@ -447,7 +451,7 @@ setMethod("dayofyear",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "dayofyear", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' decode
@@ -464,7 +468,7 @@ setMethod("decode",
           signature(x = "Column", charset = "character"),
           function(x, charset) {
             jc <- callJStatic("org.apache.spark.sql.functions", "decode", x@jc, charset)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' encode
@@ -481,7 +485,7 @@ setMethod("encode",
           signature(x = "Column", charset = "character"),
           function(x, charset) {
             jc <- callJStatic("org.apache.spark.sql.functions", "encode", x@jc, charset)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' exp
@@ -497,7 +501,7 @@ setMethod("exp",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "exp", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' expm1
@@ -513,7 +517,7 @@ setMethod("expm1",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "expm1", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' factorial
@@ -529,7 +533,7 @@ setMethod("factorial",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "factorial", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' first
@@ -545,7 +549,7 @@ setMethod("first",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "first", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' floor
@@ -561,7 +565,7 @@ setMethod("floor",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "floor", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' hex
@@ -577,7 +581,7 @@ setMethod("hex",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "hex", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' hour
@@ -593,7 +597,7 @@ setMethod("hour",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "hour", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' initcap
@@ -612,7 +616,7 @@ setMethod("initcap",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "initcap", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' is.nan
@@ -640,7 +644,7 @@ setMethod("isnan",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "isnan", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' kurtosis
@@ -656,7 +660,7 @@ setMethod("kurtosis",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "kurtosis", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' last
@@ -672,7 +676,7 @@ setMethod("last",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "last", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' last_day
@@ -690,7 +694,7 @@ setMethod("last_day",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "last_day", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' length
@@ -706,7 +710,7 @@ setMethod("length",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "length", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' log
@@ -722,7 +726,7 @@ setMethod("log",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "log", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' log10
@@ -738,7 +742,7 @@ setMethod("log10",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "log10", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' log1p
@@ -754,7 +758,7 @@ setMethod("log1p",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "log1p", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' log2
@@ -770,7 +774,7 @@ setMethod("log2",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "log2", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' lower
@@ -786,7 +790,7 @@ setMethod("lower",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "lower", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' ltrim
@@ -802,7 +806,7 @@ setMethod("ltrim",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' max
@@ -818,7 +822,7 @@ setMethod("max",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "max", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' md5
@@ -835,7 +839,7 @@ setMethod("md5",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "md5", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' mean
@@ -852,7 +856,7 @@ setMethod("mean",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "mean", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' min
@@ -868,7 +872,7 @@ setMethod("min",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "min", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' minute
@@ -884,7 +888,7 @@ setMethod("minute",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "minute", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' month
@@ -900,7 +904,7 @@ setMethod("month",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "month", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' negate
@@ -916,7 +920,7 @@ setMethod("negate",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "negate", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' quarter
@@ -932,7 +936,7 @@ setMethod("quarter",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "quarter", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' reverse
@@ -948,7 +952,7 @@ setMethod("reverse",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "reverse", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' rint
@@ -965,7 +969,7 @@ setMethod("rint",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "rint", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' round
@@ -981,7 +985,7 @@ setMethod("round",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "round", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' rtrim
@@ -997,7 +1001,7 @@ setMethod("rtrim",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' sd
@@ -1035,7 +1039,7 @@ setMethod("second",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "second", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' sha1
@@ -1052,7 +1056,7 @@ setMethod("sha1",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "sha1", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' signum
@@ -1068,7 +1072,7 @@ setMethod("signum",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "signum", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' sin
@@ -1084,7 +1088,7 @@ setMethod("sin",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "sin", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' sinh
@@ -1100,7 +1104,7 @@ setMethod("sinh",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "sinh", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' skewness
@@ -1116,7 +1120,7 @@ setMethod("skewness",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "skewness", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' soundex
@@ -1132,7 +1136,7 @@ setMethod("soundex",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "soundex", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' @rdname sd
@@ -1141,7 +1145,7 @@ setMethod("stddev",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "stddev", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' stddev_pop
@@ -1158,7 +1162,7 @@ setMethod("stddev_pop",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "stddev_pop", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' stddev_samp
@@ -1175,7 +1179,7 @@ setMethod("stddev_samp",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "stddev_samp", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' struct
@@ -1197,10 +1201,11 @@ setMethod("struct",
             if (class(x) == "Column") {
               jcols <- lapply(list(x, ...), function(x) { x@jc })
               jc <- callJStatic("org.apache.spark.sql.functions", "struct", jcols)
+              column(jc, x@df)
             } else {
               jc <- callJStatic("org.apache.spark.sql.functions", "struct", x, list(...))
+              column(jc)
             }
-            column(jc)
           })
 
 #' sqrt
@@ -1216,7 +1221,7 @@ setMethod("sqrt",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "sqrt", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' sum
@@ -1232,7 +1237,7 @@ setMethod("sum",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "sum", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' sumDistinct
@@ -1248,7 +1253,7 @@ setMethod("sumDistinct",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "sumDistinct", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' tan
@@ -1264,7 +1269,7 @@ setMethod("tan",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "tan", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' tanh
@@ -1280,7 +1285,7 @@ setMethod("tanh",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "tanh", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' toDegrees
@@ -1296,7 +1301,7 @@ setMethod("toDegrees",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "toDegrees", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' toRadians
@@ -1312,7 +1317,7 @@ setMethod("toRadians",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "toRadians", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' to_date
@@ -1328,7 +1333,7 @@ setMethod("to_date",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "to_date", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' trim
@@ -1344,7 +1349,7 @@ setMethod("trim",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' unbase64
@@ -1361,7 +1366,7 @@ setMethod("unbase64",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "unbase64", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' unhex
@@ -1378,7 +1383,7 @@ setMethod("unhex",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "unhex", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' upper
@@ -1394,7 +1399,7 @@ setMethod("upper",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "upper", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' var
@@ -1425,7 +1430,7 @@ setMethod("variance",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "variance", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' var_pop
@@ -1442,7 +1447,7 @@ setMethod("var_pop",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "var_pop", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' var_samp
@@ -1459,7 +1464,7 @@ setMethod("var_samp",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "var_samp", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' weekofyear
@@ -1475,7 +1480,7 @@ setMethod("weekofyear",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "weekofyear", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' year
@@ -1491,7 +1496,7 @@ setMethod("year",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "year", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' atan2
@@ -1510,7 +1515,7 @@ setMethod("atan2", signature(y = "Column"),
               x <- x@jc
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "atan2", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' datediff
@@ -1528,7 +1533,7 @@ setMethod("datediff", signature(y = "Column"),
               x <- x@jc
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "datediff", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' hypot
@@ -1546,7 +1551,7 @@ setMethod("hypot", signature(y = "Column"),
               x <- x@jc
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "hypot", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' levenshtein
@@ -1564,7 +1569,7 @@ setMethod("levenshtein", signature(y = "Column"),
               x <- x@jc
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "levenshtein", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' months_between
@@ -1582,7 +1587,7 @@ setMethod("months_between", signature(y = "Column"),
               x <- x@jc
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "months_between", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' nanvl
@@ -1601,7 +1606,7 @@ setMethod("nanvl", signature(y = "Column"),
               x <- x@jc
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "nanvl", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' pmod
@@ -1620,7 +1625,7 @@ setMethod("pmod", signature(y = "Column"),
               x <- x@jc
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "pmod", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 
@@ -1636,7 +1641,7 @@ setMethod("approxCountDistinct",
           signature(x = "Column"),
           function(x, rsd = 0.05) {
             jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' Count Distinct
@@ -1656,7 +1661,7 @@ setMethod("countDistinct",
             })
             jc <- callJStatic("org.apache.spark.sql.functions", "countDistinct", x@jc,
                               jcols)
-            column(jc)
+            column(jc, x@df)
           })
 
 
@@ -1677,7 +1682,7 @@ setMethod("concat",
               x@jc
             })
             jc <- callJStatic("org.apache.spark.sql.functions", "concat", jcols)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' greatest
@@ -1699,7 +1704,7 @@ setMethod("greatest",
               x@jc
             })
             jc <- callJStatic("org.apache.spark.sql.functions", "greatest", jcols)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' least
@@ -1721,7 +1726,7 @@ setMethod("least",
               x@jc
             })
             jc <- callJStatic("org.apache.spark.sql.functions", "least", jcols)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' ceiling
@@ -1796,7 +1801,7 @@ setMethod("n", signature(x = "Column"),
 setMethod("date_format", signature(y = "Column", x = "character"),
           function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "date_format", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' from_utc_timestamp
@@ -1811,7 +1816,7 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "from_utc_timestamp", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' instr
@@ -1830,7 +1835,7 @@ setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
 setMethod("instr", signature(y = "Column", x = "character"),
           function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "instr", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' next_day
@@ -1856,7 +1861,7 @@ setMethod("instr", signature(y = "Column", x = "character"),
 setMethod("next_day", signature(y = "Column", x = "character"),
           function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "next_day", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' to_utc_timestamp
@@ -1871,7 +1876,7 @@ setMethod("next_day", signature(y = "Column", x = "character"),
 setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "to_utc_timestamp", y@jc, x)
-            column(jc)
+            column(jc, y@df)
           })
 
 #' add_months
@@ -1886,7 +1891,7 @@ setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
 setMethod("add_months", signature(y = "Column", x = "numeric"),
           function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "add_months", y@jc, as.integer(x))
-            column(jc)
+            column(jc, y@df)
           })
 
 #' date_add
@@ -1901,7 +1906,7 @@ setMethod("add_months", signature(y = "Column", x = "numeric"),
 setMethod("date_add", signature(y = "Column", x = "numeric"),
           function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "date_add", y@jc, as.integer(x))
-            column(jc)
+            column(jc, y@df)
           })
 
 #' date_sub
@@ -1916,7 +1921,7 @@ setMethod("date_add", signature(y = "Column", x = "numeric"),
 setMethod("date_sub", signature(y = "Column", x = "numeric"),
           function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "date_sub", y@jc, as.integer(x))
-            column(jc)
+            column(jc, y@df)
           })
 
 #' format_number
@@ -1939,7 +1944,7 @@ setMethod("format_number", signature(y = "Column", x = "numeric"),
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "format_number",
                               y@jc, as.integer(x))
-            column(jc)
+            column(jc, y@df)
           })
 
 #' sha2
@@ -1957,7 +1962,7 @@ setMethod("format_number", signature(y = "Column", x = "numeric"),
 setMethod("sha2", signature(y = "Column", x = "numeric"),
           function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "sha2", y@jc, as.integer(x))
-            column(jc)
+            column(jc, y@df)
           })
 
 #' shiftLeft
@@ -1975,7 +1980,7 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "shiftLeft",
                               y@jc, as.integer(x))
-            column(jc)
+            column(jc, y@df)
           })
 
 #' shiftRight
@@ -1993,7 +1998,7 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"),
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "shiftRight",
                               y@jc, as.integer(x))
-            column(jc)
+            column(jc, y@df)
           })
 
 #' shiftRightUnsigned
@@ -2011,7 +2016,7 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "shiftRightUnsigned",
                               y@jc, as.integer(x))
-            column(jc)
+            column(jc, y@df)
           })
 
 #' concat_ws
@@ -2028,7 +2033,7 @@ setMethod("concat_ws", signature(sep = "character", x = "Column"),
           function(sep, x, ...) {
             jcols <- lapply(list(x, ...), function(x) { x@jc })
             jc <- callJStatic("org.apache.spark.sql.functions", "concat_ws", sep, jcols)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' conv
@@ -2047,7 +2052,7 @@ setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeri
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "conv",
                               x@jc, fromBase, toBase)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' expr
@@ -2081,7 +2086,7 @@ setMethod("format_string", signature(format = "character", x = "Column"),
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "format_string",
                               format, jcols)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' from_unixtime
@@ -2104,7 +2109,7 @@ setMethod("from_unixtime", signature(x = "Column"),
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "from_unixtime",
                               x@jc, format)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' locate
@@ -2123,7 +2128,7 @@ setMethod("locate", signature(substr = "character", str = "Column"),
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "locate",
                               substr, str@jc, as.integer(pos))
-            column(jc)
+            column(jc, str@df)
           })
 
 #' lpad
@@ -2140,7 +2145,7 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "lpad",
                               x@jc, as.integer(len), pad)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' rand
@@ -2206,7 +2211,7 @@ setMethod("regexp_extract",
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "regexp_extract",
                               x@jc, pattern, as.integer(idx))
-            column(jc)
+            column(jc, x@df)
           })
 
 #' regexp_replace
@@ -2224,7 +2229,7 @@ setMethod("regexp_replace",
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "regexp_replace",
                               x@jc, pattern, replacement)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' rpad
@@ -2241,7 +2246,7 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "rpad",
                               x@jc, as.integer(len), pad)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' substring_index
@@ -2266,7 +2271,7 @@ setMethod("substring_index",
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "substring_index",
                               x@jc, delim, as.integer(count))
-            column(jc)
+            column(jc, x@df)
           })
 
 #' translate
@@ -2286,7 +2291,7 @@ setMethod("translate",
           function(x, matchingString, replaceString) {
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "translate", x@jc, matchingString, replaceString)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' unix_timestamp
@@ -2315,7 +2320,7 @@ setMethod("unix_timestamp", signature(x = "missing", format = "missing"),
 setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
           function(x, format) {
             jc <- callJStatic("org.apache.spark.sql.functions", "unix_timestamp", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' @rdname unix_timestamp
@@ -2324,7 +2329,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
 setMethod("unix_timestamp", signature(x = "Column", format = "character"),
           function(x, format = "yyyy-MM-dd HH:mm:ss") {
             jc <- callJStatic("org.apache.spark.sql.functions", "unix_timestamp", x@jc, format)
-            column(jc)
+            column(jc, x@df)
           })
 #' when
 #'
@@ -2339,10 +2344,9 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
 #' @examples \dontrun{when(df$age == 2, df$age + 1)}
 setMethod("when", signature(condition = "Column", value = "ANY"),
           function(condition, value) {
-              condition <- condition@jc
               value <- if (class(value) == "Column") { value@jc } else { value }
-              jc <- callJStatic("org.apache.spark.sql.functions", "when", condition, value)
-              column(jc)
+              jc <- callJStatic("org.apache.spark.sql.functions", "when", condition@jc, value)
+              column(jc, condition@df)
           })
 
 #' ifelse
@@ -2362,14 +2366,13 @@ setMethod("when", signature(condition = "Column", value = "ANY"),
 setMethod("ifelse",
           signature(test = "Column", yes = "ANY", no = "ANY"),
           function(test, yes, no) {
-              test <- test@jc
               yes <- if (class(yes) == "Column") { yes@jc } else { yes }
               no <- if (class(no) == "Column") { no@jc } else { no }
               jc <- callJMethod(callJStatic("org.apache.spark.sql.functions",
                                             "when",
-                                            test, yes),
+                                            test@jc, yes),
                                 "otherwise", no)
-              column(jc)
+              column(jc, test@df)
           })
 
 ###################### Window functions######################
@@ -2579,7 +2582,7 @@ setMethod("array_contains",
           signature(x = "Column", value = "ANY"),
           function(x, value) {
             jc <- callJStatic("org.apache.spark.sql.functions", "array_contains", x@jc, value)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' explode
@@ -2595,7 +2598,7 @@ setMethod("explode",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "explode", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' size
@@ -2611,7 +2614,7 @@ setMethod("size",
           signature(x = "Column"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions", "size", x@jc)
-            column(jc)
+            column(jc, x@df)
           })
 
 #' sort_array
@@ -2636,5 +2639,5 @@ setMethod("sort_array",
           signature(x = "Column"),
           function(x, asc = TRUE) {
             jc <- callJStatic("org.apache.spark.sql.functions", "sort_array", x@jc, asc)
-            column(jc)
+            column(jc, x@df)
           })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 2dba71abec689..5d431ff471a64 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -754,7 +754,7 @@ setGeneric("ceil", function(x) { standardGeneric("ceil") })
 
 #' @rdname col
 #' @export
-setGeneric("column", function(x) { standardGeneric("column") })
+setGeneric("column", function(x, df) { standardGeneric("column") })
 
 #' @rdname concat
 #' @export
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index cc118108f61cc..300e4e53cfdb1 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -811,9 +811,9 @@ test_that("select operators", {
   expect_is(df[[2]], "Column")
   expect_is(df[["age"]], "Column")
 
-  expect_is(df[,1], "DataFrame")
-  expect_equal(columns(df[,1]), c("name"))
-  expect_equal(columns(df[,"age"]), c("age"))
+  expect_is(df[,1,drop=F], "DataFrame")
+  expect_equal(columns(df[,1,drop=F]), c("name"))
+  expect_equal(columns(df[,"age",drop=F]), c("age"))
   df2 <- df[,c("age", "name")]
   expect_is(df2, "DataFrame")
   expect_equal(columns(df2), c("age", "name"))
@@ -878,13 +878,13 @@ test_that("subsetting", {
   expect_equal(columns(filtered), c("name", "age"))
   expect_equal(collect(filtered)$name, "Andy")
 
-  df2 <- df[df$age == 19, 1]
+  df2 <- df[df$age == 19, 1, drop=F]
   expect_is(df2, "DataFrame")
   expect_equal(count(df2), 1)
   expect_equal(columns(df2), c("name"))
   expect_equal(collect(df2)$name, "Justin")
 
-  df3 <- df[df$age > 20, 2]
+  df3 <- df[df$age > 20, 2, drop=F]
   expect_equal(count(df3), 1)
   expect_equal(columns(df3), c("age"))
 
@@ -900,7 +900,7 @@ test_that("subsetting", {
   expect_equal(count(df6), 1)
   expect_equal(columns(df6), c("name", "age"))
 
-  df7 <- subset(df, select = "name")
+  df7 <- subset(df, select = "name", drop=F)
   expect_equal(count(df7), 3)
   expect_equal(columns(df7), c("name"))
 
@@ -1047,13 +1047,13 @@ test_that("column functions", {
                         schema = c("a", "b", "c"))
   result <- collect(select(df, struct("a", "c")))
   expected <- data.frame(row.names = 1:2)
-  expected$"struct(a, c)" <- list(listToStruct(list(a = 1L, c = 3L)),
+  expected$"struct(a,c)" <- list(listToStruct(list(a = 1L, c = 3L)),
                                  listToStruct(list(a = 4L, c = 6L)))
   expect_equal(result, expected)
 
   result <- collect(select(df, struct(df$a, df$b)))
   expected <- data.frame(row.names = 1:2)
-  expected$"struct(a, b)" <- list(listToStruct(list(a = 1L, b = 2L)),
+  expected$"struct(a,b)" <- list(listToStruct(list(a = 1L, b = 2L)),
                                  listToStruct(list(a = 4L, b = 5L)))
   expect_equal(result, expected)
 
@@ -1813,7 +1813,7 @@ test_that("attach() on a DataFrame", {
   stat2 <- summary(age)
   expect_equal(collect(stat2)[5, "age"], "30")
   detach("df")
-  stat3 <- summary(df[, "age"])
+  stat3 <- summary(df[, "age", drop=F])
   expect_equal(collect(stat3)[5, "age"], "30")
   expect_error(age)
 })
@@ -1853,7 +1853,7 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", {
   df1 <- select(df, cast(df$age, "integer"))
   coltypes(df) <- c("character", "integer")
   expect_equal(dtypes(df), list(c("name", "string"), c("age", "int")))
-  value <- collect(df[, 2])[[3, 1]]
+  value <- collect(df[, 2, drop=F])[[3, 1]]
   expect_equal(value, collect(df1)[[3, 1]])
   expect_equal(value, 22)
 
@@ -1897,6 +1897,27 @@ test_that("Method str()", {
   expect_equal(capture.output(utils:::str(iris)), capture.output(str(iris)))
 })
 
+test_that("collect/show/head on Columns", {
+
+  # collect
+  x <- irisDF$Sepal_Length + 100
+  y <- cos(x + irisDF$Sepal_Width) ^ 2
+  z <- sin(x + irisDF$Sepal_Width) ^ 2
+  expect_equal(any(collect(y + z) == 1), TRUE)
+
+  # show and print
+  expect_equal(capture.output(show(z + y))[1], " [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1")
+  expect_equal(capture.output(print(z + y))[1], " [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1")
+
+  # head
+  expect_equal(all(round(head(z + y)) == 1), TRUE)
+  expect_equal(length(head(z + y, 100)), 100)
+
+  # Columns without parent DataFrame
+  expect_error(x <- collect(rand()),
+               "This column cannot be collected as it's not associated to any DataFrame.")
+})
+
 unlink(parquetPath)
 unlink(jsonPath)
 unlink(jsonPathNa)

From fbf9b02b478b8eb4845232e09932d068cb393fd8 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.usca.ibm.com>
Date: Tue, 23 Feb 2016 16:00:52 -0800
Subject: [PATCH 02/31] Removed drop=F from other PR

---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 300e4e53cfdb1..bc6fb2eebe2e7 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -811,9 +811,9 @@ test_that("select operators", {
   expect_is(df[[2]], "Column")
   expect_is(df[["age"]], "Column")
 
-  expect_is(df[,1,drop=F], "DataFrame")
-  expect_equal(columns(df[,1,drop=F]), c("name"))
-  expect_equal(columns(df[,"age",drop=F]), c("age"))
+  expect_is(df[,1], "DataFrame")
+  expect_equal(columns(df[,1]), c("name"))
+  expect_equal(columns(df[,"age"]), c("age"))
   df2 <- df[,c("age", "name")]
   expect_is(df2, "DataFrame")
   expect_equal(columns(df2), c("age", "name"))
@@ -878,13 +878,13 @@ test_that("subsetting", {
   expect_equal(columns(filtered), c("name", "age"))
   expect_equal(collect(filtered)$name, "Andy")
 
-  df2 <- df[df$age == 19, 1, drop=F]
+  df2 <- df[df$age == 19, 1]
   expect_is(df2, "DataFrame")
   expect_equal(count(df2), 1)
   expect_equal(columns(df2), c("name"))
   expect_equal(collect(df2)$name, "Justin")
 
-  df3 <- df[df$age > 20, 2, drop=F]
+  df3 <- df[df$age > 20, 2]
   expect_equal(count(df3), 1)
   expect_equal(columns(df3), c("age"))
 
@@ -900,7 +900,7 @@ test_that("subsetting", {
   expect_equal(count(df6), 1)
   expect_equal(columns(df6), c("name", "age"))
 
-  df7 <- subset(df, select = "name", drop=F)
+  df7 <- subset(df, select = "name")
   expect_equal(count(df7), 3)
   expect_equal(columns(df7), c("name"))
 
@@ -1813,7 +1813,7 @@ test_that("attach() on a DataFrame", {
   stat2 <- summary(age)
   expect_equal(collect(stat2)[5, "age"], "30")
   detach("df")
-  stat3 <- summary(df[, "age", drop=F])
+  stat3 <- summary(df[, "age"])
   expect_equal(collect(stat3)[5, "age"], "30")
   expect_error(age)
 })
@@ -1853,7 +1853,7 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", {
   df1 <- select(df, cast(df$age, "integer"))
   coltypes(df) <- c("character", "integer")
   expect_equal(dtypes(df), list(c("name", "string"), c("age", "int")))
-  value <- collect(df[, 2, drop=F])[[3, 1]]
+  value <- collect(df[, 2])[[3, 1]]
   expect_equal(value, collect(df1)[[3, 1]])
   expect_equal(value, 22)
 

From 04e728b58ea1dde43fb12a1f458f2b175161c7de Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.usca.ibm.com>
Date: Tue, 23 Feb 2016 17:35:34 -0800
Subject: [PATCH 03/31] Removed NA default value for df in column() method

---
 R/pkg/R/column.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 08fa4960c3391..2787d7e610fe7 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -66,7 +66,7 @@ setMethod("head", signature="Column", definition=function(x, n=6) {
 
 setMethod("column",
           signature(x = "jobj"),
-          function(x, df=NA) {
+          function(x, df) {
             new("Column", x, df)
           })
 

From 2d9ee18dbdbcd25882a18e8fa0fa404d87c497c1 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.attlocal.net>
Date: Tue, 23 Feb 2016 19:51:17 -0800
Subject: [PATCH 04/31] Added docs to head(Column)

---
 R/pkg/R/DataFrame.R | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0d35429a9bcd3..17ee55f0583bd 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -966,13 +966,14 @@ setMethod("take",
 
 #' Head
 #'
-#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
-#' then head() returns the first 6 rows in keeping with the current data.frame
-#' convention in R.
+#' Return the first elements of a dataset. If \code{x} is a DataFrame, its first 
+#' rows will be returned as a data.frame. If the dataset is a \code{Column}, its first 
+#' elements will be returned as a vector. The number of elements to be returned
+#' is given by parameter \code{num}. Default value for \code{num} is 6.
 #'
-#' @param x A SparkSQL DataFrame
+#' @param x A Spark DataFrame or Column
 #' @param num The number of rows to return. Default is 6.
-#' @return A data.frame
+#' @return A data.frame or vector
 #'
 #' @family DataFrame functions
 #' @rdname head
@@ -980,11 +981,18 @@ setMethod("take",
 #' @export
 #' @examples
 #'\dontrun{
+#' # Initialize Spark context and SQL context
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
-#' head(df)
+#' 
+#' # Create a DataFrame from the Iris dataset
+#' irisDF <- createDataFrame(sqlContext, iris)
+#' 
+#' # Get the first 6 elements of the DataFrame
+#' head(irisDF)
+#' 
+#' # Get the first 20 elements of a Column
+#' head(irisDF$Sepal_Length)
 #' }
 setMethod("head",
           signature(x = "DataFrame"),

From dc3df1926bb3931459a0ea381e5695b7bb56ca15 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.attlocal.net>
Date: Tue, 23 Feb 2016 21:23:48 -0800
Subject: [PATCH 05/31] Still can't recreate build issue. Added default value
 df=NULL

---
 R/pkg/R/column.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 2787d7e610fe7..fac622f9954f4 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -66,7 +66,7 @@ setMethod("head", signature="Column", definition=function(x, n=6) {
 
 setMethod("column",
           signature(x = "jobj"),
-          function(x, df) {
+          function(x, df=NULL) {
             new("Column", x, df)
           })
 

From 1e06d3ccbb0cc441b9929f626812b8bc7dcbbda3 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.attlocal.net>
Date: Tue, 23 Feb 2016 22:09:41 -0800
Subject: [PATCH 06/31] Noticed collate order is automatically changed after
 running install-dev.sh. Therefore, adding DESCRIPTION

---
 R/pkg/DESCRIPTION | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 465bc37788e5d..0cd0d75df0f70 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -18,10 +18,10 @@ Collate:
     'schema.R'
     'generics.R'
     'jobj.R'
-    'RDD.R'
-    'pairRDD.R'
     'column.R'
     'group.R'
+    'RDD.R'
+    'pairRDD.R'
     'DataFrame.R'
     'SQLContext.R'
     'backend.R'
@@ -36,3 +36,4 @@ Collate:
     'stats.R'
     'types.R'
     'utils.R'
+RoxygenNote: 5.0.1

From d697d447ab8467eedade5d6e4c130dcf9a251e5e Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.attlocal.net>
Date: Tue, 23 Feb 2016 23:53:06 -0800
Subject: [PATCH 07/31] Still can't recreate build issue. Added minimal test
 case to debug it

---
 R/pkg/R/column.R                          | 7 +++++--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 5 +++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index fac622f9954f4..d94ea0b3df2bf 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -66,8 +66,11 @@ setMethod("head", signature="Column", definition=function(x, n=6) {
 
 setMethod("column",
           signature(x = "jobj"),
-          function(x, df=NULL) {
-            new("Column", x, df)
+          function(x, df) {
+            if (missing(df)) {
+              df <- NULL
+            }
+            new("Column", jc=x, df=df)
           })
 
 operators <- list(
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index bc6fb2eebe2e7..aa377d4d80277 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1918,6 +1918,11 @@ test_that("collect/show/head on Columns", {
                "This column cannot be collected as it's not associated to any DataFrame.")
 })
 
+test_that("Minimal column test.", {
+  x <- column(irisDF$Sepal_Length@jc)
+  expect_equal(collect(select(irisDF, x))[1, 1], 5.1)
+})
+
 unlink(parquetPath)
 unlink(jsonPath)
 unlink(jsonPathNa)

From 24b6154b127ca57e17fa79831b2fee1edcc38b1b Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.attlocal.net>
Date: Wed, 24 Feb 2016 00:32:03 -0800
Subject: [PATCH 08/31] Reverted removing spaces in test cases. This works in
 my environment, though. However, it fails on Jenkins

---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index aa377d4d80277..dbc3d6b4fe1ec 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1047,13 +1047,13 @@ test_that("column functions", {
                         schema = c("a", "b", "c"))
   result <- collect(select(df, struct("a", "c")))
   expected <- data.frame(row.names = 1:2)
-  expected$"struct(a,c)" <- list(listToStruct(list(a = 1L, c = 3L)),
+  expected$"struct(a, c)" <- list(listToStruct(list(a = 1L, c = 3L)),
                                  listToStruct(list(a = 4L, c = 6L)))
   expect_equal(result, expected)
 
   result <- collect(select(df, struct(df$a, df$b)))
   expected <- data.frame(row.names = 1:2)
-  expected$"struct(a,b)" <- list(listToStruct(list(a = 1L, b = 2L)),
+  expected$"struct(a, b)" <- list(listToStruct(list(a = 1L, b = 2L)),
                                  listToStruct(list(a = 4L, b = 5L)))
   expect_equal(result, expected)
 

From 6a38a3ca79087538dd7d3452a3cb487f9b0b6dd2 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.usca.ibm.com>
Date: Wed, 24 Feb 2016 13:52:33 -0800
Subject: [PATCH 09/31] Added docs for collect(Column)

---
 R/pkg/R/DataFrame.R | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 17ee55f0583bd..5bb57153c9bca 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -846,9 +846,15 @@ setMethod("dim",
             c(count(x), ncol(x))
           })
 
-#' Collects all the elements of a Spark DataFrame and coerces them into an R data.frame.
+#' Download Spark datasets into R
 #'
-#' @param x A SparkSQL DataFrame
+#' If applied to a DataFrame, \code{collect} returns a data.frame. If applied to a 
+#' Column, it returns a vector of the same type. 
+#' 
+#' \strong{Note:} Since R data.frames and vectors are
+#' held in memory, ensure that you have enough memory on your system to 
+#' accommodate the contents.
+#' @param x A Spark DataFrame or Column
 #' @param stringsAsFactors (Optional) A logical indicating whether or not string columns
 #' should be converted to factors. FALSE by default.
 #'
@@ -858,12 +864,18 @@ setMethod("dim",
 #' @export
 #' @examples
 #'\dontrun{
+#' # Initialize Spark context and SQL context
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
-#' path <- "path/to/file.json"
-#' df <- read.json(sqlContext, path)
-#' collected <- collect(df)
-#' firstName <- collected[[1]]$name
+#' 
+#' # Create a DataFrame from the Iris dataset
+#' irisDF <- createDataFrame(sqlContext, iris)
+#' 
+#' # Collect it
+#' df <- collect(irisDF)
+#' 
+#' # Collect a column
+#' v <- collect(irisDF$Sepal_Length * 100)
 #' }
 setMethod("collect",
           signature(x = "DataFrame"),

From bf4df2628f2dc214b607ddaa606728771aa685d0 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.usca.ibm.com>
Date: Wed, 30 Mar 2016 11:59:56 -0700
Subject: [PATCH 10/31] Handled case of 'orhpan' Columns with no parent
 DataFrame

---
 R/pkg/R/column.R    | 24 ++++++++++++++++++------
 R/pkg/R/functions.R | 27 ++++++++++++++++++++++-----
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index d94ea0b3df2bf..bea915ebca036 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -46,22 +46,34 @@ setMethod("initialize", "Column", function(.Object, jc, df) {
 
 setMethod("show", signature="Column", definition=function(object) {
   MAX_ELEMENTS <- 20
-  show(head(object, MAX_ELEMENTS))
-  cat(paste0("\b...\nDisplaying up to ", as.character(MAX_ELEMENTS) ," elements only."))
+  head.df <- head(object, MAX_ELEMENTS)
+
+  if (length(head.df) == 0) {
+    colname <- callJMethod(object@jc, "toString")
+    cat(paste0(colname, "\n"))
+    cat(paste0("<Empty column>\n"))
+  } else {
+    show(head.df)
+  }
+  if (length(head.df) == MAX_ELEMENTS)  {
+    cat(paste0("\b...\nDisplaying up to ", as.character(MAX_ELEMENTS) ," elements only."))
+  }
 })
 
 setMethod("collect", signature="Column", definition=function(x) {
   if (is.null(x@df)) {
-    stop("This column cannot be collected as it's not associated to any DataFrame.")
+    character(0)
+  } else {
+    collect(select(x@df, x))[, 1]
   }
-  collect(select(x@df, x))[, 1]
 })
 
 setMethod("head", signature="Column", definition=function(x, n=6) {
   if (is.null(x@df)) {
-    stop("This column cannot be collected as it's not associated to any DataFrame.")
+    collect(x)
+  } else {
+    head(select(x@df, x), n)[, 1]
   }
-  head(select(x@df, x), n)[, 1]
 })
 
 setMethod("column",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 97ccebf42666a..a41f17b1d9c65 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2160,7 +2160,11 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
 setMethod("rand", signature(seed = "missing"),
           function(seed) {
             jc <- callJStatic("org.apache.spark.sql.functions", "rand")
-            column(jc)
+
+            # By assigning a one-row data.frame, the result of this function can be collected
+            # returning a one-element Column
+            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            column(jc, df)
           })
 
 #' @rdname rand
@@ -2169,7 +2173,11 @@ setMethod("rand", signature(seed = "missing"),
 setMethod("rand", signature(seed = "numeric"),
           function(seed) {
             jc <- callJStatic("org.apache.spark.sql.functions", "rand", as.integer(seed))
-            column(jc)
+
+            # By assigning a one-row data.frame, the result of this function can be collected
+            # returning a one-element Column
+            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            column(jc, df)
           })
 
 #' randn
@@ -2184,7 +2192,11 @@ setMethod("rand", signature(seed = "numeric"),
 setMethod("randn", signature(seed = "missing"),
           function(seed) {
             jc <- callJStatic("org.apache.spark.sql.functions", "randn")
-            column(jc)
+
+            # By assigning a one-row data.frame, the result of this function can be collected
+            # returning a one-element Column
+            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            column(jc, df)
           })
 
 #' @rdname randn
@@ -2193,7 +2205,8 @@ setMethod("randn", signature(seed = "missing"),
 setMethod("randn", signature(seed = "numeric"),
           function(seed) {
             jc <- callJStatic("org.apache.spark.sql.functions", "randn", as.integer(seed))
-            column(jc)
+            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            column(jc, df)
           })
 
 #' regexp_extract
@@ -2311,7 +2324,11 @@ setMethod("translate",
 setMethod("unix_timestamp", signature(x = "missing", format = "missing"),
           function(x, format) {
             jc <- callJStatic("org.apache.spark.sql.functions", "unix_timestamp")
-            column(jc)
+
+            # By assigning a one-row data.frame, the result of this function can be collected
+            # returning a one-element Column
+            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            column(jc, df)
           })
 
 #' @rdname unix_timestamp

From c86bebb9ce02e5623d328d33109fff1ee8eeca56 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.usca.ibm.com>
Date: Wed, 30 Mar 2016 12:02:14 -0700
Subject: [PATCH 11/31] Added tests for orphan Columns

---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index dbc3d6b4fe1ec..839b07dd1f10c 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1914,8 +1914,7 @@ test_that("collect/show/head on Columns", {
   expect_equal(length(head(z + y, 100)), 100)
 
   # Columns without parent DataFrame
-  expect_error(x <- collect(rand()),
-               "This column cannot be collected as it's not associated to any DataFrame.")
+  expect_equal(is.numeric(collect(rand())), TRUE)
 })
 
 test_that("Minimal column test.", {

From e5659ee85391937cdf301bd1acc01487bc55c129 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.usca.ibm.com>
Date: Wed, 30 Mar 2016 15:30:21 -0700
Subject: [PATCH 12/31] Fixed style issues

---
 R/pkg/R/column.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index bea915ebca036..450026f54246b 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -44,7 +44,7 @@ setMethod("initialize", "Column", function(.Object, jc, df) {
   .Object
 })
 
-setMethod("show", signature="Column", definition=function(object) {
+setMethod("show", signature = "Column", definition = function(object) {
   MAX_ELEMENTS <- 20
   head.df <- head(object, MAX_ELEMENTS)
 
@@ -56,11 +56,11 @@ setMethod("show", signature="Column", definition=function(object) {
     show(head.df)
   }
   if (length(head.df) == MAX_ELEMENTS)  {
-    cat(paste0("\b...\nDisplaying up to ", as.character(MAX_ELEMENTS) ," elements only."))
+    cat(paste0("\b...\nDisplaying up to ", as.character(MAX_ELEMENTS), " elements only."))
   }
 })
 
-setMethod("collect", signature="Column", definition=function(x) {
+setMethod("collect", signature = "Column", definition = function(x) {
   if (is.null(x@df)) {
     character(0)
   } else {
@@ -68,7 +68,7 @@ setMethod("collect", signature="Column", definition=function(x) {
   }
 })
 
-setMethod("head", signature="Column", definition=function(x, n=6) {
+setMethod("head", signature = "Column", definition = function(x, n=6) {
   if (is.null(x@df)) {
     collect(x)
   } else {
@@ -82,7 +82,7 @@ setMethod("column",
             if (missing(df)) {
               df <- NULL
             }
-            new("Column", jc=x, df=df)
+            new("Column", jc = x, df = df)
           })
 
 operators <- list(

From 9c1661f0fe892fd3dcd40e8e489a940c9b129747 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <odlaraye@oscars-mbp.usca.ibm.com>
Date: Fri, 6 May 2016 16:04:31 -0700
Subject: [PATCH 13/31] pkg/R/columnR

---
 R/pkg/R/column.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 7e80f298992d1..f3df8c4247be8 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -14,8 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-setOldClass("DataFrame")
-setClassUnion("DataFrameOrNULL", c("DataFrame", "NULL"))
+setOldClass("SparkDataFrame")
+setClassUnion("SparkDataFrameOrNull", c("SparkDataFrame", "NULL"))
 
 # Column Class
 
@@ -31,7 +31,7 @@ setOldClass("jobj")
 #' @slot jc reference to JVM SparkDataFrame column
 #' @export
 setClass("Column",
-         slots = list(jc = "jobj", df = "DataFrameOrNULL"))
+         slots = list(jc = "jobj", df = "SparkDataFrameOrNull"))
 
 setMethod("initialize", "Column", function(.Object, jc, df) {
   .Object@jc <- jc

From ed0abf24d7f65ad2381f6d664ba23e440013c97a Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Tue, 11 Oct 2016 15:39:15 -0700
Subject: [PATCH 14/31] Removed method collect()

---
 R/pkg/R/DataFrame.R                       | 9 +++------
 R/pkg/R/column.R                          | 8 --------
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 6 +++---
 3 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 6c5d8e486c2fb..b52610d557099 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1037,13 +1037,12 @@ setMethod("dim",
 
 #' Download Spark datasets into R
 #'
-#' If applied to a SparkDataFrame, \code{collect} returns a data.frame. If applied to a 
-#' Column, it returns a vector of the same type. 
+#' Converts a SparkDataFrame into a data.frame. 
 #' 
-#' \strong{Note:} Since R data.frames and vectors are
+#' \strong{Note:} Since R data.frames are
 #' held in memory, ensure that you have enough memory on your system to 
 #' accommodate the contents.
-#' @param x A SparkDataFrame or Column
+#' @param x A SparkDataFrame
 #' @param stringsAsFactors (Optional) A logical indicating whether or not string columns
 
 #' should be converted to factors. FALSE by default.
@@ -1066,8 +1065,6 @@ setMethod("dim",
 #' # Collect it
 #' df <- collect(irisDF)
 #' 
-#' # Collect a column
-#' v <- collect(irisDF$Sepal_Length * 100)
 #' }
 #' @note collect since 1.4.0
 setMethod("collect",
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 6d9bed6fbb559..3140b3dfcf85d 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -67,14 +67,6 @@ setMethod("show", signature = "Column", definition = function(object) {
   }
 })
 
-setMethod("collect", signature = "Column", definition = function(x) {
-  if (is.null(x@df)) {
-    character(0)
-  } else {
-    collect(select(x@df, x))[, 1]
-  }
-})
-
 setMethod("head", signature = "Column", definition = function(x, n=6) {
   if (is.null(x@df)) {
     collect(x)
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 3eea9f8ae7925..b44a8f24a2506 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2258,7 +2258,7 @@ test_that("collect/show/head on Columns", {
   x <- irisDF$Sepal_Length + 100
   y <- cos(x + irisDF$Sepal_Width) ^ 2
   z <- sin(x + irisDF$Sepal_Width) ^ 2
-  expect_equal(any(collect(y + z) == 1), TRUE)
+  expect_equal(any(head(y + z) == 1), TRUE)
 
   # show and print
   expect_equal(capture.output(show(z + y))[1], " [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1")
@@ -2269,12 +2269,12 @@ test_that("collect/show/head on Columns", {
   expect_equal(length(head(z + y, 100)), 100)
 
   # Columns without parent DataFrame
-  expect_equal(is.numeric(collect(rand())), TRUE)
+  expect_equal(is.numeric(head(rand())), TRUE)
 })
 
 test_that("Minimal column test.", {
   x <- column(irisDF$Sepal_Length@jc)
-  expect_equal(collect(select(irisDF, x))[1, 1], 5.1)
+  expect_equal(head(select(irisDF, x))[1, 1], 5.1)
 })
 
 test_that("Histogram", {

From d2470fa736e92ee907426a031bce98e34fa3eb86 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Wed, 12 Oct 2016 09:58:59 -0700
Subject: [PATCH 15/31] Removed spark.init() call, fixed docs

---
 R/pkg/R/DataFrame.R | 41 +++++++++++++----------------------------
 R/pkg/R/column.R    |  2 +-
 R/pkg/R/functions.R | 10 +++++-----
 R/pkg/R/generics.R  |  4 ++++
 4 files changed, 23 insertions(+), 34 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b52610d557099..75b733f3ccb5f 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1035,16 +1035,10 @@ setMethod("dim",
             c(count(x), ncol(x))
           })
 
-#' Download Spark datasets into R
+#' Collects all the elements of a SparkDataFrame and coerces them into an R data.frame.
 #'
-#' Converts a SparkDataFrame into a data.frame. 
-#' 
-#' \strong{Note:} Since R data.frames are
-#' held in memory, ensure that you have enough memory on your system to 
-#' accommodate the contents.
-#' @param x A SparkDataFrame
-#' @param stringsAsFactors (Optional) A logical indicating whether or not string columns
-
+#' @param x a SparkDataFrame.
+#' @param stringsAsFactors (Optional) a logical indicating whether or not string columns
 #' should be converted to factors. FALSE by default.
 #' @param ... further arguments to be passed to or from other methods.
 #'
@@ -1055,16 +1049,11 @@ setMethod("dim",
 #' @export
 #' @examples
 #'\dontrun{
-#' # Initialize Spark context and SQL context
-#' sc <- sparkR.init()
-#' sqlContext <- sparkRSQL.init(sc)
-#' 
-#' # Create a DataFrame from the Iris dataset
-#' irisDF <- createDataFrame(sqlContext, iris)
-#' 
-#' # Collect it
-#' df <- collect(irisDF)
-#' 
+#' sparkR.session()
+#' path <- "path/to/file.json"
+#' df <- read.json(path)
+#' collected <- collect(df)
+#' firstName <- collected[[1]]$name
 #' }
 #' @note collect since 1.4.0
 setMethod("collect",
@@ -1178,25 +1167,22 @@ setMethod("take",
           })
 
 #' Head
-#'
-#' Return the first elements of a dataset. If \code{x} is a SparkDataFrame, its first 
+#' Return the first elements of a SparkDataFrame or Column. If \code{x} is a SparkDataFrame, its first 
 #' rows will be returned as a data.frame. If the dataset is a \code{Column}, its first 
 #' elements will be returned as a vector. The number of elements to be returned
 #' is given by parameter \code{num}. Default value for \code{num} is 6.
 #'
-#' @param x A SparkDataFrame or Column
-#' @param num The number of rows to return. Default is 6.
-#' @return A data.frame or vector
+#' @param num the number of rows to return. Default is 6.
+#' @return A data.frame.
 #'
 #' @family SparkDataFrame functions
 #' @aliases head,SparkDataFrame-method
-#' @rdname head
 #' @name head
 #' @export
 #' @examples
 #'\dontrun{
 #' # Initialize Spark context and SQL context
-#' sc <- sparkR.init()
+#' sc <- sparkR.session()
 #' sqlContext <- sparkRSQL.init(sc)
 #' 
 #' # Create a DataFrame from the Iris dataset
@@ -1206,8 +1192,7 @@ setMethod("take",
 #' head(irisDF)
 #' 
 #' # Get the first 20 elements of a Column
-#' head(irisDF$Sepal_Length)
-#' }
+#' head(irisDF$Sepal_Length, 20)
 #' @note head since 1.4.0
 setMethod("head",
           signature(x = "SparkDataFrame"),
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 3140b3dfcf85d..d0429091385ea 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -51,7 +51,7 @@ setMethod("initialize", "Column", function(.Object, jc, df) {
   .Object
 })
 
-setMethod("show", signature = "Column", definition = function(object) {
+setMethod("show", signature = "Column", function(object) {
   MAX_ELEMENTS <- 20
   head.df <- head(object, MAX_ELEMENTS)
 
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index d2e5a531b5f2c..a053994a5b762 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2848,7 +2848,7 @@ setMethod("rand", signature(seed = "missing"),
 
             # By assigning a one-row data.frame, the result of this function can be collected
             # returning a one-element Column
-            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            df <- as.DataFrame(data.frame(0))
             column(jc, df)
           })
 
@@ -2863,7 +2863,7 @@ setMethod("rand", signature(seed = "numeric"),
 
             # By assigning a one-row data.frame, the result of this function can be collected
             # returning a one-element Column
-            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            df <- as.DataFrame(data.frame(0))
             column(jc, df)
           })
 
@@ -2885,7 +2885,7 @@ setMethod("randn", signature(seed = "missing"),
 
             # By assigning a one-row data.frame, the result of this function can be collected
             # returning a one-element Column
-            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            df <- as.DataFrame(data.frame(0))
             column(jc, df)
           })
 
@@ -2897,7 +2897,7 @@ setMethod("randn", signature(seed = "missing"),
 setMethod("randn", signature(seed = "numeric"),
           function(seed) {
             jc <- callJStatic("org.apache.spark.sql.functions", "randn", as.integer(seed))
-            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            df <- as.DataFrame(data.frame(0))
             column(jc, df)
           })
 
@@ -3051,7 +3051,7 @@ setMethod("unix_timestamp", signature(x = "missing", format = "missing"),
 
             # By assigning a one-row data.frame, the result of this function can be collected
             # returning a one-element Column
-            df <- as.DataFrame(sparkRSQL.init(), data.frame(0))
+            df <- as.DataFrame(data.frame(0))
             column(jc, df)
           })
 
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 24c062c8e366e..7cb2348d93614 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -550,6 +550,10 @@ setGeneric("group_by", function(x, ...) { standardGeneric("group_by") })
 #' @export
 setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
 
+#' @param x a SparkDataFrame.
+#' @rdname head
+setGeneric("head")
+
 #' @rdname insertInto
 #' @export
 setGeneric("insertInto", function(x, tableName, ...) { standardGeneric("insertInto") })

From bf739e426da714c9a4ac38fea1f7fc6aff5198a3 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Wed, 12 Oct 2016 10:33:33 -0700
Subject: [PATCH 16/31] Used a singleton to generate empty DataFrame

---
 R/pkg/R/DataFrame.R | 18 +++++++++++++-----
 R/pkg/R/functions.R | 10 +++++-----
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 75b733f3ccb5f..174058290765b 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1166,8 +1166,9 @@ setMethod("take",
             collect(limited)
           })
 
-#' Head
-#' Return the first elements of a SparkDataFrame or Column. If \code{x} is a SparkDataFrame, its first 
+#' Return the first part of a SparkDataFrame or Column
+#' 
+#' If \code{x} is a SparkDataFrame, its first 
 #' rows will be returned as a data.frame. If the dataset is a \code{Column}, its first 
 #' elements will be returned as a vector. The number of elements to be returned
 #' is given by parameter \code{num}. Default value for \code{num} is 6.
@@ -1182,11 +1183,10 @@ setMethod("take",
 #' @examples
 #'\dontrun{
 #' # Initialize Spark context and SQL context
-#' sc <- sparkR.session()
-#' sqlContext <- sparkRSQL.init(sc)
+#' sparkR.session()
 #' 
 #' # Create a DataFrame from the Iris dataset
-#' irisDF <- createDataFrame(sqlContext, iris)
+#' irisDF <- as.DataFrame(iris)
 #' 
 #' # Get the first 6 elements of the DataFrame
 #' head(irisDF)
@@ -3327,3 +3327,11 @@ setMethod("randomSplit",
             }
             sapply(sdfs, dataFrame)
           })
+
+# A global singleton for an empty SparkR DataFrame.
+getEmptySparkRDataFrame <- function() {
+  if (is.null(.sparkREnv$EMPTY_DF)) {
+    .sparkREnv$EMPTY_DF <- as.DataFrame(data.frame(0))
+  }
+  return(.sparkREnv$EMPTY_DF)
+}
\ No newline at end of file
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index a053994a5b762..923c198fca293 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2848,7 +2848,7 @@ setMethod("rand", signature(seed = "missing"),
 
             # By assigning a one-row data.frame, the result of this function can be collected
             # returning a one-element Column
-            df <- as.DataFrame(data.frame(0))
+            df <- getEmptySparkRDataFrame()
             column(jc, df)
           })
 
@@ -2863,7 +2863,7 @@ setMethod("rand", signature(seed = "numeric"),
 
             # By assigning a one-row data.frame, the result of this function can be collected
             # returning a one-element Column
-            df <- as.DataFrame(data.frame(0))
+            df <- getEmptySparkRDataFrame()
             column(jc, df)
           })
 
@@ -2885,7 +2885,7 @@ setMethod("randn", signature(seed = "missing"),
 
             # By assigning a one-row data.frame, the result of this function can be collected
             # returning a one-element Column
-            df <- as.DataFrame(data.frame(0))
+            df <- getEmptySparkRDataFrame()
             column(jc, df)
           })
 
@@ -2897,7 +2897,7 @@ setMethod("randn", signature(seed = "missing"),
 setMethod("randn", signature(seed = "numeric"),
           function(seed) {
             jc <- callJStatic("org.apache.spark.sql.functions", "randn", as.integer(seed))
-            df <- as.DataFrame(data.frame(0))
+            df <- getEmptySparkRDataFrame()
             column(jc, df)
           })
 
@@ -3051,7 +3051,7 @@ setMethod("unix_timestamp", signature(x = "missing", format = "missing"),
 
             # By assigning a one-row data.frame, the result of this function can be collected
             # returning a one-element Column
-            df <- as.DataFrame(data.frame(0))
+            df <- getEmptySparkRDataFrame()
             column(jc, df)
           })
 

From 20e53e83a4d564c17d0b180ae5eab8ca9d6c1410 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Wed, 12 Oct 2016 10:37:36 -0700
Subject: [PATCH 17/31] Minor docs change

---
 R/pkg/R/generics.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 7cb2348d93614..27d5f97deda5f 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -550,7 +550,7 @@ setGeneric("group_by", function(x, ...) { standardGeneric("group_by") })
 #' @export
 setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
 
-#' @param x a SparkDataFrame.
+#' @param x a SparkDataFrame or Column
 #' @rdname head
 setGeneric("head")
 

From 266d5ffd87929adf081944de19ee36f892125977 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Wed, 12 Oct 2016 11:40:13 -0700
Subject: [PATCH 18/31] Added missing bracket

---
 R/pkg/R/DataFrame.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 174058290765b..4225de65a6f7d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1193,6 +1193,7 @@ setMethod("take",
 #' 
 #' # Get the first 20 elements of a Column
 #' head(irisDF$Sepal_Length, 20)
+#' }
 #' @note head since 1.4.0
 setMethod("head",
           signature(x = "SparkDataFrame"),

From 257fa8669eb0520d4827c0bc3d52a07037bd38f8 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Wed, 12 Oct 2016 14:18:59 -0700
Subject: [PATCH 19/31] Added documentation for parameter df of class Column

---
 R/pkg/R/column.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index d0429091385ea..a93e9f0d36a0b 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -31,6 +31,7 @@ setOldClass("jobj")
 #' @rdname column
 #'
 #' @slot jc reference to JVM SparkDataFrame column
+#' @slot df the parent SparkDataFrame of the Column object
 #' @export
 #' @note Column since 1.4.0
 setClass("Column",

From 0691c32cacc3218742c7f345b4bc498ba2f826e5 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Thu, 13 Oct 2016 12:04:58 -0700
Subject: [PATCH 20/31] Fixed docs issues

---
 R/pkg/R/column.R   | 2 ++
 R/pkg/R/generics.R | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index a93e9f0d36a0b..249c03f8e796e 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -52,6 +52,7 @@ setMethod("initialize", "Column", function(.Object, jc, df) {
   .Object
 })
 
+#' @rdname show
 setMethod("show", signature = "Column", function(object) {
   MAX_ELEMENTS <- 20
   head.df <- head(object, MAX_ELEMENTS)
@@ -68,6 +69,7 @@ setMethod("show", signature = "Column", function(object) {
   }
 })
 
+#' @rdname head
 setMethod("head", signature = "Column", definition = function(x, n=6) {
   if (is.null(x@df)) {
     collect(x)
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 27d5f97deda5f..65adedead9a2e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -552,7 +552,7 @@ setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
 
 #' @param x a SparkDataFrame or Column
 #' @rdname head
-setGeneric("head")
+#setGeneric("head")
 
 #' @rdname insertInto
 #' @export

From 445407caa11b0f555e32d51008fdf83d5c4ecd97 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Thu, 13 Oct 2016 13:49:40 -0700
Subject: [PATCH 21/31] Removed commented code

---
 R/pkg/R/generics.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 65adedead9a2e..57628f7e66922 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -552,7 +552,6 @@ setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
 
 #' @param x a SparkDataFrame or Column
 #' @rdname head
-#setGeneric("head")
 
 #' @rdname insertInto
 #' @export

From 1ace2e5785030df790b3565d2a0b24358229d655 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Thu, 13 Oct 2016 17:00:01 -0700
Subject: [PATCH 22/31] Fixed docs issues. Renamed parameter n as num for
 method head to have consistency with DataFrame's head

---
 R/pkg/R/DataFrame.R | 2 +-
 R/pkg/R/column.R    | 4 ++--
 R/pkg/R/generics.R  | 3 ---
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 4225de65a6f7d..799bcd9d12a5e 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1172,7 +1172,7 @@ setMethod("take",
 #' rows will be returned as a data.frame. If the dataset is a \code{Column}, its first 
 #' elements will be returned as a vector. The number of elements to be returned
 #' is given by parameter \code{num}. Default value for \code{num} is 6.
-#'
+#' @param x a SparkDataFrame or Column
 #' @param num the number of rows to return. Default is 6.
 #' @return A data.frame.
 #'
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 249c03f8e796e..5c490707c3521 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -70,11 +70,11 @@ setMethod("show", signature = "Column", function(object) {
 })
 
 #' @rdname head
-setMethod("head", signature = "Column", definition = function(x, n=6) {
+setMethod("head", signature = "Column", definition = function(x, num = 6) {
   if (is.null(x@df)) {
     collect(x)
   } else {
-    head(select(x@df, x), n)[, 1]
+    head(select(x@df, x), num)[, 1]
   }
 })
 
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 57628f7e66922..24c062c8e366e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -550,9 +550,6 @@ setGeneric("group_by", function(x, ...) { standardGeneric("group_by") })
 #' @export
 setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
 
-#' @param x a SparkDataFrame or Column
-#' @rdname head
-
 #' @rdname insertInto
 #' @export
 setGeneric("insertInto", function(x, tableName, ...) { standardGeneric("insertInto") })

From 2bfb8a62adf81e6953e398147e5f8d9122bc47ef Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Fri, 14 Oct 2016 10:32:06 -0700
Subject: [PATCH 23/31] Style fixes

---
 R/pkg/R/column.R                          | 16 +++++++++-------
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  2 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 5c490707c3521..d8369a2f8b48d 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -70,13 +70,15 @@ setMethod("show", signature = "Column", function(object) {
 })
 
 #' @rdname head
-setMethod("head", signature = "Column", definition = function(x, num = 6) {
-  if (is.null(x@df)) {
-    collect(x)
-  } else {
-    head(select(x@df, x), num)[, 1]
-  }
-})
+setMethod("head",
+          signature = "Column",
+          function(x, num = 6L) {
+            if (is.null(x@df)) {
+              collect(x)
+            } else {
+              head(select(x@df, x), num)[, 1]
+            }
+          })
 
 #' @rdname column
 #' @name column
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index b44a8f24a2506..8e8fdb42c4aa4 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2252,7 +2252,7 @@ test_that("Method str()", {
   expect_equal(capture.output(utils:::str(iris)), capture.output(str(iris)))
 })
 
-test_that("collect/show/head on Columns", {
+test_that("show/head on Columns", {
 
   # collect
   x <- irisDF$Sepal_Length + 100

From e0bba0a0655f750751a6b9b05a7f6cf5eb807e05 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Fri, 14 Oct 2016 11:50:45 -0700
Subject: [PATCH 24/31] Fixed docs issues

---
 R/pkg/R/column.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index d8369a2f8b48d..330d35c9cb6cc 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -82,6 +82,7 @@ setMethod("head",
 
 #' @rdname column
 #' @name column
+#' @param df the parent SparkDataFrame. This is used to retrieve the contents of the column through method head.
 #' @aliases column,jobj-method
 setMethod("column",
           signature(x = "jobj"),

From b25deb14267815b23d0d2dbd79b28f24853c563a Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Wed, 19 Oct 2016 12:40:25 -0700
Subject: [PATCH 25/31] Cosmetic changes

---
 R/pkg/R/DataFrame.R | 10 +++++-----
 R/pkg/R/column.R    |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 799bcd9d12a5e..5804d25b6b54c 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1178,21 +1178,21 @@ setMethod("take",
 #'
 #' @family SparkDataFrame functions
 #' @aliases head,SparkDataFrame-method
+#' @rdname head
 #' @name head
 #' @export
 #' @examples
 #'\dontrun{
-#' # Initialize Spark context and SQL context
 #' sparkR.session()
 #' 
 #' # Create a DataFrame from the Iris dataset
-#' irisDF <- as.DataFrame(iris)
+#' df <- as.DataFrame(airquality)
 #' 
 #' # Get the first 6 elements of the DataFrame
-#' head(irisDF)
+#' head(df)
 #' 
 #' # Get the first 20 elements of a Column
-#' head(irisDF$Sepal_Length, 20)
+#' head(df$Ozone, 20)
 #' }
 #' @note head since 1.4.0
 setMethod("head",
@@ -3334,5 +3334,5 @@ getEmptySparkRDataFrame <- function() {
   if (is.null(.sparkREnv$EMPTY_DF)) {
     .sparkREnv$EMPTY_DF <- as.DataFrame(data.frame(0))
   }
-  return(.sparkREnv$EMPTY_DF)
+  .sparkREnv$EMPTY_DF
 }
\ No newline at end of file
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 330d35c9cb6cc..31ef5a632b21e 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -74,7 +74,7 @@ setMethod("head",
           signature = "Column",
           function(x, num = 6L) {
             if (is.null(x@df)) {
-              collect(x)
+              character(0)
             } else {
               head(select(x@df, x), num)[, 1]
             }

From 76061ad8320c0ab1a52da1d0a7ea96e2db5d8a68 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Wed, 19 Oct 2016 15:42:29 -0700
Subject: [PATCH 26/31] More cosmetics

---
 R/pkg/R/DataFrame.R | 6 +++---
 R/pkg/R/column.R    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 5804d25b6b54c..a4c29b80662ab 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -3331,8 +3331,8 @@ setMethod("randomSplit",
 
 # A global singleton for an empty SparkR DataFrame.
 getEmptySparkRDataFrame <- function() {
-  if (is.null(.sparkREnv$EMPTY_DF)) {
-    .sparkREnv$EMPTY_DF <- as.DataFrame(data.frame(0))
+  if (is.null(.sparkREnv$.emptyDataFrame)) {
+    .sparkREnv$.emptyDataFrame <- as.DataFrame(data.frame(0))
   }
-  .sparkREnv$EMPTY_DF
+  .sparkREnv$.emptyDataFrame
 }
\ No newline at end of file
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 31ef5a632b21e..91032ebdb7316 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -71,7 +71,7 @@ setMethod("show", signature = "Column", function(object) {
 
 #' @rdname head
 setMethod("head",
-          signature = "Column",
+          signature(x = "Column"),
           function(x, num = 6L) {
             if (is.null(x@df)) {
               character(0)

From ed1b382524265340d0b692a89c6be388eddb549a Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Fri, 21 Oct 2016 12:38:13 -0700
Subject: [PATCH 27/31] More cosmetics

---
 R/pkg/R/DataFrame.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index a4c29b80662ab..4b53604b58af4 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -3331,7 +3331,7 @@ setMethod("randomSplit",
 
 # A global singleton for an empty SparkR DataFrame.
 getEmptySparkRDataFrame <- function() {
-  if (is.null(.sparkREnv$.emptyDataFrame)) {
+  if (!exists(".emptyDataFrame", envir=.sparkREnv)) {
     .sparkREnv$.emptyDataFrame <- as.DataFrame(data.frame(0))
   }
   .sparkREnv$.emptyDataFrame

From 1338d71a8f8f1387e08326851e15fcc84d5092f9 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Fri, 21 Oct 2016 13:55:31 -0700
Subject: [PATCH 28/31] Fixed style issues

---
 R/pkg/R/DataFrame.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 4b53604b58af4..6cd6ec60a8433 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -3331,7 +3331,7 @@ setMethod("randomSplit",
 
 # A global singleton for an empty SparkR DataFrame.
 getEmptySparkRDataFrame <- function() {
-  if (!exists(".emptyDataFrame", envir=.sparkREnv)) {
+  if (!exists(".emptyDataFrame", envir = .sparkREnv)) {
     .sparkREnv$.emptyDataFrame <- as.DataFrame(data.frame(0))
   }
   .sparkREnv$.emptyDataFrame

From c0f1906f001a9df1efb9968a3c370f11e9378b7e Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Tue, 25 Oct 2016 15:29:49 -0700
Subject: [PATCH 29/31] Added test for Columns with no parent DataFrame

---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 8e8fdb42c4aa4..01a6cbb1e5603 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2270,6 +2270,10 @@ test_that("show/head on Columns", {
 
   # Columns without parent DataFrame
   expect_equal(is.numeric(head(rand())), TRUE)
+
+  # Columns with NULL parent DataFrame
+  expect_equal(length(head(column("abc"))), 0)
+
 })
 
 test_that("Minimal column test.", {

From 777aee3e8f2adbc7fefca20a8d1fb14faffd96c2 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Tue, 25 Oct 2016 15:35:16 -0700
Subject: [PATCH 30/31] Moved head documentation to generics.R

---
 R/pkg/R/DataFrame.R | 28 ----------------------------
 R/pkg/R/generics.R  | 30 ++++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 6cd6ec60a8433..3c4d44d1ef2a7 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1166,35 +1166,7 @@ setMethod("take",
             collect(limited)
           })
 
-#' Return the first part of a SparkDataFrame or Column
-#' 
-#' If \code{x} is a SparkDataFrame, its first 
-#' rows will be returned as a data.frame. If the dataset is a \code{Column}, its first 
-#' elements will be returned as a vector. The number of elements to be returned
-#' is given by parameter \code{num}. Default value for \code{num} is 6.
-#' @param x a SparkDataFrame or Column
-#' @param num the number of rows to return. Default is 6.
-#' @return A data.frame.
-#'
-#' @family SparkDataFrame functions
-#' @aliases head,SparkDataFrame-method
 #' @rdname head
-#' @name head
-#' @export
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' 
-#' # Create a DataFrame from the Iris dataset
-#' df <- as.DataFrame(airquality)
-#' 
-#' # Get the first 6 elements of the DataFrame
-#' head(df)
-#' 
-#' # Get the first 20 elements of a Column
-#' head(df$Ozone, 20)
-#' }
-#' @note head since 1.4.0
 setMethod("head",
           signature(x = "SparkDataFrame"),
           function(x, num = 6L) {
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 24c062c8e366e..1753251f96c03 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -980,6 +980,36 @@ setGeneric("from_unixtime", function(x, ...) { standardGeneric("from_unixtime")
 #' @export
 setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })
 
+#' Return the first part of a SparkDataFrame or Column
+#' 
+#' If \code{x} is a SparkDataFrame, its first 
+#' rows will be returned as a data.frame. If the dataset is a \code{Column}, its first 
+#' elements will be returned as a vector. The number of elements to be returned
+#' is given by parameter \code{num}. Default value for \code{num} is 6.
+#' @param x a SparkDataFrame or Column
+#' @param num the number of rows to return. Default is 6.
+#' @return A data.frame.
+#'
+#' @family SparkDataFrame functions
+#' @aliases head,SparkDataFrame-method
+#' @rdname head
+#' @name head
+#' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' 
+#' # Create a DataFrame from the Iris dataset
+#' df <- as.DataFrame(airquality)
+#' 
+#' # Get the first 6 elements of the DataFrame
+#' head(df)
+#' 
+#' # Get the first 20 elements of a Column
+#' head(df$Ozone, 20)
+#' }
+#' @note head since 1.4.0
+
 #' @rdname hex
 #' @export
 setGeneric("hex", function(x) { standardGeneric("hex") })

From 619f23b367b94a47f3f63dea008ab4d674218b34 Mon Sep 17 00:00:00 2001
From: "Oscar D. Lara Yejas" <oscar.lara.yejas@us.ibm.com>
Date: Tue, 25 Oct 2016 15:40:29 -0700
Subject: [PATCH 31/31] Moved documentation for show method to generics.R

---
 R/pkg/R/DataFrame.R | 18 ------------------
 R/pkg/R/generics.R  | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 3c4d44d1ef2a7..6fa72767de23d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -223,25 +223,7 @@ setMethod("showDF",
             cat(s)
           })
 
-#' show
-#'
-#' Print class and type information of a Spark object.
-#'
-#' @param object a Spark object. Can be a SparkDataFrame, Column, GroupedData, WindowSpec.
-#'
-#' @family SparkDataFrame functions
 #' @rdname show
-#' @aliases show,SparkDataFrame-method
-#' @name show
-#' @export
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' path <- "path/to/file.json"
-#' df <- read.json(path)
-#' show(df)
-#'}
-#' @note show(SparkDataFrame) since 1.4.0
 setMethod("show", "SparkDataFrame",
           function(object) {
             cols <- lapply(dtypes(object), function(l) {
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 1753251f96c03..fd94838942038 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -683,6 +683,27 @@ setGeneric("select", function(x, col, ...) { standardGeneric("select") } )
 #' @export
 setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr") })
 
+#' show
+#'
+#' Print class and type information of a Spark object.
+#'
+#' @param object a Spark object. Can be a SparkDataFrame, Column, GroupedData, WindowSpec.
+#'
+#' @family SparkDataFrame functions
+#' @rdname show
+#' @aliases show,SparkDataFrame-method
+#' @name show
+#' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' path <- "path/to/file.json"
+#' df <- read.json(path)
+#' show(df)
+#'}
+#' @note show(SparkDataFrame) since 1.4.0
+NULL
+
 #' @rdname showDF
 #' @export
 setGeneric("showDF", function(x, ...) { standardGeneric("showDF") })
@@ -1009,6 +1030,7 @@ setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })
 #' head(df$Ozone, 20)
 #' }
 #' @note head since 1.4.0
+NULL
 
 #' @rdname hex
 #' @export