apache · olarayej · Feb 23, 2016 · Feb 24, 2016 · Feb 24, 2016 · Feb 24, 2016
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -223,25 +223,7 @@ setMethod("showDF",
             cat(s)
           })
 
-#' show
-#'
-#' Print class and type information of a Spark object.
-#'
-#' @param object a Spark object. Can be a SparkDataFrame, Column, GroupedData, WindowSpec.
-#'
-#' @family SparkDataFrame functions
 #' @rdname show
-#' @aliases show,SparkDataFrame-method
-#' @name show
-#' @export
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' path <- "path/to/file.json"
-#' df <- read.json(path)
-#' show(df)
-#'}
-#' @note show(SparkDataFrame) since 1.4.0
 setMethod("show", "SparkDataFrame",
           function(object) {
             cols <- lapply(dtypes(object), function(l) {
@@ -1166,28 +1148,7 @@ setMethod("take",
             collect(limited)
           })
 
-#' Head
-#'
-#' Return the first \code{num} rows of a SparkDataFrame as a R data.frame. If \code{num} is not
-#' specified, then head() returns the first 6 rows as with R data.frame.
-#'
-#' @param x a SparkDataFrame.
-#' @param num the number of rows to return. Default is 6.
-#' @return A data.frame.
-#'
-#' @family SparkDataFrame functions
-#' @aliases head,SparkDataFrame-method
 #' @rdname head
-#' @name head
-#' @export
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' path <- "path/to/file.json"
-#' df <- read.json(path)
-#' head(df)
-#' }
-#' @note head since 1.4.0
 setMethod("head",
           signature(x = "SparkDataFrame"),
           function(x, num = 6L) {
@@ -1679,7 +1640,7 @@ setMethod("foreachPartition",
 ############################## SELECT ##################################
 
 getColumn <- function(x, c) {
-  column(callJMethod(x@sdf, "col", c))
+  column(callJMethod(x@sdf, "col", c), x)
 }
 
 #' @param name name of a Column (without being wrapped by \code{""}).
@@ -3321,3 +3282,11 @@ setMethod("randomSplit",
             }
             sapply(sdfs, dataFrame)
           })
+
+# A global singleton for an empty SparkR DataFrame.
+getEmptySparkRDataFrame <- function() {
+  if (!exists(".emptyDataFrame", envir = .sparkREnv)) {
+    .sparkREnv$.emptyDataFrame <- as.DataFrame(data.frame(0))
+  }
+  .sparkREnv$.emptyDataFrame
+}
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+setOldClass("SparkDataFrame")
+setClassUnion("SparkDataFrameOrNull", c("SparkDataFrame", "NULL"))
 
 # Column Class
 
@@ -29,38 +31,66 @@ setOldClass("jobj")
 #' @rdname column
 #'
 #' @slot jc reference to JVM SparkDataFrame column
+#' @slot df the parent SparkDataFrame of the Column object
 #' @export
 #' @note Column since 1.4.0
 setClass("Column",
-         slots = list(jc = "jobj"))
+         slots = list(jc = "jobj", df = "SparkDataFrameOrNull"))
 
 #' A set of operations working with SparkDataFrame columns
 #' @rdname columnfunctions
 #' @name columnfunctions
 NULL
-
-setMethod("initialize", "Column", function(.Object, jc) {
+setMethod("initialize", "Column", function(.Object, jc, df) {
   .Object@jc <- jc
+
+  # Some Column objects don't have any referencing DataFrame. In such case, df will be NULL.
+  if (missing(df)) {
+    df <- NULL
+  }
+  .Object@df <- df
   .Object
 })
 
+#' @rdname show
+setMethod("show", signature = "Column", function(object) {
+  MAX_ELEMENTS <- 20
+  head.df <- head(object, MAX_ELEMENTS)
+
+  if (length(head.df) == 0) {
+    colname <- callJMethod(object@jc, "toString")
+    cat(paste0(colname, "\n"))
+    cat(paste0("<Empty column>\n"))
+  } else {
+    show(head.df)
+  }
+  if (length(head.df) == MAX_ELEMENTS)  {
+    cat(paste0("\b...\nDisplaying up to ", as.character(MAX_ELEMENTS), " elements only."))
+  }
+})
+
+#' @rdname head
+setMethod("head",
+          signature(x = "Column"),
+          function(x, num = 6L) {
+            if (is.null(x@df)) {
+              character(0)
+            } else {
+              head(select(x@df, x), num)[, 1]
+            }
+          })
+
 #' @rdname column
 #' @name column
+#' @param df the parent SparkDataFrame. This is used to retrieve the contents of the column through method head.
 #' @aliases column,jobj-method
 setMethod("column",
           signature(x = "jobj"),
-          function(x) {
-            new("Column", x)
-          })
-
-#' @rdname show
-#' @name show
-#' @aliases show,Column-method
-#' @export
-#' @note show(Column) since 1.4.0
-setMethod("show", "Column",
-          function(object) {
-            cat("Column", callJMethod(object@jc, "toString"), "\n")
+          function(x, df) {
+            if (missing(df)) {
+              df <- NULL
+            }
+            new("Column", jc = x, df = df)
           })
 
 operators <- list(
@@ -93,15 +123,15 @@ createOperator <- function(op) {
                   callJMethod(e1@jc, operators[[op]], e2)
                 }
               }
-              column(jc)
+              column(jc, e1@df)
             })
 }
 
 createColumnFunction1 <- function(name) {
   setMethod(name,
             signature(x = "Column"),
             function(x) {
-              column(callJMethod(x@jc, name))
+              column(callJMethod(x@jc, name), x@df)
             })
 }
 
@@ -113,7 +143,7 @@ createColumnFunction2 <- function(name) {
                 data <- data@jc
               }
               jc <- callJMethod(x@jc, name, data)
-              column(jc)
+              column(jc, x@df)
             })
 }
 
@@ -148,7 +178,7 @@ setMethod("alias",
           signature(object = "Column"),
           function(object, data) {
             if (is.character(data)) {
-              column(callJMethod(object@jc, "as", data))
+              column(callJMethod(object@jc, "as", data), object@df)
             } else {
               stop("data should be character")
             }
@@ -170,7 +200,7 @@ setMethod("alias",
 setMethod("substr", signature(x = "Column"),
           function(x, start, stop) {
             jc <- callJMethod(x@jc, "substr", as.integer(start - 1), as.integer(stop - start + 1))
-            column(jc)
+            column(jc, x@df)
           })
 
 #' startsWith
@@ -227,7 +257,7 @@ setMethod("between", signature(x = "Column"),
           function(x, bounds) {
             if (is.vector(bounds) && length(bounds) == 2) {
               jc <- callJMethod(x@jc, "between", bounds[1], bounds[2])
-              column(jc)
+              column(jc, x@df)
             } else {
               stop("bounds should be a vector of lower and upper bounds")
             }
@@ -253,7 +283,7 @@ setMethod("cast",
           signature(x = "Column"),
           function(x, dataType) {
             if (is.character(dataType)) {
-              column(callJMethod(x@jc, "cast", dataType))
+              column(callJMethod(x@jc, "cast", dataType), x@df)
             } else {
               stop("dataType should be character")
             }
@@ -278,7 +308,7 @@ setMethod("%in%",
           signature(x = "Column"),
           function(x, table) {
             jc <- callJMethod(x@jc, "isin", as.list(table))
-            return(column(jc))
+            column(jc, x@df)
           })
 
 #' otherwise
@@ -300,5 +330,5 @@ setMethod("otherwise",
           function(x, value) {
             value <- if (class(value) == "Column") { value@jc } else { value }
             jc <- callJMethod(x@jc, "otherwise", value)
-            column(jc)
+            column(jc, x@df)
           })