From 99109c4b592c57d1ba00a002b3d4b71ece10f954 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Sun, 23 Aug 2015 03:06:33 -0700
Subject: [PATCH 1/4] R: Add support for subsetting + tests

---
 R/pkg/R/DataFrame.R              | 10 ++++++++++
 R/pkg/inst/tests/test_sparkSQL.R | 16 ++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 895603235011e..13558bc270475 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -945,6 +945,13 @@ setMethod("[", signature(x = "DataFrame", i = "missing"),
             select(x, j)
           })
 
+#' @rdname select
+setMethod("[", signature(x = "DataFrame"),
+          function(x, i, j, ...) {
+            filtered <- filter(x, i)
+            filtered[, j]
+          })
+
 #' Select
 #'
 #' Selects a set of columns with names or Column expressions.
@@ -963,8 +970,11 @@ setMethod("[", signature(x = "DataFrame", i = "missing"),
 #'   # Columns can also be selected using `[[` and `[`
 #'   df[[2]] == df[["age"]]
 #'   df[,2] == df[,"age"]
+#'   df[,c("name", "age")]
 #'   # Similar to R data frames columns can also be selected using `$`
 #'   df$age
+#'   # It can also be subset on rows and Columns
+#'   df[df$name == "Smith", 2]
 #' }
 setMethod("select", signature(x = "DataFrame", col = "character"),
           function(x, col, ...) {
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 556b8c5447054..19a776b96a2c4 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -587,6 +587,22 @@ test_that("select with column", {
   expect_equal(collect(select(df3, "x"))[[1, 1]], "x")
 })
 
+test_that("subsetting", {
+  df <- jsonFile(sqlContext, jsonPath)
+  filtered <- df[df$age > 20,]
+  expect_equal(count(filtered), 1)
+  expect_equal(columns(filtered), c("name", "age"))
+  expect_equal(collect(filtered)$name, "Andy")
+  df2 <- df[df$age == 19,1]
+  expect_is(df2, "DataFrame")
+  expect_equal(count(df2), 1)
+  expect_equal(columns(df2), c("name"))
+  expect_equal(collect(df2)$name, "Justin")
+  df3 <- df[df$age > 20, 2]
+  expect_equal(count(df3), 1)
+  expect_equal(columns(df3), c("age"))
+}}
+
 test_that("selectExpr() on a DataFrame", {
   df <- jsonFile(sqlContext, jsonPath)
   selected <- selectExpr(df, "age * 2")

From 42e881a4e4c7d3e669400b44ec24c4af1e10f6da Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Mon, 24 Aug 2015 00:46:26 -0700
Subject: [PATCH 2/4] add support for d[d$something>0,], more tests

---
 R/pkg/R/DataFrame.R              | 16 ++++++++++++----
 R/pkg/inst/tests/test_sparkSQL.R | 13 ++++++++++---
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 13558bc270475..7f535f3490290 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -922,8 +922,10 @@ setMethod("$<-", signature(x = "DataFrame"),
             x
           })
 
+setClassUnion("numericOrcharacter", c("numeric", "character"))
+
 #' @rdname select
-setMethod("[[", signature(x = "DataFrame"),
+setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
           function(x, i) {
             if (is.numeric(i)) {
               cols <- columns(x)
@@ -946,10 +948,16 @@ setMethod("[", signature(x = "DataFrame", i = "missing"),
           })
 
 #' @rdname select
-setMethod("[", signature(x = "DataFrame"),
+setMethod("[", signature(x = "DataFrame", i = "Column"),
           function(x, i, j, ...) {
+            # It could handle i as "character" but it seems confusing and not required
+            # https://stat.ethz.ch/R-manual/R-devel/library/base/html/Extract.data.frame.html
             filtered <- filter(x, i)
-            filtered[, j]
+            if (!missing(j)) {
+              filtered[, j]
+            } else {
+              filtered
+            }
           })
 
 #' Select
@@ -974,7 +982,7 @@ setMethod("[", signature(x = "DataFrame"),
 #'   # Similar to R data frames columns can also be selected using `$`
 #'   df$age
 #'   # It can also be subset on rows and Columns
-#'   df[df$name == "Smith", 2]
+#'   df[df$name == "Smith", c(1,2)]
 #' }
 setMethod("select", signature(x = "DataFrame", col = "character"),
           function(x, col, ...) {
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 19a776b96a2c4..003b9778fcadc 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -588,12 +588,13 @@ test_that("select with column", {
 })
 
 test_that("subsetting", {
-  df <- jsonFile(sqlContext, jsonPath)
+  # jsonFile returns columns in random order
+  df <- select(jsonFile(sqlContext, jsonPath), "name", "age")
   filtered <- df[df$age > 20,]
   expect_equal(count(filtered), 1)
   expect_equal(columns(filtered), c("name", "age"))
   expect_equal(collect(filtered)$name, "Andy")
-  df2 <- df[df$age == 19,1]
+  df2 <- df[df$age == 19, 1]
   expect_is(df2, "DataFrame")
   expect_equal(count(df2), 1)
   expect_equal(columns(df2), c("name"))
@@ -601,7 +602,13 @@ test_that("subsetting", {
   df3 <- df[df$age > 20, 2]
   expect_equal(count(df3), 1)
   expect_equal(columns(df3), c("age"))
-}}
+  df4 <- df[df$age %in% c(19, 30), 1:2]
+  expect_equal(count(df4), 2)
+  expect_equal(columns(df4), c("name", "age"))
+  df5 <- df[df$age %in% c(19), c(1,2)]
+  expect_equal(count(df5), 1)
+  expect_equal(columns(df5), c("name", "age"))
+})
 
 test_that("selectExpr() on a DataFrame", {
   df <- jsonFile(sqlContext, jsonPath)

From 16e0ba375ac12788478002ce96ca74206c2d437a Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Mon, 24 Aug 2015 00:47:15 -0700
Subject: [PATCH 3/4] update example

---
 R/pkg/R/DataFrame.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 7f535f3490290..c91d05ef0e604 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -983,6 +983,7 @@ setMethod("[", signature(x = "DataFrame", i = "Column"),
 #'   df$age
 #'   # It can also be subset on rows and Columns
 #'   df[df$name == "Smith", c(1,2)]
+#'   df[df$age %in% c(19, 30), 1:2]
 #' }
 setMethod("select", signature(x = "DataFrame", col = "character"),
           function(x, col, ...) {

From 3578ba29ef1bef3fead1232278fb23dd28d62c0f Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Mon, 24 Aug 2015 21:39:36 -0700
Subject: [PATCH 4/4] updating test formatting from feedback

---
 R/pkg/inst/tests/test_sparkSQL.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 003b9778fcadc..ee48a3dc0cc05 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -594,17 +594,21 @@ test_that("subsetting", {
   expect_equal(count(filtered), 1)
   expect_equal(columns(filtered), c("name", "age"))
   expect_equal(collect(filtered)$name, "Andy")
+
   df2 <- df[df$age == 19, 1]
   expect_is(df2, "DataFrame")
   expect_equal(count(df2), 1)
   expect_equal(columns(df2), c("name"))
   expect_equal(collect(df2)$name, "Justin")
+
   df3 <- df[df$age > 20, 2]
   expect_equal(count(df3), 1)
   expect_equal(columns(df3), c("age"))
+
   df4 <- df[df$age %in% c(19, 30), 1:2]
   expect_equal(count(df4), 2)
   expect_equal(columns(df4), c("name", "age"))
+  
   df5 <- df[df$age %in% c(19), c(1,2)]
   expect_equal(count(df5), 1)
   expect_equal(columns(df5), c("name", "age"))