From 416e71af4d26e67afb715ea1d625341cdea4873d Mon Sep 17 00:00:00 2001 From: "Oscar D. Lara Yejas" Date: Thu, 10 Mar 2016 17:10:23 -0800 Subject: [PATCH] [SPARK-13327][SPARKR] Added parameter validations for colnames<- Author: Oscar D. Lara Yejas Author: Oscar D. Lara Yejas Closes #11220 from olarayej/SPARK-13312-3. --- R/pkg/R/DataFrame.R | 22 +++++++++++++++++++++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 11 +++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 3b7b8250b94f7..50655e9382325 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -303,8 +303,28 @@ setMethod("colnames", #' @rdname columns #' @name colnames<- setMethod("colnames<-", - signature(x = "DataFrame", value = "character"), + signature(x = "DataFrame"), function(x, value) { + + # Check parameter integrity + if (class(value) != "character") { + stop("Invalid column names.") + } + + if (length(value) != ncol(x)) { + stop( + "Column names must have the same length as the number of columns in the dataset.") + } + + if (any(is.na(value))) { + stop("Column names cannot be NA.") + } + + # Check if the column names have . in it + if (any(regexec(".", value, fixed=TRUE)[[1]][1] != -1)) { + stop("Colum names cannot contain the '.' symbol.") + } + sdf <- callJMethod(x@sdf, "toDF", as.list(value)) dataFrame(sdf) }) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 236bae6bded25..cad5766812aed 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -691,6 +691,17 @@ test_that("names() colnames() set the column names", { colnames(df) <- c("col3", "col4") expect_equal(names(df)[1], "col3") + expect_error(colnames(df) <- c("sepal.length", "sepal_width"), + "Colum names cannot contain the '.' symbol.") + expect_error(colnames(df) <- c(1, 2), "Invalid column names.") + expect_error(colnames(df) <- c("a"), + "Column names must have the same length as the number of columns in the dataset.") + expect_error(colnames(df) <- c("1", NA), "Column names cannot be NA.") + + # Note: if this test is broken, remove check for "." character on colnames<- method + irisDF <- suppressWarnings(createDataFrame(sqlContext, iris)) + expect_equal(names(irisDF)[1], "Sepal_Length") + # Test base::colnames base::names m2 <- cbind(1, 1:4) expect_equal(colnames(m2, do.NULL = FALSE), c("col1", "col2"))