Skip to content

Commit

Permalink
[SPARK-11781][SPARKR] SparkR has problem in inferring type of raw type.
Browse files Browse the repository at this point in the history
Author: Sun Rui <rui.sun@intel.com>

Closes apache#9769 from sun-rui/SPARK-11781.
  • Loading branch information
Sun Rui authored and shivaram committed Nov 29, 2015
1 parent c793d2d commit cc7a1bc
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 32 deletions.
34 changes: 19 additions & 15 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -793,8 +793,8 @@ setMethod("dim",
setMethod("collect",
signature(x = "DataFrame"),
function(x, stringsAsFactors = FALSE) {
names <- columns(x)
ncol <- length(names)
dtypes <- dtypes(x)
ncol <- length(dtypes)
if (ncol <= 0) {
# empty data.frame with 0 columns and 0 rows
data.frame()
Expand All @@ -817,25 +817,29 @@ setMethod("collect",
# data of complex type can be held. But getting a cell from a column
# of list type returns a list instead of a vector. So for columns of
# non-complex type, append them as vector.
#
# For columns of complex type, be careful to access them.
# Get a column of complex type returns a list.
# Get a cell from a column of complex type returns a list instead of a vector.
col <- listCols[[colIndex]]
colName <- dtypes[[colIndex]][[1]]
if (length(col) <= 0) {
df[[names[colIndex]]] <- col
df[[colName]] <- col
} else {
# TODO: more robust check on column of primitive types
vec <- do.call(c, col)
if (class(vec) != "list") {
df[[names[colIndex]]] <- vec
colType <- dtypes[[colIndex]][[2]]
# Note that "binary" columns behave like complex types.
if (!is.null(PRIMITIVE_TYPES[[colType]]) && colType != "binary") {
vec <- do.call(c, col)
stopifnot(class(vec) != "list")
df[[colName]] <- vec
} else {
# For columns of complex type, be careful to access them.
# Get a column of complex type returns a list.
# Get a cell from a column of complex type returns a list instead of a vector.
df[[names[colIndex]]] <- col
}
df[[colName]] <- col
}
}
}
df
}
df
}
})
})

#' Limit
#'
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/SQLContext.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ infer_type <- function(x) {
})
type <- Reduce(paste0, type)
type <- paste0("struct<", substr(type, 1, nchar(type) - 1), ">")
} else if (length(x) > 1) {
} else if (length(x) > 1 && type != "binary") {
paste0("array<", infer_type(x[[1]]), ">")
} else {
type
Expand Down
37 changes: 21 additions & 16 deletions R/pkg/R/types.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,30 @@
# values are equivalent R types. This is stored in an environment to allow for
# more efficient look up (environments use hashmaps).
PRIMITIVE_TYPES <- as.environment(list(
"byte"="integer",
"tinyint"="integer",
"smallint"="integer",
"integer"="integer",
"bigint"="numeric",
"float"="numeric",
"double"="numeric",
"decimal"="numeric",
"string"="character",
"binary"="raw",
"boolean"="logical",
"timestamp"="POSIXct",
"date"="Date"))
"tinyint" = "integer",
"smallint" = "integer",
"int" = "integer",
"bigint" = "numeric",
"float" = "numeric",
"double" = "numeric",
"decimal" = "numeric",
"string" = "character",
"binary" = "raw",
"boolean" = "logical",
"timestamp" = "POSIXct",
"date" = "Date",
# following types are not SQL types returned by dtypes(). They are listed here for usage
# by checkType() in schema.R.
# TODO: refactor checkType() in schema.R.
"byte" = "integer",
"integer" = "integer"
))

# The complex data types. These do not have any direct mapping to R's types.
COMPLEX_TYPES <- list(
"map"=NA,
"array"=NA,
"struct"=NA)
"map" = NA,
"array" = NA,
"struct" = NA)

# The full list of data types.
DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
Expand Down
6 changes: 6 additions & 0 deletions R/pkg/inst/tests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ test_that("infer types and check types", {
expect_equal(infer_type(e), "map<string,integer>")

expect_error(checkType("map<integer,integer>"), "Key type in a map must be string or character")

expect_equal(infer_type(as.raw(c(1, 2, 3))), "binary")
})

test_that("structType and structField", {
Expand Down Expand Up @@ -250,6 +252,10 @@ test_that("create DataFrame from list or data.frame", {

mtcarsdf <- createDataFrame(sqlContext, mtcars)
expect_equivalent(collect(mtcarsdf), mtcars)

bytes <- as.raw(c(1, 2, 3))
df <- createDataFrame(sqlContext, list(list(bytes)))
expect_equal(collect(df)[[1]][[1]], bytes)
})

test_that("create DataFrame with different data types", {
Expand Down

0 comments on commit cc7a1bc

Please sign in to comment.