Skip to content

Commit

Permalink
resolve merge conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
yanboliang committed Aug 19, 2015
2 parents 5d2f75b + 865a3df commit 879474d
Show file tree
Hide file tree
Showing 600 changed files with 15,521 additions and 6,007 deletions.
1 change: 1 addition & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,4 @@ INDEX
gen-java.*
.*avpr
org.apache.spark.sql.sources.DataSourceRegister
.*parquet
5 changes: 0 additions & 5 deletions R/install-dev.bat
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0..
MKDIR %SPARK_HOME%\R\lib

R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\

rem Zip the SparkR package so that it can be distributed to worker nodes on YARN
pushd %SPARK_HOME%\R\lib
%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR
popd
3 changes: 2 additions & 1 deletion R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SparkR
Type: Package
Title: R frontend for Spark
Version: 1.4.0
Version: 1.5.0
Date: 2013-09-09
Author: The Apache Software Foundation
Maintainer: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Expand Down Expand Up @@ -29,6 +29,7 @@ Collate:
'client.R'
'context.R'
'deserialize.R'
'functions.R'
'mllib.R'
'serialize.R'
'sparkR.R'
Expand Down
52 changes: 51 additions & 1 deletion R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -87,44 +87,87 @@ exportMethods("abs",
"alias",
"approxCountDistinct",
"asc",
"ascii",
"asin",
"atan",
"atan2",
"avg",
"base64",
"between",
"bin",
"bitwiseNOT",
"cast",
"cbrt",
"ceil",
"ceiling",
"concat",
"contains",
"cos",
"cosh",
"count",
"countDistinct",
"crc32",
"datediff",
"dayofmonth",
"dayofyear",
"desc",
"endsWith",
"exp",
"explode",
"expm1",
"factorial",
"first",
"floor",
"getField",
"getItem",
"greatest",
"hex",
"hour",
"hypot",
"initcap",
"isNaN",
"isNotNull",
"isNull",
"last",
"last_day",
"least",
"length",
"levenshtein",
"like",
"lit",
"log",
"log10",
"log1p",
"log2",
"lower",
"ltrim",
"max",
"md5",
"mean",
"min",
"minute",
"month",
"months_between",
"n",
"n_distinct",
"nanvl",
"negate",
"otherwise",
"pmod",
"quarter",
"reverse",
"rint",
"rlike",
"round",
"rtrim",
"second",
"sha1",
"sign",
"signum",
"sin",
"sinh",
"size",
"soundex",
"sqrt",
"startsWith",
"substr",
Expand All @@ -134,7 +177,14 @@ exportMethods("abs",
"tanh",
"toDegrees",
"toRadians",
"upper")
"to_date",
"trim",
"unbase64",
"unhex",
"upper",
"weekofyear",
"when",
"year")

exportClasses("GroupedData")
exportMethods("agg")
Expand Down
83 changes: 8 additions & 75 deletions R/pkg/R/column.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,6 @@ operators <- list(
)
column_functions1 <- c("asc", "desc", "isNull", "isNotNull")
column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains")
functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt",
"first", "last", "lower", "upper", "sumDistinct",
"acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp",
"expm1", "floor", "log", "log10", "log1p", "rint", "sign",
"sin", "sinh", "tan", "tanh", "toDegrees", "toRadians")
binary_mathfunctions <- c("atan2", "hypot")

createOperator <- function(op) {
setMethod(op,
Expand Down Expand Up @@ -111,33 +105,6 @@ createColumnFunction2 <- function(name) {
})
}

createStaticFunction <- function(name) {
setMethod(name,
signature(x = "Column"),
function(x) {
if (name == "ceiling") {
name <- "ceil"
}
if (name == "sign") {
name <- "signum"
}
jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc)
column(jc)
})
}

createBinaryMathfunctions <- function(name) {
setMethod(name,
signature(y = "Column"),
function(y, x) {
if (class(x) == "Column") {
x <- x@jc
}
jc <- callJStatic("org.apache.spark.sql.functions", name, y@jc, x)
column(jc)
})
}

createMethods <- function() {
for (op in names(operators)) {
createOperator(op)
Expand All @@ -148,12 +115,6 @@ createMethods <- function() {
for (name in column_functions2) {
createColumnFunction2(name)
}
for (x in functions) {
createStaticFunction(x)
}
for (name in binary_mathfunctions) {
createBinaryMathfunctions(name)
}
}

createMethods()
Expand Down Expand Up @@ -243,44 +204,16 @@ setMethod("%in%",
return(column(jc))
})

#' Approx Count Distinct
#' otherwise
#'
#' @rdname column
#' @return the approximate number of distinct items in a group.
setMethod("approxCountDistinct",
signature(x = "Column"),
function(x, rsd = 0.95) {
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
column(jc)
})

#' Count Distinct
#' If values in the specified column are null, returns the value.
#' Can be used in conjunction with `when` to specify a default value for expressions.
#'
#' @rdname column
#' @return the number of distinct items in a group.
setMethod("countDistinct",
signature(x = "Column"),
function(x, ...) {
jcol <- lapply(list(...), function (x) {
x@jc
})
jc <- callJStatic("org.apache.spark.sql.functions", "countDistinct", x@jc,
listToSeq(jcol))
setMethod("otherwise",
signature(x = "Column", value = "ANY"),
function(x, value) {
value <- ifelse(class(value) == "Column", value@jc, value)
jc <- callJMethod(x@jc, "otherwise", value)
column(jc)
})

#' @rdname column
#' @aliases countDistinct
setMethod("n_distinct",
signature(x = "Column"),
function(x, ...) {
countDistinct(x, ...)
})

#' @rdname column
#' @aliases count
setMethod("n",
signature(x = "Column"),
function(x) {
count(x)
})
16 changes: 10 additions & 6 deletions R/pkg/R/deserialize.R
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,14 @@ readRow <- function(inputCon) {

# Take a single column as Array[Byte] and deserialize it into an atomic vector
readCol <- function(inputCon, numRows) {
# sapply can not work with POSIXlt
do.call(c, lapply(1:numRows, function(x) {
value <- readObject(inputCon)
# Replace NULL with NA so we can coerce to vectors
if (is.null(value)) NA else value
}))
if (numRows > 0) {
# sapply can not work with POSIXlt
do.call(c, lapply(1:numRows, function(x) {
value <- readObject(inputCon)
# Replace NULL with NA so we can coerce to vectors
if (is.null(value)) NA else value
}))
} else {
vector()
}
}
Loading

0 comments on commit 879474d

Please sign in to comment.