From 7c1d95b496784fd7db82e91e207323c071ccda4d Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Fri, 9 Dec 2016 16:07:25 -0800 Subject: [PATCH 1/5] changes to support env override url --- R/pkg/R/install.R | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 69b0a523b84e4..5691f8abdca86 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -182,16 +182,24 @@ getPreferredMirror <- function(version, packageName) { } directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) { - packageRemotePath <- paste0( - file.path(mirrorUrl, version, packageName), ".tgz") - fmt <- "Downloading %s for Hadoop %s from:\n- %s" - msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), - packageRemotePath) - message(msg) + releaseUrl <- Sys.getenv("SPARKR_RELEASE_DOWNLOAD_URL") + if (releaseUrl != "") { + packageRemotePath <- releaseUrl + baseUrl <- releaseUrl + message("Downloading from alternate URL:\n- %s", packageRemotePath) + } else { + packageRemotePath <- paste0( + file.path(mirrorUrl, version, packageName), ".tgz") + baseUrl <- mirrorUrl + fmt <- "Downloading %s for Hadoop %s from:\n- %s" + msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), + packageRemotePath) + message(msg) + } isFail <- tryCatch(download.file(packageRemotePath, packageLocalPath), error = function(e) { - message(sprintf("Fetch failed from %s", mirrorUrl)) + message(sprintf("Fetch failed from %s", baseUrl)) print(e) TRUE }) From 32a010afb611130cad1b12b5d13f50cde95ba74b Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 10 Dec 2016 23:08:46 -0800 Subject: [PATCH 2/5] change to setup path correctly --- R/pkg/R/install.R | 54 +++++++++++++++----------- R/pkg/R/utils.R | 11 +++++- R/pkg/inst/tests/testthat/test_utils.R | 9 +++++ 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 5691f8abdca86..afeaab0c36308 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -79,19 +79,28 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, dir.create(localDir, recursive = TRUE) } - packageLocalDir <- file.path(localDir, packageName) - if (overwrite) { message(paste0("Overwrite = TRUE: download and overwrite the tar file", "and Spark package directory if they exist.")) } + releaseUrl <- Sys.getenv("SPARKR_RELEASE_DOWNLOAD_URL") + if (releaseUrl != "") { + packageName <- basenameSansExtFromUrl(releaseUrl) + } + + packageLocalDir <- file.path(localDir, packageName) + # can use dir.exists(packageLocalDir) under R 3.2.0 or later if (!is.na(file.info(packageLocalDir)$isdir) && !overwrite) { - fmt <- "%s for Hadoop %s found, with SPARK_HOME set to %s" - msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), - packageLocalDir) - message(msg) + if (releaseUrl != "") { + message(paste(packageName, "found, setting SPARK_HOME to", packageLocalDir)) + } else { + fmt <- "%s for Hadoop %s found, setting SPARK_HOME to %s" + msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), + packageLocalDir) + message(msg) + } Sys.setenv(SPARK_HOME = packageLocalDir) return(invisible(packageLocalDir)) } else { @@ -104,7 +113,12 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, if (tarExists && !overwrite) { message("tar file found.") } else { - robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) + if (releaseUrl != "") { + message("Downloading from alternate URL:\n- ", releaseUrl) + downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", mirrorUrl)) + } else { + robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) + } } message(sprintf("Installing to %s", localDir)) @@ -182,24 +196,18 @@ getPreferredMirror <- function(version, packageName) { } directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) { - releaseUrl <- Sys.getenv("SPARKR_RELEASE_DOWNLOAD_URL") - if (releaseUrl != "") { - packageRemotePath <- releaseUrl - baseUrl <- releaseUrl - message("Downloading from alternate URL:\n- %s", packageRemotePath) - } else { - packageRemotePath <- paste0( - file.path(mirrorUrl, version, packageName), ".tgz") - baseUrl <- mirrorUrl - fmt <- "Downloading %s for Hadoop %s from:\n- %s" - msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), - packageRemotePath) - message(msg) - } + packageRemotePath <- paste0(file.path(mirrorUrl, version, packageName), ".tgz") + fmt <- "Downloading %s for Hadoop %s from:\n- %s" + msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), + packageRemotePath) + message(msg) + downloadUrl(packageRemotePath, packageLocalPath, paste0("Fetch failed from ", mirrorUrl)) +} - isFail <- tryCatch(download.file(packageRemotePath, packageLocalPath), +downloadUrl <- function(remotePath, localPath, errorMessage) { + isFail <- tryCatch(download.file(remotePath, localPath), error = function(e) { - message(sprintf("Fetch failed from %s", baseUrl)) + message(errorMessage) print(e) TRUE }) diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 098c0e3e31e95..352c8d9dba779 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -841,7 +841,7 @@ captureJVMException <- function(e, method) { # # @param inputData a list of rows, with each row a list # @return data.frame with raw columns as lists -rbindRaws <- function(inputData){ +rbindRaws <- function(inputData) { row1 <- inputData[[1]] rawcolumns <- ("raw" == sapply(row1, class)) @@ -851,3 +851,12 @@ rbindRaws <- function(inputData){ out[!rawcolumns] <- lapply(out[!rawcolumns], unlist) out } + +# Get basename without extension from URL +basenameSansExtFromUrl <- function(url) { + # split by '/' + splits <- unlist(strsplit(url, "^.+/")) + last <- tail(splits, 1) + # strip extension by the last '.' + sub("([^.]+)\\.[[:alnum:]]+$", "\\1", last) +} diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index 607c407f04f97..4e74cae501dde 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -228,4 +228,13 @@ test_that("varargsToStrEnv", { expect_warning(varargsToStrEnv(1, 2, 3, 4), "Unnamed arguments ignored: 1, 2, 3, 4.") }) +test_that("basenameSansExtFromUrl", { + x <- paste0("http://people.apache.org/~pwendell/spark-nightly/spark-branch-2.1-bin/spark-2.1.1-", + "SNAPSHOT-2016_12_09_11_08-eb2d9bf-bin/spark-2.1.1-SNAPSHOT-bin-hadoop2.7.tgz") + y <- paste0("http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-bin/spark-2.1.0-", + "bin-hadoop2.4-without-hive.tgz") + expect_equal(x, "spark-2.1.1-SNAPSHOT-bin-hadoop2.7") + expect_equal(y, "spark-2.1.0-bin-hadoop2.4-without-hive") +}) + sparkR.session.stop() From b5d58b1f4ce3dc9dd9ea2108679c5693235ba329 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 10 Dec 2016 23:38:51 -0800 Subject: [PATCH 3/5] missed in tests --- R/pkg/inst/tests/testthat/test_utils.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index 4e74cae501dde..90eae100c7394 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -233,8 +233,8 @@ test_that("basenameSansExtFromUrl", { "SNAPSHOT-2016_12_09_11_08-eb2d9bf-bin/spark-2.1.1-SNAPSHOT-bin-hadoop2.7.tgz") y <- paste0("http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-bin/spark-2.1.0-", "bin-hadoop2.4-without-hive.tgz") - expect_equal(x, "spark-2.1.1-SNAPSHOT-bin-hadoop2.7") - expect_equal(y, "spark-2.1.0-bin-hadoop2.4-without-hive") + expect_equal(basenameSansExtFromUrl(x), "spark-2.1.1-SNAPSHOT-bin-hadoop2.7") + expect_equal(basenameSansExtFromUrl(y), "spark-2.1.0-bin-hadoop2.4-without-hive") }) sparkR.session.stop() From c68e3ee7efc8bf8295fb466c30e80e28e97b88fb Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sun, 11 Dec 2016 23:10:59 -0800 Subject: [PATCH 4/5] remove compression extension --- R/pkg/R/install.R | 2 +- R/pkg/R/utils.R | 7 +++++-- R/pkg/inst/tests/testthat/test_utils.R | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index afeaab0c36308..097b7ad4bea08 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -115,7 +115,7 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, } else { if (releaseUrl != "") { message("Downloading from alternate URL:\n- ", releaseUrl) - downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", mirrorUrl)) + downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", releaseUrl)) } else { robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) } diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 352c8d9dba779..1283449f3592a 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -857,6 +857,9 @@ basenameSansExtFromUrl <- function(url) { # split by '/' splits <- unlist(strsplit(url, "^.+/")) last <- tail(splits, 1) - # strip extension by the last '.' - sub("([^.]+)\\.[[:alnum:]]+$", "\\1", last) + # this is from file_path_sans_ext + # first, remove any compression extension + filename <- sub("[.](gz|bz2|xz)$", "", last) + # then, strip extension by the last '.' + sub("([^.]+)\\.[[:alnum:]]+$", "\\1", filename) } diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index 90eae100c7394..e6cbe9caa9fa2 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -235,6 +235,8 @@ test_that("basenameSansExtFromUrl", { "bin-hadoop2.4-without-hive.tgz") expect_equal(basenameSansExtFromUrl(x), "spark-2.1.1-SNAPSHOT-bin-hadoop2.7") expect_equal(basenameSansExtFromUrl(y), "spark-2.1.0-bin-hadoop2.4-without-hive") + z <- paste0("http://people.apache.org/~pwendell/spark-releases/spark-2.1.0--hive.tar.gz") + expect_equal(basenameSansExtFromUrl(z), "spark-2.1.0--hive") }) sparkR.session.stop() From 3e5034d18aa1edfe77310a8b52bccd2cd30ef130 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sun, 11 Dec 2016 23:11:42 -0800 Subject: [PATCH 5/5] minor --- R/pkg/inst/tests/testthat/test_utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index e6cbe9caa9fa2..c87524842876e 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -235,7 +235,7 @@ test_that("basenameSansExtFromUrl", { "bin-hadoop2.4-without-hive.tgz") expect_equal(basenameSansExtFromUrl(x), "spark-2.1.1-SNAPSHOT-bin-hadoop2.7") expect_equal(basenameSansExtFromUrl(y), "spark-2.1.0-bin-hadoop2.4-without-hive") - z <- paste0("http://people.apache.org/~pwendell/spark-releases/spark-2.1.0--hive.tar.gz") + z <- "http://people.apache.org/~pwendell/spark-releases/spark-2.1.0--hive.tar.gz" expect_equal(basenameSansExtFromUrl(z), "spark-2.1.0--hive") })