From 9a41d47dfa5b778744972163a53b2324a2bc54a2 Mon Sep 17 00:00:00 2001 From: Shubhanshu Mishra Date: Tue, 19 Jan 2016 14:13:28 -0600 Subject: [PATCH 1/2] Fixes SPARK-12910: R version for installing sparkR I have added an option for passing the R_HOME path to the `install-dev.sh` file which fixes SPARK-12910. The code works and has been tested. --- R/README.md | 9 +++++++++ R/install-dev.sh | 11 +++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/R/README.md b/R/README.md index 005f56da1670c..99182e555fe24 100644 --- a/R/README.md +++ b/R/README.md @@ -1,6 +1,15 @@ # R on Spark SparkR is an R package that provides a light-weight frontend to use Spark from R. +### Installing sparkR + +Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be done by running the script `$SPARK_HOME/R/install-dev.sh`. +By default the above script uses the system wide installation of R. However, this can be changed to any user installed location of R by giving the full path of the `$R_HOME` as the first argument to the install-dev.sh script. +Example: +``` +# where /home/username/R is where R is installed and /home/username/R/bin contains the files R and RScript +./install-dev.sh /home/username/R +``` ### SparkR development diff --git a/R/install-dev.sh b/R/install-dev.sh index 4972bb9217072..a8efa86a20e57 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -35,12 +35,19 @@ LIB_DIR="$FWDIR/lib" mkdir -p $LIB_DIR pushd $FWDIR > /dev/null +if [ ! -z "$1" ] + then + R_HOME="$1/bin" + else + R_HOME="$(dirname $(which R))" +fi +echo "USING R_HOME = $R_HOME" # Generate Rd files if devtools is installed -Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }' +"$R_HOME/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }' # Install SparkR to $LIB_DIR -R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/ +"$R_HOME/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/ # Zip the SparkR package so that it can be distributed to worker nodes on YARN cd $LIB_DIR From 70782abbda7b46e9feb2952c5914ad76817b12f7 Mon Sep 17 00:00:00 2001 From: Shubhanshu Mishra Date: Tue, 19 Jan 2016 16:54:16 -0600 Subject: [PATCH 2/2] Fixes SPARK-12910: R version for installing sparkR Added the option for using the environment variable R_HOME which is used by `install-dev.sh` script for creating sparkR libraries. Fixes SPARK-12910. The code works and has been tested. --- R/README.md | 5 +++-- R/install-dev.sh | 10 +++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/README.md b/R/README.md index 99182e555fe24..bb3464ba9955d 100644 --- a/R/README.md +++ b/R/README.md @@ -4,11 +4,12 @@ SparkR is an R package that provides a light-weight frontend to use Spark from R ### Installing sparkR Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be done by running the script `$SPARK_HOME/R/install-dev.sh`. -By default the above script uses the system wide installation of R. However, this can be changed to any user installed location of R by giving the full path of the `$R_HOME` as the first argument to the install-dev.sh script. +By default the above script uses the system wide installation of R. However, this can be changed to any user installed location of R by setting the environment variable `R_HOME` the full path of the base directory where R is installed, before running install-dev.sh script. Example: ``` # where /home/username/R is where R is installed and /home/username/R/bin contains the files R and RScript -./install-dev.sh /home/username/R +export R_HOME=/home/username/R +./install-dev.sh ``` ### SparkR development diff --git a/R/install-dev.sh b/R/install-dev.sh index a8efa86a20e57..befd413c4cd26 100755 --- a/R/install-dev.sh +++ b/R/install-dev.sh @@ -35,19 +35,19 @@ LIB_DIR="$FWDIR/lib" mkdir -p $LIB_DIR pushd $FWDIR > /dev/null -if [ ! -z "$1" ] +if [ ! -z "$R_HOME" ] then - R_HOME="$1/bin" + R_SCRIPT_PATH="$R_HOME/bin" else - R_HOME="$(dirname $(which R))" + R_SCRIPT_PATH="$(dirname $(which R))" fi echo "USING R_HOME = $R_HOME" # Generate Rd files if devtools is installed -"$R_HOME/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }' +"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }' # Install SparkR to $LIB_DIR -"$R_HOME/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/ +"$R_SCRIPT_PATH/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/ # Zip the SparkR package so that it can be distributed to worker nodes on YARN cd $LIB_DIR