Skip to content

Commit

Permalink
[SW-1211] Automatically download H2O Wheel during build (#1139)
Browse files Browse the repository at this point in the history
(cherry picked from commit ce6a134)
  • Loading branch information
jakubhava committed Apr 18, 2019
1 parent 5c19d86 commit e3e7bd7
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 47 deletions.
12 changes: 4 additions & 8 deletions doc/src/site/sphinx/devel/build.rst
Expand Up @@ -3,23 +3,19 @@ Build Sparkling Water

Download and install Spark, and point the environment variable ``SPARK_HOME`` to the installation path. Then use the provided ``gradlew`` to build project.

In order to build the whole project, including PySparkling, one of the following properties needs to be set:

- ``H2O_HOME`` - should point to location of the local H2O project directory
- ``H2O_PYTHON_WHEEL`` - should point to H2O Python Wheel

If you are not sure which property to set, just run:
In order to build the whole project, including PySparkling, run:

.. code:: bash
./gradlew build
The commands that set the ``H2O_PYTHON_WHEEL`` will be shown on your console and can be copy-pasted into your terminal. After setting the property, the build needs to be rerun.

- To avoid running tests, use the ``-x test -x integTest`` or the ``-x check`` option.

- To build only a specific module, use, for example, ``./gradlew :sparkling-water-examples:build``.

- To build and test a specific module, use, for example, ``./gradlew :sparkling-water-examples:check``.

Sparkling Water SUBST_SW_VERSION is built with Scala 2.11.

Note: If you would like to build against custom H2O Python package, specify ``H2O_HOME`` environment variable. The variable
should point to the root directory of H2O-3 repository. This is mainly used for integration testing with H2O-3.
13 changes: 3 additions & 10 deletions jenkins/sparklingWaterPipeline.groovy
Expand Up @@ -12,7 +12,6 @@ def call(params, body) {
"SPARK_HOME=${env.WORKSPACE}/spark",
"HADOOP_CONF_DIR=/etc/hadoop/conf",
"MASTER=yarn-client",
"H2O_PYTHON_WHEEL=${env.WORKSPACE}/private/h2o.whl",
"H2O_EXTENDED_JAR=${env.WORKSPACE}/assembly-h2o/private/extended/h2odriver-extended.jar",
// Properties used in case we are building against specific H2O version
"BUILD_HADOOP=true",
Expand Down Expand Up @@ -41,7 +40,6 @@ def call(params, body) {
"SPARK_HOME=${env.WORKSPACE}/spark",
"HADOOP_CONF_DIR=/etc/hadoop/conf",
"MASTER=yarn-client",
"H2O_PYTHON_WHEEL=${env.WORKSPACE}/private/h2o.whl",
"H2O_EXTENDED_JAR=${env.WORKSPACE}/assembly-h2o/private/extended/h2odriver-extended.jar",
"JAVA_HOME=/usr/lib/jvm/java-8-oracle/",
"PATH=/usr/lib/jvm/java-8-oracle/bin:${PATH}",
Expand Down Expand Up @@ -176,14 +174,9 @@ def prepareSparklingWaterEnvironment() {
# When extending from specific jar the jar has already the desired name
${getGradleCommand(config)} -q :sparkling-water-examples:build -x check -PdoExtend extendJar
fi
else
# Download h2o-python client, save it in private directory
# and export variable H2O_PYTHON_WHEEL driving building of pysparkling package
mkdir -p ${env.WORKSPACE}/private/
curl -s `${env.WORKSPACE}/gradlew -Dorg.gradle.internal.launcher.welcomeMessageEnabled=false -q printH2OWheelPackage` > ${env.WORKSPACE}/private/h2o.whl
if [ ${config.backendMode} = external ]; then
else if [ ${config.backendMode} = external ]; then
cp `${getGradleCommand(config)} -q :sparkling-water-examples:build -x check -PdoExtend extendJar -PdownloadH2O=${config.driverHadoopVersion}` ${env.H2O_EXTENDED_JAR}
fi
fi
fi
"""
Expand Down Expand Up @@ -262,7 +255,7 @@ def pyUnitTests() {
}
}
}

stage('QA: Python Unit Tests 2.7 - ' + config.backendMode) {
withDocker(config) {
if (config.runPyUnitTests.toBoolean()) {
Expand Down
50 changes: 21 additions & 29 deletions py/build.gradle
Expand Up @@ -17,6 +17,7 @@ ext {
pkgDir = file("$buildDir/pkg")
distDir = file("$buildDir/dist")
condaDir = file("$buildDir/conda/h2o_pysparkling_${sparkMajorVersion}")
wheelFile = file("${rootDir}/.gradle/wheels/h2o-${h2oMajorVersion}.${h2oBuild}-py2.py3-none-any.whl")
}

// Define the environment required to run tests
Expand All @@ -34,7 +35,7 @@ python {
pip "colorama:0.3.8"
pip "numpy:1.16.2"
pip "pyspark:${sparkVersion}"
envPath "${rootDir.toString()}/.gradle/python/${getPythonVersion()}/${sparkVersion}"
envPath "${rootDir}/.gradle/python/${getPythonVersion()}/${sparkVersion}"
}

configurations {
Expand Down Expand Up @@ -78,34 +79,32 @@ task createVersionFile {
//
// Represents a location of H2O Wheel Package
//
def h2oPythonWheelPackageLocation = "http://h2o-release.s3.amazonaws.com/h2o/${h2oMajorName != "master" ? "rel-${h2oMajorName}" : "master"}/${h2oBuild}/Python/h2o-${h2oMajorVersion}.${h2oBuild}-py2.py3-none-any.whl"
final String h2oPythonWheelPackageLocation = "http://h2o-release.s3.amazonaws.com/h2o/${h2oMajorName != "master" ? "rel-${h2oMajorName}" : "master"}/${h2oBuild}/Python/h2o-${h2oMajorVersion}.${h2oBuild}-py2.py3-none-any.whl"


static def downloadH2OWheel(String url, File out) {
if (!out.parentFile.exists()) {
out.parentFile.mkdirs()
}

if (!out.exists()) {
new URL(url).withInputStream { i -> out.withOutputStream { it << i } }
}
}

//
// Initial task checking setup of all properties required
// by Python build
//
task checkPythonEnv {
doLast {
def H2O_HOME = System.getenv("H2O_HOME")
def H2O_PYTHON_WHEEL = System.getenv("H2O_PYTHON_WHEEL")
def H2O_EXTENDED_JAR = System.getenv("H2O_EXTENDED_JAR")


if (H2O_HOME == null && H2O_PYTHON_WHEEL == null) {
throw new InvalidUserDataException("""
Both properties H2O_HOME and H2O_PYTHON_WHEEL were not found!
Please specify:
- H2O_HOME to point to H2O Git repo version ${h2oMajorVersion}.${h2oBuild}
or
- H2O_PYTHON_WHEEL to point to downloaded H2O Python Wheel package version ${h2oMajorVersion}.${h2oBuild}
For example:
mkdir -p \$(pwd)/private/
curl -s ${h2oPythonWheelPackageLocation} > \$(pwd)/private/h2o.whl
export H2O_PYTHON_WHEEL=\$(pwd)/private/h2o.whl
""")
if (H2O_HOME == null) {
downloadH2OWheel(h2oPythonWheelPackageLocation, wheelFile)
} else {
logger.info("Using \"H2O_HOME\" to specify H2O python package location!")
}

// if the spark.ext.h2o.backend.cluster.mode is set to external, then
Expand All @@ -119,10 +118,6 @@ export H2O_EXTENDED_JAR=`./gradlew -q extendJar -PdownloadH2O`
""")
}


if (H2O_HOME != null && H2O_PYTHON_WHEEL != null) {
logger.info("Both \"H2O_HOME\" and \"H2O_PYTHON_WHEEL\" properties are set. Using \"H2O_HOME\"!")
}
}
}

Expand Down Expand Up @@ -187,7 +182,7 @@ def copyH2OFromH2OHome(String h2oHome) {
}
}

def copyH2OFromH2OWheel(String h2oPythonWheel) {
def copyH2OFromH2OWheel(File h2oPythonWheel) {
copy {
from zipTree(h2oPythonWheel)
into file("${project.pkgDir}")
Expand All @@ -203,15 +198,12 @@ def copyH2OFromH2OWheel(String h2oPythonWheel) {
task distPython(type: Zip, dependsOn: [checkPythonEnv, configurations.sparklingWaterAssemblyJar]) {
doFirst {
def H2O_HOME = System.getenv("H2O_HOME")
def H2O_PYTHON_WHEEL = System.getenv("H2O_PYTHON_WHEEL")

// if both properties are set, give precedence to H2O_HOME
if (H2O_HOME != null && H2O_PYTHON_WHEEL != null) {
copyH2OFromH2OHome(H2O_HOME)
} else if (H2O_HOME != null) {
if (H2O_HOME != null) {
copyH2OFromH2OHome(H2O_HOME)
} else if (H2O_PYTHON_WHEEL != null) {
copyH2OFromH2OWheel(H2O_PYTHON_WHEEL)
} else {
copyH2OFromH2OWheel(wheelFile)
}
// Copy basic python setup
copyPySetup()
Expand Down

0 comments on commit e3e7bd7

Please sign in to comment.