From d4b7e372fd86ec06de772143df4653404e48ee48 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 17 Mar 2017 15:47:22 -0700 Subject: [PATCH 01/14] I'm pretty sure we can just add anaconda to the path cause its got python2.7 right now --- dev/run-tests-jenkins | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index e79accf9e987a..7bd1181a7c894 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -25,4 +25,5 @@ FWDIR="$(cd "`dirname $0`"/..; pwd)" cd "$FWDIR" +export PATH=/home/anaconda/bin:$PATH exec python -u ./dev/run-tests-jenkins.py "$@" From 050c0914a18e5d334633e345b1ba9f07f5777cba Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 17 Mar 2017 15:47:39 -0700 Subject: [PATCH 02/14] Oh right and check for python2.7 --- python/run-tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index 53a0aef229b08..b2e50435bb192 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -111,9 +111,9 @@ def run_individual_python_test(test_name, pyspark_python): def get_default_python_executables(): - python_execs = [x for x in ["python2.6", "python3.4", "pypy"] if which(x)] - if "python2.6" not in python_execs: - LOGGER.warning("Not testing against `python2.6` because it could not be found; falling" + python_execs = [x for x in ["python2.7", "python3.4", "pypy"] if which(x)] + if "python2.7" not in python_execs: + LOGGER.warning("Not testing against `python2.7` because it could not be found; falling" " back to `python` instead") python_execs.insert(0, "python") return python_execs From 3f0e0ef716221812d9c38e5bf2e488604d9c6c8f Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 17 Mar 2017 15:47:58 -0700 Subject: [PATCH 03/14] Start looking at using conda for the pip installability tests --- dev/run-pip-tests | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index af1b1feb70cd1..c238f537dcf13 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -35,9 +35,14 @@ function delete_virtualenv() { } trap delete_virtualenv EXIT +USE_CONDA=0 # Some systems don't have pip or virtualenv - in those cases our tests won't work. if ! hash virtualenv 2>/dev/null; then - echo "Missing virtualenv skipping pip installability tests." + echo "Missing virtualenv, checking for conda." + USE_CONDA=1 +fi +if USE_CONDA && ! hash virtualenv 2>/dev/null; then + echo "Missing virtualenv & conda, skipping pip installability tests" exit 0 fi if ! hash pip 2>/dev/null; then @@ -76,7 +81,11 @@ for python in "${PYTHON_EXECS[@]}"; do VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python rm -rf "$VIRTUALENV_PATH" mkdir -p "$VIRTUALENV_PATH" - virtualenv --python=$python "$VIRTUALENV_PATH" + if USE_CONDA; then + conda create -y -p "$VIRTUALENV_PATH" $python pandas + else + virtualenv --python=$python "$VIRTUALENV_PATH" + fi source "$VIRTUALENV_PATH"/bin/activate # Upgrade pip & friends pip install --upgrade pip pypandoc wheel From 480846024c39dd4a8a03d1826e65bc4eeb48770a Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sun, 19 Mar 2017 17:05:18 -0700 Subject: [PATCH 04/14] Update run pip tests to try and work with conda enviroments as well --- dev/run-pip-tests | 55 ++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index c238f537dcf13..42c5f7c4df5e6 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -35,13 +35,27 @@ function delete_virtualenv() { } trap delete_virtualenv EXIT -USE_CONDA=0 +set -x +PYTHON_EXECS=() # Some systems don't have pip or virtualenv - in those cases our tests won't work. -if ! hash virtualenv 2>/dev/null; then - echo "Missing virtualenv, checking for conda." - USE_CONDA=1 -fi -if USE_CONDA && ! hash virtualenv 2>/dev/null; then +if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then + echo "virtualenv installed - using. Note if this is a conda virtual env you may wish to set USE_CONDA" + # Figure out which Python execs we should test pip installation with + if hash python2 2>/dev/null; then + # We do this since we are testing with virtualenv and the default virtual env python + # is in /usr/bin/python + PYTHON_EXECS+=('python2') + elif hash python 2>/dev/null; then + # If python2 isn't installed fallback to python if available + PYTHON_EXECS+=('python') + fi + if hash python3 2>/dev/null; then + PYTHON_EXECS+=('python3') + fi +elif hash conda 2>/dev/null; then + echo "Using conda virtual enviroments" + PYTHON_EXECS=('2.7.12' '3') +else echo "Missing virtualenv & conda, skipping pip installability tests" exit 0 fi @@ -50,20 +64,6 @@ if ! hash pip 2>/dev/null; then exit 0 fi -# Figure out which Python execs we should test pip installation with -PYTHON_EXECS=() -if hash python2 2>/dev/null; then - # We do this since we are testing with virtualenv and the default virtual env python - # is in /usr/bin/python - PYTHON_EXECS+=('python2') -elif hash python 2>/dev/null; then - # If python2 isn't installed fallback to python if available - PYTHON_EXECS+=('python') -fi -if hash python3 2>/dev/null; then - PYTHON_EXECS+=('python3') -fi - # Determine which version of PySpark we are building for archive name PYSPARK_VERSION=$(python -c "exec(open('python/pyspark/version.py').read());print __version__") PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz" @@ -80,22 +80,23 @@ for python in "${PYTHON_EXECS[@]}"; do echo "Using $VIRTUALENV_BASE for virtualenv" VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python rm -rf "$VIRTUALENV_PATH" - mkdir -p "$VIRTUALENV_PATH" - if USE_CONDA; then - conda create -y -p "$VIRTUALENV_PATH" $python pandas + if [ -n "$USE_CONDA" ]; then + conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip else + mkdir -p "$VIRTUALENV_PATH" virtualenv --python=$python "$VIRTUALENV_PATH" fi source "$VIRTUALENV_PATH"/bin/activate - # Upgrade pip & friends - pip install --upgrade pip pypandoc wheel - pip install numpy # Needed so we can verify mllib imports + # Upgrade pip & friends if using virutal env + if [ ! -n "USE_CONDA" ]; then + pip install --upgrade pip pypandoc wheel numpy + fi echo "Creating pip installable source dist" cd "$FWDIR"/python # Delete the egg info file if it exists, this can cache the setup file. rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" - $python setup.py sdist + python setup.py sdist echo "Installing dist into virtual env" From 267837cd741b9a1d50842e485c20033aa9b77f8f Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sun, 19 Mar 2017 21:43:40 -0700 Subject: [PATCH 05/14] Remove set -x --- dev/run-pip-tests | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 42c5f7c4df5e6..7ba72dd720013 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -35,7 +35,6 @@ function delete_virtualenv() { } trap delete_virtualenv EXIT -set -x PYTHON_EXECS=() # Some systems don't have pip or virtualenv - in those cases our tests won't work. if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then From bc4f673a901ed218c6a5d1b7afac622a85a20eac Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 21 Mar 2017 20:41:16 -0700 Subject: [PATCH 06/14] Add USE_CONDA=1 and then put in using spark debugging --- dev/run-pip-tests | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 7ba72dd720013..af6ed03f8cfbb 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -54,6 +54,7 @@ if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then elif hash conda 2>/dev/null; then echo "Using conda virtual enviroments" PYTHON_EXECS=('2.7.12' '3') + USE_CONDA=1 else echo "Missing virtualenv & conda, skipping pip installability tests" exit 0 @@ -72,6 +73,7 @@ PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall " PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST" "pip install $PIP_OPTIONS -e python/") +set -x for python in "${PYTHON_EXECS[@]}"; do for install_command in "${PIP_COMMANDS[@]}"; do echo "Testing pip installation with python $python" From a722140c895c0066502d0f4d6c7e120cb2092d1e Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 22 Mar 2017 11:31:51 -0700 Subject: [PATCH 07/14] Use python3 for determining version number, move set -x above determining version for debugging --- dev/run-pip-tests | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index af6ed03f8cfbb..28e6d1ca23257 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -64,8 +64,9 @@ if ! hash pip 2>/dev/null; then exit 0 fi +set -x # Determine which version of PySpark we are building for archive name -PYSPARK_VERSION=$(python -c "exec(open('python/pyspark/version.py').read());print __version__") +PYSPARK_VERSION=$(python3 -c "exec(open('python/pyspark/version.py').read());print __version__") PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz" # The pip install options we use for all the pip commands PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall " @@ -73,7 +74,6 @@ PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall " PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST" "pip install $PIP_OPTIONS -e python/") -set -x for python in "${PYTHON_EXECS[@]}"; do for install_command in "${PIP_COMMANDS[@]}"; do echo "Testing pip installation with python $python" From 57a1f6e27132d66d2f5e7d1915d7c9e53eb86471 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 22 Mar 2017 14:58:26 -0700 Subject: [PATCH 08/14] Use python3 print --- dev/run-pip-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 28e6d1ca23257..88da95f3b7567 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -66,7 +66,7 @@ fi set -x # Determine which version of PySpark we are building for archive name -PYSPARK_VERSION=$(python3 -c "exec(open('python/pyspark/version.py').read());print __version__") +PYSPARK_VERSION=$(python3 -c "exec(open('python/pyspark/version.py').read());print(__version__)") PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz" # The pip install options we use for all the pip commands PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall " From 6f33633348b9bf735074f2596e6f130b5d8dba04 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 22 Mar 2017 17:29:40 -0700 Subject: [PATCH 09/14] Change how we activate/deactivate for conda envs --- dev/run-pip-tests | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 88da95f3b7567..16e2d1ae6db72 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -83,11 +83,12 @@ for python in "${PYTHON_EXECS[@]}"; do rm -rf "$VIRTUALENV_PATH" if [ -n "$USE_CONDA" ]; then conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip + source activate "$VIRTUALENV_PATH" else mkdir -p "$VIRTUALENV_PATH" virtualenv --python=$python "$VIRTUALENV_PATH" + source "$VIRTUALENV_PATH"/bin/activate fi - source "$VIRTUALENV_PATH"/bin/activate # Upgrade pip & friends if using virutal env if [ ! -n "USE_CONDA" ]; then pip install --upgrade pip pypandoc wheel numpy @@ -123,6 +124,13 @@ for python in "${PYTHON_EXECS[@]}"; do cd "$FWDIR" + # conda / virtualenv enviroments need to be deactivated differently + if [ -n "$USE_CONDA" ]; then + source deactivate + else + deactivate + fi + done done From 16d2773f4154a7b2324e9083c2f7d2b61da2ac35 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 22 Mar 2017 22:14:26 -0700 Subject: [PATCH 10/14] I'm lazy for conda packaging lets just look at python 3, also install a current setuptools --- dev/run-pip-tests | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 16e2d1ae6db72..f7add08ad7d4f 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -53,7 +53,7 @@ if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then fi elif hash conda 2>/dev/null; then echo "Using conda virtual enviroments" - PYTHON_EXECS=('2.7.12' '3') + PYTHON_EXECS=('3') USE_CONDA=1 else echo "Missing virtualenv & conda, skipping pip installability tests" @@ -82,7 +82,7 @@ for python in "${PYTHON_EXECS[@]}"; do VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python rm -rf "$VIRTUALENV_PATH" if [ -n "$USE_CONDA" ]; then - conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip + conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools source activate "$VIRTUALENV_PATH" else mkdir -p "$VIRTUALENV_PATH" From 8fe8adab7abc09dc595c2ee724b615a12107e7a6 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 23 Mar 2017 19:50:17 -0700 Subject: [PATCH 11/14] Explicitly force python 3.5 for testing, install pypandoc for conda, remove ml.stat --- dev/run-pip-tests | 3 ++- python/setup.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index f7add08ad7d4f..8a3ab8adb4ac1 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -53,7 +53,7 @@ if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then fi elif hash conda 2>/dev/null; then echo "Using conda virtual enviroments" - PYTHON_EXECS=('3') + PYTHON_EXECS=('3.5') USE_CONDA=1 else echo "Missing virtualenv & conda, skipping pip installability tests" @@ -84,6 +84,7 @@ for python in "${PYTHON_EXECS[@]}"; do if [ -n "$USE_CONDA" ]; then conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools source activate "$VIRTUALENV_PATH" + conda install -y -c conda-forge pypandoc else mkdir -p "$VIRTUALENV_PATH" virtualenv --python=$python "$VIRTUALENV_PATH" diff --git a/python/setup.py b/python/setup.py index 47eab98e0f7b3..f50035435e26b 100644 --- a/python/setup.py +++ b/python/setup.py @@ -167,7 +167,6 @@ def _supports_symlinks(): 'pyspark.ml', 'pyspark.ml.linalg', 'pyspark.ml.param', - 'pyspark.ml.stat', 'pyspark.sql', 'pyspark.streaming', 'pyspark.bin', From f99f222f2979a72f44520442e594d090200bf4ec Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 25 Mar 2017 15:04:02 -0700 Subject: [PATCH 12/14] Don't install pypandoc since pandoc isn't available on all the workers just yet --- dev/run-pip-tests | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 8a3ab8adb4ac1..bdb14a43c80b0 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -84,7 +84,6 @@ for python in "${PYTHON_EXECS[@]}"; do if [ -n "$USE_CONDA" ]; then conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools source activate "$VIRTUALENV_PATH" - conda install -y -c conda-forge pypandoc else mkdir -p "$VIRTUALENV_PATH" virtualenv --python=$python "$VIRTUALENV_PATH" From 5db1bc7a0fe1c511af1e2a9b26909c593bc01316 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 27 Mar 2017 12:12:41 -0700 Subject: [PATCH 13/14] Remove backtick usage in run-test-jenkins found during shellcheck of changed shellscripts --- dev/run-tests-jenkins | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index 7bd1181a7c894..f41f1ac79e381 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -22,7 +22,7 @@ # Environment variables are populated by the code here: #+ https://github.com/jenkinsci/ghprb-plugin/blob/master/src/main/java/org/jenkinsci/plugins/ghprb/GhprbTrigger.java#L139 -FWDIR="$(cd "`dirname $0`"/..; pwd)" +FWDIR="$( cd "$( dirname "$0" )/.." && pwd )" cd "$FWDIR" export PATH=/home/anaconda/bin:$PATH From a7bf53f1b0f3c7104d23a0c1153b15eddceb9169 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 28 Mar 2017 18:01:57 -0700 Subject: [PATCH 14/14] Remove debugging --- dev/run-pip-tests | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index bdb14a43c80b0..d51dde12a03c5 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -64,7 +64,6 @@ if ! hash pip 2>/dev/null; then exit 0 fi -set -x # Determine which version of PySpark we are building for archive name PYSPARK_VERSION=$(python3 -c "exec(open('python/pyspark/version.py').read());print(__version__)") PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"