From 7a9cf6718ae2b7d266ba2e67923fa7fe8ccf8fae Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 15 Jul 2020 13:40:51 +0900 Subject: [PATCH] Debug flaky pip installation test failure --- dev/run-pip-tests | 21 ++++++++---- dev/run-tests.py | 86 +++++++++++++++++++++++------------------------ 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 5fd0be7476f29..05374984169da 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -63,7 +63,7 @@ fi PYSPARK_VERSION=$(python3 -c "exec(open('python/pyspark/version.py').read());print(__version__)") PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz" # The pip install options we use for all the pip commands -PIP_OPTIONS="--user --upgrade --no-cache-dir --force-reinstall " +PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall" # Test both regular user and edit/dev install modes. PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST" "pip install $PIP_OPTIONS -e python/") @@ -75,13 +75,19 @@ for python in "${PYTHON_EXECS[@]}"; do echo "Using $VIRTUALENV_BASE for virtualenv" VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python rm -rf "$VIRTUALENV_PATH" + USE_CONDA_CMD=0 if [ -n "$USE_CONDA" ]; then if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then # See also https://github.com/conda/conda/issues/7980 + USE_CONDA_CMD=1 source "$CONDA_PREFIX/etc/profile.d/conda.sh" fi conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools - conda activate "$VIRTUALENV_PATH" || (echo "Falling back to 'source activate'" && source activate "$VIRTUALENV_PATH") + if [ $USE_CONDA_CMD == 1 ]; then + conda activate "$VIRTUALENV_PATH" + else + source activate "$VIRTUALENV_PATH" + fi else mkdir -p "$VIRTUALENV_PATH" virtualenv --python=$python "$VIRTUALENV_PATH" @@ -96,8 +102,8 @@ for python in "${PYTHON_EXECS[@]}"; do cd "$FWDIR"/python # Delete the egg info file if it exists, this can cache the setup file. rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" - # Also, delete the symbolic link if exists. It can be left over from the previous editable mode installation. - python3 -c "from distutils.sysconfig import get_python_lib; import os; f = os.path.join(get_python_lib(), 'pyspark.egg-link'); os.unlink(f) if os.path.isfile(f) else 0" + # Also delete .local in case it was already installed via --user. + rm -rf "$(python3 -m site --user-site)/pyspark.egg-link" || echo "No existing PySpark installation at user site-packages." python3 setup.py sdist @@ -116,7 +122,6 @@ for python in "${PYTHON_EXECS[@]}"; do cd / echo "Run basic sanity check on pip installed version with spark-submit" - export PATH="$(python3 -m site --user-base)/bin:$PATH" spark-submit "$FWDIR"/dev/pip-sanity-check.py echo "Run basic sanity check with import based" python3 "$FWDIR"/dev/pip-sanity-check.py @@ -127,7 +132,11 @@ for python in "${PYTHON_EXECS[@]}"; do # conda / virtualenv environments need to be deactivated differently if [ -n "$USE_CONDA" ]; then - conda deactivate || (echo "Falling back to 'source deactivate'" && source deactivate) + if [ $USE_CONDA_CMD == 1 ]; then + conda deactivate + else + source deactivate + fi else deactivate fi diff --git a/dev/run-tests.py b/dev/run-tests.py index 8e29f89c3a0d0..08b9c57c220fd 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -711,61 +711,61 @@ def main(): setup_test_environ(test_environ) should_run_java_style_checks = False - if not should_only_test_modules: - # license checks - run_apache_rat_checks() - - # style checks - if not changed_files or any(f.endswith(".scala") - or f.endswith("scalastyle-config.xml") - for f in changed_files): - run_scala_style_checks(extra_profiles) - if not changed_files or any(f.endswith(".java") - or f.endswith("checkstyle.xml") - or f.endswith("checkstyle-suppressions.xml") - for f in changed_files): - # Run SBT Checkstyle after the build to prevent a side-effect to the build. - should_run_java_style_checks = True - if not changed_files or any(f.endswith("lint-python") - or f.endswith("tox.ini") - or f.endswith(".py") - for f in changed_files): - run_python_style_checks() - if not changed_files or any(f.endswith(".R") - or f.endswith("lint-r") - or f.endswith(".lintr") - for f in changed_files): - run_sparkr_style_checks() - - # determine if docs were changed and if we're inside the amplab environment - # note - the below commented out until *all* Jenkins workers can get `jekyll` installed - # if "DOCS" in changed_modules and test_env == "amplab_jenkins": - # build_spark_documentation() - - if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins": - run_build_tests() + # if not should_only_test_modules: + # # license checks + # run_apache_rat_checks() + # + # # style checks + # if not changed_files or any(f.endswith(".scala") + # or f.endswith("scalastyle-config.xml") + # for f in changed_files): + # run_scala_style_checks(extra_profiles) + # if not changed_files or any(f.endswith(".java") + # or f.endswith("checkstyle.xml") + # or f.endswith("checkstyle-suppressions.xml") + # for f in changed_files): + # # Run SBT Checkstyle after the build to prevent a side-effect to the build. + # should_run_java_style_checks = True + # if not changed_files or any(f.endswith("lint-python") + # or f.endswith("tox.ini") + # or f.endswith(".py") + # for f in changed_files): + # run_python_style_checks() + # if not changed_files or any(f.endswith(".R") + # or f.endswith("lint-r") + # or f.endswith(".lintr") + # for f in changed_files): + # run_sparkr_style_checks() + # + # # determine if docs were changed and if we're inside the amplab environment + # # note - the below commented out until *all* Jenkins workers can get `jekyll` installed + # # if "DOCS" in changed_modules and test_env == "amplab_jenkins": + # # build_spark_documentation() + # + # if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins": + # run_build_tests() # spark build build_apache_spark(build_tool, extra_profiles) - # backwards compatibility checks - if build_tool == "sbt": - # Note: compatibility tests only supported in sbt for now - detect_binary_inop_with_mima(extra_profiles) - # Since we did not build assembly/package before running dev/mima, we need to - # do it here because the tests still rely on it; see SPARK-13294 for details. - build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) + # # backwards compatibility checks + # if build_tool == "sbt": + # # Note: compatibility tests only supported in sbt for now + # detect_binary_inop_with_mima(extra_profiles) + # # Since we did not build assembly/package before running dev/mima, we need to + # # do it here because the tests still rely on it; see SPARK-13294 for details. + # build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) # run the test suites - run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags) + # run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: # We only run PySpark tests with coverage report in one specific job with # Spark master with SBT in Jenkins. is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ - run_python_tests( - modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) + # run_python_tests( + # modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules): run_sparkr_tests()