From 7a9cf6718ae2b7d266ba2e67923fa7fe8ccf8fae Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 15 Jul 2020 13:40:51 +0900
Subject: [PATCH] Debug flaky pip installation test failure

---
 dev/run-pip-tests | 21 ++++++++----
 dev/run-tests.py  | 86 +++++++++++++++++++++++------------------------
 2 files changed, 58 insertions(+), 49 deletions(-)

diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index 5fd0be7476f29..05374984169da 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -63,7 +63,7 @@ fi
 PYSPARK_VERSION=$(python3 -c "exec(open('python/pyspark/version.py').read());print(__version__)")
 PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"
 # The pip install options we use for all the pip commands
-PIP_OPTIONS="--user --upgrade --no-cache-dir --force-reinstall "
+PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall"
 # Test both regular user and edit/dev install modes.
 PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST"
 	      "pip install $PIP_OPTIONS -e python/")
@@ -75,13 +75,19 @@ for python in "${PYTHON_EXECS[@]}"; do
     echo "Using $VIRTUALENV_BASE for virtualenv"
     VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
     rm -rf "$VIRTUALENV_PATH"
+    USE_CONDA_CMD=0
     if [ -n "$USE_CONDA" ]; then
       if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then
         # See also https://github.com/conda/conda/issues/7980
+        USE_CONDA_CMD=1
         source "$CONDA_PREFIX/etc/profile.d/conda.sh"
       fi
       conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools
-      conda activate "$VIRTUALENV_PATH" || (echo "Falling back to 'source activate'" && source activate "$VIRTUALENV_PATH")
+      if [ $USE_CONDA_CMD == 1 ]; then
+        conda activate "$VIRTUALENV_PATH"
+      else
+        source activate "$VIRTUALENV_PATH"
+      fi
     else
       mkdir -p "$VIRTUALENV_PATH"
       virtualenv --python=$python "$VIRTUALENV_PATH"
@@ -96,8 +102,8 @@ for python in "${PYTHON_EXECS[@]}"; do
     cd "$FWDIR"/python
     # Delete the egg info file if it exists, this can cache the setup file.
     rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
-    # Also, delete the symbolic link if exists. It can be left over from the previous editable mode installation.
-    python3 -c "from distutils.sysconfig import get_python_lib; import os; f = os.path.join(get_python_lib(), 'pyspark.egg-link'); os.unlink(f) if os.path.isfile(f) else 0"
+    # Also delete .local in case it was already installed via --user.
+    rm -rf "$(python3 -m site --user-site)/pyspark.egg-link" || echo "No existing PySpark installation at user site-packages."
     python3 setup.py sdist
 
 
@@ -116,7 +122,6 @@ for python in "${PYTHON_EXECS[@]}"; do
     cd /
 
     echo "Run basic sanity check on pip installed version with spark-submit"
-    export PATH="$(python3 -m site --user-base)/bin:$PATH"
     spark-submit "$FWDIR"/dev/pip-sanity-check.py
     echo "Run basic sanity check with import based"
     python3 "$FWDIR"/dev/pip-sanity-check.py
@@ -127,7 +132,11 @@ for python in "${PYTHON_EXECS[@]}"; do
 
     # conda / virtualenv environments need to be deactivated differently
     if [ -n "$USE_CONDA" ]; then
-      conda deactivate || (echo "Falling back to 'source deactivate'" && source deactivate)
+      if [ $USE_CONDA_CMD == 1 ]; then
+        conda deactivate
+      else
+        source deactivate
+      fi
     else
       deactivate
     fi
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 8e29f89c3a0d0..08b9c57c220fd 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -711,61 +711,61 @@ def main():
     setup_test_environ(test_environ)
 
     should_run_java_style_checks = False
-    if not should_only_test_modules:
-        # license checks
-        run_apache_rat_checks()
-
-        # style checks
-        if not changed_files or any(f.endswith(".scala")
-                                    or f.endswith("scalastyle-config.xml")
-                                    for f in changed_files):
-            run_scala_style_checks(extra_profiles)
-        if not changed_files or any(f.endswith(".java")
-                                    or f.endswith("checkstyle.xml")
-                                    or f.endswith("checkstyle-suppressions.xml")
-                                    for f in changed_files):
-            # Run SBT Checkstyle after the build to prevent a side-effect to the build.
-            should_run_java_style_checks = True
-        if not changed_files or any(f.endswith("lint-python")
-                                    or f.endswith("tox.ini")
-                                    or f.endswith(".py")
-                                    for f in changed_files):
-            run_python_style_checks()
-        if not changed_files or any(f.endswith(".R")
-                                    or f.endswith("lint-r")
-                                    or f.endswith(".lintr")
-                                    for f in changed_files):
-            run_sparkr_style_checks()
-
-    # determine if docs were changed and if we're inside the amplab environment
-    # note - the below commented out until *all* Jenkins workers can get `jekyll` installed
-    # if "DOCS" in changed_modules and test_env == "amplab_jenkins":
-    #    build_spark_documentation()
-
-    if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins":
-        run_build_tests()
+    # if not should_only_test_modules:
+    #     # license checks
+    #     run_apache_rat_checks()
+    #
+    #     # style checks
+    #     if not changed_files or any(f.endswith(".scala")
+    #                                 or f.endswith("scalastyle-config.xml")
+    #                                 for f in changed_files):
+    #         run_scala_style_checks(extra_profiles)
+    #     if not changed_files or any(f.endswith(".java")
+    #                                 or f.endswith("checkstyle.xml")
+    #                                 or f.endswith("checkstyle-suppressions.xml")
+    #                                 for f in changed_files):
+    #         # Run SBT Checkstyle after the build to prevent a side-effect to the build.
+    #         should_run_java_style_checks = True
+    #     if not changed_files or any(f.endswith("lint-python")
+    #                                 or f.endswith("tox.ini")
+    #                                 or f.endswith(".py")
+    #                                 for f in changed_files):
+    #         run_python_style_checks()
+    #     if not changed_files or any(f.endswith(".R")
+    #                                 or f.endswith("lint-r")
+    #                                 or f.endswith(".lintr")
+    #                                 for f in changed_files):
+    #         run_sparkr_style_checks()
+    #
+    # # determine if docs were changed and if we're inside the amplab environment
+    # # note - the below commented out until *all* Jenkins workers can get `jekyll` installed
+    # # if "DOCS" in changed_modules and test_env == "amplab_jenkins":
+    # #    build_spark_documentation()
+    #
+    # if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins":
+    #     run_build_tests()
 
     # spark build
     build_apache_spark(build_tool, extra_profiles)
 
-    # backwards compatibility checks
-    if build_tool == "sbt":
-        # Note: compatibility tests only supported in sbt for now
-        detect_binary_inop_with_mima(extra_profiles)
-        # Since we did not build assembly/package before running dev/mima, we need to
-        # do it here because the tests still rely on it; see SPARK-13294 for details.
-        build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
+    # # backwards compatibility checks
+    # if build_tool == "sbt":
+    #     # Note: compatibility tests only supported in sbt for now
+    #     detect_binary_inop_with_mima(extra_profiles)
+    #     # Since we did not build assembly/package before running dev/mima, we need to
+    #     # do it here because the tests still rely on it; see SPARK-13294 for details.
+    #     build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
 
     # run the test suites
-    run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags)
+    # run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags)
 
     modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
     if modules_with_python_tests:
         # We only run PySpark tests with coverage report in one specific job with
         # Spark master with SBT in Jenkins.
         is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ
-        run_python_tests(
-            modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job)
+        # run_python_tests(
+        #     modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job)
         run_python_packaging_tests()
     if any(m.should_run_r_tests for m in test_modules):
         run_sparkr_tests()