diff --git a/dev/run-tests.py b/dev/run-tests.py index a1e6f1bdb560e..3ba13d0da23c7 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -323,7 +323,7 @@ def get_hadoop_profiles(hadoop_version): def build_spark_maven(hadoop_version): # Enable all of the profiles for the build: build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags - mvn_goals = ["clean", "package", "-DskipTests"] + mvn_goals = ["clean", "package", "-DskipTests", "-pl", "!assembly"] profiles_and_goals = build_profiles + mvn_goals print("[info] Building Spark (w/Hive 1.2.1) using Maven with these arguments: ", @@ -349,16 +349,6 @@ def build_spark_sbt(hadoop_version): exec_sbt(profiles_and_goals) -def build_spark_assembly_sbt(hadoop_version): - # Enable all of the profiles for the build: - build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags - sbt_goals = ["assembly/assembly"] - profiles_and_goals = build_profiles + sbt_goals - print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ", - " ".join(profiles_and_goals)) - exec_sbt(profiles_and_goals) - - def build_apache_spark(build_tool, hadoop_version): """Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or `maven`). Defaults to using `sbt`.""" @@ -574,9 +564,6 @@ def main(): if build_tool == "sbt": # Note: compatibility tests only supported in sbt for now detect_binary_inop_with_mima() - # Since we did not build assembly/assembly before running dev/mima, we need to - # do it here because the tests still rely on it; see SPARK-13294 for details. - build_spark_assembly_sbt(hadoop_version) # run the test suites run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index f6c7e07654ee9..605fd35d6cb00 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -144,10 +144,38 @@ List buildClassPath(String appClassPath) throws IOException { boolean isTesting = "1".equals(getenv("SPARK_TESTING")); if (prependClasses || isTesting) { String scala = getScalaVersion(); - List projects = Arrays.asList("core", "repl", "mllib", "graphx", - "streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver", - "yarn", "launcher", - "common/network-common", "common/network-shuffle", "common/network-yarn"); + // All projects except assemblies: + List projects = Arrays.asList( + "common/network-common", + "common/network-shuffle", + "common/network-yarn", + "common/sketch", + "common/tags", + "common/unsafe", + "core", + "examples", + "external/akka", + "external/docker-integration-tests", + "external/flume", + "external/flume-sink", + "external/kafka", + "external/kinesis-asl", + "external/mqtt", + "external/spark-ganglia-lgpl", + "external/twitter", + "external/zeromq", + "graphx", + "launcher", + "mllib", + "repl", + "sql/catalyst", + "sql/core", + "sql/hive", + "sql/hive-thriftserver", + "streaming", + "tools", + "yarn" + ); if (prependClasses) { if (!isTesting) { System.err.println( diff --git a/python/run-tests.py b/python/run-tests.py index a9f8854e6f66a..6f99d89693335 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -54,10 +54,27 @@ def print_red(text): LOGGER = logging.getLogger() -def run_individual_python_test(test_name, pyspark_python): +def get_spark_dist_classpath(): + original_working_dir = os.getcwd() + os.chdir(SPARK_HOME) + cp = subprocess_check_output( + ["./build/sbt", "export assembly/managedClasspath"], universal_newlines=True) + cp = cp.strip().split("\n")[-1] + os.chdir(original_working_dir) + return cp + + +def run_individual_python_test(test_name, pyspark_python, spark_dist_classpath): env = dict(os.environ) - env.update({'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python), - 'PYSPARK_DRIVER_PYTHON': which(pyspark_python)}) + env.update({ + # Setting SPARK_DIST_CLASSPATH is a simple way to make sure that any child processes + # launched by the tests have access to the correct test-time classpath. + 'SPARK_DIST_CLASSPATH': spark_dist_classpath, + 'SPARK_TESTING': '1', + 'SPARK_PREPEND_CLASSES': '1', + 'PYSPARK_PYTHON': which(pyspark_python), + 'PYSPARK_DRIVER_PYTHON': which(pyspark_python), + }) LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name) start_time = time.time() try: @@ -175,6 +192,8 @@ def main(): priority = 100 task_queue.put((priority, (python_exec, test_goal))) + spark_dist_classpath = get_spark_dist_classpath() + def process_queue(task_queue): while True: try: @@ -182,7 +201,7 @@ def process_queue(task_queue): except Queue.Empty: break try: - run_individual_python_test(test_goal, python_exec) + run_individual_python_test(test_goal, python_exec, spark_dist_classpath) finally: task_queue.task_done()