Skip to content

Commit

Permalink
Remove assembly in tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshRosen committed Mar 14, 2016
1 parent 310981d commit 2c10193
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 22 deletions.
15 changes: 1 addition & 14 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def get_hadoop_profiles(hadoop_version):
def build_spark_maven(hadoop_version):
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
mvn_goals = ["clean", "package", "-DskipTests"]
mvn_goals = ["clean", "package", "-DskipTests", "-pl", "!assembly"]
profiles_and_goals = build_profiles + mvn_goals

print("[info] Building Spark (w/Hive 1.2.1) using Maven with these arguments: ",
Expand All @@ -349,16 +349,6 @@ def build_spark_sbt(hadoop_version):
exec_sbt(profiles_and_goals)


def build_spark_assembly_sbt(hadoop_version):
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
sbt_goals = ["assembly/assembly"]
profiles_and_goals = build_profiles + sbt_goals
print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
" ".join(profiles_and_goals))
exec_sbt(profiles_and_goals)


def build_apache_spark(build_tool, hadoop_version):
"""Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or
`maven`). Defaults to using `sbt`."""
Expand Down Expand Up @@ -574,9 +564,6 @@ def main():
if build_tool == "sbt":
# Note: compatibility tests only supported in sbt for now
detect_binary_inop_with_mima()
# Since we did not build assembly/assembly before running dev/mima, we need to
# do it here because the tests still rely on it; see SPARK-13294 for details.
build_spark_assembly_sbt(hadoop_version)

# run the test suites
run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,38 @@ List<String> buildClassPath(String appClassPath) throws IOException {
boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
if (prependClasses || isTesting) {
String scala = getScalaVersion();
List<String> projects = Arrays.asList("core", "repl", "mllib", "graphx",
"streaming", "tools", "sql/catalyst", "sql/core", "sql/hive", "sql/hive-thriftserver",
"yarn", "launcher",
"common/network-common", "common/network-shuffle", "common/network-yarn");
// All projects except assemblies:
List<String> projects = Arrays.asList(
"common/network-common",
"common/network-shuffle",
"common/network-yarn",
"common/sketch",
"common/tags",
"common/unsafe",
"core",
"examples",
"external/akka",
"external/docker-integration-tests",
"external/flume",
"external/flume-sink",
"external/kafka",
"external/kinesis-asl",
"external/mqtt",
"external/spark-ganglia-lgpl",
"external/twitter",
"external/zeromq",
"graphx",
"launcher",
"mllib",
"repl",
"sql/catalyst",
"sql/core",
"sql/hive",
"sql/hive-thriftserver",
"streaming",
"tools",
"yarn"
);
if (prependClasses) {
if (!isTesting) {
System.err.println(
Expand Down
27 changes: 23 additions & 4 deletions python/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,27 @@ def print_red(text):
LOGGER = logging.getLogger()


def run_individual_python_test(test_name, pyspark_python):
def get_spark_dist_classpath():
original_working_dir = os.getcwd()
os.chdir(SPARK_HOME)
cp = subprocess_check_output(
["./build/sbt", "export assembly/managedClasspath"], universal_newlines=True)
cp = cp.strip().split("\n")[-1]
os.chdir(original_working_dir)
return cp


def run_individual_python_test(test_name, pyspark_python, spark_dist_classpath):
env = dict(os.environ)
env.update({'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python),
'PYSPARK_DRIVER_PYTHON': which(pyspark_python)})
env.update({
# Setting SPARK_DIST_CLASSPATH is a simple way to make sure that any child processes
# launched by the tests have access to the correct test-time classpath.
'SPARK_DIST_CLASSPATH': spark_dist_classpath,
'SPARK_TESTING': '1',
'SPARK_PREPEND_CLASSES': '1',
'PYSPARK_PYTHON': which(pyspark_python),
'PYSPARK_DRIVER_PYTHON': which(pyspark_python),
})
LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name)
start_time = time.time()
try:
Expand Down Expand Up @@ -175,14 +192,16 @@ def main():
priority = 100
task_queue.put((priority, (python_exec, test_goal)))

spark_dist_classpath = get_spark_dist_classpath()

def process_queue(task_queue):
while True:
try:
(priority, (python_exec, test_goal)) = task_queue.get_nowait()
except Queue.Empty:
break
try:
run_individual_python_test(test_goal, python_exec)
run_individual_python_test(test_goal, python_exec, spark_dist_classpath)
finally:
task_queue.task_done()

Expand Down

0 comments on commit 2c10193

Please sign in to comment.