From 69877a20ceb9156ecf8ed7062ba3b64f56ab9134 Mon Sep 17 00:00:00 2001 From: Dan LaRocque Date: Thu, 17 Nov 2016 11:33:58 -0500 Subject: [PATCH] Limit JVM system props passed around in Spark jobs Prior to this commit, SGC indiscriminately set the entire job config as JVM system properties on Spark executors. It didn't account for the fact that some config values (e.g. spark.job.description) could have spaces. A value with a space wouldn't get quoted. This led to Spark workers failing to start, because part of the unquoted value would be erroneously interpreted as the JVM main class. This commit makes SGC only pass two config settings as system props: * gremlin.io.registry * gremlin.io.kryoShimService --- .../process/computer/SparkGraphComputer.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java index 4e7408847a0..ee3ebe19688 100644 --- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java +++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java @@ -71,11 +71,16 @@ import org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorage; import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.kryoshim.unshaded.UnshadedKryoShimService; import org.apache.tinkerpop.gremlin.structure.Direction; +import org.apache.tinkerpop.gremlin.structure.io.IoRegistry; import org.apache.tinkerpop.gremlin.structure.io.Storage; import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader; import java.io.File; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -91,6 +96,16 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer { private boolean workersSet = false; private final ThreadFactory threadFactoryBoss = new BasicThreadFactory.Builder().namingPattern(SparkGraphComputer.class.getSimpleName() + "-boss").build(); + private static final Set KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES; + + static + { + Set s = new HashSet<>(); + s.add(KryoShimServiceLoader.KRYO_SHIM_SERVICE); + s.add(IoRegistry.IO_REGISTRY); + KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES = Collections.unmodifiableSet(s); + } + /** * An {@code ExecutorService} that schedules up background work. Since a {@link GraphComputer} is only used once * for a {@link VertexProgram} a single threaded executor is sufficient. @@ -146,7 +161,7 @@ private Future submitWithExecutor(Executor exec) { /////////// final StringBuilder params = new StringBuilder(); this.sparkConfiguration.getKeys().forEachRemaining(key -> { - if (key.startsWith("gremlin") || key.startsWith("spark")) { + if (KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES.contains(key)) { params.append(" -D").append("tinkerpop.").append(key).append("=").append(this.sparkConfiguration.getProperty(key)); System.setProperty("tinkerpop." + key, this.sparkConfiguration.getProperty(key).toString()); }