From 89ed7b19d3aa8250f02ffa99b44743130983a722 Mon Sep 17 00:00:00 2001 From: Michael Mior Date: Wed, 28 Mar 2018 16:57:41 -0400 Subject: [PATCH] [SPARK-23820][CORE] Enable use of long form of callsite in logs --- .../org/apache/spark/internal/config/package.scala | 3 +++ .../main/scala/org/apache/spark/storage/RDDInfo.scala | 11 ++++++++++- docs/configuration.md | 7 +++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index bf0391cc9185b..8d827189ebb57 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -77,6 +77,9 @@ package object config { private[spark] val EVENT_LOG_OVERWRITE = ConfigBuilder("spark.eventLog.overwrite").booleanConf.createWithDefault(false) + private[spark] val EVENT_LOG_CALLSITE_LONG_FORM = + ConfigBuilder("spark.eventLog.longForm.enabled").booleanConf.createWithDefault(false) + private[spark] val EXECUTOR_CLASS_PATH = ConfigBuilder(SparkLauncher.EXECUTOR_EXTRA_CLASSPATH).stringConf.createOptional diff --git a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala index e5abbf745cc41..19f86569c1e3c 100644 --- a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala +++ b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala @@ -17,7 +17,9 @@ package org.apache.spark.storage +import org.apache.spark.SparkEnv import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.internal.config._ import org.apache.spark.rdd.{RDD, RDDOperationScope} import org.apache.spark.util.Utils @@ -53,10 +55,17 @@ class RDDInfo( } private[spark] object RDDInfo { + private val callsiteLongForm = SparkEnv.get.conf.get(EVENT_LOG_CALLSITE_LONG_FORM) + def fromRdd(rdd: RDD[_]): RDDInfo = { val rddName = Option(rdd.name).getOrElse(Utils.getFormattedClassName(rdd)) val parentIds = rdd.dependencies.map(_.rdd.id) + val callSite = if (callsiteLongForm) { + rdd.creationSite.longForm + } else { + rdd.creationSite.shortForm + } new RDDInfo(rdd.id, rddName, rdd.partitions.length, - rdd.getStorageLevel, parentIds, rdd.creationSite.shortForm, rdd.scope) + rdd.getStorageLevel, parentIds, callSite, rdd.scope) } } diff --git a/docs/configuration.md b/docs/configuration.md index 3a8d56776e9e8..782ccff667076 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -746,6 +746,13 @@ Apart from these, the following properties are also available, and may be useful *Warning*: This will increase the size of the event log considerably. + + spark.eventLog.longForm.enabled + false + + If true, use the long form of call sites in the event log. Otherwise use the short form. + + spark.eventLog.compress false