-
Notifications
You must be signed in to change notification settings - Fork 29.2k
[SPARK-56326][SS][CORE] Include streaming query and batch ids in scheduling logs #55166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9ed6cd6
4a53e4b
92d2f95
4845215
53099c9
280d11b
f1497f6
f5a1915
413c21c
fedfcdc
dc6bd59
0b0f5f1
3951124
8272bc4
6f40341
c54701f
5607bab
47368d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,278 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.scheduler | ||
|
|
||
| import java.util.{HashMap, Locale, Properties} | ||
|
|
||
| import org.apache.spark.internal.{LogEntry, Logging, LogKeys, MessageWithContext} | ||
|
|
||
| /** | ||
| * A logging trait for scheduler components where log messages should include | ||
| * structured streaming identifiers (query ID and batch ID). | ||
| * | ||
| * Streaming execution sets these identifiers via | ||
| * [[org.apache.spark.SparkContext#setLocalProperty]], which is thread-local. | ||
| * Scheduler code typically runs on a different thread (e.g. the | ||
| * task-scheduler-event-loop-worker), so `getLocalProperty` would not have | ||
| * the streaming context. This trait instead reads the identifiers from the | ||
| * task's [[java.util.Properties]], which are propagated with the | ||
| * [[org.apache.spark.scheduler.TaskSet]] across thread boundaries. | ||
| * | ||
| * Mix this trait into any scheduler component that has access to task | ||
| * properties and needs streaming-aware log output. | ||
| */ | ||
| private[scheduler] trait StructuredStreamingIdAwareSchedulerLogging extends Logging { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use a trait here so all logs published from TaskSetManager will include the query and batch Id when present |
||
| // we gather the query and batch Id from the properties of a given TaskSet | ||
| protected def properties: Properties | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we can't rely on thread local properties, we need to gather the query and batch Id from the taskSet's properties, this must be set by class which mixes in the trait |
||
| protected def streamingIdAwareLoggingEnabled: Boolean | ||
| protected def streamingQueryIdLength: Int | ||
|
|
||
| override protected def logInfo(msg: => String): Unit = | ||
| super.logInfo( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
|
|
||
| override protected def logInfo(entry: LogEntry): Unit = { | ||
| super.logInfo( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
| } | ||
|
|
||
| override protected def logInfo(msg: => String, t: Throwable): Unit = | ||
| super.logInfo( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
|
|
||
| override protected def logInfo(entry: LogEntry, t: Throwable): Unit = { | ||
| super.logInfo( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
| } | ||
|
|
||
| override protected def logWarning(msg: => String): Unit = | ||
| super.logWarning( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
|
|
||
| override protected def logWarning(entry: LogEntry): Unit = { | ||
| super.logWarning( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
| } | ||
|
|
||
| override protected def logWarning(msg: => String, t: Throwable): Unit = | ||
| super.logWarning( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
|
|
||
| override protected def logWarning(entry: LogEntry, t: Throwable): Unit = { | ||
| super.logWarning( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
| } | ||
|
|
||
| override protected def logDebug(msg: => String): Unit = | ||
| super.logDebug( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
|
|
||
| override protected def logDebug(entry: LogEntry): Unit = { | ||
| super.logDebug( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
| } | ||
|
|
||
| override protected def logDebug(msg: => String, t: Throwable): Unit = | ||
| super.logDebug( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
|
|
||
| override protected def logDebug(entry: LogEntry, t: Throwable): Unit = { | ||
| super.logDebug( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
| } | ||
|
|
||
| override protected def logError(msg: => String): Unit = | ||
| super.logError( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
|
|
||
| override protected def logError(entry: LogEntry): Unit = { | ||
| super.logError( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
| } | ||
|
|
||
| override protected def logError(msg: => String, t: Throwable): Unit = | ||
| super.logError( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
|
|
||
| override protected def logError(entry: LogEntry, t: Throwable): Unit = { | ||
| super.logError( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
| } | ||
|
|
||
| override protected def logTrace(msg: => String): Unit = | ||
| super.logTrace( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
|
|
||
| override protected def logTrace(entry: LogEntry): Unit = { | ||
| super.logTrace( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength)) | ||
| } | ||
|
|
||
| override protected def logTrace(msg: => String, t: Throwable): Unit = | ||
| super.logTrace( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, msg, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
|
|
||
| override protected def logTrace(entry: LogEntry, t: Throwable): Unit = { | ||
| super.logTrace( | ||
| StructuredStreamingIdAwareSchedulerLogging | ||
| .constructStreamingLogEntry( | ||
| properties, entry, streamingIdAwareLoggingEnabled, streamingQueryIdLength), t) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Helpers for constructing log entries enriched with structured streaming | ||
| * identifiers extracted from task properties. | ||
| */ | ||
| private[scheduler] object StructuredStreamingIdAwareSchedulerLogging extends Logging { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. uses a companion object here so that we can call the methods from SchedulableBuilder which can not set one Properties object at construction as it's shared across tasks |
||
| val QUERY_ID_KEY = "sql.streaming.queryId" | ||
| val BATCH_ID_KEY = "streaming.sql.batchId" | ||
|
|
||
| private[scheduler] def constructStreamingLogEntry( | ||
| properties: Properties, | ||
| entry: LogEntry, | ||
| enabled: Boolean, | ||
| queryIdLength: Int): LogEntry = { | ||
| if (!enabled || properties == null) { | ||
| return entry | ||
| } | ||
| // wrap in log entry to defer until log is evaluated | ||
| new LogEntry({ | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we wrap this in a LogEntry since Claude pointed out that we were forcing eager evaluation of the provided logEntry even if the logging level was disabled in the environment. By wrapping everything in a LogEntry, now the logic is only called when the logging level is enabled for the environment. This is important for things like debugging and trace logs |
||
| val (queryId: Option[String], batchId: Option[String]) = | ||
| getStreamingProperties(properties, queryIdLength) | ||
|
|
||
| formatMessage( | ||
| queryId, | ||
| batchId, | ||
| entry | ||
| ) | ||
| }) | ||
| } | ||
|
|
||
| private[scheduler] def constructStreamingLogEntry( | ||
| properties: Properties, | ||
| msg: => String, | ||
| enabled: Boolean, | ||
| queryIdLength: Int): LogEntry = { | ||
| if (!enabled || properties == null) { | ||
| return new LogEntry( | ||
| MessageWithContext(msg, java.util.Collections.emptyMap()) | ||
| ) | ||
| } | ||
|
|
||
| new LogEntry({ | ||
| val (queryId: Option[String], batchId: Option[String]) = | ||
| getStreamingProperties(properties, queryIdLength) | ||
|
|
||
| MessageWithContext( | ||
| formatMessage( | ||
| queryId, | ||
| batchId, | ||
| msg | ||
| ), | ||
| constructStreamingContext(queryId, batchId) | ||
| ) | ||
| }) | ||
| } | ||
|
|
||
| private def constructStreamingContext( | ||
| queryId: Option[String], | ||
| batchId: Option[String]): HashMap[String, String] = { | ||
| val streamingContext = new HashMap[String, String]() | ||
| // MDC places the log key in the context as all lowercase, so we do the same here | ||
| queryId.foreach(streamingContext.put(LogKeys.QUERY_ID.name.toLowerCase(Locale.ROOT), _)) | ||
| batchId.foreach(streamingContext.put(LogKeys.BATCH_ID.name.toLowerCase(Locale.ROOT), _)) | ||
|
Comment on lines
+235
to
+237
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if the lowercase is necessary here or not, but wanted to match the behavior of the log interpolator |
||
| streamingContext | ||
| } | ||
|
|
||
| private def formatMessage( | ||
| queryId: Option[String], | ||
| batchId: Option[String], | ||
| msg: => String): String = { | ||
| val msgWithBatchId = batchId.map(bid => s"[batchId = $bid] $msg").getOrElse(msg) | ||
| queryId.map(qId => s"[queryId = $qId] $msgWithBatchId").getOrElse(msgWithBatchId) | ||
| } | ||
|
|
||
| private def formatMessage( | ||
| queryId: Option[String], | ||
| batchId: Option[String], | ||
| msg: => LogEntry): MessageWithContext = { | ||
| val msgWithBatchId: MessageWithContext = batchId.map( | ||
| bId => log"[batchId = ${MDC(LogKeys.BATCH_ID, bId)}] " + toMessageWithContext(msg) | ||
| ).getOrElse(toMessageWithContext(msg)) | ||
| queryId.map( | ||
| qId => log"[queryId = ${MDC(LogKeys.QUERY_ID, qId)}] " + msgWithBatchId | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ended up removing the truncation of the query Id here. The reason is that, before, when we added the hashmap to the context containing the full query Id, any log renderer which used the log context would place the full query Id into the log message anyways. Ultimately I think it's probably better to have the full query Id in the log context and subsequently the log, then to have the truncated version. Open to reverting this change back if others feel differently.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also the code is simpler without the truncation
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does this impact the log message? I'm not very sure we really want to make log message to contain almost 40 chars of query ID every time. This would be more painful when we have to look at mixed log (we can't ensure every single log message to onboard this). So it's OK if this isn't reflected to log message (especially prefix) but if this will go to the prefix of log message, I'm not sure I prefer it.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pushed update where we default to truncating the query Id to 5 characters and a new config can be used to control how much we truncate, or no truncating at all |
||
| ).getOrElse(msgWithBatchId) | ||
| } | ||
|
|
||
| private def toMessageWithContext(entry: LogEntry): MessageWithContext = { | ||
| MessageWithContext(entry.message, entry.context) | ||
| } | ||
|
|
||
| private def getStreamingProperties( | ||
| properties: Properties, | ||
| queryIdLength: Int): (Option[String], Option[String]) = { | ||
| val queryId = Option(properties.getProperty(QUERY_ID_KEY)).filter(_.nonEmpty).map { id => | ||
| if (queryIdLength == -1) { | ||
| id | ||
| } else { | ||
| id.take(queryIdLength) | ||
| } | ||
| } | ||
| val batchId = Option(properties.getProperty(BATCH_ID_KEY)).filter(_.nonEmpty) | ||
| (queryId, batchId) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like I said above, since this method is called for many different TaskSets we have to use the companion object's method