Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KAFKA-6927: Message down-conversion causes Out Of Memory on broker #4871

Merged
merged 47 commits into from May 31, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
2185e03
LazyDownConversionRecords implementation.
dhruvilshah3 Apr 11, 2018
19c30f5
Remove topic configuration changes.
dhruvilshah3 Apr 11, 2018
e619124
Maintain metrics for lazy down-conversion.
dhruvilshah3 Apr 13, 2018
4b04c09
Code cleanup.
dhruvilshah3 Apr 13, 2018
1226496
Down-conversion unit tests
dhruvilshah3 Apr 24, 2018
2b4e36f
Use `SerializableRecords` for `LazyDownConversionRecords`. Verified w…
dhruvilshah3 Apr 27, 2018
90e87bc
LazyDownConversionRecords implementation.
dhruvilshah3 Apr 11, 2018
ca7c623
Remove topic configuration changes.
dhruvilshah3 Apr 11, 2018
3dcb201
Maintain metrics for lazy down-conversion.
dhruvilshah3 Apr 13, 2018
d88fd93
Code cleanup.
dhruvilshah3 Apr 13, 2018
910525a
Down-conversion unit tests
dhruvilshah3 Apr 24, 2018
53a76dd
Use `SerializableRecords` for `LazyDownConversionRecords`. Verified w…
dhruvilshah3 Apr 27, 2018
4ea1deb
Rebase against trunk.
dhruvilshah3 Apr 28, 2018
46c067e
Minor refactoring and checkStyle changes.
dhruvilshah3 Apr 28, 2018
69cee5f
Added logging to `LazyDownConversionRecords`.
dhruvilshah3 Apr 30, 2018
ad7daf4
Unit test
dhruvilshah3 May 1, 2018
6d44315
Always down-convert first batch.
dhruvilshah3 May 3, 2018
1289818
Added some comments.
dhruvilshah3 May 3, 2018
a0474a8
checkstyle
dhruvilshah3 May 3, 2018
b31b6ec
Fix failing unit tests.
dhruvilshah3 May 8, 2018
8d6b5b9
Fix FetchRequestTest
dhruvilshah3 May 8, 2018
aea9548
Merge branch 'trunk' into KAFKA-6709
dhruvilshah3 May 17, 2018
78c5327
Rework inheritance
dhruvilshah3 May 18, 2018
99471e2
checkstyle
dhruvilshah3 May 18, 2018
459c7c9
More refactoring
dhruvilshah3 May 19, 2018
127d6ee
Minor cleanup
dhruvilshah3 May 19, 2018
72df999
Fix bug in LazyDownConversionRecords
dhruvilshah3 May 19, 2018
3aa8887
Introduce LazyDownConversionRecordsSend
dhruvilshah3 May 23, 2018
b187fe2
AbstractRecords --> Records
dhruvilshah3 May 23, 2018
e45dde0
Change documentation
dhruvilshah3 May 23, 2018
da99558
Fix tests
dhruvilshah3 May 23, 2018
9b61658
checkstyle
dhruvilshah3 May 24, 2018
d0068d5
Fix casting bug
dhruvilshah3 May 24, 2018
2e76d91
Address review comments.
dhruvilshah3 May 24, 2018
3e9f9ba
Revert to using RecordsWriter
dhruvilshah3 May 25, 2018
40d598c
Merge branch 'trunk' into KAFKA-6709
dhruvilshah3 May 25, 2018
de1d294
Address review comments.
dhruvilshah3 May 25, 2018
5ca9365
Null out converted batch after first use; move MultiRecordsSend to `r…
dhruvilshah3 May 26, 2018
88437a0
Drop use of AtomicReference
dhruvilshah3 May 26, 2018
84f7b78
Address review comments.
dhruvilshah3 May 29, 2018
cc03a52
data --> unconvertedFetchResponse
dhruvilshah3 May 29, 2018
fed927f
Address review comments
dhruvilshah3 May 30, 2018
002b5e5
Address review comments.
dhruvilshah3 May 30, 2018
af74c13
checkstyle
dhruvilshah3 May 30, 2018
74f714d
Merge branch 'trunk' into KAFKA-6709
dhruvilshah3 May 30, 2018
9deb553
Fix ThrottledChannelExpirationTest
dhruvilshah3 May 30, 2018
2e35d5c
Address review comments
dhruvilshah3 May 30, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -17,7 +17,6 @@
package org.apache.kafka.common.protocol.types;

import org.apache.kafka.common.record.BaseRecords;
import org.apache.kafka.common.record.FileRecords;
import org.apache.kafka.common.record.MemoryRecords;
import org.apache.kafka.common.record.Records;
import org.apache.kafka.common.utils.ByteUtils;
Expand Down Expand Up @@ -550,8 +549,8 @@ public boolean isNullable() {

@Override
public void write(ByteBuffer buffer, Object o) {
if (o instanceof FileRecords)
throw new IllegalArgumentException("FileRecords must be written to the channel directly");
if (!(o instanceof MemoryRecords))
throw new IllegalArgumentException("Unexpected record type: " + o.getClass());
MemoryRecords records = (MemoryRecords) o;
NULLABLE_BYTES.write(buffer, records.buffer().duplicate());
}
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/kafka/network/RequestChannel.scala
Expand Up @@ -247,7 +247,7 @@ object RequestChannel extends Logging {
class SendResponse(request: Request,
val responseSend: Send,
val responseAsString: Option[String],
val processingStatsCallback: Option[Map[TopicPartition, RecordConversionStats] => Unit]) extends Response(request) {
val onComplete: Option[Send => Unit]) extends Response(request) {
override def toString: String = toString(Some(responseSend), responseAsString)
}

Expand Down
21 changes: 9 additions & 12 deletions core/src/main/scala/kafka/network/SocketServer.scala
Expand Up @@ -704,7 +704,15 @@ private[kafka] class Processor(val id: Int,
val response = inflightResponses.remove(send.destination).getOrElse {
throw new IllegalStateException(s"Send for ${send.destination} completed, but not in `inflightResponses`")
}
updateRequestMetrics(response, send)
updateRequestMetrics(response)

// Invoke send completion callback
response match {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to the comment above. If you find yourself matching on types, it is worth considering whether we can instead add a method to the type. Here we could have Response.onComplete, and the default implementation can do nothing.

case response: RequestChannel.SendResponse => response.onComplete.foreach {
onComplete => onComplete(send)
}
case _ =>
}
selector.unmute(send.destination)
} catch {
case e: Throwable => processChannelException(send.destination,
Expand All @@ -713,17 +721,6 @@ private[kafka] class Processor(val id: Int,
}
}

private def updateRequestMetrics(response: RequestChannel.Response, send: Send): Unit = {
// If we have a callback for updating record processing statistics, invoke that now. This is used for cases
// where network threads process records, for example when we lazily down-convert records.
(response, send) match {
case (response: RequestChannel.SendResponse, send: MultiRecordsSend) if (send.recordConversionStats().size() > 0) =>
response.processingStatsCallback.foreach(callback => callback(send.recordConversionStats().asScala.toMap))
case _ =>
}
updateRequestMetrics(response)
}

private def updateRequestMetrics(response: RequestChannel.Response): Unit = {
// Record the amount of time this request spent on the network thread
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this comment added intentionally? The request metrics include more than how much time was spent on the network thread.

val request = response.request
Expand Down
74 changes: 40 additions & 34 deletions core/src/main/scala/kafka/server/KafkaApis.scala
Expand Up @@ -43,9 +43,9 @@ import org.apache.kafka.common.errors._
import org.apache.kafka.common.internals.FatalExitError
import org.apache.kafka.common.internals.Topic.{GROUP_METADATA_TOPIC_NAME, TRANSACTION_STATE_TOPIC_NAME, isInternal}
import org.apache.kafka.common.metrics.Metrics
import org.apache.kafka.common.network.ListenerName
import org.apache.kafka.common.network.{ListenerName, Send}
import org.apache.kafka.common.protocol.{ApiKeys, Errors}
import org.apache.kafka.common.record.{BaseRecords, ControlRecordType, EndTransactionMarker, LazyDownConversionRecords, MemoryRecords, RecordBatch, RecordConversionStats, Records}
import org.apache.kafka.common.record.{BaseRecords, ControlRecordType, EndTransactionMarker, LazyDownConversionRecords, MemoryRecords, MultiRecordsSend, RecordBatch, RecordConversionStats, Records}
import org.apache.kafka.common.requests.CreateAclsResponse.AclCreationResponse
import org.apache.kafka.common.requests.DeleteAclsResponse.{AclDeletionResult, AclFilterResponse}
import org.apache.kafka.common.requests.DescribeLogDirsResponse.LogDirInfo
Expand Down Expand Up @@ -459,7 +459,7 @@ class KafkaApis(val requestChannel: RequestChannel,

def processingStatsCallback(processingStats: FetchResponseStats): Unit = {
processingStats.foreach { case (tp, info) =>
updateRecordsProcessingStats(request, tp, info)
updateRecordConversionStats(request, tp, info)
}
}

Expand Down Expand Up @@ -533,8 +533,7 @@ class KafkaApis(val requestChannel: RequestChannel,
})
}

def convertedPartitionData(tp: TopicPartition,
unconvertedFetchResponse: FetchResponse.PartitionData[Records]): FetchResponse.PartitionData[BaseRecords] = {
def convertRecords(tp: TopicPartition, unconvertedRecords: Records): BaseRecords = {
// Down-conversion of the fetched records is needed when the stored magic version is
// greater than that supported by the client (as indicated by the fetch request version). If the
// configured magic version for the topic is less than or equal to that supported by the version of the
Expand All @@ -544,9 +543,9 @@ class KafkaApis(val requestChannel: RequestChannel,
// which were written in the new format prior to the version downgrade.
replicaManager.getMagic(tp).flatMap { magic =>
val downConvertMagic = {
if (magic > RecordBatch.MAGIC_VALUE_V0 && versionId <= 1 && !unconvertedFetchResponse.records.hasCompatibleMagic(RecordBatch.MAGIC_VALUE_V0))
if (magic > RecordBatch.MAGIC_VALUE_V0 && versionId <= 1 && !unconvertedRecords.hasCompatibleMagic(RecordBatch.MAGIC_VALUE_V0))
Some(RecordBatch.MAGIC_VALUE_V0)
else if (magic > RecordBatch.MAGIC_VALUE_V1 && versionId <= 3 && !unconvertedFetchResponse.records.hasCompatibleMagic(RecordBatch.MAGIC_VALUE_V1))
else if (magic > RecordBatch.MAGIC_VALUE_V1 && versionId <= 3 && !unconvertedRecords.hasCompatibleMagic(RecordBatch.MAGIC_VALUE_V1))
Some(RecordBatch.MAGIC_VALUE_V1)
else
None
Expand All @@ -559,14 +558,9 @@ class KafkaApis(val requestChannel: RequestChannel,
// as possible. With KIP-283, we have the ability to lazily down-convert in a chunked manner. The lazy, chunked
// down-conversion always guarantees that at least one batch of messages is down-converted and sent out to the
// client.
val converted = new LazyDownConversionRecords(tp, unconvertedFetchResponse.records, magic, fetchContext.getFetchOffset(tp).get, Time.SYSTEM)
new FetchResponse.PartitionData[BaseRecords](unconvertedFetchResponse.error, unconvertedFetchResponse.highWatermark,
FetchResponse.INVALID_LAST_STABLE_OFFSET, unconvertedFetchResponse.logStartOffset, unconvertedFetchResponse.abortedTransactions,
converted)
new LazyDownConversionRecords(tp, unconvertedRecords, magic, fetchContext.getFetchOffset(tp).get, time)
}
}.getOrElse(new FetchResponse.PartitionData[BaseRecords](unconvertedFetchResponse.error, unconvertedFetchResponse.highWatermark,
unconvertedFetchResponse.lastStableOffset, unconvertedFetchResponse.logStartOffset, unconvertedFetchResponse.abortedTransactions,
unconvertedFetchResponse.records))
}.getOrElse(unconvertedRecords)
}

// the callback for process a fetch response, invoked before throttling
Expand All @@ -584,13 +578,20 @@ class KafkaApis(val requestChannel: RequestChannel,
// fetch response callback invoked after any throttling
def fetchResponseCallback(bandwidthThrottleTimeMs: Int) {
def createResponse(requestThrottleTimeMs: Int): FetchResponse[BaseRecords] = {
// Down-convert messages for each partition, if needed
val convertedData = new util.LinkedHashMap[TopicPartition, FetchResponse.PartitionData[BaseRecords]]
unconvertedFetchResponse.responseData().asScala.foreach { case (tp, partitionData) =>
if (partitionData.error != Errors.NONE)
unconvertedFetchResponse.responseData().asScala.foreach { case (tp, unconvertedPartitionData) =>
if (unconvertedPartitionData.error != Errors.NONE)
debug(s"Fetch request with correlation id ${request.header.correlationId} from client $clientId " +
s"on partition $tp failed due to ${partitionData.error.exceptionName}")
convertedData.put(tp, convertedPartitionData(tp, partitionData))
s"on partition $tp failed due to ${unconvertedPartitionData.error.exceptionName}")
val convertedRecords = convertRecords(tp, unconvertedPartitionData.records)
val convertedPartitionData = new FetchResponse.PartitionData[BaseRecords](unconvertedPartitionData.error,
unconvertedPartitionData.highWatermark, FetchResponse.INVALID_LAST_STABLE_OFFSET, unconvertedPartitionData.logStartOffset,
unconvertedPartitionData.abortedTransactions, convertedRecords)
convertedData.put(tp, convertedPartitionData)
}

// Prepare fetch response from converted data
val response = new FetchResponse(unconvertedFetchResponse.error(), convertedData,
bandwidthThrottleTimeMs + requestThrottleTimeMs, unconvertedFetchResponse.sessionId())
response.responseData.asScala.foreach { case (topicPartition, data) =>
Expand All @@ -603,16 +604,20 @@ class KafkaApis(val requestChannel: RequestChannel,
trace(s"Sending Fetch response with partitions.size=${unconvertedFetchResponse.responseData().size()}, " +
s"metadata=${unconvertedFetchResponse.sessionId()}")

def processingStatsCallback(recordConversionStats: FetchResponseStats): Unit = {
recordConversionStats.foreach { case (tp, info) =>
updateRecordsProcessingStats(request, tp, info)
def onComplete(send: Send): Unit = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we could name this something more descriptive like updateConversionStats.

send match {
case send: MultiRecordsSend if send.recordConversionStats != null =>
send.recordConversionStats.asScala.toMap.foreach {
case (tp, stats) => updateRecordConversionStats(request, tp, stats)
}
case _ =>
}
}

if (fetchRequest.isFromFollower)
sendResponseExemptThrottle(request, createResponse(0), Some(processingStatsCallback))
sendResponseExemptThrottle(request, createResponse(0), Some(onComplete))
else
sendResponseMaybeThrottle(request, requestThrottleMs => createResponse(requestThrottleMs), Some(processingStatsCallback))
sendResponseMaybeThrottle(request, requestThrottleMs => createResponse(requestThrottleMs), Some(onComplete))
}

// When this callback is triggered, the remote API call has completed.
Expand Down Expand Up @@ -2207,9 +2212,10 @@ class KafkaApis(val requestChannel: RequestChannel,
throw new ClusterAuthorizationException(s"Request $request is not authorized.")
}

private def updateRecordsProcessingStats(request: RequestChannel.Request, tp: TopicPartition,
processingStats: RecordConversionStats): Unit = {
val conversionCount = processingStats.numRecordsConverted
private def updateRecordConversionStats(request: RequestChannel.Request,
tp: TopicPartition,
conversionStats: RecordConversionStats): Unit = {
val conversionCount = conversionStats.numRecordsConverted
if (conversionCount > 0) {
request.header.apiKey match {
case ApiKeys.PRODUCE =>
Expand All @@ -2221,9 +2227,9 @@ class KafkaApis(val requestChannel: RequestChannel,
case _ =>
throw new IllegalStateException("Message conversion info is recorded only for Produce/Fetch requests")
}
request.messageConversionsTimeNanos = processingStats.conversionTimeNanos
request.messageConversionsTimeNanos = conversionStats.conversionTimeNanos
}
request.temporaryMemoryBytes = processingStats.temporaryMemoryBytes
request.temporaryMemoryBytes = conversionStats.temporaryMemoryBytes
}

private def handleError(request: RequestChannel.Request, e: Throwable) {
Expand All @@ -2237,9 +2243,9 @@ class KafkaApis(val requestChannel: RequestChannel,

private def sendResponseMaybeThrottle(request: RequestChannel.Request,
createResponse: Int => AbstractResponse,
processingStatsCallback: Option[FetchResponseStats => Unit] = None): Unit = {
onComplete: Option[Send => Unit] = None): Unit = {
quotas.request.maybeRecordAndThrottle(request,
throttleTimeMs => sendResponse(request, Some(createResponse(throttleTimeMs)), processingStatsCallback))
throttleTimeMs => sendResponse(request, Some(createResponse(throttleTimeMs)), onComplete))
}

private def sendErrorResponseMaybeThrottle(request: RequestChannel.Request, error: Throwable) {
Expand All @@ -2248,9 +2254,9 @@ class KafkaApis(val requestChannel: RequestChannel,

private def sendResponseExemptThrottle(request: RequestChannel.Request,
response: AbstractResponse,
processingStatsCallback: Option[FetchResponseStats => Unit] = None): Unit = {
onComplete: Option[Send => Unit] = None): Unit = {
quotas.request.maybeRecordExempt(request)
sendResponse(request, Some(response), processingStatsCallback)
sendResponse(request, Some(response), onComplete)
}

private def sendErrorResponseExemptThrottle(request: RequestChannel.Request, error: Throwable): Unit = {
Expand Down Expand Up @@ -2281,7 +2287,7 @@ class KafkaApis(val requestChannel: RequestChannel,

private def sendResponse(request: RequestChannel.Request,
responseOpt: Option[AbstractResponse],
processingStatsCallback: Option[FetchResponseStats => Unit]): Unit = {
onComplete: Option[Send => Unit]): Unit = {
// Update error metrics for each error code in the response including Errors.NONE
responseOpt.foreach(response => requestChannel.updateErrorMetrics(request.header.apiKey, response.errorCounts.asScala))

Expand All @@ -2291,7 +2297,7 @@ class KafkaApis(val requestChannel: RequestChannel,
val responseString =
if (RequestChannel.isRequestLoggingEnabled) Some(response.toString(request.context.apiVersion))
else None
new RequestChannel.SendResponse(request, responseSend, responseString, processingStatsCallback)
new RequestChannel.SendResponse(request, responseSend, responseString, onComplete)
case None =>
new RequestChannel.NoOpResponse(request)
}
Expand Down
1 change: 0 additions & 1 deletion core/src/main/scala/kafka/server/ReplicaManager.scala
Expand Up @@ -16,7 +16,6 @@
*/
package kafka.server


import java.io.File
import java.util.concurrent.TimeUnit
import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
Expand Down
Expand Up @@ -27,7 +27,7 @@ import kafka.security.auth._
import kafka.server.{BaseRequestTest, KafkaConfig}
import kafka.utils.TestUtils
import org.apache.kafka.clients.admin.NewPartitions
import org.apache.kafka.clients.consumer.{OffsetAndMetadata, _}
import org.apache.kafka.clients.consumer._
import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
import org.apache.kafka.clients.producer._
import org.apache.kafka.common.acl.{AccessControlEntry, AccessControlEntryFilter, AclBinding, AclBindingFilter, AclOperation, AclPermissionType}
Expand Down