-
Notifications
You must be signed in to change notification settings - Fork 13.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
KAFKA-16207; KRaft's internal log listener to update voter set #15671
Changes from 31 commits
dd84479
87a43c6
c9de8a1
54b164e
04fcf4c
702e739
fae8b84
25d9dfd
b3e65bd
9a6b4c2
24661b1
cd3277c
f3a0fcd
08e1831
89ac698
2889b42
1188b94
92909fb
dd28ef4
deb7920
0db1a06
ab1886a
a962188
1829e06
a108e9c
795858f
a79660e
625aabd
855a13e
629c7da
bc26f59
b6fc7f6
742c969
e55564d
de19062
66c3793
c821a83
c2a7a9a
d1c17e0
bc80295
c363bf1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,11 +44,15 @@ public enum ControlRecordType { | |
ABORT((short) 0), | ||
COMMIT((short) 1), | ||
|
||
// Raft quorum related control messages. | ||
// KRaft quorum related control messages | ||
LEADER_CHANGE((short) 2), | ||
SNAPSHOT_HEADER((short) 3), | ||
SNAPSHOT_FOOTER((short) 4), | ||
|
||
// KRaft membership changes messages | ||
KRAFT_VERSION((short) 5), | ||
VOTERS((short) 6), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: maybe we can start using the prefix consistently. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good. Fixed for KRAFT_VOTERS. I'll fix the rest in another PR. |
||
|
||
// UNKNOWN is used to indicate a control type which the client is not aware of and should be ignored | ||
UNKNOWN((short) -1); | ||
|
||
|
@@ -108,6 +112,10 @@ public static ControlRecordType fromTypeId(short typeId) { | |
return SNAPSHOT_HEADER; | ||
case 4: | ||
return SNAPSHOT_FOOTER; | ||
case 5: | ||
return KRAFT_VERSION; | ||
case 6: | ||
return VOTERS; | ||
|
||
default: | ||
return UNKNOWN; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,9 +18,11 @@ | |
|
||
import org.apache.kafka.common.TopicPartition; | ||
import org.apache.kafka.common.errors.CorruptRecordException; | ||
import org.apache.kafka.common.message.KRaftVersionRecord; | ||
import org.apache.kafka.common.message.LeaderChangeMessage; | ||
import org.apache.kafka.common.message.SnapshotHeaderRecord; | ||
import org.apache.kafka.common.message.SnapshotFooterRecord; | ||
import org.apache.kafka.common.message.SnapshotHeaderRecord; | ||
import org.apache.kafka.common.message.VotersRecord; | ||
import org.apache.kafka.common.network.TransferableChannel; | ||
import org.apache.kafka.common.record.MemoryRecords.RecordFilter.BatchRetention; | ||
import org.apache.kafka.common.record.MemoryRecords.RecordFilter.BatchRetentionResult; | ||
|
@@ -154,7 +156,7 @@ public FilterResult filterTo(TopicPartition partition, RecordFilter filter, Byte | |
|
||
/** | ||
* Note: This method is also used to convert the first timestamp of the batch (which is usually the timestamp of the first record) | ||
* to the delete horizon of the tombstones or txn markers which are present in the batch. | ||
* to the delete horizon of the tombstones or txn markers which are present in the batch. | ||
*/ | ||
private static FilterResult filterTo(TopicPartition partition, Iterable<MutableRecordBatch> batches, | ||
RecordFilter filter, ByteBuffer destinationBuffer, int maxRecordBatchSize, | ||
|
@@ -807,4 +809,62 @@ private static void writeSnapshotFooterRecord( | |
builder.appendSnapshotFooterMessage(timestamp, snapshotFooterRecord); | ||
} | ||
} | ||
|
||
public static MemoryRecords withKRaftVersionRecord( | ||
long initialOffset, | ||
long timestamp, | ||
int leaderEpoch, | ||
ByteBuffer buffer, | ||
KRaftVersionRecord kraftVersionRecord | ||
) { | ||
writeKRaftVersionRecord(buffer, initialOffset, timestamp, leaderEpoch, kraftVersionRecord); | ||
buffer.flip(); | ||
return MemoryRecords.readableRecords(buffer); | ||
} | ||
|
||
private static void writeKRaftVersionRecord( | ||
ByteBuffer buffer, | ||
long initialOffset, | ||
long timestamp, | ||
int leaderEpoch, | ||
KRaftVersionRecord kraftVersionRecord | ||
) { | ||
try (MemoryRecordsBuilder builder = new MemoryRecordsBuilder( | ||
buffer, RecordBatch.CURRENT_MAGIC_VALUE, CompressionType.NONE, | ||
TimestampType.CREATE_TIME, initialOffset, timestamp, | ||
RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, RecordBatch.NO_SEQUENCE, | ||
false, true, leaderEpoch, buffer.capacity()) | ||
) { | ||
builder.appendKRaftVersionMessage(timestamp, kraftVersionRecord); | ||
} | ||
} | ||
|
||
public static MemoryRecords withVotersRecord( | ||
long initialOffset, | ||
long timestamp, | ||
int leaderEpoch, | ||
ByteBuffer buffer, | ||
VotersRecord votersRecord | ||
) { | ||
writeVotersRecord(buffer, initialOffset, timestamp, leaderEpoch, votersRecord); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: This looks a little odd. We create two There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you mean we by "we create two MemoryRecords instances"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are discarding the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I see that. Looks like this is an existing issue with existing control record builders. Let me fix the ones that are specific for KRaft. We can fix the other ones in another PR. |
||
buffer.flip(); | ||
return MemoryRecords.readableRecords(buffer); | ||
} | ||
|
||
private static void writeVotersRecord( | ||
ByteBuffer buffer, | ||
long initialOffset, | ||
long timestamp, | ||
int leaderEpoch, | ||
VotersRecord votersRecord | ||
) { | ||
try (MemoryRecordsBuilder builder = new MemoryRecordsBuilder( | ||
buffer, RecordBatch.CURRENT_MAGIC_VALUE, CompressionType.NONE, | ||
TimestampType.CREATE_TIME, initialOffset, timestamp, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: as long as we're separating args into lines, how about one argument per line? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, RecordBatch.NO_SEQUENCE, | ||
false, true, leaderEpoch, buffer.capacity()) | ||
) { | ||
builder.appendVotersMessage(timestamp, votersRecord); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one or more | ||
// contributor license agreements. See the NOTICE file distributed with | ||
// this work for additional information regarding copyright ownership. | ||
// The ASF licenses this file to You under the Apache License, Version 2.0 | ||
// (the "License"); you may not use this file except in compliance with | ||
// the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
{ | ||
"type": "data", | ||
"name": "KRaftVersionRecord", | ||
"validVersions": "0", | ||
"flexibleVersions": "0+", | ||
"fields": [ | ||
{ "name": "Version", "type": "int16", "versions": "0+", | ||
"about": "The version of the kraft version record" }, | ||
{ "name": "KRaftVersion", "type": "int16", "versions": "0+", | ||
"about": "The kraft protocol version" } | ||
] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was it a bug that we only allowed version 1 and above? I'm wondering if we really need to change it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. I think this was missed during the original implementation. The default value for any feature version is 0 but that cannot be expressed in the range of supported versions since it doesn't allow 0 as the min or max value.