From 1b5879653e0d956c79556301d1d11987baf6f2d7 Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Mon, 21 Mar 2016 09:48:02 -0700 Subject: [PATCH 001/267] Changing version to 0.10.0.0 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index b058e58cbe30..7f30b4d4d7ed 100644 --- a/gradle.properties +++ b/gradle.properties @@ -16,7 +16,7 @@ group=org.apache.kafka # NOTE: When you change this version number, you should also make sure to update # the version numbers in tests/kafkatest/__init__.py and kafka-merge-pr.py. -version=0.10.0.0-SNAPSHOT +version=0.10.0.0 scalaVersion=2.10.6 task=build org.gradle.jvmargs=-XX:MaxPermSize=512m -Xmx1024m -Xss2m From 4f8b3aed8dc6a77293c526253123d999a23af149 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Mon, 21 Mar 2016 12:06:07 -0700 Subject: [PATCH 002/267] MINOR: Add InterfaceStability.Unstable annotations to some Kafka Streams public APIs Also improves Java docs for the Streams high-level DSL. Author: Guozhang Wang Reviewers: Ismael Juma, Michael G. Noll Closes #1097 from guozhangwang/KNewJavaDoc (cherry picked from commit b6c29e3810bd59f39fa93c429817396cf8c324b7) Signed-off-by: Guozhang Wang --- .../kafka/streams/kstream/Aggregator.java | 6 +- .../kafka/streams/kstream/Initializer.java | 4 +- .../kafka/streams/kstream/JoinWindows.java | 15 +- .../apache/kafka/streams/kstream/KStream.java | 270 ++++++++++-------- .../kafka/streams/kstream/KStreamBuilder.java | 44 ++- .../apache/kafka/streams/kstream/KTable.java | 165 ++++++----- .../kafka/streams/kstream/KeyValueMapper.java | 6 +- .../kafka/streams/kstream/Predicate.java | 4 +- .../apache/kafka/streams/kstream/Reducer.java | 2 +- .../kafka/streams/kstream/Transformer.java | 6 +- .../kafka/streams/kstream/ValueJoiner.java | 6 +- .../kafka/streams/kstream/ValueMapper.java | 4 +- .../streams/kstream/ValueTransformer.java | 4 +- .../kafka/streams/kstream/Windowed.java | 5 +- .../apache/kafka/streams/kstream/Windows.java | 33 +-- .../processor/DefaultPartitionGrouper.java | 8 +- .../streams/processor/ProcessorContext.java | 2 + .../kafka/streams/state/KeyValueStore.java | 2 + .../kafka/streams/state/WindowStore.java | 2 + 19 files changed, 303 insertions(+), 285 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Aggregator.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Aggregator.java index 0d29409bc116..9ec9f966442a 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/Aggregator.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Aggregator.java @@ -20,9 +20,9 @@ /** * The Aggregator interface for aggregating values of the given key. * - * @param Key type. - * @param Receiving value type. - * @param Aggregate value type. + * @param key type + * @param original value type + * @param aggregate value type */ public interface Aggregator { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Initializer.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Initializer.java index fdd522030a7a..67c1c2163840 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/Initializer.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Initializer.java @@ -18,9 +18,9 @@ package org.apache.kafka.streams.kstream; /** - * The Initializer interface for creating an initial value for aggregations. + * The Initializer interface for creating an initial value in aggregations. * - * @param Aggregate value type. + * @param aggregate value type */ public interface Initializer { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java index 70294a80d327..24dbdd33b254 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java @@ -45,8 +45,7 @@ public static JoinWindows of(String name) { * Specifies that records of the same key are joinable if their timestamp stamps are within * timeDifference. * - * @param timeDifference - * @return + * @param timeDifference join window interval in milliseconds */ public JoinWindows within(long timeDifference) { return new JoinWindows(this.name, timeDifference, timeDifference); @@ -54,11 +53,10 @@ public JoinWindows within(long timeDifference) { /** * Specifies that records of the same key are joinable if their timestamp stamps are within - * timeDifference, and if the timestamp of a record from the secondary stream is - * is earlier than or equal to the timestamp of a record from the first stream. + * the join window interval, and if the timestamp of a record from the secondary stream is + * earlier than or equal to the timestamp of a record from the first stream. * - * @param timeDifference - * @return + * @param timeDifference join window interval in milliseconds */ public JoinWindows before(long timeDifference) { return new JoinWindows(this.name, timeDifference, this.after); @@ -66,11 +64,10 @@ public JoinWindows before(long timeDifference) { /** * Specifies that records of the same key are joinable if their timestamp stamps are within - * timeDifference, and if the timestamp of a record from the secondary stream is + * the join window interval, and if the timestamp of a record from the secondary stream * is later than or equal to the timestamp of a record from the first stream. * - * @param timeDifference - * @return + * @param timeDifference join window interval in milliseconds */ public JoinWindows after(long timeDifference) { return new JoinWindows(this.name, this.before, timeDifference); diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java index 6f05c3b7213d..c4188de44c50 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java @@ -17,116 +17,110 @@ package org.apache.kafka.streams.kstream; +import org.apache.kafka.common.annotation.InterfaceStability; import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.processor.ProcessorSupplier; /** - * KStream is an abstraction of an event stream in key-value pairs. + * KStream is an abstraction of a record stream of key-value pairs. * * @param Type of keys * @param Type of values */ +@InterfaceStability.Unstable public interface KStream { /** - * Creates a new instance of KStream consists of all elements of this stream which satisfy a predicate + * Create a new instance of {@link KStream} that consists of all elements of this stream which satisfy a predicate. * - * @param predicate the instance of Predicate - * @return the instance of KStream with only those elements that satisfy the predicate + * @param predicate the instance of {@link Predicate} */ KStream filter(Predicate predicate); /** - * Creates a new instance of KStream consists all elements of this stream which do not satisfy a predicate + * Create a new instance of {@link KStream} that consists all elements of this stream which do not satisfy a predicate. * - * @param predicate the instance of Predicate - * @return the instance of KStream with only those elements that do not satisfy the predicate + * @param predicate the instance of {@link Predicate} */ KStream filterOut(Predicate predicate); /** - * Creates a new instance of KStream by applying transforming each element in this stream into a different element in the new stream. + * Create a new instance of {@link KStream} by transforming each element in this stream into a different element in the new stream. * - * @param mapper the instance of KeyValueMapper - * @param the key type of the new stream - * @param the value type of the new stream - * @return the instance of KStream + * @param mapper the instance of {@link KeyValueMapper} + * @param the key type of the new stream + * @param the value type of the new stream */ KStream map(KeyValueMapper> mapper); /** - * Creates a new instance of KStream by transforming each value in this stream into a different value in the new stream. + * Create a new instance of {@link KStream} by transforming the value of each element in this stream into a new value in the new stream. * - * @param mapper the instance of ValueMapper - * @param the value type of the new stream - * @return the instance of KStream + * @param mapper the instance of {@link ValueMapper} + * @param the value type of the new stream */ KStream mapValues(ValueMapper mapper); /** - * Creates a new instance of KStream by transforming each element in this stream into zero or more elements in the new stream. + * Create a new instance of {@link KStream} by transforming each element in this stream into zero or more elements in the new stream. * - * @param mapper the instance of KeyValueMapper - * @param the key type of the new stream - * @param the value type of the new stream - * @return the instance of KStream + * @param mapper the instance of {@link KeyValueMapper} + * @param the key type of the new stream + * @param the value type of the new stream */ KStream flatMap(KeyValueMapper>> mapper); /** - * Creates a new stream by transforming each value in this stream into zero or more values in the new stream. + * Create a new instance of {@link KStream} by transforming the value of each element in this stream into zero or more values with the same key in the new stream. * - * @param processor the instance of Processor - * @param the value type of the new stream - * @return the instance of KStream + * @param processor the instance of {@link ValueMapper} + * @param the value type of the new stream */ KStream flatMapValues(ValueMapper> processor); /** - * Creates an array of streams from this stream. Each stream in the array corresponds to a predicate in - * supplied predicates in the same order. Predicates are evaluated in order. An element is streamed to - * a corresponding stream for the first predicate is evaluated true. - * An element will be dropped if none of the predicates evaluate true. + * Creates an array of {@link KStream} from this stream by branching the elements in the original stream based on the supplied predicates. + * Each element is evaluated against the supplied predicates, and predicates are evaluated in order. Each stream in the result array + * corresponds position-wise (index) to the predicate in the supplied predicates. The branching happens on first-match: An element + * in the original stream is assigned to the corresponding result stream for the first predicate that evaluates to true, and + * assigned to this stream only. An element will be dropped if none of the predicates evaluate to true. * - * @param predicates the ordered list of Predicate instances - * @return the instances of KStream that each contain those elements for which their Predicate evaluated to true. + * @param predicates the ordered list of {@link Predicate} instances */ KStream[] branch(Predicate... predicates); /** - * Sends key-value to a topic, also creates a new instance of KStream from the topic. - * This is equivalent to calling to(topic) and from(topic). + * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic + * using default serializers and deserializers. + * This is equivalent to calling {@link #to(String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(String...)}. * * @param topic the topic name - * - * @return the instance of {@link KStream} that consumes the given topic */ KStream through(String topic); /** - * Sends key-value to a topic, also creates a new instance of KStream from the topic. - * This is equivalent to calling to(topic) and from(topic). + * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic. + * This is equivalent to calling {@link #to(Serde, Serde, String)} and + * {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(Serde, Serde, String...)}. * * @param keySerde key serde used to send key-value pairs, * if not specified the default key serde defined in the configuration will be used * @param valSerde value serde used to send key-value pairs, * if not specified the default value serde defined in the configuration will be used * @param topic the topic name - * - * @return the instance of {@link KStream} that consumes the given topic */ KStream through(Serde keySerde, Serde valSerde, String topic); /** - * Sends key-value to a topic using default serializers specified in the config. + * Materialize this stream to a topic using default serializers specified in the config. * * @param topic the topic name */ void to(String topic); /** - * Sends key-value to a topic. + * Materialize this stream to a topic. * * @param keySerde key serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used @@ -137,45 +131,43 @@ public interface KStream { void to(Serde keySerde, Serde valSerde, String topic); /** - * Applies a stateful transformation to all elements in this stream. + * Create a new {@link KStream} instance by applying a {@link org.apache.kafka.streams.kstream.Transformer} to all elements in this stream, one element at a time. * - * @param transformerSupplier the class of valueTransformerSupplier - * @param stateStoreNames the names of the state store used by the processor - * @return the instance of KStream that contains transformed keys and values + * @param transformerSupplier the instance of {@link TransformerSupplier} that generates {@link org.apache.kafka.streams.kstream.Transformer} + * @param stateStoreNames the names of the state store used by the processor */ KStream transform(TransformerSupplier> transformerSupplier, String... stateStoreNames); /** - * Applies a stateful transformation to all values in this stream. + * Create a new {@link KStream} instance by applying a {@link org.apache.kafka.streams.kstream.ValueTransformer} to all values in this stream, one element at a time. * - * @param valueTransformerSupplier the class of valueTransformerSupplier - * @param stateStoreNames the names of the state store used by the processor - * @return the instance of KStream that contains the keys and transformed values + * @param valueTransformerSupplier the instance of {@link ValueTransformerSupplier} that generates {@link org.apache.kafka.streams.kstream.ValueTransformer} + * @param stateStoreNames the names of the state store used by the processor */ KStream transformValues(ValueTransformerSupplier valueTransformerSupplier, String... stateStoreNames); /** - * Processes all elements in this stream by applying a processor. + * Process all elements in this stream, one element at a time, by applying a {@link org.apache.kafka.streams.processor.Processor}. * - * @param processorSupplier the supplier of the Processor to use - * @param stateStoreNames the names of the state store used by the processor + * @param processorSupplier the supplier of {@link ProcessorSupplier} that generates {@link org.apache.kafka.streams.processor.Processor} + * @param stateStoreNames the names of the state store used by the processor */ void process(ProcessorSupplier processorSupplier, String... stateStoreNames); /** - * Combines values of this stream with another KStream using Windowed Inner Join. + * Combine element values of this stream with another {@link KStream}'s elements of the same key using windowed Inner Join. * - * @param otherStream the instance of KStream joined with this stream - * @param joiner ValueJoiner - * @param windows the specification of the join window - * @param keySerde key serdes, - * if not specified the default serdes defined in the configs will be used - * @param thisValueSerde value serdes for this stream, - * if not specified the default serdes defined in the configs will be used - * @param otherValueSerde value serdes for other stream, - * if not specified the default serdes defined in the configs will be used - * @param the value type of the other stream - * @param the value type of the new stream + * @param otherStream the instance of {@link KStream} joined with this stream + * @param joiner the instance of {@link ValueJoiner} + * @param windows the specification of the {@link JoinWindows} + * @param keySerde key serdes for materializing both streams, + * if not specified the default serdes defined in the configs will be used + * @param thisValueSerde value serdes for materializing this stream, + * if not specified the default serdes defined in the configs will be used + * @param otherValueSerde value serdes for materializing the other stream, + * if not specified the default serdes defined in the configs will be used + * @param the value type of the other stream + * @param the value type of the new stream */ KStream join( KStream otherStream, @@ -186,7 +178,8 @@ KStream join( Serde otherValueSerde); /** - * Combines values of this stream with another KStream using Windowed Inner Join. + * Combine element values of this stream with another {@link KStream}'s elements of the same key using windowed Inner Join + * with default serializers and deserializers. * * @param otherStream the instance of {@link KStream} joined with this stream * @param joiner the instance of {@link ValueJoiner} @@ -200,19 +193,19 @@ KStream join( JoinWindows windows); /** - * Combines values of this stream with another KStream using Windowed Outer Join. + * Combine values of this stream with another {@link KStream}'s elements of the same key using windowed Outer Join. * - * @param otherStream the instance of KStream joined with this stream - * @param joiner ValueJoiner - * @param windows the specification of the join window - * @param keySerde key serdes, - * if not specified the default serdes defined in the configs will be used - * @param thisValueSerde value serdes for this stream, - * if not specified the default serdes defined in the configs will be used - * @param otherValueSerde value serdes for other stream, - * if not specified the default serdes defined in the configs will be used - * @param the value type of the other stream - * @param the value type of the new stream + * @param otherStream the instance of {@link KStream} joined with this stream + * @param joiner the instance of {@link ValueJoiner} + * @param windows the specification of the {@link JoinWindows} + * @param keySerde key serdes for materializing both streams, + * if not specified the default serdes defined in the configs will be used + * @param thisValueSerde value serdes for materializing this stream, + * if not specified the default serdes defined in the configs will be used + * @param otherValueSerde value serdes for materializing the other stream, + * if not specified the default serdes defined in the configs will be used + * @param the value type of the other stream + * @param the value type of the new stream */ KStream outerJoin( KStream otherStream, @@ -223,7 +216,8 @@ KStream outerJoin( Serde otherValueSerde); /** - * Combines values of this stream with another KStream using Windowed Outer Join. + * Combine values of this stream with another {@link KStream}'s elements of the same key using windowed Outer Join + * with default serializers and deserializers. * * @param otherStream the instance of {@link KStream} joined with this stream * @param joiner the instance of {@link ValueJoiner} @@ -237,17 +231,17 @@ KStream outerJoin( JoinWindows windows); /** - * Combines values of this stream with another KStream using Windowed Left Join. + * Combine values of this stream with another {@link KStream}'s elements of the same key using windowed Left Join. * - * @param otherStream the instance of KStream joined with this stream - * @param joiner ValueJoiner - * @param windows the specification of the join window - * @param keySerde key serdes, - * if not specified the default serdes defined in the configs will be used - * @param otherValueSerde value serdes for other stream, - * if not specified the default serdes defined in the configs will be used - * @param the value type of the other stream - * @param the value type of the new stream + * @param otherStream the instance of {@link KStream} joined with this stream + * @param joiner the instance of {@link ValueJoiner} + * @param windows the specification of the {@link JoinWindows} + * @param keySerde key serdes for materializing the other stream, + * if not specified the default serdes defined in the configs will be used + * @param otherValueSerde value serdes for materializing the other stream, + * if not specified the default serdes defined in the configs will be used + * @param the value type of the other stream + * @param the value type of the new stream */ KStream leftJoin( KStream otherStream, @@ -257,7 +251,8 @@ KStream leftJoin( Serde otherValueSerde); /** - * Combines values of this stream with another KStream using Windowed Left Join. + * Combine values of this stream with another {@link KStream}'s elements of the same key using windowed Left Join + * with default serializers and deserializers. * * @param otherStream the instance of {@link KStream} joined with this stream * @param joiner the instance of {@link ValueJoiner} @@ -271,7 +266,7 @@ KStream leftJoin( JoinWindows windows); /** - * Combines values of this stream with KTable using Left Join. + * Combine values of this stream with {@link KTable}'s elements of the same key using non-windowed Left Join. * * @param table the instance of {@link KTable} joined with this stream * @param joiner the instance of {@link ValueJoiner} @@ -281,48 +276,63 @@ KStream leftJoin( KStream leftJoin(KTable table, ValueJoiner joiner); /** - * Aggregate values of this stream by key on a window basis. + * Combine values of this stream by key on a window basis into a new instance of windowed {@link KTable}. * - * @param reducer the class of {@link Reducer} - * @param windows the specification of the aggregation {@link Windows} + * @param reducer the instance of {@link Reducer} + * @param windows the specification of the aggregation {@link Windows} + * @param keySerde key serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param valueSerde value serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used */ KTable, V> reduceByKey(Reducer reducer, Windows windows, Serde keySerde, - Serde aggValueSerde); + Serde valueSerde); /** - * Aggregate values of this stream by key on a window basis. + * Combine values of this stream by key on a window basis into a new instance of windowed {@link KTable} + * with default serializers and deserializers. * - * @param reducer the class of {@link Reducer} + * @param reducer the instance of {@link Reducer} * @param windows the specification of the aggregation {@link Windows} */ KTable, V> reduceByKey(Reducer reducer, Windows windows); /** - * Aggregate values of this stream by key on a window basis. + * Combine values of this stream by key into a new instance of ever-updating {@link KTable}. * - * @param reducer the class of Reducer + * @param reducer the instance of {@link Reducer} + * @param keySerde key serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param valueSerde value serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param name the name of the resulted {@link KTable} */ KTable reduceByKey(Reducer reducer, Serde keySerde, - Serde aggValueSerde, + Serde valueSerde, String name); /** - * Aggregate values of this stream by key on a window basis. + * Combine values of this stream by key into a new instance of ever-updating {@link KTable} with default serializers and deserializers. * - * @param reducer the class of {@link Reducer} + * @param reducer the instance of {@link Reducer} + * @param name the name of the resulted {@link KTable} */ KTable reduceByKey(Reducer reducer, String name); /** - * Aggregate values of this stream by key on a window basis. + * Aggregate values of this stream by key on a window basis into a new instance of windowed {@link KTable}. * - * @param initializer the class of Initializer - * @param aggregator the class of Aggregator - * @param windows the specification of the aggregation {@link Windows} - * @param the value type of the aggregated table + * @param initializer the instance of {@link Initializer} + * @param aggregator the instance of {@link Aggregator} + * @param windows the specification of the aggregation {@link Windows} + * @param keySerde key serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param aggValueSerde aggregate value serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param the value type of the resulted {@link KTable} */ KTable, T> aggregateByKey(Initializer initializer, Aggregator aggregator, @@ -331,25 +341,29 @@ KTable, T> aggregateByKey(Initializer initi Serde aggValueSerde); /** - * Aggregate values of this stream by key on a window basis. + * Aggregate values of this stream by key on a window basis into a new instance of windowed {@link KTable} + * with default serializers and deserializers. * - * @param initializer the class of {@link Initializer} - * @param aggregator the class of {@link Aggregator} + * @param initializer the instance of {@link Initializer} + * @param aggregator the instance of {@link Aggregator} * @param windows the specification of the aggregation {@link Windows} - * @param the value type of the aggregated table + * @param the value type of the resulted {@link KTable} */ KTable, T> aggregateByKey(Initializer initializer, Aggregator aggregator, Windows windows); /** - * Aggregate values of this stream by key without a window basis, and hence - * return an ever updating table + * Aggregate values of this stream by key into a new instance of ever-updating {@link KTable}. * * @param initializer the class of {@link Initializer} * @param aggregator the class of {@link Aggregator} - * @param name the name of the aggregated table - * @param the value type of the aggregated table + * @param keySerde key serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param aggValueSerde aggregate value serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param name the name of the resulted {@link KTable} + * @param the value type of the resulted {@link KTable} */ KTable aggregateByKey(Initializer initializer, Aggregator aggregator, @@ -358,45 +372,49 @@ KTable aggregateByKey(Initializer initializer, String name); /** - * Aggregate values of this stream by key without a window basis, and hence - * return an ever updating table + * Aggregate values of this stream by key into a new instance of ever-updating {@link KTable} + * with default serializers and deserializers. * * @param initializer the class of {@link Initializer} * @param aggregator the class of {@link Aggregator} - * @param name the name of the aggregated table - * @param the value type of the aggregated table + * @param name the name of the resulted {@link KTable} + * @param the value type of the resulted {@link KTable} */ KTable aggregateByKey(Initializer initializer, Aggregator aggregator, String name); /** - * Count number of messages of this stream by key on a window basis. + * Count number of messages of this stream by key on a window basis into a new instance of windowed {@link KTable}. * * @param windows the specification of the aggregation {@link Windows} + * @param keySerde key serdes for materializing the counting table, + * if not specified the default serdes defined in the configs will be used */ KTable, Long> countByKey(Windows windows, Serde keySerde); /** - * Count number of messages of this stream by key on a window basis. + * Count number of messages of this stream by key on a window basis into a new instance of windowed {@link KTable} + * with default serializers and deserializers. * * @param windows the specification of the aggregation {@link Windows} */ KTable, Long> countByKey(Windows windows); /** - * Count number of messages of this stream by key without a window basis, and hence - * return a ever updating counting table. + * Count number of messages of this stream by key into a new instance of ever-updating {@link KTable}. * - * @param name the name of the aggregated table + * @param keySerde key serdes for materializing the counting table, + * if not specified the default serdes defined in the configs will be used + * @param name the name of the resulted {@link KTable} */ KTable countByKey(Serde keySerde, String name); /** - * Count number of messages of this stream by key without a window basis, and hence - * return a ever updating counting table. + * Count number of messages of this stream by key into a new instance of ever-updating {@link KTable} + * with default serializers and deserializers. * - * @param name the name of the aggregated table + * @param name the name of the resulted {@link KTable} */ KTable countByKey(String name); diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KStreamBuilder.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KStreamBuilder.java index dfd9281e711e..6b770b4006e4 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KStreamBuilder.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KStreamBuilder.java @@ -28,8 +28,8 @@ import java.util.concurrent.atomic.AtomicInteger; /** - * KStreamBuilder is a subclass of {@link TopologyBuilder} that provides the {@link KStream} DSL - * for users to specify computational logic and translates the given logic to a processor topology. + * KStreamBuilder is a subclass of {@link TopologyBuilder} that provides the Kafka Streams DSL + * for users to specify computational logic and translates the given logic to a {@link org.apache.kafka.streams.processor.internals.ProcessorTopology}. */ public class KStreamBuilder extends TopologyBuilder { @@ -40,25 +40,23 @@ public KStreamBuilder() { } /** - * Creates a KStream instance for the specified topic. + * Creates a {@link KStream} instance from the specified topics. * The default deserializers specified in the config are used. * - * @param topics the topic names, if empty default to all the topics in the config - * @return KStream + * @param topics the topic names; must contain at least one topic name */ public KStream stream(String... topics) { return stream(null, null, topics); } /** - * Creates a KStream instance for the specified topic. + * Creates a {@link KStream} instance for the specified topics. * - * @param keySerde key serde used to read this source KStream, - * if not specified the default serde defined in the configs will be used - * @param valSerde value serde used to read this source KStream, - * if not specified the default serde defined in the configs will be used - * @param topics the topic names, if empty default to all the topics in the config - * @return KStream + * @param keySerde key serde used to read this source {@link KStream}, + * if not specified the default serde defined in the configs will be used + * @param valSerde value serde used to read this source {@link KStream}, + * if not specified the default serde defined in the configs will be used + * @param topics the topic names; must contain at least one topic name */ public KStream stream(Serde keySerde, Serde valSerde, String... topics) { String name = newName(KStreamImpl.SOURCE_NAME); @@ -69,25 +67,23 @@ public KStream stream(Serde keySerde, Serde valSerde, String. } /** - * Creates a KTable instance for the specified topic. + * Creates a {@link KTable} instance for the specified topic. * The default deserializers specified in the config are used. * - * @param topic the topic name - * @return KTable + * @param topic the topic name; cannot be null */ public KTable table(String topic) { return table(null, null, topic); } /** - * Creates a KTable instance for the specified topic. + * Creates a {@link KTable} instance for the specified topic. * * @param keySerde key serde used to send key-value pairs, - * if not specified the default key serde defined in the configuration will be used + * if not specified the default key serde defined in the configuration will be used * @param valSerde value serde used to send key-value pairs, - * if not specified the default value serde defined in the configuration will be used - * @param topic the topic name - * @return KStream + * if not specified the default value serde defined in the configuration will be used + * @param topic the topic name; cannot be null */ public KTable table(Serde keySerde, Serde valSerde, String topic) { String source = newName(KStreamImpl.SOURCE_NAME); @@ -102,10 +98,9 @@ public KTable table(Serde keySerde, Serde valSerde, String to } /** - * Creates a new stream by merging the given streams + * Creates a new instance of {@link KStream} by merging the given streams * - * @param streams the streams to be merged - * @return KStream + * @param streams the instances of {@link KStream} to be merged */ public KStream merge(KStream... streams) { return KStreamImpl.merge(this, streams); @@ -115,8 +110,7 @@ public KStream merge(KStream... streams) { * Create a unique processor name used for translation into the processor topology. * This function is only for internal usage. * - * @param prefix Processor name prefix. - * @return The unique processor name. + * @param prefix processor name prefix */ public String newName(String prefix) { return prefix + String.format("%010d", index.getAndIncrement()); diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java index 997cb4dc9f10..9a2a8a8b7e8a 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java @@ -17,73 +17,72 @@ package org.apache.kafka.streams.kstream; +import org.apache.kafka.common.annotation.InterfaceStability; import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.streams.KeyValue; /** - * KTable is an abstraction of a change log stream from a primary-keyed table. + * KTable is an abstraction of a changelog stream from a primary-keyed table. * * @param Type of primary keys * @param Type of value changes */ +@InterfaceStability.Unstable public interface KTable { /** - * Creates a new instance of KTable consists of all elements of this stream which satisfy a predicate + * Create a new instance of {@link KTable} that consists of all elements of this stream which satisfy a predicate. * - * @param predicate the instance of Predicate - * @return the instance of KTable with only those elements that satisfy the predicate + * @param predicate the instance of {@link Predicate} */ KTable filter(Predicate predicate); /** - * Creates a new instance of KTable consists all elements of this stream which do not satisfy a predicate + * Create a new instance of {@link KTable} that consists all elements of this stream which do not satisfy a predicate * - * @param predicate the instance of Predicate - * @return the instance of KTable with only those elements that do not satisfy the predicate + * @param predicate the instance of {@link Predicate} */ KTable filterOut(Predicate predicate); /** - * Creates a new instance of KTable by transforming each value in this stream into a different value in the new stream. + * Create a new instance of {@link KTable} by transforming the value of each element in this stream into a new value in the new stream. * - * @param mapper the instance of ValueMapper - * @param the value type of the new stream - * @return the instance of KTable + * @param mapper the instance of {@link ValueMapper} + * @param the value type of the new stream */ KTable mapValues(ValueMapper mapper); /** - * Sends key-value to a topic, also creates a new instance of KTable from the topic. - * This is equivalent to calling to(topic) and table(topic). + * Materialize this stream to a topic, also creates a new instance of {@link KTable} from the topic + * using default serializers and deserializers. + * This is equivalent to calling {@link #to(String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#table(String)}. * - * @param topic the topic name - * @return the instance of KTable that consumes the given topic + * @param topic the topic name */ KTable through(String topic); /** - * Sends key-value to a topic, also creates a new instance of KTable from the topic. - * This is equivalent to calling to(topic) and table(topic). + * Materialize this stream to a topic, also creates a new instance of {@link KTable} from the topic. + * This is equivalent to calling {@link #to(Serde, Serde, String)} and + * {@link org.apache.kafka.streams.kstream.KStreamBuilder#table(Serde, Serde, String)}. * * @param keySerde key serde used to send key-value pairs, * if not specified the default key serde defined in the configuration will be used * @param valSerde value serde used to send key-value pairs, * if not specified the default value serde defined in the configuration will be used * @param topic the topic name - * @return the new stream that consumes the given topic */ KTable through(Serde keySerde, Serde valSerde, String topic); /** - * Sends key-value to a topic using default serializers specified in the config. + * Materialize this stream to a topic using default serializers specified in the config. * * @param topic the topic name */ void to(String topic); /** - * Sends key-value to a topic. + * Materialize this stream to a topic. * * @param keySerde key serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used @@ -94,55 +93,53 @@ public interface KTable { void to(Serde keySerde, Serde valSerde, String topic); /** - * Creates a new instance of KStream from this KTable - * - * @return the instance of KStream + * Convert this stream to a new instance of {@link KStream}. */ KStream toStream(); /** - * Combines values of this KTable with another KTable using Inner Join. + * Combine values of this stream with another {@link KTable} stream's elements of the same key using Inner Join. * - * @param other the instance of KTable joined with this stream - * @param joiner ValueJoiner - * @param the value type of the other stream - * @param the value type of the new stream - * @return the instance of KTable + * @param other the instance of {@link KTable} joined with this stream + * @param joiner the instance of {@link ValueJoiner} + * @param the value type of the other stream + * @param the value type of the new stream */ KTable join(KTable other, ValueJoiner joiner); /** - * Combines values of this KTable with another KTable using Outer Join. + * Combine values of this stream with another {@link KTable} stream's elements of the same key using Outer Join. * - * @param other the instance of KTable joined with this stream - * @param joiner ValueJoiner - * @param the value type of the other stream - * @param the value type of the new stream - * @return the instance of KTable + * @param other the instance of {@link KTable} joined with this stream + * @param joiner the instance of {@link ValueJoiner} + * @param the value type of the other stream + * @param the value type of the new stream */ KTable outerJoin(KTable other, ValueJoiner joiner); /** - * Combines values of this KTable with another KTable using Left Join. + * Combine values of this stream with another {@link KTable} stream's elements of the same key using Left Join. * - * @param other the instance of KTable joined with this stream - * @param joiner ValueJoiner - * @param the value type of the other stream - * @param the value type of the new stream - * @return the instance of KTable + * @param other the instance of {@link KTable} joined with this stream + * @param joiner the instance of {@link ValueJoiner} + * @param the value type of the other stream + * @param the value type of the new stream */ KTable leftJoin(KTable other, ValueJoiner joiner); /** - * Reduce values of this table by the selected key. + * Combine updating values of this stream by the selected key into a new instance of {@link KTable}. * - * @param adder the class of Reducer - * @param subtractor the class of Reducer - * @param selector the KeyValue mapper that select the aggregate key - * @param name the name of the resulted table - * @param the key type of the aggregated table - * @param the value type of the aggregated table - * @return the instance of KTable + * @param adder the instance of {@link Reducer} for addition + * @param subtractor the instance of {@link Reducer} for subtraction + * @param selector the instance of {@link KeyValueMapper} that select the aggregate key + * @param keySerde key serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param valueSerde value serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param name the name of the resulted {@link KTable} + * @param the key type of the aggregated {@link KTable} + * @param the value type of the aggregated {@link KTable} */ KTable reduce(Reducer adder, Reducer subtractor, @@ -152,15 +149,15 @@ KTable reduce(Reducer adder, String name); /** - * Reduce values of this table by the selected key. + * Combine updating values of this stream by the selected key into a new instance of {@link KTable} + * using default serializers and deserializers. * * @param adder the instance of {@link Reducer} for addition * @param subtractor the instance of {@link Reducer} for subtraction * @param selector the instance of {@link KeyValueMapper} that select the aggregate key - * @param name the name of the resulted table - * @param the key type of the aggregated table - * @param the value type of the aggregated table - * @return the instance of KTable + * @param name the name of the resulted {@link KTable} + * @param the key type of the aggregated {@link KTable} + * @param the value type of the aggregated {@link KTable} */ KTable reduce(Reducer adder, Reducer subtractor, @@ -168,20 +165,26 @@ KTable reduce(Reducer adder, String name); /** - * Aggregate values of this table by the selected key. + * Aggregate updating values of this stream by the selected key into a new instance of {@link KTable}. * - * @param initializer the class of Initializer - * @param add the class of Aggregator - * @param remove the class of Aggregator - * @param selector the KeyValue mapper that select the aggregate key - * @param name the name of the resulted table - * @param the key type of the aggregated table - * @param the value type of the aggregated table - * @return the instance of KTable + * @param initializer the instance of {@link Initializer} + * @param adder the instance of {@link Aggregator} for addition + * @param substractor the instance of {@link Aggregator} for subtraction + * @param selector the instance of {@link KeyValueMapper} that select the aggregate key + * @param keySerde key serdes for materializing this stream and the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param valueSerde value serdes for materializing this stream, + * if not specified the default serdes defined in the configs will be used + * @param aggValueSerde value serdes for materializing the aggregated table, + * if not specified the default serdes defined in the configs will be used + * @param name the name of the resulted table + * @param the key type of this {@link KTable} + * @param the value type of this {@link KTable} + * @param the value type of the aggregated {@link KTable} */ KTable aggregate(Initializer initializer, - Aggregator add, - Aggregator remove, + Aggregator adder, + Aggregator substractor, KeyValueMapper> selector, Serde keySerde, Serde valueSerde, @@ -189,16 +192,17 @@ KTable aggregate(Initializer initializer, String name); /** - * Aggregate values of this table by the selected key. + * Aggregate updating values of this stream by the selected key into a new instance of {@link KTable} + * using default serializers and deserializers. * * @param initializer the instance of {@link Initializer} * @param adder the instance of {@link Aggregator} for addition * @param substractor the instance of {@link Aggregator} for subtraction * @param selector the instance of {@link KeyValueMapper} that select the aggregate key - * @param name the name of the resulted table - * @param the key type of the aggregated table - * @param the value type of the aggregated table - * @return the instance of aggregated {@link KTable} + * @param name the name of the resulted {@link KTable} + * @param the key type of the aggregated {@link KTable} + * @param the value type of the aggregated {@link KTable} + * @param the value type of the aggregated {@link KTable} */ KTable aggregate(Initializer initializer, Aggregator adder, @@ -207,12 +211,15 @@ KTable aggregate(Initializer initializer, String name); /** - * Count number of records of this table by the selected key. + * Count number of records of this stream by the selected key into a new instance of {@link KTable}. * - * @param selector the KeyValue mapper that select the aggregate key - * @param name the name of the resulted table - * @param the key type of the aggregated table - * @return the instance of KTable + * @param selector the instance of {@link KeyValueMapper} that select the aggregate key + * @param keySerde key serdes for materializing this stream, + * if not specified the default serdes defined in the configs will be used + * @param valueSerde value serdes for materializing this stream, + * if not specified the default serdes defined in the configs will be used + * @param name the name of the resulted table + * @param the key type of the aggregated {@link KTable} */ KTable count(KeyValueMapper selector, Serde keySerde, @@ -220,12 +227,12 @@ KTable count(KeyValueMapper selector, String name); /** - * Count number of records of this table by the selected key. + * Count number of records of this stream by the selected key into a new instance of {@link KTable} + * using default serializers and deserializers. * * @param selector the instance of {@link KeyValueMapper} that select the aggregate key - * @param name the name of the resulted table - * @param the key type of the aggregated table - * @return the instance of aggregated {@link KTable} + * @param name the name of the resulted {@link KTable} + * @param the key type of the aggregated {@link KTable} */ KTable count(KeyValueMapper selector, String name); } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KeyValueMapper.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KeyValueMapper.java index 9c04ef5aa7c8..a4aed91f92c4 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KeyValueMapper.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KeyValueMapper.java @@ -20,9 +20,9 @@ /** * The KeyValueMapper interface for mapping a key-value pair to a new value (could be another key-value pair). * - * @param Original key type. - * @param Original value type. - * @param Mapped value type. + * @param original key type + * @param original value type + * @param mapped value type */ public interface KeyValueMapper { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Predicate.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Predicate.java index 784f5b1a0a08..c90554b192cc 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/Predicate.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Predicate.java @@ -20,8 +20,8 @@ /** * The Predicate interface represents a predicate (boolean-valued function) of a key-value pair. * - * @param Key type. - * @param Value type. + * @param key type + * @param value type */ public interface Predicate { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Reducer.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Reducer.java index bf25f7360d11..551a6720e66c 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/Reducer.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Reducer.java @@ -20,7 +20,7 @@ /** * The Reducer interface for combining two values of the same type into a new value. * - * @param Value type. + * @param value type */ public interface Reducer { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Transformer.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Transformer.java index 47198e4991d9..8069dca60602 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/Transformer.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Transformer.java @@ -22,9 +22,9 @@ /** * A stateful Transformer interface for transform a key-value pair into a new value. * - * @param Key type. - * @param Value type. - * @param Return type. + * @param key type + * @param value type + * @param return type */ public interface Transformer { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/ValueJoiner.java b/streams/src/main/java/org/apache/kafka/streams/kstream/ValueJoiner.java index 41005b3997a5..5f00a1a7ff20 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/ValueJoiner.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/ValueJoiner.java @@ -20,9 +20,9 @@ /** * The ValueJoiner interface for joining two values and return a the joined new value. * - * @param First value type. - * @param Second value type. - * @param Joined value type. + * @param first value type + * @param second value type + * @param joined value type */ public interface ValueJoiner { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/ValueMapper.java b/streams/src/main/java/org/apache/kafka/streams/kstream/ValueMapper.java index d507c870c1ec..6e62a55774f4 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/ValueMapper.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/ValueMapper.java @@ -20,8 +20,8 @@ /** * The KeyValueMapper interface for mapping an original value to a new value (could be another key-value pair). * - * @param Original value type. - * @param Mapped value type. + * @param original value type + * @param mapped value type */ public interface ValueMapper { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/ValueTransformer.java b/streams/src/main/java/org/apache/kafka/streams/kstream/ValueTransformer.java index b4d2b38213cb..1a0679d68159 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/ValueTransformer.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/ValueTransformer.java @@ -22,8 +22,8 @@ /** * A stateful Value Transformer interface for transform a value into a new value. * - * @param Value type. - * @param Return type. + * @param value type + * @param return type */ public interface ValueTransformer { diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Windowed.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Windowed.java index eed5fe1f4022..369128277733 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/Windowed.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Windowed.java @@ -19,9 +19,8 @@ /** * The windowed key interface used in {@link KTable}, used for representing a windowed table result from windowed stream aggregations, - * i.e. {@link KStream#aggregateByKey(Initializer, Aggregator, Windows, org.apache.kafka.common.serialization.Serializer, - * org.apache.kafka.common.serialization.Serializer, org.apache.kafka.common.serialization.Deserializer, - * org.apache.kafka.common.serialization.Deserializer)} + * i.e. {@link KStream#aggregateByKey(Initializer, Aggregator, Windows, org.apache.kafka.common.serialization.Serde, + * org.apache.kafka.common.serialization.Serde)} * * @param Type of the key */ diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java index 678e3516ad36..e7dc23ec9a73 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java @@ -24,7 +24,7 @@ /** * The window specification interface that can be extended for windowing operation in joins and aggregations. * - * @param Type of the window instance + * @param type of the window instance */ public abstract class Windows { @@ -38,17 +38,17 @@ public abstract class Windows { protected String name; - private long emitDuration; + private long emitDurationMs; - private long maintainDuration; + private long maintainDurationMs; public int segments; protected Windows(String name) { this.name = name; this.segments = DEFAULT_NUM_SEGMENTS; - this.emitDuration = DEFAULT_EMIT_DURATION; - this.maintainDuration = DEFAULT_MAINTAIN_DURATION; + this.emitDurationMs = DEFAULT_EMIT_DURATION; + this.maintainDurationMs = DEFAULT_MAINTAIN_DURATION; } public String name() { @@ -56,29 +56,26 @@ public String name() { } /** - * Set the window emit duration in milliseconds of system time + * Set the window emit duration in milliseconds of system time. */ - public Windows emit(long duration) { - this.emitDuration = duration; + public Windows emit(long durationMs) { + this.emitDurationMs = durationMs; return this; } /** - * Set the window maintain duration in milliseconds of system time + * Set the window maintain duration in milliseconds of system time. */ - public Windows until(long duration) { - this.maintainDuration = duration; + public Windows until(long durationMs) { + this.maintainDurationMs = durationMs; return this; } /** - * Specifies the number of segments to be used for rolling the window store, - * this function is not exposed to users but can be called by developers that extend this JoinWindows specs - * - * @param segments - * @return + * Specify the number of segments to be used for rolling the window store, + * this function is not exposed to users but can be called by developers that extend this JoinWindows specs. */ protected Windows segments(int segments) { this.segments = segments; @@ -87,11 +84,11 @@ protected Windows segments(int segments) { } public long emitEveryMs() { - return this.emitDuration; + return this.emitDurationMs; } public long maintainMs() { - return this.maintainDuration; + return this.maintainDurationMs; } protected String newName(String prefix) { diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/DefaultPartitionGrouper.java b/streams/src/main/java/org/apache/kafka/streams/processor/DefaultPartitionGrouper.java index 06681ac51e2e..999f6a94f3bf 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/DefaultPartitionGrouper.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/DefaultPartitionGrouper.java @@ -39,11 +39,11 @@ public class DefaultPartitionGrouper implements PartitionGrouper { /** - * Generate tasks with the assigned topic partitions + * Generate tasks with the assigned topic partitions. * - * @param topicGroups {@link TopologyBuilder#topicGroups()} where topics of the same group need to be joined together - * @param metadata Metadata of the consuming cluster - * @return The map from generated task ids to the assigned partitions. + * @param topicGroups group of topics that need to be joined together + * @param metadata metadata of the consuming cluster + * @return The map from generated task ids to the assigned partitions */ public Map> partitionGroups(Map> topicGroups, Cluster metadata) { Map> groups = new HashMap<>(); diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/ProcessorContext.java b/streams/src/main/java/org/apache/kafka/streams/processor/ProcessorContext.java index fdcff19e01bc..434996e2fd56 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/ProcessorContext.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/ProcessorContext.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.processor; +import org.apache.kafka.common.annotation.InterfaceStability; import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.streams.StreamsMetrics; @@ -25,6 +26,7 @@ /** * Processor context interface. */ +@InterfaceStability.Unstable public interface ProcessorContext { /** diff --git a/streams/src/main/java/org/apache/kafka/streams/state/KeyValueStore.java b/streams/src/main/java/org/apache/kafka/streams/state/KeyValueStore.java index 3e7f6fbc8c24..908e116e9fb2 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/KeyValueStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/KeyValueStore.java @@ -19,6 +19,7 @@ package org.apache.kafka.streams.state; +import org.apache.kafka.common.annotation.InterfaceStability; import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.processor.StateStore; @@ -30,6 +31,7 @@ * @param The key type * @param The value type */ +@InterfaceStability.Unstable public interface KeyValueStore extends StateStore { /** diff --git a/streams/src/main/java/org/apache/kafka/streams/state/WindowStore.java b/streams/src/main/java/org/apache/kafka/streams/state/WindowStore.java index cbd373cf8bc3..c7a882fc3ac0 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/WindowStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/WindowStore.java @@ -19,6 +19,7 @@ package org.apache.kafka.streams.state; +import org.apache.kafka.common.annotation.InterfaceStability; import org.apache.kafka.streams.processor.StateStore; /** @@ -27,6 +28,7 @@ * @param Type of keys * @param Type of values */ +@InterfaceStability.Unstable public interface WindowStore extends StateStore { /** From 3710b12e6e6a6d8dddb7402087ee550cd14909db Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Mon, 21 Mar 2016 13:15:53 -0700 Subject: [PATCH 003/267] MINOR: fix documentation version This will need to be double-committed. Author: Gwen Shapira Reviewers: Ismael Juma , Ewen Cheslack-Postava Closes #1107 from gwenshap/fix-doc-version (cherry picked from commit 69df734470f1db9ba1732cb4bedaa1541d721103) Signed-off-by: Ewen Cheslack-Postava --- docs/documentation.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/documentation.html b/docs/documentation.html index 4ce759988bc1..67a2954acbf1 100644 --- a/docs/documentation.html +++ b/docs/documentation.html @@ -17,8 +17,8 @@ -

Kafka 0.9.0 Documentation

-Prior releases: 0.7.x, 0.8.0, 0.8.1.X, 0.8.2.X. +

Kafka 0.10.0 Documentation

+Prior releases: 0.7.x, 0.8.0, 0.8.1.X, 0.8.2.X, 0.9.0.X.
    From 3cfd20b7b9d609c1055f6c23de86b6133e5cac75 Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Mon, 21 Mar 2016 18:58:45 -0700 Subject: [PATCH 004/267] MINOR: update new version in additional places matching set of version fixes. ewencp junrao Author: Gwen Shapira Reviewers: Ismael Juma , Geoff Anderson , Ewen Cheslack-Postava Closes #1110 from gwenshap/minor-fix-version-010 --- kafka-merge-pr.py | 2 +- tests/kafkatest/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kafka-merge-pr.py b/kafka-merge-pr.py index e12410511b65..2345dbbd4516 100644 --- a/kafka-merge-pr.py +++ b/kafka-merge-pr.py @@ -72,7 +72,7 @@ DEV_BRANCH_NAME = "trunk" -DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.0.0") +DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.0.1") def get_json(url): try: diff --git a/tests/kafkatest/__init__.py b/tests/kafkatest/__init__.py index df1a6129dbc8..c509eff0cb2b 100644 --- a/tests/kafkatest/__init__.py +++ b/tests/kafkatest/__init__.py @@ -23,4 +23,4 @@ # Instead, in trunk, the version should have a suffix of the form ".devN" # # For example, when Kafka is at version 0.9.0.0-SNAPSHOT, this should be something like "0.9.0.0.dev0" -__version__ = '0.10.0.0.dev0' +__version__ = '0.10.0.0' From 808f85f03d8f69047914eb21438d1458e23c4325 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Mon, 21 Mar 2016 20:47:25 -0700 Subject: [PATCH 005/267] KAFKA-3412: multiple asynchronous commits causes send failures Author: Jason Gustafson Reviewers: Ismael Juma , Ewen Cheslack-Postava Closes #1108 from hachikuji/KAFKA-3412 (cherry picked from commit 8d8e3aaa6172d314230a8d61e6892e9c09dc45b6) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/clients/consumer/KafkaConsumer.java | 2 +- .../internals/ConsumerCoordinator.java | 4 ++++ .../internals/ConsumerNetworkClient.java | 5 ++-- .../internals/ConsumerCoordinatorTest.java | 8 ------- .../kafka/api/BaseConsumerTest.scala | 24 ++++++++++++------- .../kafka/api/PlaintextConsumerTest.scala | 15 ++++++++++++ 6 files changed, 39 insertions(+), 19 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java index b7eafbe346f4..c36b7f1aa5de 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java @@ -870,7 +870,7 @@ public ConsumerRecords poll(long timeout) { // must return these records to users to process before being interrupted or // auto-committing offsets fetcher.sendFetches(metadata.fetch()); - client.quickPoll(); + client.quickPoll(false); return this.interceptors == null ? new ConsumerRecords<>(records) : this.interceptors.onConsume(new ConsumerRecords<>(records)); } diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index cf935309cd38..e582ce3fc71b 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -345,6 +345,10 @@ public void onFailure(RuntimeException e) { cb.onComplete(offsets, e); } }); + + // ensure commit has a chance to be transmitted (without blocking on its completion) + // note that we allow delayed tasks to be executed in case heartbeats need to be sent + client.quickPoll(true); } /** diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkClient.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkClient.java index 4492306fd3a8..b70994d87fc4 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkClient.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkClient.java @@ -196,10 +196,11 @@ public void poll(long timeout) { /** * Poll for network IO and return immediately. This will not trigger wakeups, * nor will it execute any delayed tasks. + * @param executeDelayedTasks Whether to allow delayed task execution (true allows) */ - public void quickPoll() { + public void quickPoll(boolean executeDelayedTasks) { disableWakeups(); - poll(0, time.milliseconds(), false); + poll(0, time.milliseconds(), executeDelayedTasks); enableWakeups(); } diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java index 260ee7a081ed..8844adc80b96 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java @@ -627,7 +627,6 @@ public void testCommitOffsetOnly() { AtomicBoolean success = new AtomicBoolean(false); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), callback(success)); - consumerClient.poll(0); assertTrue(success.get()); assertEquals(100L, subscriptions.committed(tp).offset()); @@ -644,7 +643,6 @@ public void testCommitOffsetMetadata() { AtomicBoolean success = new AtomicBoolean(false); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L, "hello")), callback(success)); - consumerClient.poll(0); assertTrue(success.get()); assertEquals(100L, subscriptions.committed(tp).offset()); @@ -658,7 +656,6 @@ public void testCommitOffsetAsyncWithDefaultCallback() { coordinator.ensureCoordinatorKnown(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code()))); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), null); - consumerClient.poll(0); assertEquals(invokedBeforeTest + 1, defaultOffsetCommitCallback.invoked); assertNull(defaultOffsetCommitCallback.exception); } @@ -693,7 +690,6 @@ public boolean matches(ClientRequest request) { AtomicBoolean success = new AtomicBoolean(false); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), callback(success)); - consumerClient.poll(0); assertTrue(success.get()); } @@ -704,7 +700,6 @@ public void testCommitOffsetAsyncFailedWithDefaultCallback() { coordinator.ensureCoordinatorKnown(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.GROUP_COORDINATOR_NOT_AVAILABLE.code()))); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), null); - consumerClient.poll(0); assertEquals(invokedBeforeTest + 1, defaultOffsetCommitCallback.invoked); assertEquals(Errors.GROUP_COORDINATOR_NOT_AVAILABLE.exception(), defaultOffsetCommitCallback.exception); } @@ -718,7 +713,6 @@ public void testCommitOffsetAsyncCoordinatorNotAvailable() { MockCommitCallback cb = new MockCommitCallback(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.GROUP_COORDINATOR_NOT_AVAILABLE.code()))); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), cb); - consumerClient.poll(0); assertTrue(coordinator.coordinatorUnknown()); assertEquals(1, cb.invoked); @@ -734,7 +728,6 @@ public void testCommitOffsetAsyncNotCoordinator() { MockCommitCallback cb = new MockCommitCallback(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NOT_COORDINATOR_FOR_GROUP.code()))); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), cb); - consumerClient.poll(0); assertTrue(coordinator.coordinatorUnknown()); assertEquals(1, cb.invoked); @@ -750,7 +743,6 @@ public void testCommitOffsetAsyncDisconnected() { MockCommitCallback cb = new MockCommitCallback(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code())), true); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), cb); - consumerClient.poll(0); assertTrue(coordinator.coordinatorUnknown()); assertEquals(1, cb.invoked); diff --git a/core/src/test/scala/integration/kafka/api/BaseConsumerTest.scala b/core/src/test/scala/integration/kafka/api/BaseConsumerTest.scala index 9939309dc41b..1408cd96a165 100644 --- a/core/src/test/scala/integration/kafka/api/BaseConsumerTest.scala +++ b/core/src/test/scala/integration/kafka/api/BaseConsumerTest.scala @@ -81,7 +81,7 @@ abstract class BaseConsumerTest extends IntegrationTestHarness with Logging { // shouldn't make progress until poll is invoked Thread.sleep(10) - assertEquals(0, commitCallback.count) + assertEquals(0, commitCallback.successCount) awaitCommitCallback(this.consumers(0), commitCallback) } @@ -330,18 +330,26 @@ abstract class BaseConsumerTest extends IntegrationTestHarness with Logging { records } - protected def awaitCommitCallback[K, V](consumer: Consumer[K, V], commitCallback: CountConsumerCommitCallback): Unit = { - val startCount = commitCallback.count + protected def awaitCommitCallback[K, V](consumer: Consumer[K, V], + commitCallback: CountConsumerCommitCallback, + count: Int = 1): Unit = { + val startCount = commitCallback.successCount val started = System.currentTimeMillis() - while (commitCallback.count == startCount && System.currentTimeMillis() - started < 10000) + while (commitCallback.successCount < startCount + count && System.currentTimeMillis() - started < 10000) consumer.poll(50) - assertEquals(startCount + 1, commitCallback.count) + assertEquals(startCount + count, commitCallback.successCount) } protected class CountConsumerCommitCallback extends OffsetCommitCallback { - var count = 0 - - override def onComplete(offsets: util.Map[TopicPartition, OffsetAndMetadata], exception: Exception): Unit = count += 1 + var successCount = 0 + var failCount = 0 + + override def onComplete(offsets: util.Map[TopicPartition, OffsetAndMetadata], exception: Exception): Unit = { + if (exception == null) + successCount += 1 + else + failCount += 1 + } } protected class ConsumerAssignmentPoller(consumer: Consumer[Array[Byte], Array[Byte]], diff --git a/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala b/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala index 9c560107e411..ff2e63d366aa 100644 --- a/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala +++ b/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala @@ -232,6 +232,21 @@ class PlaintextConsumerTest extends BaseConsumerTest { assertEquals(nullMetadata, this.consumers(0).committed(tp)) } + @Test + def testAsyncCommit() { + val consumer = this.consumers(0) + consumer.assign(List(tp).asJava) + consumer.poll(0) + + val callback = new CountConsumerCommitCallback + val count = 5 + for (i <- 1 to count) + consumer.commitAsync(Map(tp -> new OffsetAndMetadata(i)).asJava, callback) + + awaitCommitCallback(consumer, callback, count=count) + assertEquals(new OffsetAndMetadata(count), consumer.committed(tp)) + } + @Test def testExpandingTopicSubscriptions() { val otherTopic = "other" From c0f870f8509253564c73ca76cd9fabb7df4ce6ad Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Tue, 22 Mar 2016 11:53:12 -0700 Subject: [PATCH 006/267] KAFKA-3435: Remove `Unstable` annotation from new Java Consumer Author: Grant Henke Reviewers: Gwen Shapira Closes #1113 from granthenke/remove-unstable (cherry picked from commit 99cfb99aa161007f2ceae33945d11b968d932175) Signed-off-by: Gwen Shapira --- .../org/apache/kafka/clients/consumer/Consumer.java | 5 ++--- .../apache/kafka/clients/consumer/KafkaConsumer.java | 11 +++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java index 0862c326c1ad..ef80606232b1 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java @@ -3,9 +3,9 @@ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the * License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. @@ -29,7 +29,6 @@ * @see KafkaConsumer * @see MockConsumer */ -@InterfaceStability.Unstable public interface Consumer extends Closeable { /** diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java index c36b7f1aa5de..103125298b8a 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java @@ -165,11 +165,11 @@ * In this example the client is subscribing to the topics foo and bar as part of a group of consumers * called test as described above. *

    - * The broker will automatically detect failed processes in the test group by using a heartbeat mechanism. The - * consumer will automatically ping the cluster periodically, which lets the cluster know that it is alive. Note that - * the consumer is single-threaded, so periodic heartbeats can only be sent when {@link #poll(long)} is called. As long as - * the consumer is able to do this it is considered alive and retains the right to consume from the partitions assigned - * to it. If it stops heartbeating by failing to call {@link #poll(long)} for a period of time longer than session.timeout.ms + * The broker will automatically detect failed processes in the test group by using a heartbeat mechanism. The + * consumer will automatically ping the cluster periodically, which lets the cluster know that it is alive. Note that + * the consumer is single-threaded, so periodic heartbeats can only be sent when {@link #poll(long)} is called. As long as + * the consumer is able to do this it is considered alive and retains the right to consume from the partitions assigned + * to it. If it stops heartbeating by failing to call {@link #poll(long)} for a period of time longer than session.timeout.ms * then it will be considered dead and its partitions will be assigned to another process. *

    * The deserializer settings specify how to turn bytes into objects. For example, by specifying string deserializers, we @@ -452,7 +452,6 @@ * commit. * */ -@InterfaceStability.Unstable public class KafkaConsumer implements Consumer { private static final Logger log = LoggerFactory.getLogger(KafkaConsumer.class); From 66ac5157d914aacf9b544fe7de91020f445cef3c Mon Sep 17 00:00:00 2001 From: gaob13 Date: Tue, 22 Mar 2016 11:58:01 -0700 Subject: [PATCH 007/267] MINOR: Remove the very misleading comment lines It is not true in practice. Maybe the implied feature is not yet implemented or removed. These lines can be super misleading. Please merge. Thank you. Author: gaob13 Reviewers: Ismael Juma, Ewen Cheslack-Postava Closes #793 from gaob13/trunk (cherry picked from commit 73470b028cd659c1c405e6b6ba52bf483113b90b) Signed-off-by: Gwen Shapira --- config/server.properties | 30 +++++++++---------- .../main/scala/kafka/server/KafkaConfig.scala | 20 +++++++++---- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/config/server.properties b/config/server.properties index ddb695aff7a1..aebcb87ca9e4 100644 --- a/config/server.properties +++ b/config/server.properties @@ -21,22 +21,18 @@ broker.id=0 ############################# Socket Server Settings ############################# -listeners=PLAINTEXT://:9092 - -# The port the socket server listens on -#port=9092 - -# Hostname the broker will bind to. If not set, the server will bind to all interfaces -#host.name=localhost - -# Hostname the broker will advertise to producers and consumers. If not set, it uses the -# value for "host.name" if configured. Otherwise, it will use the value returned from -# java.net.InetAddress.getCanonicalHostName(). -#advertised.host.name= - -# The port to publish to ZooKeeper for clients to use. If this is not set, -# it will publish the same port that the broker binds to. -#advertised.port= +# The address the socket server listens on. It will get the value returned from +# java.net.InetAddress.getCanonicalHostName() if not configured. +# FORMAT: +# listeners = security_protocol://host_name:port +# EXAMPLE: +# listeners = PLAINTEXT://your.host.name:9092 +#listeners=PLAINTEXT://:9092 + +# Hostname and port the broker will advertise to producers and consumers. If not set, +# it uses the value for "listeners" if configured. Otherwise, it will use the value +# returned from java.net.InetAddress.getCanonicalHostName(). +#advertised.listeners=PLAINTEXT://your.host.name:9092 # The number of threads handling network requests num.network.threads=3 @@ -117,3 +113,5 @@ zookeeper.connect=localhost:2181 # Timeout in ms for connecting to zookeeper zookeeper.connection.timeout.ms=6000 + + diff --git a/core/src/main/scala/kafka/server/KafkaConfig.scala b/core/src/main/scala/kafka/server/KafkaConfig.scala index 7e1013e22a5c..a6018ad45b8d 100755 --- a/core/src/main/scala/kafka/server/KafkaConfig.scala +++ b/core/src/main/scala/kafka/server/KafkaConfig.scala @@ -366,24 +366,32 @@ object KafkaConfig { /************* Authorizer Configuration ***********/ val AuthorizerClassNameDoc = "The authorizer class that should be used for authorization" /** ********* Socket Server Configuration ***********/ - val PortDoc = "the port to listen and accept connections on" - val HostNameDoc = "hostname of broker. If this is set, it will only bind to this address. If this is not set, it will bind to all interfaces" + val PortDoc = "DEPRECATED: only used when `listeners` is not set. " + + "Use `listeners` instead. \n" + + "the port to listen and accept connections on" + val HostNameDoc = "DEPRECATED: only used when `listeners` is not set. " + + "Use `listeners` instead. \n" + + "hostname of broker. If this is set, it will only bind to this address. If this is not set, it will bind to all interfaces" val ListenersDoc = "Listener List - Comma-separated list of URIs we will listen on and their protocols.\n" + " Specify hostname as 0.0.0.0 to bind to all interfaces.\n" + " Leave hostname empty to bind to default interface.\n" + " Examples of legal listener lists:\n" + " PLAINTEXT://myhost:9092,TRACE://:9091\n" + " PLAINTEXT://0.0.0.0:9092, TRACE://localhost:9093\n" - val AdvertisedHostNameDoc = "Hostname to publish to ZooKeeper for clients to use. In IaaS environments, this may " + + val AdvertisedHostNameDoc = "DEPRECATED: only used when `advertised.listeners` or `listeners` are not set. " + + "Use `advertised.listeners` instead. \n" + + "Hostname to publish to ZooKeeper for clients to use. In IaaS environments, this may " + "need to be different from the interface to which the broker binds. If this is not set, " + - "it will use the value for \"host.name\" if configured. Otherwise " + + "it will use the value for `host.name` if configured. Otherwise " + "it will use the value returned from java.net.InetAddress.getCanonicalHostName()." - val AdvertisedPortDoc = "The port to publish to ZooKeeper for clients to use. In IaaS environments, this may " + + val AdvertisedPortDoc = "DEPRECATED: only used when `advertised.listeners` or `listeners` are not set. " + + "Use `advertised.listeners` instead. \n" + + "The port to publish to ZooKeeper for clients to use. In IaaS environments, this may " + "need to be different from the port to which the broker binds. If this is not set, " + "it will publish the same port that the broker binds to." val AdvertisedListenersDoc = "Listeners to publish to ZooKeeper for clients to use, if different than the listeners above." + " In IaaS environments, this may need to be different from the interface to which the broker binds." + - " If this is not set, the value for \"listeners\" will be used." + " If this is not set, the value for `listeners` will be used." val SocketSendBufferBytesDoc = "The SO_SNDBUF buffer of the socket sever sockets" val SocketReceiveBufferBytesDoc = "The SO_RCVBUF buffer of the socket sever sockets" val SocketRequestMaxBytesDoc = "The maximum number of bytes in a socket request" From e2d7f9e445a2cbb5e1e0ed8437af849b731677bf Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 22 Mar 2016 12:39:04 -0700 Subject: [PATCH 008/267] KAFKA-3426; Improve protocol type errors when invalid sizes are received Author: Ismael Juma Reviewers: Grant Henke, Gwen Shapira Closes #1100 from ijuma/kafka-3426-invalid-protocol-type-errors-invalid-sizes (cherry picked from commit 73c79000edddd929cd0af25f4a29fcc682a1c9c0) Signed-off-by: Gwen Shapira --- .../kafka/common/protocol/types/ArrayOf.java | 2 + .../kafka/common/protocol/types/Type.java | 22 ++++- .../types/ProtocolSerializationTest.java | 93 ++++++++++++++++++- 3 files changed, 112 insertions(+), 5 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/types/ArrayOf.java b/clients/src/main/java/org/apache/kafka/common/protocol/types/ArrayOf.java index 4a36cb71d2c7..a08f876abdb3 100644 --- a/clients/src/main/java/org/apache/kafka/common/protocol/types/ArrayOf.java +++ b/clients/src/main/java/org/apache/kafka/common/protocol/types/ArrayOf.java @@ -41,6 +41,8 @@ public void write(ByteBuffer buffer, Object o) { @Override public Object read(ByteBuffer buffer) { int size = buffer.getInt(); + if (size < 0) + throw new SchemaException("Array size " + size + " cannot be negative"); if (size > buffer.remaining()) throw new SchemaException("Error reading array of size " + size + ", only " + buffer.remaining() + " bytes available"); Object[] objs = new Object[size]; diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java b/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java index c4bcb1e49201..92c1f7c67b17 100644 --- a/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java +++ b/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java @@ -184,14 +184,19 @@ public Long validate(Object item) { public void write(ByteBuffer buffer, Object o) { byte[] bytes = Utils.utf8((String) o); if (bytes.length > Short.MAX_VALUE) - throw new SchemaException("String is longer than the maximum string length."); + throw new SchemaException("String length " + bytes.length + " is larger than the maximum string length."); buffer.putShort((short) bytes.length); buffer.put(bytes); } @Override public Object read(ByteBuffer buffer) { - int length = buffer.getShort(); + short length = buffer.getShort(); + if (length < 0) + throw new SchemaException("String length " + length + " cannot be negative"); + if (length > buffer.remaining()) + throw new SchemaException("Error reading string of length " + length + ", only " + buffer.remaining() + " bytes available"); + byte[] bytes = new byte[length]; buffer.get(bytes); return Utils.utf8(bytes); @@ -231,16 +236,18 @@ public void write(ByteBuffer buffer, Object o) { byte[] bytes = Utils.utf8((String) o); if (bytes.length > Short.MAX_VALUE) - throw new SchemaException("String is longer than the maximum string length."); + throw new SchemaException("String length " + bytes.length + " is larger than the maximum string length."); buffer.putShort((short) bytes.length); buffer.put(bytes); } @Override public Object read(ByteBuffer buffer) { - int length = buffer.getShort(); + short length = buffer.getShort(); if (length < 0) return null; + if (length > buffer.remaining()) + throw new SchemaException("Error reading string of length " + length + ", only " + buffer.remaining() + " bytes available"); byte[] bytes = new byte[length]; buffer.get(bytes); @@ -285,6 +292,11 @@ public void write(ByteBuffer buffer, Object o) { @Override public Object read(ByteBuffer buffer) { int size = buffer.getInt(); + if (size < 0) + throw new SchemaException("Bytes size " + size + " cannot be negative"); + if (size > buffer.remaining()) + throw new SchemaException("Error reading bytes of size " + size + ", only " + buffer.remaining() + " bytes available"); + ByteBuffer val = buffer.slice(); val.limit(size); buffer.position(buffer.position() + size); @@ -336,6 +348,8 @@ public Object read(ByteBuffer buffer) { int size = buffer.getInt(); if (size < 0) return null; + if (size > buffer.remaining()) + throw new SchemaException("Error reading bytes of size " + size + ", only " + buffer.remaining() + " bytes available"); ByteBuffer val = buffer.slice(); val.limit(size); diff --git a/clients/src/test/java/org/apache/kafka/common/protocol/types/ProtocolSerializationTest.java b/clients/src/test/java/org/apache/kafka/common/protocol/types/ProtocolSerializationTest.java index e20aa1093258..5c342776b48f 100644 --- a/clients/src/test/java/org/apache/kafka/common/protocol/types/ProtocolSerializationTest.java +++ b/clients/src/test/java/org/apache/kafka/common/protocol/types/ProtocolSerializationTest.java @@ -117,7 +117,7 @@ private void checkNullableDefault(Type type, Object defaultValue) { } @Test - public void testArray() { + public void testReadArraySizeTooLarge() { Type type = new ArrayOf(Type.INT8); int size = 10; ByteBuffer invalidBuffer = ByteBuffer.allocate(4 + size); @@ -133,6 +133,97 @@ public void testArray() { } } + @Test + public void testReadNegativeArraySize() { + Type type = new ArrayOf(Type.INT8); + int size = 10; + ByteBuffer invalidBuffer = ByteBuffer.allocate(4 + size); + invalidBuffer.putInt(-1); + for (int i = 0; i < size; i++) + invalidBuffer.put((byte) i); + invalidBuffer.rewind(); + try { + type.read(invalidBuffer); + fail("Array size not validated"); + } catch (SchemaException e) { + // Expected exception + } + } + + @Test + public void testReadStringSizeTooLarge() { + byte[] stringBytes = "foo".getBytes(); + ByteBuffer invalidBuffer = ByteBuffer.allocate(2 + stringBytes.length); + invalidBuffer.putShort((short) (stringBytes.length * 5)); + invalidBuffer.put(stringBytes); + invalidBuffer.rewind(); + try { + Type.STRING.read(invalidBuffer); + fail("String size not validated"); + } catch (SchemaException e) { + // Expected exception + } + invalidBuffer.rewind(); + try { + Type.NULLABLE_STRING.read(invalidBuffer); + fail("String size not validated"); + } catch (SchemaException e) { + // Expected exception + } + } + + @Test + public void testReadNegativeStringSize() { + byte[] stringBytes = "foo".getBytes(); + ByteBuffer invalidBuffer = ByteBuffer.allocate(2 + stringBytes.length); + invalidBuffer.putShort((short) -1); + invalidBuffer.put(stringBytes); + invalidBuffer.rewind(); + try { + Type.STRING.read(invalidBuffer); + fail("String size not validated"); + } catch (SchemaException e) { + // Expected exception + } + } + + @Test + public void testReadBytesSizeTooLarge() { + byte[] stringBytes = "foo".getBytes(); + ByteBuffer invalidBuffer = ByteBuffer.allocate(4 + stringBytes.length); + invalidBuffer.putInt(stringBytes.length * 5); + invalidBuffer.put(stringBytes); + invalidBuffer.rewind(); + try { + Type.BYTES.read(invalidBuffer); + fail("Bytes size not validated"); + } catch (SchemaException e) { + // Expected exception + } + invalidBuffer.rewind(); + try { + Type.NULLABLE_BYTES.read(invalidBuffer); + fail("Bytes size not validated"); + } catch (SchemaException e) { + // Expected exception + } + } + + @Test + public void testReadNegativeBytesSize() { + byte[] stringBytes = "foo".getBytes(); + ByteBuffer invalidBuffer = ByteBuffer.allocate(4 + stringBytes.length); + invalidBuffer.putInt(-20); + invalidBuffer.put(stringBytes); + invalidBuffer.rewind(); + try { + Type.BYTES.read(invalidBuffer); + fail("Bytes size not validated"); + } catch (SchemaException e) { + // Expected exception + } + } + private Object roundtrip(Type type, Object obj) { ByteBuffer buffer = ByteBuffer.allocate(type.sizeOf(obj)); type.write(buffer, obj); From 687d2494ff27b05f21ea29e645a0b1b000334db6 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Tue, 22 Mar 2016 13:09:13 -0700 Subject: [PATCH 009/267] KAFKA-3319: improve session timeout broker/client config documentation Author: Jason Gustafson Reviewers: Grant Henke, Ismael Juma, Guozhang Wang Closes #1106 from hachikuji/KAFKA-3319 (cherry picked from commit ca77d67058726fc9df9bdd7cc0217ee62ccc5106) Signed-off-by: Guozhang Wang --- .../kafka/clients/consumer/ConsumerConfig.java | 16 ++++++++++++---- .../org/apache/kafka/common/protocol/Errors.java | 3 ++- .../main/scala/kafka/server/KafkaConfig.scala | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java index 9101307cc0c5..c97c8fb47024 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java @@ -43,11 +43,22 @@ public class ConsumerConfig extends AbstractConfig { public static final String GROUP_ID_CONFIG = "group.id"; private static final String GROUP_ID_DOC = "A unique string that identifies the consumer group this consumer belongs to. This property is required if the consumer uses either the group management functionality by using subscribe(topic) or the Kafka-based offset management strategy."; + /** max.poll.records */ + public static final String MAX_POLL_RECORDS_CONFIG = "max.poll.records"; + private static final String MAX_POLL_RECORDS_DOC = "The maximum number of records returned in a single call to poll()."; + /** * session.timeout.ms */ public static final String SESSION_TIMEOUT_MS_CONFIG = "session.timeout.ms"; - private static final String SESSION_TIMEOUT_MS_DOC = "The timeout used to detect failures when using Kafka's group management facilities."; + private static final String SESSION_TIMEOUT_MS_DOC = "The timeout used to detect failures when using Kafka's " + + "group management facilities. When a consumer's heartbeat is not received within the session timeout, " + + "the broker will mark the consumer as failed and rebalance the group. Since heartbeats are sent only " + + "when poll() is invoked, a higher session timeout allows more time for message processing in the consumer's " + + "poll loop at the cost of a longer time to detect hard failures. See also " + MAX_POLL_RECORDS_CONFIG + " for " + + "another option to control the processing time in the poll loop. Note that the value must be in the " + + "allowable range as configured in the broker configuration by group.min.session.timeout.ms " + + "and group.max.session.timeout.ms."; /** * heartbeat.interval.ms @@ -168,9 +179,6 @@ public class ConsumerConfig extends AbstractConfig { + "Implementing the ConsumerInterceptor interface allows you to intercept (and possibly mutate) records " + "received by the consumer. By default, there are no interceptors."; - /** max.poll.records */ - public static final String MAX_POLL_RECORDS_CONFIG = "max.poll.records"; - private static final String MAX_POLL_RECORDS_DOC = "The maximum number of records returned in a single call to poll()."; /** exclude.internal.topics */ public static final String EXCLUDE_INTERNAL_TOPICS_CONFIG = "exclude.internal.topics"; diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java b/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java index 90be0144109a..0f33516b934c 100644 --- a/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java +++ b/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java @@ -117,7 +117,8 @@ public enum Errors { UNKNOWN_MEMBER_ID(25, new UnknownMemberIdException("The coordinator is not aware of this member.")), INVALID_SESSION_TIMEOUT(26, - new InvalidSessionTimeoutException("The session timeout is not within an acceptable range.")), + new InvalidSessionTimeoutException("The session timeout is not within the range allowed by the broker " + + "(as configured by group.min.session.timeout.ms and group.max.session.timeout.ms).")), REBALANCE_IN_PROGRESS(27, new RebalanceInProgressException("The group is rebalancing, so a rejoin is needed.")), INVALID_COMMIT_OFFSET_SIZE(28, diff --git a/core/src/main/scala/kafka/server/KafkaConfig.scala b/core/src/main/scala/kafka/server/KafkaConfig.scala index a6018ad45b8d..dc2a0a0814bf 100755 --- a/core/src/main/scala/kafka/server/KafkaConfig.scala +++ b/core/src/main/scala/kafka/server/KafkaConfig.scala @@ -480,8 +480,8 @@ object KafkaConfig { val ControlledShutdownRetryBackoffMsDoc = "Before each retry, the system needs time to recover from the state that caused the previous failure (Controller fail over, replica lag etc). This config determines the amount of time to wait before retrying." val ControlledShutdownEnableDoc = "Enable controlled shutdown of the server" /** ********* Consumer coordinator configuration ***********/ - val ConsumerMinSessionTimeoutMsDoc = "The minimum allowed session timeout for registered consumers" - val ConsumerMaxSessionTimeoutMsDoc = "The maximum allowed session timeout for registered consumers" + val ConsumerMinSessionTimeoutMsDoc = "The minimum allowed session timeout for registered consumers. Shorter timeouts leader to quicker failure detection at the cost of more frequent consumer heartbeating, which can overwhelm broker resources." + val ConsumerMaxSessionTimeoutMsDoc = "The maximum allowed session timeout for registered consumers. Longer timeouts give consumers more time to process messages in between heartbeats at the cost of a longer time to detect failures." /** ********* Offset management configuration ***********/ val OffsetMetadataMaxSizeDoc = "The maximum size for a metadata entry associated with an offset commit" val OffsetsLoadBufferSizeDoc = "Batch size for reading from the offsets segments when loading offsets into the cache." From 05984de69ee8eb06f214a2e42abd789a4fbc8cef Mon Sep 17 00:00:00 2001 From: Vahid Hashemian Date: Tue, 22 Mar 2016 13:10:07 -0700 Subject: [PATCH 010/267] KAFKA-3219: Fix long topic name validation This fixes an issue with long topic names by considering, during topic validation, the '-' and the partition id that is appended to the log folder created for each topic partition. Author: Vahid Hashemian Reviewers: Gwen Shapira, Grant Henke Closes #898 from vahidhashemian/KAFKA-3219 (cherry picked from commit ad3dfc6ab25c3f80d2425e24e72ae732b850dc60) Signed-off-by: Gwen Shapira --- core/src/main/scala/kafka/common/Topic.scala | 2 +- core/src/test/scala/unit/kafka/common/TopicTest.scala | 3 ++- docs/ops.html | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/kafka/common/Topic.scala b/core/src/main/scala/kafka/common/Topic.scala index 930d0e441568..6067712762af 100644 --- a/core/src/main/scala/kafka/common/Topic.scala +++ b/core/src/main/scala/kafka/common/Topic.scala @@ -22,7 +22,7 @@ import kafka.coordinator.GroupCoordinator object Topic { val legalChars = "[a-zA-Z0-9\\._\\-]" - private val maxNameLength = 255 + private val maxNameLength = 249 private val rgx = new Regex(legalChars + "+") def validate(topic: String) { diff --git a/core/src/test/scala/unit/kafka/common/TopicTest.scala b/core/src/test/scala/unit/kafka/common/TopicTest.scala index da6083a2dd9f..66549afe4b3d 100644 --- a/core/src/test/scala/unit/kafka/common/TopicTest.scala +++ b/core/src/test/scala/unit/kafka/common/TopicTest.scala @@ -31,6 +31,7 @@ class TopicTest { for (i <- 1 to 6) longName += longName invalidTopicNames += longName + invalidTopicNames += longName.drop(6) val badChars = Array('/', '\\', ',', '\u0000', ':', "\"", '\'', ';', '*', '?', ' ', '\t', '\r', '\n', '=') for (weirdChar <- badChars) { invalidTopicNames += "Is" + weirdChar + "illegal" @@ -47,7 +48,7 @@ class TopicTest { } val validTopicNames = new ArrayBuffer[String]() - validTopicNames += ("valid", "TOPIC", "nAmEs", "ar6", "VaL1d", "_0-9_.") + validTopicNames += ("valid", "TOPIC", "nAmEs", "ar6", "VaL1d", "_0-9_.", longName.drop(7)) for (i <- 0 until validTopicNames.size) { try { Topic.validate(validTopicNames(i)) diff --git a/docs/ops.html b/docs/ops.html index 4cfe17b1899a..541a01ddcaf8 100644 --- a/docs/ops.html +++ b/docs/ops.html @@ -34,6 +34,8 @@

    Adding and removing

    The partition count controls how many logs the topic will be sharded into. There are several impacts of the partition count. First each partition must fit entirely on a single server. So if you have 20 partitions the full data set (and read and write load) will be handled by no more than 20 servers (no counting replicas). Finally the partition count impacts the maximum parallelism of your consumers. This is discussed in greater detail in the concepts section.

    +Each sharded partition log is placed into its own folder under the Kafka log directory. The name of such folders consists of the topic name, appended by a dash (-) and the partition id. Since a typical folder name can not be over 255 characters long, there will be a limitation on the length of topic names. We assume the number of partitions will not ever be above 100,000. Therefore, topic names cannot be longer than 249 characters. This leaves just enough room in the folder name for a dash and a potentially 5 digit long partition id. +

    The configurations added on the command line override the default settings the server has for things like the length of time data should be retained. The complete set of per-topic configurations is documented here.

    Modifying topics

    From a81e5d05e6fea7fb74dfb19b67729b6dd4ca2423 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Tue, 22 Mar 2016 14:09:10 -0700 Subject: [PATCH 011/267] KAFKA-3435: Follow up to fix checkstyle Author: Ewen Cheslack-Postava Reviewers: Gwen Shapira Closes #1116 from ewencp/kafka-3435-follow-up (cherry picked from commit d6b450ced57bb5ab8a7164358e6a360f97a7b5cc) Signed-off-by: Gwen Shapira --- .../main/java/org/apache/kafka/clients/consumer/Consumer.java | 1 - .../java/org/apache/kafka/clients/consumer/KafkaConsumer.java | 1 - 2 files changed, 2 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java index ef80606232b1..6f5a6b64c9bd 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/Consumer.java @@ -16,7 +16,6 @@ import org.apache.kafka.common.MetricName; import org.apache.kafka.common.PartitionInfo; import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.annotation.InterfaceStability; import java.io.Closeable; import java.util.Collection; diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java index 103125298b8a..b15d07f80969 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java @@ -28,7 +28,6 @@ import org.apache.kafka.common.MetricName; import org.apache.kafka.common.PartitionInfo; import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.annotation.InterfaceStability; import org.apache.kafka.common.config.ConfigException; import org.apache.kafka.common.metrics.JmxReporter; import org.apache.kafka.common.metrics.MetricConfig; From d9cbe566371ed1f519fa983c14fc48d088f66b6a Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Tue, 22 Mar 2016 15:01:05 -0700 Subject: [PATCH 012/267] =?UTF-8?q?KAFKA-3301;=20CommonClientConfigs.METRI?= =?UTF-8?q?CS=5FSAMPLE=5FWINDOW=5FMS=5FDOC=20is=20incor=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …rect Author: Grant Henke Reviewers: Jun Rao Closes #1114 from granthenke/window-docs (cherry picked from commit 983e9486ef139f8c9797679732d09feb3f7bfb87) Signed-off-by: Jun Rao --- .../main/java/org/apache/kafka/clients/CommonClientConfigs.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/CommonClientConfigs.java b/clients/src/main/java/org/apache/kafka/clients/CommonClientConfigs.java index 2c5e67c5d78b..298e1d8c541b 100644 --- a/clients/src/main/java/org/apache/kafka/clients/CommonClientConfigs.java +++ b/clients/src/main/java/org/apache/kafka/clients/CommonClientConfigs.java @@ -53,7 +53,7 @@ public class CommonClientConfigs { public static final String RETRY_BACKOFF_MS_DOC = "The amount of time to wait before attempting to retry a failed fetch request to a given topic partition. This avoids repeated fetching-and-failing in a tight loop."; public static final String METRICS_SAMPLE_WINDOW_MS_CONFIG = "metrics.sample.window.ms"; - public static final String METRICS_SAMPLE_WINDOW_MS_DOC = "The number of samples maintained to compute metrics."; + public static final String METRICS_SAMPLE_WINDOW_MS_DOC = "The window of time a metrics sample is computed over."; public static final String METRICS_NUM_SAMPLES_CONFIG = "metrics.num.samples"; public static final String METRICS_NUM_SAMPLES_DOC = "The number of samples maintained to compute metrics."; From d1e74890caaf3b76b0d9a888742a2cf1c6e6e883 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 22 Mar 2016 18:20:07 -0700 Subject: [PATCH 013/267] KAFKA-3447; partitionState in UpdateMetadataRequest not logged properly state-change log Author: Ismael Juma Reviewers: Jun Rao Closes #1117 from ijuma/kafka-3447-metadata-cache-logging (cherry picked from commit adca4d7df3155ac29ae510c237adf06c9521d221) Signed-off-by: Jun Rao --- .../scala/kafka/server/MetadataCache.scala | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/kafka/server/MetadataCache.scala b/core/src/main/scala/kafka/server/MetadataCache.scala index 4b68f707320e..06fae42116e5 100755 --- a/core/src/main/scala/kafka/server/MetadataCache.scala +++ b/core/src/main/scala/kafka/server/MetadataCache.scala @@ -44,7 +44,7 @@ private[server] class MetadataCache(brokerId: Int) extends Logging { private val aliveNodes = mutable.Map[Int, collection.Map[SecurityProtocol, Node]]() private val partitionMetadataLock = new ReentrantReadWriteLock() - this.logIdent = "[Kafka Metadata Cache on broker %d] ".format(brokerId) + this.logIdent = s"[Kafka Metadata Cache on broker $brokerId] " private def getAliveEndpoints(brokers: Iterable[Int], protocol: SecurityProtocol): Seq[Node] = { val result = new mutable.ArrayBuffer[Node](math.min(aliveBrokers.size, brokers.size)) @@ -73,7 +73,7 @@ private[server] class MetadataCache(brokerId: Int) extends Logging { maybeLeader match { case None => - debug("Error while fetching metadata for %s: leader not available".format(topicPartition)) + debug(s"Error while fetching metadata for $topicPartition: leader not available") new MetadataResponse.PartitionMetadata(Errors.LEADER_NOT_AVAILABLE, partitionId, Node.noNode(), replicaInfo.asJava, java.util.Collections.emptyList()) @@ -82,14 +82,14 @@ private[server] class MetadataCache(brokerId: Int) extends Logging { val isrInfo = getAliveEndpoints(isr, protocol) if (replicaInfo.size < replicas.size) { - debug("Error while fetching metadata for %s: replica information not available for following brokers %s" - .format(topicPartition, replicas.filterNot(replicaInfo.map(_.id).contains).mkString(","))) + debug(s"Error while fetching metadata for $topicPartition: replica information not available for " + + s"following brokers ${replicas.filterNot(replicaInfo.map(_.id).contains).mkString(",")}") new MetadataResponse.PartitionMetadata(Errors.REPLICA_NOT_AVAILABLE, partitionId, leader, replicaInfo.asJava, isrInfo.asJava) } else if (isrInfo.size < isr.size) { - debug("Error while fetching metadata for %s: in sync replica information not available for following brokers %s" - .format(topicPartition, isr.filterNot(isrInfo.map(_.id).contains).mkString(","))) + debug(s"Error while fetching metadata for $topicPartition: in sync replica information not available for " + + s"following brokers ${isr.filterNot(isrInfo.map(_.id).contains).mkString(",")}") new MetadataResponse.PartitionMetadata(Errors.REPLICA_NOT_AVAILABLE, partitionId, leader, replicaInfo.asJava, isrInfo.asJava) } else { @@ -167,19 +167,17 @@ private[server] class MetadataCache(brokerId: Int) extends Logging { } updateMetadataRequest.partitionStates.asScala.foreach { case (tp, info) => + val controllerId = updateMetadataRequest.controllerId + val controllerEpoch = updateMetadataRequest.controllerEpoch if (info.leader == LeaderAndIsr.LeaderDuringDelete) { removePartitionInfo(tp.topic, tp.partition) - stateChangeLogger.trace(("Broker %d deleted partition %s from metadata cache in response to UpdateMetadata request " + - "sent by controller %d epoch %d with correlation id %d") - .format(brokerId, tp, updateMetadataRequest.controllerId, - updateMetadataRequest.controllerEpoch, correlationId)) + stateChangeLogger.trace(s"Broker $brokerId deleted partition $tp from metadata cache in response to UpdateMetadata " + + s"request sent by controller $controllerId epoch $controllerEpoch with correlation id $correlationId") } else { val partitionInfo = partitionStateToPartitionStateInfo(info) addOrUpdatePartitionInfo(tp.topic, tp.partition, partitionInfo) - stateChangeLogger.trace(("Broker %d cached leader info %s for partition %s in response to UpdateMetadata request " + - "sent by controller %d epoch %d with correlation id %d") - .format(brokerId, info, tp, updateMetadataRequest.controllerId, - updateMetadataRequest.controllerEpoch, correlationId)) + stateChangeLogger.trace(s"Broker $brokerId cached leader info $partitionInfo for partition $tp in response to " + + s"UpdateMetadata request sent by controller $controllerId epoch $controllerEpoch with correlation id $correlationId") } } } From aead28a54e55ca0541d696abced523b453bb50f7 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 22 Mar 2016 19:13:26 -0700 Subject: [PATCH 014/267] KAFKA-3431: Remove `o.a.k.common.BrokerEndPoint` in favour of `Node` Also included a minor efficiency improvement in kafka.cluster.EndPoint. Author: Ismael Juma Reviewers: Gwen Shapira Closes #1105 from ijuma/kafka-3431-replace-broker-end-point-with-node (cherry picked from commit 255b5e13863a95cfc327236856db2df188f04d49) Signed-off-by: Gwen Shapira --- .../apache/kafka/common/BrokerEndPoint.java | 88 ------------------- .../common/requests/LeaderAndIsrRequest.java | 14 +-- .../requests/UpdateMetadataRequest.java | 8 +- .../common/requests/RequestResponseTest.java | 13 ++- .../main/scala/kafka/cluster/EndPoint.scala | 3 +- .../controller/ControllerChannelManager.scala | 12 +-- .../kafka/api/AuthorizerIntegrationTest.scala | 8 +- .../kafka/server/LeaderElectionTest.scala | 17 ++-- .../kafka/server/ReplicaManagerTest.scala | 6 +- 9 files changed, 38 insertions(+), 131 deletions(-) delete mode 100644 clients/src/main/java/org/apache/kafka/common/BrokerEndPoint.java diff --git a/clients/src/main/java/org/apache/kafka/common/BrokerEndPoint.java b/clients/src/main/java/org/apache/kafka/common/BrokerEndPoint.java deleted file mode 100644 index d5275c419a66..000000000000 --- a/clients/src/main/java/org/apache/kafka/common/BrokerEndPoint.java +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.common; - -import java.io.Serializable; - -/** - * Broker id, host and port - */ -public final class BrokerEndPoint implements Serializable { - - private int hash = 0; - private final int id; - private final String host; - private final int port; - - public BrokerEndPoint(int id, String host, int port) { - this.id = id; - this.host = host; - this.port = port; - } - - public int id() { - return id; - } - - public String host() { - return host; - } - - public int port() { - return port; - } - - @Override - public int hashCode() { - if (hash != 0) - return hash; - final int prime = 31; - int result = 1; - result = prime * result + id; - result = prime * result + ((host == null) ? 0 : host.hashCode()); - result = prime * result + port; - this.hash = result; - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - BrokerEndPoint other = (BrokerEndPoint) obj; - if (id != other.id) - return false; - if (port != other.port) - return false; - if (host == null) { - if (other.host != null) - return false; - } else if (!host.equals(other.host)) - return false; - return true; - } - - @Override - public String toString() { - return "[" + id + ", " + host + ":" + port + "]"; - } - -} diff --git a/clients/src/main/java/org/apache/kafka/common/requests/LeaderAndIsrRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/LeaderAndIsrRequest.java index 264af903b7ea..fee3c2111410 100644 --- a/clients/src/main/java/org/apache/kafka/common/requests/LeaderAndIsrRequest.java +++ b/clients/src/main/java/org/apache/kafka/common/requests/LeaderAndIsrRequest.java @@ -17,7 +17,7 @@ package org.apache.kafka.common.requests; -import org.apache.kafka.common.BrokerEndPoint; +import org.apache.kafka.common.Node; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.protocol.ApiKeys; import org.apache.kafka.common.protocol.Errors; @@ -78,10 +78,10 @@ public PartitionState(int controllerEpoch, int leader, int leaderEpoch, List partitionStates; - private final Set liveLeaders; + private final Set liveLeaders; public LeaderAndIsrRequest(int controllerId, int controllerEpoch, Map partitionStates, - Set liveLeaders) { + Set liveLeaders) { super(new Struct(CURRENT_SCHEMA)); struct.set(CONTROLLER_ID_KEY_NAME, controllerId); struct.set(CONTROLLER_EPOCH_KEY_NAME, controllerEpoch); @@ -104,7 +104,7 @@ public LeaderAndIsrRequest(int controllerId, int controllerEpoch, Map leadersData = new ArrayList<>(liveLeaders.size()); - for (BrokerEndPoint leader : liveLeaders) { + for (Node leader : liveLeaders) { Struct leaderData = struct.instance(LIVE_LEADERS_KEY_NAME); leaderData.set(END_POINT_ID_KEY_NAME, leader.id()); leaderData.set(HOST_KEY_NAME, leader.host()); @@ -148,13 +148,13 @@ public LeaderAndIsrRequest(Struct struct) { } - Set leaders = new HashSet<>(); + Set leaders = new HashSet<>(); for (Object leadersDataObj : struct.getArray(LIVE_LEADERS_KEY_NAME)) { Struct leadersData = (Struct) leadersDataObj; int id = leadersData.getInt(END_POINT_ID_KEY_NAME); String host = leadersData.getString(HOST_KEY_NAME); int port = leadersData.getInt(PORT_KEY_NAME); - leaders.add(new BrokerEndPoint(id, host, port)); + leaders.add(new Node(id, host, port)); } controllerId = struct.getInt(CONTROLLER_ID_KEY_NAME); @@ -191,7 +191,7 @@ public Map partitionStates() { return partitionStates; } - public Set liveLeaders() { + public Set liveLeaders() { return liveLeaders; } diff --git a/clients/src/main/java/org/apache/kafka/common/requests/UpdateMetadataRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/UpdateMetadataRequest.java index 4c3d0a74740d..27f89fa5796f 100644 --- a/clients/src/main/java/org/apache/kafka/common/requests/UpdateMetadataRequest.java +++ b/clients/src/main/java/org/apache/kafka/common/requests/UpdateMetadataRequest.java @@ -13,7 +13,7 @@ package org.apache.kafka.common.requests; -import org.apache.kafka.common.BrokerEndPoint; +import org.apache.kafka.common.Node; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.protocol.ApiKeys; import org.apache.kafka.common.protocol.Errors; @@ -113,15 +113,15 @@ public EndPoint(String host, int port) { * Constructor for version 0. */ @Deprecated - public UpdateMetadataRequest(int controllerId, int controllerEpoch, Set liveBrokers, + public UpdateMetadataRequest(int controllerId, int controllerEpoch, Set liveBrokers, Map partitionStates) { this(0, controllerId, controllerEpoch, partitionStates, brokerEndPointsToBrokers(liveBrokers)); } - private static Set brokerEndPointsToBrokers(Set brokerEndPoints) { + private static Set brokerEndPointsToBrokers(Set brokerEndPoints) { Set brokers = new HashSet<>(brokerEndPoints.size()); - for (BrokerEndPoint brokerEndPoint : brokerEndPoints) { + for (Node brokerEndPoint : brokerEndPoints) { Map endPoints = Collections.singletonMap(SecurityProtocol.PLAINTEXT, new EndPoint(brokerEndPoint.host(), brokerEndPoint.port())); brokers.add(new Broker(brokerEndPoint.id(), endPoints, null)); diff --git a/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java index a4c5238bcf02..9def5577a5bc 100644 --- a/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java +++ b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java @@ -13,7 +13,6 @@ package org.apache.kafka.common.requests; -import org.apache.kafka.common.BrokerEndPoint; import org.apache.kafka.common.Node; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.errors.UnknownServerException; @@ -374,9 +373,9 @@ private AbstractRequest createLeaderAndIsrRequest() { partitionStates.put(new TopicPartition("topic20", 1), new LeaderAndIsrRequest.PartitionState(1, 0, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas))); - Set leaders = new HashSet<>(Arrays.asList( - new BrokerEndPoint(0, "test0", 1223), - new BrokerEndPoint(1, "test1", 1223) + Set leaders = new HashSet<>(Arrays.asList( + new Node(0, "test0", 1223), + new Node(1, "test1", 1223) )); return new LeaderAndIsrRequest(1, 10, partitionStates, leaders); @@ -401,9 +400,9 @@ private AbstractRequest createUpdateMetadataRequest(int version, String rack) { new UpdateMetadataRequest.PartitionState(1, 0, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas))); if (version == 0) { - Set liveBrokers = new HashSet<>(Arrays.asList( - new BrokerEndPoint(0, "host1", 1223), - new BrokerEndPoint(1, "host2", 1234) + Set liveBrokers = new HashSet<>(Arrays.asList( + new Node(0, "host1", 1223), + new Node(1, "host2", 1234) )); return new UpdateMetadataRequest(1, 10, liveBrokers, partitionStates); diff --git a/core/src/main/scala/kafka/cluster/EndPoint.scala b/core/src/main/scala/kafka/cluster/EndPoint.scala index 32c27ed37717..3d248628ada4 100644 --- a/core/src/main/scala/kafka/cluster/EndPoint.scala +++ b/core/src/main/scala/kafka/cluster/EndPoint.scala @@ -26,6 +26,8 @@ import org.apache.kafka.common.utils.Utils object EndPoint { + private val uriParseExp = """^(.*)://\[?([0-9a-zA-Z\-.:]*)\]?:(-?[0-9]+)""".r + def readFrom(buffer: ByteBuffer): EndPoint = { val port = buffer.getInt() val host = readShortString(buffer) @@ -42,7 +44,6 @@ object EndPoint { * @return */ def createEndPoint(connectionString: String): EndPoint = { - val uriParseExp = """^(.*)://\[?([0-9a-zA-Z\-.:]*)\]?:(-?[0-9]+)""".r connectionString match { case uriParseExp(protocol, "", port) => new EndPoint(null, port.toInt, SecurityProtocol.forName(protocol)) case uriParseExp(protocol, host, port) => new EndPoint(host, port.toInt, SecurityProtocol.forName(protocol)) diff --git a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala index ea156fa66fd2..b376d15e4eb1 100755 --- a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala +++ b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala @@ -30,7 +30,7 @@ import org.apache.kafka.common.network.{ChannelBuilders, LoginType, Mode, Networ import org.apache.kafka.common.protocol.{ApiKeys, SecurityProtocol} import org.apache.kafka.common.requests.{UpdateMetadataRequest, _} import org.apache.kafka.common.utils.Time -import org.apache.kafka.common.{BrokerEndPoint, Node, TopicPartition} +import org.apache.kafka.common.{Node, TopicPartition} import scala.collection.JavaConverters._ import scala.collection.{Set, mutable} @@ -351,9 +351,8 @@ class ControllerBrokerRequestBatch(controller: KafkaController) extends Logging topicPartition.topic, topicPartition.partition)) } val leaderIds = partitionStateInfos.map(_._2.leaderIsrAndControllerEpoch.leaderAndIsr.leader).toSet - val leaders = controllerContext.liveOrShuttingDownBrokers.filter(b => leaderIds.contains(b.id)).map { b => - val brokerEndPoint = b.getBrokerEndPoint(controller.config.interBrokerSecurityProtocol) - new BrokerEndPoint(brokerEndPoint.id, brokerEndPoint.host, brokerEndPoint.port) + val leaders = controllerContext.liveOrShuttingDownBrokers.filter(b => leaderIds.contains(b.id)).map { + _.getNode(controller.config.interBrokerSecurityProtocol) } val partitionStates = partitionStateInfos.map { case (topicPartition, partitionStateInfo) => val LeaderIsrAndControllerEpoch(leaderIsr, controllerEpoch) = partitionStateInfo.leaderIsrAndControllerEpoch @@ -387,10 +386,7 @@ class ControllerBrokerRequestBatch(controller: KafkaController) extends Logging val updateMetadataRequest = if (version == 0) { - val liveBrokers = controllerContext.liveOrShuttingDownBrokers.map { broker => - val brokerEndPoint = broker.getBrokerEndPoint(SecurityProtocol.PLAINTEXT) - new BrokerEndPoint(brokerEndPoint.id, brokerEndPoint.host, brokerEndPoint.port) - } + val liveBrokers = controllerContext.liveOrShuttingDownBrokers.map(_.getNode(SecurityProtocol.PLAINTEXT)) new UpdateMetadataRequest(controllerId, controllerEpoch, liveBrokers.asJava, partitionStates.asJava) } else { diff --git a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala index fad76575939d..bc705f13f10d 100644 --- a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala +++ b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala @@ -17,6 +17,7 @@ import java.net.Socket import java.nio.ByteBuffer import java.util.concurrent.ExecutionException import java.util.{ArrayList, Collections, Properties} + import kafka.cluster.EndPoint import kafka.common.TopicAndPartition import kafka.coordinator.GroupCoordinator @@ -24,15 +25,16 @@ import kafka.integration.KafkaServerTestHarness import kafka.security.auth._ import kafka.server.KafkaConfig import kafka.utils.TestUtils -import org.apache.kafka.clients.consumer.{OffsetAndMetadata, Consumer, ConsumerRecord, KafkaConsumer} +import org.apache.kafka.clients.consumer.{Consumer, ConsumerRecord, KafkaConsumer, OffsetAndMetadata} import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.errors._ import org.apache.kafka.common.protocol.{ApiKeys, Errors, SecurityProtocol} import org.apache.kafka.common.requests._ import org.apache.kafka.common.security.auth.KafkaPrincipal -import org.apache.kafka.common.{BrokerEndPoint, TopicPartition, requests} +import org.apache.kafka.common.{Node, TopicPartition, requests} import org.junit.Assert._ import org.junit.{After, Assert, Before, Test} + import scala.collection.JavaConverters._ import scala.collection.mutable import scala.collection.mutable.Buffer @@ -214,7 +216,7 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness { private def createLeaderAndIsrRequest = { new requests.LeaderAndIsrRequest(brokerId, Int.MaxValue, Map(tp -> new requests.LeaderAndIsrRequest.PartitionState(Int.MaxValue, brokerId, Int.MaxValue, List(brokerId).asJava, 2, Set(brokerId).asJava)).asJava, - Set(new BrokerEndPoint(brokerId,"localhost", 0)).asJava) + Set(new Node(brokerId, "localhost", 0)).asJava) } private def createStopReplicaRequest = { diff --git a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala index 94013bcb225d..e84780aaa2a1 100755 --- a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala +++ b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala @@ -17,22 +17,22 @@ package kafka.server -import org.apache.kafka.common.{BrokerEndPoint, TopicPartition} +import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.requests.LeaderAndIsrRequest.PartitionState import scala.collection.JavaConverters._ import kafka.api.LeaderAndIsr -import org.apache.kafka.common.requests.{LeaderAndIsrResponse, LeaderAndIsrRequest, AbstractRequestResponse} +import org.apache.kafka.common.requests.{AbstractRequestResponse, LeaderAndIsrRequest, LeaderAndIsrResponse} import org.junit.Assert._ -import kafka.utils.{TestUtils, CoreUtils} +import kafka.utils.{CoreUtils, TestUtils} import kafka.cluster.Broker import kafka.controller.{ControllerChannelManager, ControllerContext} import kafka.utils.TestUtils._ import kafka.zk.ZooKeeperTestHarness import org.apache.kafka.common.metrics.Metrics -import org.apache.kafka.common.protocol.{Errors, ApiKeys, SecurityProtocol} +import org.apache.kafka.common.protocol.{ApiKeys, Errors, SecurityProtocol} import org.apache.kafka.common.utils.SystemTime -import org.junit.{Test, After, Before} +import org.junit.{After, Before, Test} class LeaderElectionTest extends ZooKeeperTestHarness { val brokerId1 = 0 @@ -130,10 +130,7 @@ class LeaderElectionTest extends ZooKeeperTestHarness { val controllerConfig = KafkaConfig.fromProps(TestUtils.createBrokerConfig(controllerId, zkConnect)) val brokers = servers.map(s => new Broker(s.config.brokerId, "localhost", s.boundPort())) - val brokerEndPoints = brokers.map { b => - val brokerEndPoint = b.getBrokerEndPoint(SecurityProtocol.PLAINTEXT) - new BrokerEndPoint(brokerEndPoint.id, brokerEndPoint.host, brokerEndPoint.port) - } + val nodes = brokers.map(_.getNode(SecurityProtocol.PLAINTEXT)) val controllerContext = new ControllerContext(zkUtils, 6000) controllerContext.liveBrokers = brokers.toSet @@ -148,7 +145,7 @@ class LeaderElectionTest extends ZooKeeperTestHarness { Set(0, 1).map(Integer.valueOf).asJava) ) val leaderAndIsrRequest = new LeaderAndIsrRequest(controllerId, staleControllerEpoch, partitionStates.asJava, - brokerEndPoints.toSet.asJava) + nodes.toSet.asJava) controllerChannelManager.sendRequest(brokerId2, ApiKeys.LEADER_AND_ISR, None, leaderAndIsrRequest, staleControllerEpochCallback) diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala index a5a8df1e32aa..ee14af4af518 100644 --- a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala +++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala @@ -33,7 +33,7 @@ import org.apache.kafka.common.requests.LeaderAndIsrRequest import org.apache.kafka.common.requests.LeaderAndIsrRequest.PartitionState import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse import org.apache.kafka.common.utils.{MockTime => JMockTime} -import org.apache.kafka.common.{BrokerEndPoint, TopicPartition} +import org.apache.kafka.common.{Node, TopicPartition} import org.easymock.EasyMock import org.junit.Assert.{assertEquals, assertTrue} import org.junit.Test @@ -162,7 +162,7 @@ class ReplicaManagerTest { // Make this replica the leader. val leaderAndIsrRequest1 = new LeaderAndIsrRequest(0, 0, collection.immutable.Map(new TopicPartition(topic, 0) -> new PartitionState(0, 0, 0, brokerList, 0, brokerSet)).asJava, - Set(new BrokerEndPoint(0, "host1", 0), new BrokerEndPoint(1, "host2", 1)).asJava) + Set(new Node(0, "host1", 0), new Node(1, "host2", 1)).asJava) rm.becomeLeaderOrFollower(0, leaderAndIsrRequest1, metadataCache, (_, _) => {}) rm.getLeaderReplicaIfLocal(topic, 0) @@ -185,7 +185,7 @@ class ReplicaManagerTest { // Make this replica the follower val leaderAndIsrRequest2 = new LeaderAndIsrRequest(0, 0, collection.immutable.Map(new TopicPartition(topic, 0) -> new PartitionState(0, 1, 1, brokerList, 0, brokerSet)).asJava, - Set(new BrokerEndPoint(0, "host1", 0), new BrokerEndPoint(1, "host2", 1)).asJava) + Set(new Node(0, "host1", 0), new Node(1, "host2", 1)).asJava) rm.becomeLeaderOrFollower(1, leaderAndIsrRequest2, metadataCache, (_, _) => {}) assertTrue(produceCallbackFired) From e5a1f704d7ed59f9a6a423bbc36510d13a5df1de Mon Sep 17 00:00:00 2001 From: Jiangjie Qin Date: Wed, 23 Mar 2016 07:15:59 -0700 Subject: [PATCH 015/267] KAFKA-3442; Fix FileMessageSet iterator. Author: Jiangjie Qin Reviewers: Ismael Juma , Jun Rao Closes #1112 from becketqin/KAFKA-3442 (cherry picked from commit 7af67ce22aa02121d6b82dc54dad42358282e524) Signed-off-by: Jun Rao --- .../main/scala/kafka/log/FileMessageSet.scala | 28 +++++++++++-------- .../unit/kafka/log/FileMessageSetTest.scala | 17 +++++++++-- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/kafka/log/FileMessageSet.scala b/core/src/main/scala/kafka/log/FileMessageSet.scala index 45b3df9970e1..a164b4b96730 100755 --- a/core/src/main/scala/kafka/log/FileMessageSet.scala +++ b/core/src/main/scala/kafka/log/FileMessageSet.scala @@ -206,7 +206,7 @@ class FileMessageSet private[kafka](@volatile var file: File, /** * Convert this message set to use the specified message format. */ - def toMessageFormat(toMagicValue: Byte): ByteBufferMessageSet = { + def toMessageFormat(toMagicValue: Byte): MessageSet = { val offsets = new ArrayBuffer[Long] val newMessages = new ArrayBuffer[Message] this.foreach { messageAndOffset => @@ -224,11 +224,16 @@ class FileMessageSet private[kafka](@volatile var file: File, } } - // We use the offset seq to assign offsets so the offset of the messages does not change. - new ByteBufferMessageSet( - compressionCodec = this.headOption.map(_.message.compressionCodec).getOrElse(NoCompressionCodec), - offsetSeq = offsets, - newMessages: _*) + if (sizeInBytes > 0 && newMessages.size == 0) { + // This indicates that the message is too large. We just return all the bytes in the file message set. + this + } else { + // We use the offset seq to assign offsets so the offset of the messages does not change. + new ByteBufferMessageSet( + compressionCodec = this.headOption.map(_.message.compressionCodec).getOrElse(NoCompressionCodec), + offsetSeq = offsets, + newMessages: _*) + } } /** @@ -245,10 +250,11 @@ class FileMessageSet private[kafka](@volatile var file: File, def iterator(maxMessageSize: Int): Iterator[MessageAndOffset] = { new IteratorTemplate[MessageAndOffset] { var location = start - val sizeOffsetBuffer = ByteBuffer.allocate(12) + val sizeOffsetLength = 12 + val sizeOffsetBuffer = ByteBuffer.allocate(sizeOffsetLength) override def makeNext(): MessageAndOffset = { - if(location >= end) + if(location + sizeOffsetLength >= end) return allDone() // read the size of the item @@ -260,20 +266,20 @@ class FileMessageSet private[kafka](@volatile var file: File, sizeOffsetBuffer.rewind() val offset = sizeOffsetBuffer.getLong() val size = sizeOffsetBuffer.getInt() - if(size < Message.MinMessageOverhead) + if(size < Message.MinMessageOverhead || location + sizeOffsetLength + size > end) return allDone() if(size > maxMessageSize) throw new CorruptRecordException("Message size exceeds the largest allowable message size (%d).".format(maxMessageSize)) // read the item itself val buffer = ByteBuffer.allocate(size) - channel.read(buffer, location + 12) + channel.read(buffer, location + sizeOffsetLength) if(buffer.hasRemaining) return allDone() buffer.rewind() // increment the location and return the item - location += size + 12 + location += size + sizeOffsetLength new MessageAndOffset(new Message(buffer), offset) } } diff --git a/core/src/test/scala/unit/kafka/log/FileMessageSetTest.scala b/core/src/test/scala/unit/kafka/log/FileMessageSetTest.scala index a3e5b2d4f01e..534443ce3203 100644 --- a/core/src/test/scala/unit/kafka/log/FileMessageSetTest.scala +++ b/core/src/test/scala/unit/kafka/log/FileMessageSetTest.scala @@ -133,11 +133,13 @@ class FileMessageSetTest extends BaseMessageSetTestCases { def testIteratorWithLimits() { val message = messageSet.toList(1) val start = messageSet.searchFor(1, 0).position - val size = message.message.size + val size = message.message.size + 12 val slice = messageSet.read(start, size) assertEquals(List(message), slice.toList) + val slice2 = messageSet.read(start, size - 1) + assertEquals(List(), slice2.toList) } - + /** * Test the truncateTo method lops off messages and appropriately updates the size */ @@ -202,6 +204,17 @@ class FileMessageSetTest extends BaseMessageSetTestCases { assertEquals(oldposition, tempReopen.length) } + @Test + def testFormatConversionWithPartialMessage() { + val message = messageSet.toList(1) + val start = messageSet.searchFor(1, 0).position + val size = message.message.size + 12 + val slice = messageSet.read(start, size - 1) + val messageV0 = slice.toMessageFormat(Message.MagicValue_V0) + assertEquals("No message should be there", 0, messageV0.size) + assertEquals(s"There should be ${size - 1} bytes", size - 1, messageV0.sizeInBytes) + } + @Test def testMessageFormatConversion() { From 2835e73000a44c9586d085275b62eeeb2ce1fcec Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Wed, 23 Mar 2016 09:47:48 -0700 Subject: [PATCH 016/267] KAFKA-3409: handle CommitFailedException in MirrorMaker Author: Jason Gustafson Reviewers: Ismael Juma, Ashish Singh, Guozhang Wang Closes #1115 from hachikuji/KAFKA-3409 (cherry picked from commit 20c313526a0518a51142d3abc5ee2a4d2ef3cb34) Signed-off-by: Guozhang Wang --- .../main/scala/kafka/tools/MirrorMaker.scala | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/kafka/tools/MirrorMaker.scala b/core/src/main/scala/kafka/tools/MirrorMaker.scala index 26f4826982e6..87f3cc53bafb 100755 --- a/core/src/main/scala/kafka/tools/MirrorMaker.scala +++ b/core/src/main/scala/kafka/tools/MirrorMaker.scala @@ -25,14 +25,13 @@ import java.util.{Collections, Properties} import com.yammer.metrics.core.Gauge import joptsimple.OptionParser -import kafka.client.ClientUtils import kafka.consumer.{BaseConsumerRecord, ConsumerIterator, BaseConsumer, Blacklist, ConsumerConfig, ConsumerThreadId, ConsumerTimeoutException, TopicFilter, Whitelist, ZookeeperConsumerConnector} import kafka.javaapi.consumer.ConsumerRebalanceListener -import kafka.message.MessageAndMetadata import kafka.metrics.KafkaMetricsGroup import kafka.serializer.DefaultDecoder import kafka.utils.{CommandLineUtils, CoreUtils, Logging} -import org.apache.kafka.clients.consumer.{OffsetAndMetadata, Consumer, ConsumerRecord, KafkaConsumer} +import org.apache.kafka.clients.consumer +import org.apache.kafka.clients.consumer.{OffsetAndMetadata, Consumer, ConsumerRecord, KafkaConsumer, CommitFailedException} import org.apache.kafka.clients.producer.internals.ErrorLoggingCallback import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata} import org.apache.kafka.common.TopicPartition @@ -356,6 +355,12 @@ object MirrorMaker extends Logging with KafkaMetricsGroup { // and re-throw to break the loop mirrorMakerConsumer.commit() throw e + + case e: CommitFailedException => + warn("Failed to commit offsets because the consumer group has rebalanced and assigned partitions to " + + "another instance. If you see this regularly, it could indicate that you need to either increase " + + s"the consumer's ${consumer.ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG} or reduce the number of records " + + s"handled on each iteration with ${consumer.ConsumerConfig.MAX_POLL_RECORDS_CONFIG}") } } else { info("Exiting on send failure, skip committing offsets.") @@ -422,10 +427,15 @@ object MirrorMaker extends Logging with KafkaMetricsGroup { case t: Throwable => fatal("Mirror maker thread failure due to ", t) } finally { - info("Flushing producer.") - producer.flush() - info("Committing consumer offsets.") - CoreUtils.swallow(commitOffsets(mirrorMakerConsumer)) + CoreUtils.swallow { + info("Flushing producer.") + producer.flush() + + // note that this commit is skipped if flush() fails which ensures that we don't lose messages + info("Committing consumer offsets.") + commitOffsets(mirrorMakerConsumer) + } + info("Shutting down consumer connectors.") CoreUtils.swallow(mirrorMakerConsumer.stop()) CoreUtils.swallow(mirrorMakerConsumer.cleanup()) From f5e1ca625332572923b42a9ae8d660f2452ae92f Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Wed, 23 Mar 2016 12:54:26 -0700 Subject: [PATCH 017/267] KAFKA-3441: 0.10.0 documentation still says "0.9.0" Author: Grant Henke Reviewers: Gwen Shapira Closes #1122 from granthenke/docs-10 (cherry picked from commit d57847641037823a7306f53251b063869f7affb6) Signed-off-by: Gwen Shapira --- docs/api.html | 8 ++++---- docs/quickstart.html | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/api.html b/docs/api.html index 254155392027..d303244427d7 100644 --- a/docs/api.html +++ b/docs/api.html @@ -24,12 +24,12 @@

    2.1 Producer API

    <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> - <version>0.9.0.0</version> + <version>0.10.0.0</version> </dependency> Examples showing how to use the producer are given in the -javadocs. +javadocs.

    For those interested in the legacy Scala producer api, information can be found @@ -159,9 +159,9 @@

    2.2.3 New Consumer API

    Examples showing how to use the consumer are given in the -javadocs. +javadocs. diff --git a/docs/quickstart.html b/docs/quickstart.html index 1238316dbee6..1e7b62ce3c0a 100644 --- a/docs/quickstart.html +++ b/docs/quickstart.html @@ -21,11 +21,11 @@

    1.3 Quick Start

    Step 1: Download the code

    -Download the 0.9.0.0 release and un-tar it. +Download the 0.10.0.0 release and un-tar it.
    -> tar -xzf kafka_2.11-0.9.0.0.tgz
    -> cd kafka_2.11-0.9.0.0
    +> tar -xzf kafka_2.11-0.10.0.0.tgz
    +> cd kafka_2.11-0.10.0.0
     

    Step 2: Start the server

    From 63e9d246b7978152e12aa4ea8b1ba6bcc40f4498 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Wed, 23 Mar 2016 13:53:37 -0700 Subject: [PATCH 018/267] KAFKA-3432; Cluster.update() thread-safety Replace `update` with `withPartitions`, which returns a copy instead of mutating the instance. Author: Ismael Juma Reviewers: Guozhang Wang Closes #1118 from ijuma/kafka-3432-cluster-update-thread-safety (cherry picked from commit d4d5920ed40736d21f056188efa8a86c93e22506) Signed-off-by: Guozhang Wang --- .../java/org/apache/kafka/common/Cluster.java | 40 ++++--------------- .../internals/StreamPartitionAssignor.java | 12 +++++- 2 files changed, 17 insertions(+), 35 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/Cluster.java b/clients/src/main/java/org/apache/kafka/common/Cluster.java index 4f3735850fc1..8e85df8f0903 100644 --- a/clients/src/main/java/org/apache/kafka/common/Cluster.java +++ b/clients/src/main/java/org/apache/kafka/common/Cluster.java @@ -19,6 +19,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -50,7 +51,7 @@ public Cluster(Collection nodes, this.nodes = Collections.unmodifiableList(copy); this.nodesById = new HashMap<>(); - for (Node node: nodes) + for (Node node : nodes) this.nodesById.put(node.id(), node); // index the partitions by topic/partition for quick lookup @@ -118,39 +119,12 @@ public static Cluster bootstrap(List addresses) { } /** - * Update the cluster information for specific topic with new partition information + * Return a copy of this cluster combined with `partitions`. */ - public Cluster update(String topic, Collection partitions) { - - // re-index the partitions by topic/partition for quick lookup - for (PartitionInfo p : partitions) - this.partitionsByTopicPartition.put(new TopicPartition(p.topic(), p.partition()), p); - - // re-index the partitions by topic and node respectively - this.partitionsByTopic.put(topic, Collections.unmodifiableList(new ArrayList<>(partitions))); - - List availablePartitions = new ArrayList<>(); - for (PartitionInfo part : partitions) { - if (part.leader() != null) - availablePartitions.add(part); - } - this.availablePartitionsByTopic.put(topic, Collections.unmodifiableList(availablePartitions)); - - HashMap> partsForNode = new HashMap<>(); - for (Node n : this.nodes) { - partsForNode.put(n.id(), new ArrayList()); - } - for (PartitionInfo p : partitions) { - if (p.leader() != null) { - List psNode = Utils.notNull(partsForNode.get(p.leader().id())); - psNode.add(p); - } - } - - for (Map.Entry> entry : partsForNode.entrySet()) - this.partitionsByNode.put(entry.getKey(), Collections.unmodifiableList(entry.getValue())); - - return this; + public Cluster withPartitions(Map partitions) { + Map combinedPartitions = new HashMap<>(this.partitionsByTopicPartition); + combinedPartitions.putAll(partitions); + return new Cluster(this.nodes, combinedPartitions.values(), new HashSet<>(this.unauthorizedTopics)); } /** diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java index a6b82af02e28..1dd082d8cf5c 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java @@ -226,6 +226,8 @@ public Map assign(Cluster metadata, Map internalPartitionInfos = new HashMap<>(); + // if ZK is specified, prepare the internal source topic before calling partition grouper if (internalTopicManager != null) { log.debug("Starting to validate internal source topics in partition assignor."); @@ -247,15 +249,21 @@ public Map assign(Cluster metadata, Map> partitionsForTask = streamThread.partitionGrouper.partitionGroups(sourceTopicGroups, metadata); + Map> partitionsForTask = streamThread.partitionGrouper.partitionGroups( + sourceTopicGroups, metadataWithInternalTopics); // add tasks to state change log topic subscribers stateChangelogTopicToTaskIds = new HashMap<>(); From b31a2f06c80c2c2cacf34d75998d401bddcca374 Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsuda Date: Wed, 23 Mar 2016 14:25:08 -0700 Subject: [PATCH 019/267] HOTFIX: fix NPE in changelogger Fix NPE in StoreChangeLogger caused by a record out of window retention period. guozhangwang Author: Yasuhiro Matsuda Reviewers: Guozhang Wang Closes #1124 from ymatsuda/logger_npe (cherry picked from commit 80d78f81470f109dc6d221f755b039c7332bb93b) Signed-off-by: Guozhang Wang --- .../kafka/streams/state/internals/RocksDBWindowStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStore.java index 4c6a2296b632..9851c0489b88 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStore.java @@ -245,7 +245,7 @@ public void close() { public void put(K key, V value) { byte[] rawKey = putAndReturnInternalKey(key, value, USE_CURRENT_TIMESTAMP); - if (loggingEnabled) { + if (rawKey != null && loggingEnabled) { changeLogger.add(rawKey); changeLogger.maybeLogChange(this.getter); } @@ -255,7 +255,7 @@ public void put(K key, V value) { public void put(K key, V value, long timestamp) { byte[] rawKey = putAndReturnInternalKey(key, value, timestamp); - if (loggingEnabled) { + if (rawKey != null && loggingEnabled) { changeLogger.add(rawKey); changeLogger.maybeLogChange(this.getter); } From 8525d8279b7798db90f752c329aa3b5936cb5f7f Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsuda Date: Wed, 23 Mar 2016 14:57:03 -0700 Subject: [PATCH 020/267] MINOR: remove streams-smoke-test.sh guozhangwang Author: Yasuhiro Matsuda Reviewers: Guozhang Wang Closes #1125 from ymatsuda/remove_smoketest_shell_script (cherry picked from commit de062443381df84ee0d65acc20e44ffca2b2552b) Signed-off-by: Guozhang Wang --- bin/streams-smoke-test.sh | 23 ----------------------- tests/kafkatest/services/streams.py | 3 ++- 2 files changed, 2 insertions(+), 24 deletions(-) delete mode 100755 bin/streams-smoke-test.sh diff --git a/bin/streams-smoke-test.sh b/bin/streams-smoke-test.sh deleted file mode 100755 index 196990ef41c6..000000000000 --- a/bin/streams-smoke-test.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -base_dir=$(dirname $0) - -if [ "x$KAFKA_LOG4J_OPTS" = "x" ]; then - export KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$base_dir/../config/tools-log4j.properties" -fi - -exec $(dirname $0)/kafka-run-class.sh org.apache.kafka.streams.smoketest.StreamsSmokeTest "$@" diff --git a/tests/kafkatest/services/streams.py b/tests/kafkatest/services/streams.py index 192a8d9fcfa8..dcbcc696b8b3 100644 --- a/tests/kafkatest/services/streams.py +++ b/tests/kafkatest/services/streams.py @@ -107,7 +107,8 @@ def start_cmd(self, node): args['kafka_dir'] = kafka_dir(node) cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j)s\"; " \ - "/opt/%(kafka_dir)s/bin/streams-smoke-test.sh %(command)s %(kafka)s %(zk)s %(state_dir)s " \ + "/opt/%(kafka_dir)s/bin/kafka-run-class.sh org.apache.kafka.streams.smoketest.StreamsSmokeTest " \ + " %(command)s %(kafka)s %(zk)s %(state_dir)s " \ " & echo $! >&3 ) 1>> %(stdout)s 2>> %(stderr)s 3> %(pidfile)s" % args return cmd From 0cc997ae76010cf85b55d057dea9996fa1931e5b Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Wed, 23 Mar 2016 15:41:07 -0700 Subject: [PATCH 021/267] MINOR: Revert 0.10.0 branch to SNAPSHOT per change in release process Author: Gwen Shapira Reviewers: Ewen Cheslack-Postava Closes #1126 from gwenshap/minor-release-version --- gradle.properties | 2 +- kafka-merge-pr.py | 2 +- tests/kafkatest/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gradle.properties b/gradle.properties index 7f30b4d4d7ed..b058e58cbe30 100644 --- a/gradle.properties +++ b/gradle.properties @@ -16,7 +16,7 @@ group=org.apache.kafka # NOTE: When you change this version number, you should also make sure to update # the version numbers in tests/kafkatest/__init__.py and kafka-merge-pr.py. -version=0.10.0.0 +version=0.10.0.0-SNAPSHOT scalaVersion=2.10.6 task=build org.gradle.jvmargs=-XX:MaxPermSize=512m -Xmx1024m -Xss2m diff --git a/kafka-merge-pr.py b/kafka-merge-pr.py index 2345dbbd4516..e12410511b65 100644 --- a/kafka-merge-pr.py +++ b/kafka-merge-pr.py @@ -72,7 +72,7 @@ DEV_BRANCH_NAME = "trunk" -DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.0.1") +DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.0.0") def get_json(url): try: diff --git a/tests/kafkatest/__init__.py b/tests/kafkatest/__init__.py index c509eff0cb2b..df1a6129dbc8 100644 --- a/tests/kafkatest/__init__.py +++ b/tests/kafkatest/__init__.py @@ -23,4 +23,4 @@ # Instead, in trunk, the version should have a suffix of the form ".devN" # # For example, when Kafka is at version 0.9.0.0-SNAPSHOT, this should be something like "0.9.0.0.dev0" -__version__ = '0.10.0.0' +__version__ = '0.10.0.0.dev0' From 4e557f8ef60d46a8870704655c9a35092f74d125 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Wed, 23 Mar 2016 22:36:19 -0700 Subject: [PATCH 022/267] KAFKA-3434; add old constructor to ConsumerRecord Author: Jason Gustafson Reviewers: Grant Henke , Ismael Juma , Ewen Cheslack-Postava Closes #1123 from hachikuji/KAFKA-3434 (cherry picked from commit cb78223bf90aca4f75699f36c1a82db7661a62f3) Signed-off-by: Ewen Cheslack-Postava --- .../clients/consumer/ConsumerRecord.java | 29 +++++++++++ .../clients/consumer/internals/Fetcher.java | 4 +- .../clients/consumer/ConsumerRecordTest.java | 48 +++++++++++++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerRecordTest.java diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecord.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecord.java index 4165534456de..586156e07461 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecord.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecord.java @@ -12,6 +12,7 @@ */ package org.apache.kafka.clients.consumer; +import org.apache.kafka.common.record.Record; import org.apache.kafka.common.record.TimestampType; /** @@ -19,6 +20,10 @@ * record is being received and an offset that points to the record in a Kafka partition. */ public final class ConsumerRecord { + public static final long NO_TIMESTAMP = Record.NO_TIMESTAMP; + public static final int NULL_SIZE = -1; + public static final int NULL_CHECKSUM = -1; + private final String topic; private final int partition; private final long offset; @@ -30,6 +35,27 @@ public final class ConsumerRecord { private final K key; private final V value; + /** + * Creates a record to be received from a specified topic and partition (provided for + * compatibility with Kafka 0.9 before the message format supported timestamps and before + * serialized metadata were exposed). + * + * @param topic The topic this record is received from + * @param partition The partition of the topic this record is received from + * @param offset The offset of this record in the corresponding Kafka partition + * @param key The key of the record, if one exists (null is allowed) + * @param value The record contents + */ + public ConsumerRecord(String topic, + int partition, + long offset, + K key, + V value) { + this(topic, partition, offset, NO_TIMESTAMP, TimestampType.NO_TIMESTAMP_TYPE, + NULL_CHECKSUM, NULL_SIZE, NULL_SIZE, key, value); + } + + /** * Creates a record to be received from a specified topic and partition * @@ -38,6 +64,9 @@ public final class ConsumerRecord { * @param offset The offset of this record in the corresponding Kafka partition * @param timestamp The timestamp of the record. * @param timestampType The timestamp type + * @param checksum The checksum (CRC32) of the full record + * @param serializedKeySize The length of the serialized key + * @param serializedValueSize The length of the serialized value * @param key The key of the record, if one exists (null is allowed) * @param value The record contents */ diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java index 802a2f0b63e7..9a26551a1e7a 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java @@ -653,8 +653,8 @@ private ConsumerRecord parseRecord(TopicPartition partition, LogEntry logE return new ConsumerRecord<>(partition.topic(), partition.partition(), offset, timestamp, timestampType, logEntry.record().checksum(), - keyByteArray == null ? -1 : keyByteArray.length, - valueByteArray == null ? -1 : valueByteArray.length, + keyByteArray == null ? ConsumerRecord.NULL_SIZE : keyByteArray.length, + valueByteArray == null ? ConsumerRecord.NULL_SIZE : valueByteArray.length, key, value); } catch (KafkaException e) { throw e; diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerRecordTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerRecordTest.java new file mode 100644 index 000000000000..d1d3b24afeff --- /dev/null +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerRecordTest.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.kafka.clients.consumer; + +import org.apache.kafka.common.record.TimestampType; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class ConsumerRecordTest { + + @Test + public void testOldConstructor() { + String topic = "topic"; + int partition = 0; + long offset = 23; + String key = "key"; + String value = "value"; + + ConsumerRecord record = new ConsumerRecord(topic, partition, offset, key, value); + assertEquals(topic, record.topic()); + assertEquals(partition, record.partition()); + assertEquals(offset, record.offset()); + assertEquals(key, record.key()); + assertEquals(value, record.value()); + assertEquals(TimestampType.NO_TIMESTAMP_TYPE, record.timestampType()); + assertEquals(ConsumerRecord.NO_TIMESTAMP, record.timestamp()); + assertEquals(ConsumerRecord.NULL_CHECKSUM, record.checksum()); + assertEquals(ConsumerRecord.NULL_SIZE, record.serializedKeySize()); + assertEquals(ConsumerRecord.NULL_SIZE, record.serializedValueSize()); + } + + +} From 4e5653d77359214452b94a65c9db29eaa57a3c31 Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Fri, 25 Mar 2016 10:07:05 -0700 Subject: [PATCH 023/267] KAFKA-3460: Remove old 0.7 KafkaMigrationTool Author: Grant Henke Reviewers: Gwen Shapira Closes #1136 from granthenke/remove-07-migration (cherry picked from commit d3a66a65365579e9320347663969b30c1148e497) Signed-off-by: Gwen Shapira --- .../scala/kafka/tools/KafkaMigrationTool.java | 487 ------------------ docs/upgrade.html | 1 + 2 files changed, 1 insertion(+), 487 deletions(-) delete mode 100755 core/src/main/scala/kafka/tools/KafkaMigrationTool.java diff --git a/core/src/main/scala/kafka/tools/KafkaMigrationTool.java b/core/src/main/scala/kafka/tools/KafkaMigrationTool.java deleted file mode 100755 index 0b94902b8806..000000000000 --- a/core/src/main/scala/kafka/tools/KafkaMigrationTool.java +++ /dev/null @@ -1,487 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package kafka.tools; - -import joptsimple.ArgumentAcceptingOptionSpec; -import joptsimple.OptionParser; -import joptsimple.OptionSet; -import joptsimple.OptionSpec; -import joptsimple.OptionSpecBuilder; -import kafka.javaapi.producer.Producer; -import kafka.producer.KeyedMessage; -import kafka.producer.ProducerConfig; -import org.apache.kafka.common.utils.Utils; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.net.URL; -import java.net.URLClassLoader; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicBoolean; - -/** - * This is a kafka 0.7 to 0.8 online migration tool used for migrating data from 0.7 to 0.8 cluster. Internally, - * it's composed of a kafka 0.7 consumer and kafka 0.8 producer. The kafka 0.7 consumer consumes data from the - * 0.7 cluster, and the kafka 0.8 producer produces data to the 0.8 cluster. - * - * The 0.7 consumer is loaded from kafka 0.7 jar using a "parent last, child first" java class loader. - * Ordinary class loader is "parent first, child last", and kafka 0.8 and 0.7 both have classes for a lot of - * class names like "kafka.consumer.Consumer", etc., so ordinary java URLClassLoader with kafka 0.7 jar will - * will still load the 0.8 version class. - * - * As kafka 0.7 and kafka 0.8 used different version of zkClient, the zkClient jar used by kafka 0.7 should - * also be used by the class loader. - * - * The user need to provide the configuration file for 0.7 consumer and 0.8 producer. For 0.8 producer, - * the "serializer.class" config is set to "kafka.serializer.DefaultEncoder" by the code. - */ -@SuppressWarnings({"unchecked", "rawtypes", "deprecation"}) -public class KafkaMigrationTool { - private static final org.apache.log4j.Logger log = org.apache.log4j.Logger.getLogger(KafkaMigrationTool.class.getName()); - private static final String KAFKA_07_STATIC_CONSUMER_CLASS_NAME = "kafka.consumer.Consumer"; - private static final String KAFKA_07_CONSUMER_CONFIG_CLASS_NAME = "kafka.consumer.ConsumerConfig"; - private static final String KAFKA_07_CONSUMER_STREAM_CLASS_NAME = "kafka.consumer.KafkaStream"; - private static final String KAFKA_07_CONSUMER_ITERATOR_CLASS_NAME = "kafka.consumer.ConsumerIterator"; - private static final String KAFKA_07_CONSUMER_CONNECTOR_CLASS_NAME = "kafka.javaapi.consumer.ConsumerConnector"; - private static final String KAFKA_07_MESSAGE_AND_METADATA_CLASS_NAME = "kafka.message.MessageAndMetadata"; - private static final String KAFKA_07_MESSAGE_CLASS_NAME = "kafka.message.Message"; - private static final String KAFKA_07_WHITE_LIST_CLASS_NAME = "kafka.consumer.Whitelist"; - private static final String KAFKA_07_TOPIC_FILTER_CLASS_NAME = "kafka.consumer.TopicFilter"; - private static final String KAFKA_07_BLACK_LIST_CLASS_NAME = "kafka.consumer.Blacklist"; - - private static Class kafkaStaticConsumer07 = null; - private static Class consumerConfig07 = null; - private static Class consumerConnector07 = null; - private static Class kafkaStream07 = null; - private static Class topicFilter07 = null; - private static Class whiteList07 = null; - private static Class blackList07 = null; - private static Class kafkaConsumerIteratorClass07 = null; - private static Class kafkaMessageAndMetaDataClass07 = null; - private static Class kafkaMessageClass07 = null; - - public static void main(String[] args) throws InterruptedException, IOException { - OptionParser parser = new OptionParser(); - ArgumentAcceptingOptionSpec consumerConfigOpt - = parser.accepts("consumer.config", "Kafka 0.7 consumer config to consume from the source 0.7 cluster. " + "You man specify multiple of these.") - .withRequiredArg() - .describedAs("config file") - .ofType(String.class); - - ArgumentAcceptingOptionSpec producerConfigOpt - = parser.accepts("producer.config", "Producer config.") - .withRequiredArg() - .describedAs("config file") - .ofType(String.class); - - ArgumentAcceptingOptionSpec numProducersOpt - = parser.accepts("num.producers", "Number of producer instances") - .withRequiredArg() - .describedAs("Number of producers") - .ofType(Integer.class) - .defaultsTo(1); - - ArgumentAcceptingOptionSpec zkClient01JarOpt - = parser.accepts("zkclient.01.jar", "zkClient 0.1 jar file") - .withRequiredArg() - .describedAs("zkClient 0.1 jar file required by Kafka 0.7") - .ofType(String.class); - - ArgumentAcceptingOptionSpec kafka07JarOpt - = parser.accepts("kafka.07.jar", "Kafka 0.7 jar file") - .withRequiredArg() - .describedAs("kafka 0.7 jar") - .ofType(String.class); - - ArgumentAcceptingOptionSpec numStreamsOpt - = parser.accepts("num.streams", "Number of consumer streams") - .withRequiredArg() - .describedAs("Number of consumer threads") - .ofType(Integer.class) - .defaultsTo(1); - - ArgumentAcceptingOptionSpec whitelistOpt - = parser.accepts("whitelist", "Whitelist of topics to migrate from the 0.7 cluster") - .withRequiredArg() - .describedAs("Java regex (String)") - .ofType(String.class); - - ArgumentAcceptingOptionSpec blacklistOpt - = parser.accepts("blacklist", "Blacklist of topics to migrate from the 0.7 cluster") - .withRequiredArg() - .describedAs("Java regex (String)") - .ofType(String.class); - - ArgumentAcceptingOptionSpec queueSizeOpt - = parser.accepts("queue.size", "Number of messages that are buffered between the 0.7 consumer and 0.8 producer") - .withRequiredArg() - .describedAs("Queue size in terms of number of messages") - .ofType(Integer.class) - .defaultsTo(10000); - - OptionSpecBuilder helpOpt - = parser.accepts("help", "Print this message."); - - OptionSet options = parser.parse(args); - - if (options.has(helpOpt)) { - parser.printHelpOn(System.out); - System.exit(0); - } - - checkRequiredArgs(parser, options, new OptionSpec[]{consumerConfigOpt, producerConfigOpt, zkClient01JarOpt, kafka07JarOpt}); - int whiteListCount = options.has(whitelistOpt) ? 1 : 0; - int blackListCount = options.has(blacklistOpt) ? 1 : 0; - if (whiteListCount + blackListCount != 1) { - System.err.println("Exactly one of whitelist or blacklist is required."); - System.exit(1); - } - - String kafkaJarFile07 = options.valueOf(kafka07JarOpt); - String zkClientJarFile = options.valueOf(zkClient01JarOpt); - String consumerConfigFile07 = options.valueOf(consumerConfigOpt); - int numConsumers = options.valueOf(numStreamsOpt); - String producerConfigFile08 = options.valueOf(producerConfigOpt); - int numProducers = options.valueOf(numProducersOpt); - final List migrationThreads = new ArrayList(numConsumers); - final List producerThreads = new ArrayList(numProducers); - - try { - File kafkaJar07 = new File(kafkaJarFile07); - File zkClientJar = new File(zkClientJarFile); - ParentLastURLClassLoader c1 = new ParentLastURLClassLoader(new URL[]{ - kafkaJar07.toURI().toURL(), - zkClientJar.toURI().toURL() - }); - - /** Construct the 07 consumer config **/ - consumerConfig07 = c1.loadClass(KAFKA_07_CONSUMER_CONFIG_CLASS_NAME); - kafkaStaticConsumer07 = c1.loadClass(KAFKA_07_STATIC_CONSUMER_CLASS_NAME); - consumerConnector07 = c1.loadClass(KAFKA_07_CONSUMER_CONNECTOR_CLASS_NAME); - kafkaStream07 = c1.loadClass(KAFKA_07_CONSUMER_STREAM_CLASS_NAME); - topicFilter07 = c1.loadClass(KAFKA_07_TOPIC_FILTER_CLASS_NAME); - whiteList07 = c1.loadClass(KAFKA_07_WHITE_LIST_CLASS_NAME); - blackList07 = c1.loadClass(KAFKA_07_BLACK_LIST_CLASS_NAME); - kafkaMessageClass07 = c1.loadClass(KAFKA_07_MESSAGE_CLASS_NAME); - kafkaConsumerIteratorClass07 = c1.loadClass(KAFKA_07_CONSUMER_ITERATOR_CLASS_NAME); - kafkaMessageAndMetaDataClass07 = c1.loadClass(KAFKA_07_MESSAGE_AND_METADATA_CLASS_NAME); - - Constructor consumerConfigConstructor07 = consumerConfig07.getConstructor(Properties.class); - Properties kafkaConsumerProperties07 = new Properties(); - kafkaConsumerProperties07.load(new FileInputStream(consumerConfigFile07)); - /** Disable shallow iteration because the message format is different between 07 and 08, we have to get each individual message **/ - if (kafkaConsumerProperties07.getProperty("shallow.iterator.enable", "").equals("true")) { - log.warn("Shallow iterator should not be used in the migration tool"); - kafkaConsumerProperties07.setProperty("shallow.iterator.enable", "false"); - } - Object consumerConfig07 = consumerConfigConstructor07.newInstance(kafkaConsumerProperties07); - - /** Construct the 07 consumer connector **/ - Method consumerConnectorCreationMethod07 = kafkaStaticConsumer07.getMethod("createJavaConsumerConnector", KafkaMigrationTool.consumerConfig07); - final Object consumerConnector07 = consumerConnectorCreationMethod07.invoke(null, consumerConfig07); - Method consumerConnectorCreateMessageStreamsMethod07 = KafkaMigrationTool.consumerConnector07.getMethod( - "createMessageStreamsByFilter", - topicFilter07, int.class); - final Method consumerConnectorShutdownMethod07 = KafkaMigrationTool.consumerConnector07.getMethod("shutdown"); - Constructor whiteListConstructor07 = whiteList07.getConstructor(String.class); - Constructor blackListConstructor07 = blackList07.getConstructor(String.class); - Object filterSpec = null; - if (options.has(whitelistOpt)) - filterSpec = whiteListConstructor07.newInstance(options.valueOf(whitelistOpt)); - else - filterSpec = blackListConstructor07.newInstance(options.valueOf(blacklistOpt)); - - Object retKafkaStreams = consumerConnectorCreateMessageStreamsMethod07.invoke(consumerConnector07, filterSpec, numConsumers); - - Properties kafkaProducerProperties08 = new Properties(); - kafkaProducerProperties08.load(new FileInputStream(producerConfigFile08)); - kafkaProducerProperties08.setProperty("serializer.class", "kafka.serializer.DefaultEncoder"); - // create a producer channel instead - int queueSize = options.valueOf(queueSizeOpt); - ProducerDataChannel> producerDataChannel = new ProducerDataChannel>(queueSize); - int threadId = 0; - - Runtime.getRuntime().addShutdownHook(new Thread() { - @Override - public void run() { - try { - consumerConnectorShutdownMethod07.invoke(consumerConnector07); - } catch (Exception e) { - log.error("Error while shutting down Kafka consumer", e); - } - for (MigrationThread migrationThread : migrationThreads) { - migrationThread.shutdown(); - } - for (ProducerThread producerThread : producerThreads) { - producerThread.shutdown(); - } - for (ProducerThread producerThread : producerThreads) { - producerThread.awaitShutdown(); - } - log.info("Kafka migration tool shutdown successfully"); - } - }); - - // start consumer threads - for (Object stream : (List) retKafkaStreams) { - MigrationThread thread = new MigrationThread(stream, producerDataChannel, threadId); - threadId++; - thread.start(); - migrationThreads.add(thread); - } - - String clientId = kafkaProducerProperties08.getProperty("client.id"); - // start producer threads - for (int i = 0; i < numProducers; i++) { - kafkaProducerProperties08.put("client.id", clientId + "-" + i); - ProducerConfig producerConfig08 = new ProducerConfig(kafkaProducerProperties08); - Producer producer = new Producer(producerConfig08); - ProducerThread producerThread = new ProducerThread(producerDataChannel, producer, i); - producerThread.start(); - producerThreads.add(producerThread); - } - } catch (Throwable e) { - System.out.println("Kafka migration tool failed due to: " + Utils.stackTrace(e)); - log.error("Kafka migration tool failed: ", e); - } - } - - private static void checkRequiredArgs(OptionParser parser, OptionSet options, OptionSpec[] required) throws IOException { - for (OptionSpec arg : required) { - if (!options.has(arg)) { - System.err.println("Missing required argument \"" + arg + "\""); - parser.printHelpOn(System.err); - System.exit(1); - } - } - } - - static class ProducerDataChannel { - private final int producerQueueSize; - private final BlockingQueue producerRequestQueue; - - public ProducerDataChannel(int queueSize) { - producerQueueSize = queueSize; - producerRequestQueue = new ArrayBlockingQueue(producerQueueSize); - } - - public void sendRequest(T data) throws InterruptedException { - producerRequestQueue.put(data); - } - - public T receiveRequest() throws InterruptedException { - return producerRequestQueue.take(); - } - } - - private static class MigrationThread extends Thread { - private final Object stream; - private final ProducerDataChannel> producerDataChannel; - private final int threadId; - private final String threadName; - private final org.apache.log4j.Logger logger; - private CountDownLatch shutdownComplete = new CountDownLatch(1); - private final AtomicBoolean isRunning = new AtomicBoolean(true); - - MigrationThread(Object stream, ProducerDataChannel> producerDataChannel, int threadId) { - this.stream = stream; - this.producerDataChannel = producerDataChannel; - this.threadId = threadId; - threadName = "MigrationThread-" + threadId; - logger = org.apache.log4j.Logger.getLogger(MigrationThread.class.getName()); - this.setName(threadName); - } - - public void run() { - try { - Method messageGetPayloadMethod07 = kafkaMessageClass07.getMethod("payload"); - Method kafkaGetMessageMethod07 = kafkaMessageAndMetaDataClass07.getMethod("message"); - Method kafkaGetTopicMethod07 = kafkaMessageAndMetaDataClass07.getMethod("topic"); - Method consumerIteratorMethod = kafkaStream07.getMethod("iterator"); - Method kafkaStreamHasNextMethod07 = kafkaConsumerIteratorClass07.getMethod("hasNext"); - Method kafkaStreamNextMethod07 = kafkaConsumerIteratorClass07.getMethod("next"); - Object iterator = consumerIteratorMethod.invoke(stream); - - while (((Boolean) kafkaStreamHasNextMethod07.invoke(iterator)).booleanValue()) { - Object messageAndMetaData07 = kafkaStreamNextMethod07.invoke(iterator); - Object message07 = kafkaGetMessageMethod07.invoke(messageAndMetaData07); - Object topic = kafkaGetTopicMethod07.invoke(messageAndMetaData07); - Object payload07 = messageGetPayloadMethod07.invoke(message07); - int size = ((ByteBuffer) payload07).remaining(); - byte[] bytes = new byte[size]; - ((ByteBuffer) payload07).get(bytes); - if (logger.isDebugEnabled()) - logger.debug("Migration thread " + threadId + " sending message of size " + bytes.length + " to topic " + topic); - KeyedMessage producerData = new KeyedMessage((String) topic, null, bytes); - producerDataChannel.sendRequest(producerData); - } - logger.info("Migration thread " + threadName + " finished running"); - } catch (InvocationTargetException t) { - logger.fatal("Migration thread failure due to root cause ", t.getCause()); - } catch (Throwable t) { - logger.fatal("Migration thread failure due to ", t); - } finally { - shutdownComplete.countDown(); - } - } - - public void shutdown() { - logger.info("Migration thread " + threadName + " shutting down"); - isRunning.set(false); - interrupt(); - try { - shutdownComplete.await(); - } catch (InterruptedException ie) { - logger.warn("Interrupt during shutdown of MigrationThread", ie); - } - logger.info("Migration thread " + threadName + " shutdown complete"); - } - } - - static class ProducerThread extends Thread { - private final ProducerDataChannel> producerDataChannel; - private final Producer producer; - private final int threadId; - private String threadName; - private org.apache.log4j.Logger logger; - private CountDownLatch shutdownComplete = new CountDownLatch(1); - private KeyedMessage shutdownMessage = new KeyedMessage("shutdown", null, null); - - public ProducerThread(ProducerDataChannel> producerDataChannel, - Producer producer, - int threadId) { - this.producerDataChannel = producerDataChannel; - this.producer = producer; - this.threadId = threadId; - threadName = "ProducerThread-" + threadId; - logger = org.apache.log4j.Logger.getLogger(ProducerThread.class.getName()); - this.setName(threadName); - } - - public void run() { - try { - while (true) { - KeyedMessage data = producerDataChannel.receiveRequest(); - if (!data.equals(shutdownMessage)) { - producer.send(data); - if (logger.isDebugEnabled()) - logger.debug(String.format("Sending message %s", new String(data.message()))); - } else - break; - } - logger.info("Producer thread " + threadName + " finished running"); - } catch (Throwable t) { - logger.fatal("Producer thread failure due to ", t); - } finally { - shutdownComplete.countDown(); - } - } - - public void shutdown() { - try { - logger.info("Producer thread " + threadName + " shutting down"); - producerDataChannel.sendRequest(shutdownMessage); - } catch (InterruptedException ie) { - logger.warn("Interrupt during shutdown of ProducerThread", ie); - } - } - - public void awaitShutdown() { - try { - shutdownComplete.await(); - producer.close(); - logger.info("Producer thread " + threadName + " shutdown complete"); - } catch (InterruptedException ie) { - logger.warn("Interrupt during shutdown of ProducerThread", ie); - } - } - } - - /** - * A parent-last class loader that will try the child class loader first and then the parent. - * This takes a fair bit of doing because java really prefers parent-first. - */ - private static class ParentLastURLClassLoader extends ClassLoader { - private ChildURLClassLoader childClassLoader; - - /** - * This class allows me to call findClass on a class loader - */ - private static class FindClassClassLoader extends ClassLoader { - public FindClassClassLoader(ClassLoader parent) { - super(parent); - } - - @Override - public Class findClass(String name) throws ClassNotFoundException { - return super.findClass(name); - } - } - - /** - * This class delegates (child then parent) for the findClass method for a URLClassLoader. - * We need this because findClass is protected in URLClassLoader - */ - private static class ChildURLClassLoader extends URLClassLoader { - private FindClassClassLoader realParent; - - public ChildURLClassLoader(URL[] urls, FindClassClassLoader realParent) { - super(urls, null); - this.realParent = realParent; - } - - @Override - public Class findClass(String name) throws ClassNotFoundException { - try { - // first try to use the URLClassLoader findClass - return super.findClass(name); - } catch (ClassNotFoundException e) { - // if that fails, we ask our real parent class loader to load the class (we give up) - return realParent.loadClass(name); - } - } - } - - public ParentLastURLClassLoader(URL[] urls) { - super(Thread.currentThread().getContextClassLoader()); - childClassLoader = new ChildURLClassLoader(urls, new FindClassClassLoader(this.getParent())); - } - - @Override - protected synchronized Class loadClass(String name, boolean resolve) throws ClassNotFoundException { - try { - // first we try to find a class inside the child class loader - return childClassLoader.findClass(name); - } catch (ClassNotFoundException e) { - // didn't find it, try the parent - return super.loadClass(name, resolve); - } - } - } -} - diff --git a/docs/upgrade.html b/docs/upgrade.html index ba3d0248718a..f1e1e40e05e6 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -78,6 +78,7 @@

    potential breaking c
  • MessageFormatter's package was changed from kafka.tools to kafka.common
  • MessageReader's package was changed from kafka.tools to kafka.common
  • MirrorMakerMessageHandler no longer exposes the handle(record: MessageAndMetadata[Array[Byte], Array[Byte]]) method as it was never called.
  • +
  • The 0.7 KafkaMigrationTool is no longer packaged with Kafka. If you need to migrate from 0.7 to 0.10.0, please migrate to 0.8 first and then follow the documented upgrade process to upgrade from 0.8 to 0.10.0.

Upgrading from 0.8.0, 0.8.1.X or 0.8.2.X to 0.9.0.0

From 3f930cd84c1a889f0df2bc5057e073e683860171 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Fri, 25 Mar 2016 12:51:54 -0700 Subject: [PATCH 024/267] KAFKA-3463: change default receive buffer size for consumer to 64K Author: Jason Gustafson Reviewers: Gwen Shapira Closes #1140 from hachikuji/KAFKA-3463 (cherry picked from commit d691faf98cb573c4e92748d95d5c8afc492db806) Signed-off-by: Gwen Shapira --- .../org/apache/kafka/clients/consumer/ConsumerConfig.java | 2 +- docs/upgrade.html | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java index c97c8fb47024..69c4a3620925 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java @@ -243,7 +243,7 @@ public class ConsumerConfig extends AbstractConfig { CommonClientConfigs.SEND_BUFFER_DOC) .define(RECEIVE_BUFFER_CONFIG, Type.INT, - 32 * 1024, + 64 * 1024, atLeast(0), Importance.MEDIUM, CommonClientConfigs.RECEIVE_BUFFER_DOC) diff --git a/docs/upgrade.html b/docs/upgrade.html index f1e1e40e05e6..060c3deb33dd 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -64,7 +64,7 @@
is set to 0.10.0, one should not change it back to an earlier format as it may break consumers on versions before 0.10.0.0.

-
potential breaking changes in 0.10.0.0
+
Potential breaking changes in 0.10.0.0
+
Notable changes in 0.10.0.0
+ +
    +
  • The default value of the configuration parameter receive.buffer.bytes is now 64K for the new consumer
  • +
+

Upgrading from 0.8.0, 0.8.1.X or 0.8.2.X to 0.9.0.0

0.9.0.0 has potential breaking changes (please review before upgrading) and an inter-broker protocol change from previous versions. This means that upgraded brokers and clients may not be compatible with older versions. It is important that you upgrade your Kafka cluster before upgrading your clients. If you are using MirrorMaker downstream clusters should be upgraded first as well. From 1b1b949b7fbd10fe4b08e97769ab024fad8459fb Mon Sep 17 00:00:00 2001 From: Andrea Cosentino Date: Fri, 25 Mar 2016 15:00:45 -0700 Subject: [PATCH 025/267] KAFKA-3449: Rename filterOut() to filterNot() to achieve better terminology MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …nology Hi all, This is my first contribution and I hope it will be good. The PR is related to this issue: https://issues.apache.org/jira/browse/KAFKA-3449 Thanks a lot, Andrea Author: Andrea Cosentino Reviewers: Yasuhiro Matsuda, Guozhang Wang Closes #1134 from oscerd/KAFKA-3449 (cherry picked from commit c1d8c38345e0a1e04ced143ed07e63fe02ceb8b0) Signed-off-by: Guozhang Wang --- .../java/org/apache/kafka/streams/kstream/KStream.java | 2 +- .../java/org/apache/kafka/streams/kstream/KTable.java | 2 +- .../kafka/streams/kstream/internals/KStreamFilter.java | 8 ++++---- .../kafka/streams/kstream/internals/KStreamImpl.java | 2 +- .../kafka/streams/kstream/internals/KTableFilter.java | 8 ++++---- .../kafka/streams/kstream/internals/KTableImpl.java | 2 +- .../streams/kstream/internals/KStreamFilterTest.java | 4 ++-- .../kafka/streams/kstream/internals/KStreamImplTest.java | 2 +- .../kafka/streams/kstream/internals/KTableFilterTest.java | 4 ++-- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java index c4188de44c50..2313b8bf749c 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java @@ -43,7 +43,7 @@ public interface KStream { * * @param predicate the instance of {@link Predicate} */ - KStream filterOut(Predicate predicate); + KStream filterNot(Predicate predicate); /** * Create a new instance of {@link KStream} by transforming each element in this stream into a different element in the new stream. diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java index 9a2a8a8b7e8a..30ea88256379 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java @@ -42,7 +42,7 @@ public interface KTable { * * @param predicate the instance of {@link Predicate} */ - KTable filterOut(Predicate predicate); + KTable filterNot(Predicate predicate); /** * Create a new instance of {@link KTable} by transforming the value of each element in this stream into a new value in the new stream. diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFilter.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFilter.java index 0b1f1e05a4a4..f5c2fbc5c369 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFilter.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFilter.java @@ -25,11 +25,11 @@ class KStreamFilter implements ProcessorSupplier { private final Predicate predicate; - private final boolean filterOut; + private final boolean filterNot; - public KStreamFilter(Predicate predicate, boolean filterOut) { + public KStreamFilter(Predicate predicate, boolean filterNot) { this.predicate = predicate; - this.filterOut = filterOut; + this.filterNot = filterNot; } @Override @@ -40,7 +40,7 @@ public Processor get() { private class KStreamFilterProcessor extends AbstractProcessor { @Override public void process(K key, V value) { - if (filterOut ^ predicate.test(key, value)) { + if (filterNot ^ predicate.test(key, value)) { context().forward(key, value); } } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java index 567b06c61ddc..5889e078c330 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java @@ -106,7 +106,7 @@ public KStream filter(Predicate predicate) { } @Override - public KStream filterOut(final Predicate predicate) { + public KStream filterNot(final Predicate predicate) { String name = topology.newName(FILTER_NAME); topology.addProcessor(name, new KStreamFilter<>(predicate, true), this.name); diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableFilter.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableFilter.java index 72f1d88e5ce9..080fd9d52a11 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableFilter.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableFilter.java @@ -26,14 +26,14 @@ class KTableFilter implements KTableProcessorSupplier { private final KTableImpl parent; private final Predicate predicate; - private final boolean filterOut; + private final boolean filterNot; private boolean sendOldValues = false; - public KTableFilter(KTableImpl parent, Predicate predicate, boolean filterOut) { + public KTableFilter(KTableImpl parent, Predicate predicate, boolean filterNot) { this.parent = parent; this.predicate = predicate; - this.filterOut = filterOut; + this.filterNot = filterNot; } @Override @@ -64,7 +64,7 @@ public void enableSendingOldValues() { private V computeValue(K key, V value) { V newValue = null; - if (value != null && (filterOut ^ predicate.test(key, value))) + if (value != null && (filterNot ^ predicate.test(key, value))) newValue = value; return newValue; diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java index ca1e65911465..fd464a08d7e9 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java @@ -111,7 +111,7 @@ public KTable filter(Predicate predicate) { } @Override - public KTable filterOut(final Predicate predicate) { + public KTable filterNot(final Predicate predicate) { String name = topology.newName(FILTER_NAME); KTableProcessorSupplier processorSupplier = new KTableFilter<>(this, predicate, true); diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamFilterTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamFilterTest.java index ecf11153dd01..75465c85bb8c 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamFilterTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamFilterTest.java @@ -60,7 +60,7 @@ public void testFilter() { } @Test - public void testFilterOut() { + public void testFilterNot() { KStreamBuilder builder = new KStreamBuilder(); final int[] expectedKeys = new int[]{1, 2, 3, 4, 5, 6, 7}; @@ -69,7 +69,7 @@ public void testFilterOut() { processor = new MockProcessorSupplier<>(); stream = builder.stream(Serdes.Integer(), Serdes.String(), topicName); - stream.filterOut(isMultipleOfThree).process(processor); + stream.filterNot(isMultipleOfThree).process(processor); KStreamTestDriver driver = new KStreamTestDriver(builder); for (int i = 0; i < expectedKeys.length; i++) { diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java index 38182bc355fa..b5c3d47a80b1 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java @@ -52,7 +52,7 @@ public void testNumProcesses() { public boolean test(String key, String value) { return true; } - }).filterOut(new Predicate() { + }).filterNot(new Predicate() { @Override public boolean test(String key, String value) { return false; diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableFilterTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableFilterTest.java index 5491ea316e8c..78d274eb695b 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableFilterTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableFilterTest.java @@ -53,7 +53,7 @@ public boolean test(String key, Integer value) { return (value % 2) == 0; } }); - KTable table3 = table1.filterOut(new Predicate() { + KTable table3 = table1.filterNot(new Predicate() { @Override public boolean test(String key, Integer value) { return (value % 2) == 0; @@ -95,7 +95,7 @@ public boolean test(String key, Integer value) { return (value % 2) == 0; } }); - KTableImpl table3 = (KTableImpl) table1.filterOut( + KTableImpl table3 = (KTableImpl) table1.filterNot( new Predicate() { @Override public boolean test(String key, Integer value) { From 496bd3fd41c0272b7ef77652b85cbaae748e100e Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Fri, 25 Mar 2016 16:04:58 -0700 Subject: [PATCH 026/267] KAFKA-3454: add Kafka Streams web docs Author: Guozhang Wang Reviewers: Gwen Shapira Closes #1127 from guozhangwang/KStreamsDocs (cherry picked from commit 23b50093f4100ce7fbff325cdc92ee6cf3c54102) Signed-off-by: Gwen Shapira --- build.gradle | 123 ++++--- docs/configuration.html | 5 + docs/documentation.html | 15 + docs/quickstart.html | 109 ++++++ docs/streams.html | 341 ++++++++++++++++++ .../examples/pageview/PageViewTypedDemo.java | 3 +- .../pageview/PageViewUntypedDemo.java | 3 +- .../kafka/streams/examples/pipe/PipeDemo.java | 3 +- .../examples/wordcount/WordCountDemo.java | 3 +- .../wordcount/WordCountProcessorDemo.java | 3 +- .../apache/kafka/streams/StreamsConfig.java | 12 +- 11 files changed, 546 insertions(+), 74 deletions(-) create mode 100644 docs/streams.html diff --git a/build.gradle b/build.gradle index c29ad5a8eb83..13a8b4e5ac18 100644 --- a/build.gradle +++ b/build.gradle @@ -413,7 +413,7 @@ project(':core') { task siteDocsTar(dependsOn: ['genProtocolErrorDocs', 'genProtocolApiKeyDocs', 'genProtocolMessageDocs', 'genProducerConfigDocs', 'genConsumerConfigDocs', 'genKafkaConfigDocs', - ':connect:runtime:genConnectConfigDocs'], type: Tar) { + ':connect:runtime:genConnectConfigDocs', ':streams:genStreamsConfigDocs'], type: Tar) { classifier = 'site-docs' compression = Compression.GZIP from project.file("../docs") @@ -552,77 +552,84 @@ project(':clients') { } project(':tools') { - archivesBaseName = "kafka-tools" + archivesBaseName = "kafka-tools" - dependencies { - compile project(':clients') - compile project(':log4j-appender') - compile libs.argparse4j - compile libs.jacksonDatabind - compile libs.slf4jlog4j + dependencies { + compile project(':clients') + compile project(':log4j-appender') + compile libs.argparse4j + compile libs.jacksonDatabind + compile libs.slf4jlog4j - testCompile project(':clients') - testCompile libs.junit - } + testCompile project(':clients') + testCompile libs.junit + } - javadoc { - include "**/org/apache/kafka/tools/*" - } + javadoc { + include "**/org/apache/kafka/tools/*" + } - tasks.create(name: "copyDependantLibs", type: Copy) { - from (configurations.testRuntime) { - include('slf4j-log4j12*') - } - from (configurations.runtime) { - exclude('kafka-clients*') - } - into "$buildDir/dependant-libs-${versions.scala}" - duplicatesStrategy 'exclude' + tasks.create(name: "copyDependantLibs", type: Copy) { + from (configurations.testRuntime) { + include('slf4j-log4j12*') } - - jar { - dependsOn 'copyDependantLibs' + from (configurations.runtime) { + exclude('kafka-clients*') } + into "$buildDir/dependant-libs-${versions.scala}" + duplicatesStrategy 'exclude' + } + + jar { + dependsOn 'copyDependantLibs' + } } project(':streams') { - archivesBaseName = "kafka-streams" - - dependencies { - compile project(':clients') - compile project(':connect:json') // this dependency should be removed after we unify data API - compile libs.slf4jlog4j - compile libs.rocksDBJni - compile libs.zkclient // this dependency should be removed after KIP-4 - compile libs.jacksonDatabind // this dependency should be removed after KIP-4 - - testCompile project(':clients').sourceSets.test.output - testCompile libs.junit - } + archivesBaseName = "kafka-streams" - javadoc { - include "**/org/apache/kafka/streams/**" - exclude "**/internals/**" - } + dependencies { + compile project(':clients') + compile project(':connect:json') // this dependency should be removed after we unify data API + compile libs.slf4jlog4j + compile libs.rocksDBJni + compile libs.zkclient // this dependency should be removed after KIP-4 + compile libs.jacksonDatabind // this dependency should be removed after KIP-4 - tasks.create(name: "copyDependantLibs", type: Copy) { - from (configurations.testRuntime) { - include('slf4j-log4j12*') - } - from (configurations.runtime) { - exclude('kafka-clients*') - } - into "$buildDir/dependant-libs-${versions.scala}" - duplicatesStrategy 'exclude' - } + testCompile project(':clients').sourceSets.test.output + testCompile libs.junit + } - jar { - dependsOn 'copyDependantLibs' - } + javadoc { + include "**/org/apache/kafka/streams/**" + exclude "**/internals/**" + } - systemTestLibs { - dependsOn testJar + tasks.create(name: "copyDependantLibs", type: Copy) { + from (configurations.testRuntime) { + include('slf4j-log4j12*') } + from (configurations.runtime) { + exclude('kafka-clients*') + } + into "$buildDir/dependant-libs-${versions.scala}" + duplicatesStrategy 'exclude' + } + + jar { + dependsOn 'copyDependantLibs' + } + + systemTestLibs { + dependsOn testJar + } + + task genStreamsConfigDocs(type: JavaExec) { + classpath = sourceSets.main.runtimeClasspath + main = 'org.apache.kafka.streams.StreamsConfig' + if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() } + standardOutput = new File(generatedDocsDir, "streams_config.html").newOutputStream() + } } project(':streams:examples') { diff --git a/docs/configuration.html b/docs/configuration.html index a89778d3270f..e5280a5744c9 100644 --- a/docs/configuration.html +++ b/docs/configuration.html @@ -333,4 +333,9 @@

3.3.2 New Consumer Con

3.4 Kafka Connect Configs

+Below is the configuration of the Kafka Connect framework. + +

3.5 Kafka Streams Configs

+Below is the configuration of the Kafka Streams client library. + diff --git a/docs/documentation.html b/docs/documentation.html index 67a2954acbf1..70002ab8ec4e 100644 --- a/docs/documentation.html +++ b/docs/documentation.html @@ -52,6 +52,7 @@

Kafka 0.10.0 Documentation

  • 3.3.2 New Consumer Configs
  • 3.4 Kafka Connect Configs +
  • 3.5 Kafka Streams Configs
  • 4. Design @@ -136,6 +137,17 @@

    Kafka 0.10.0 Documentation

  • 8.3 Connector Development Guide
  • +
  • 9. Kafka Streams + +
  • 1. Getting Started

    @@ -171,4 +183,7 @@

    7. Security

    8. Kafka Connect

    +

    9. Kafka Streams

    + + diff --git a/docs/quickstart.html b/docs/quickstart.html index 1e7b62ce3c0a..7a923c69fc00 100644 --- a/docs/quickstart.html +++ b/docs/quickstart.html @@ -249,3 +249,112 @@

    Step 7: Use You should see the line appear in the console consumer output and in the sink file. + +

    Step 8: Use Kafka Streams to process data

    + +

    +Kafka Streams is a client library of Kafka for real-time stream processing and analyzing data stored in Kafka brokers. +This quickstart example will demonstrate how to run a streaming application coded in this library. Here is the gist +of the WordCountDemo example code (converted to use Java 8 lambda expressions for easy reading). +

    +
    +KStream wordCounts = textLines
    +// Split each text line, by whitespace, into words.
    +.flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
    +// Ensure the words are available as message keys for the next aggregate operation.
    +.map((key, value) -> new KeyValue<>(value, value))
    +// Count the occurrences of each word (message key).
    +.countByKey(stringSerializer, longSerializer, stringDeserializer, longDeserializer, "Counts")
    +// Convert the resulted aggregate table into another stream.
    +.toStream();
    +
    + +

    +It implements the WordCount +algorithm, which computes a word occurrence histogram from the input text. However, unlike other WordCount examples +you might have seen before that operate on bounded data, the WordCount demo application behaves slightly differently because it is +designed to operate on an infinite, unbounded stream of data. Similar to the bounded variant, it is a stateful algorithm that +tracks and updates the counts of words. However, since it must assume potentially +unbounded input data, it will periodically output its current state and results while continuing to process more data +because it cannot know when it has processed "all" the input data. +

    +

    +We will now prepare input data to a Kafka topic, which will subsequently processed by a Kafka Streams application. +

    + + + +
    +> echo -e "all streams lead to kafka\nhello kafka streams\njoin kafka summit" > file-input.txt
    +
    + +

    +Next, we send this input data to the input topic named streams-file-input using the console producer (in practice, +stream data will likely be flowing continuously into Kafka where the application will be up and running): +

    + +
    +> cat /tmp/file-input.txt | ./bin/kafka-console-producer --broker-list localhost:9092 --topic streams-file-input
    +
    + +

    +We can now run the WordCount demo application to process the input data: +

    + +
    +> ./bin/kafka-run-class org.apache.kafka.streams.examples.wordcount.WordCountDemo
    +
    + +

    +There won't be any STDOUT output except log entries as the results are continuously written back into another topic named streams-wordcount-output in Kafka. +The demo will run for a few seconds and then, unlike typical stream processing applications, terminate automatically. +

    +

    +We can now inspect the output of the WordCount demo application by reading from its output topic: +

    + +
    +> ./bin/kafka-console-consumer --zookeeper localhost:2181 \
    +            --topic streams-wordcount-output \
    +            --from-beginning \
    +            --formatter kafka.tools.DefaultMessageFormatter \
    +            --property print.key=true \
    +            --property print.key=true \
    +            --property key.deserializer=org.apache.kafka.common.serialization.StringDeserializer \
    +            --property value.deserializer=org.apache.kafka.common.serialization.LongDeserializer
    +
    + +

    +with the following output data being printed to the console (You can stop the console consumer via Ctrl-C): +

    + +
    +all     1
    +streams 1
    +lead    1
    +to      1
    +kafka   1
    +hello   1
    +kafka   2
    +streams 2
    +join    1
    +kafka   3
    +summit  1
    +^C
    +
    + +

    +Here, the first column is the Kafka message key, and the second column is the message value, both in in java.lang.String format. +Note that the output is actually a continuous stream of updates, where each data record (i.e. each line in the original output above) is +an updated count of a single word, aka record key such as "kafka". For multiple records with the same key, each later record is an update of the previous one. +

    \ No newline at end of file diff --git a/docs/streams.html b/docs/streams.html new file mode 100644 index 000000000000..9b94bb32c06d --- /dev/null +++ b/docs/streams.html @@ -0,0 +1,341 @@ + + +

    9.1 Overview

    + +

    +Kafka Streams is a client library for processing and analyzing data stored in Kafka and either write the resulting data back to Kafka or send the final output to an external system. It builds upon important stream processing concepts such as properly distinguishing between event time and processing time, windowing support, and simple yet efficient management of application state. +Kafka Streams has a low barrier to entry: You can quickly write and run a small-scale proof-of-concept on a single machine; and you only need to run additional instances of your application on multiple machines to scale up to high-volume production workloads. Kafka Streams transparently handles the load balancing of multiple instances of the same application by leveraging Kafka's parallelism model. +

    +

    +Some highlights of Kafka Streams: +

    + +
      +
    • Designed as a simple and lightweight client library, which can be easily embedded in any Java application and integrated with any existing packaging, deployment and operational tools that users have for their streaming applications.
    • +
    • Has no external dependencies on systems other than Apache Kafka itself as the internal messaging layer; notably, it uses Kafka's partitioning model to horizontally scale processing while maintaining strong ordering guarantees.
    • +
    • Supports fault-tolerant local state, which enables very fast and efficient stateful operations like joins and windowed aggregations.
    • +
    • Employs one-record-at-a-time processing to achieve low processing latency, and supports event-time based windowing operations.
    • +
    • Offers necessary stream processing primitives, along with a high-level Streams DSL and a low-level Processor API.
    • + +
    + +

    9.2 Developer Guide

    + +

    +There is a quickstart example that provides how to run a stream processing program coded in the Kafka Streams library. +This section focuses on how to write, configure, and execute a Kafka Streams application. +

    + +

    Core Concepts

    + +

    +We first summarize the key concepts of Kafka Streams. +

    + +
    Stream Processing Topology
    + +
      +
    • A streamis the most important abstraction provided by Kafka Streams: it represents an unbounded, continuously updating data set. A stream is an ordered, replayable, and fault-tolerant sequence of immutable data records, where a data record is defined as a key-value pair.
    • +
    • A stream processing application written in Kafka Streams defines its computational logic through one or more processor topologies, where a processor topology is a graph of stream processors (nodes) that are connected by streams (edges).
    • +
    • A stream processor is a node in the processor topology; it represents a processing step to transform data in streams by receiving one input record at a time from its upstream processors in the topology, applying its operation to it, and may subsequently producing one or more output records to its downstream processors.
    • +
    + +

    +Kafka Streams offers two ways to define the stream processing topology: the Kafka Streams DSL provides +the most common data transformation operations such as map and filter; the lower-level Processor API allows +developers define and connect custom processors as well as to interact with state stores. +

    + +
    Time
    + +

    +A critical aspect in stream processing is the the notion of time, and how it is modeled and integrated. +For example, some operations such as windowing are defined based on time boundaries. +

    +

    +Common notions of time in streams are: +

    + +
      +
    • Event time - The point in time when an event or data record occurred, i.e. was originally created "at the source".
    • +
    • Processing time - The point in time when the event or data record happens to be processed by the stream processing application, i.e. when the record is being consumed. The processing time may be milliseconds, hours, or days etc. later than the original event time.
    • +
    + +

    +Kafka Streams assigns a timestamp to every data record +via the TimestampExtractor interface. +Concrete implementations of this interface may retrieve or compute timestamps based on the actual contents of data records such as an embedded timestamp field +to provide event-time semantics, or use any other approach such as returning the current wall-clock time at the time of processing, +thereby yielding processing-time semantics to stream processing applications. +Developers can thus enforce different notions of time depending on their business needs. For example, +per-record timestamps describe the progress of a stream with regards to time (although records may be out-of-order within the stream) and +are leveraged by time-dependent operations such as joins. +

    + +
    States
    + +

    +Some stream processing applications don't require state, which means the processing of a message is independent from +the processing of all other messages. +However, being able to maintain state opens up many possibilities for sophisticated stream processing applications: you +can join input streams, or group and aggregate data records. Many such stateful operators are provided by the Kafka Streams DSL. +

    +

    +Kafka Streams provides so-called state stores, which can be used by stream processing applications to store and query data. +This is an important capability when implementing stateful operations. +Every task in Kafka Streams embeds one or more state stores that can be accessed via APIs to store and query data required for processing. +These state stores can either be a persistent key-value store, an in-memory hashmap, or another convenient data structure. +Kafka Streams offers fault-tolerance and automatic recovery for local state stores. +

    +
    +

    +As we have mentioned above, the computational logic of a Kafka Streams application is defined as a processor topology. +Currently Kafka Streams provides two sets of APIs to define the processor topology, which will be described in the subsequent sections. +

    + +

    Low-Level Processor API

    + +
    Processor
    + +

    +Developers can define their customized processing logic by implementing the Processor interface, which +provides process and punctuate methods. The process method is performed on each +of the received record; and the punctuate method is performed periodically based on elapsed time. +In addition, the processor can maintain the current ProcessorContext instance variable initialized in the +init method, and use the context to schedule the punctuation period (context().schedule), to +forward the modified / new key-value pair to downstream processors (context().forward), to commit the current +processing progress (context().commit), etc. +

    + +
    +    public class MyProcessor extends Processor {
    +        private ProcessorContext context;
    +        private KeyValueStore kvStore;
    +
    +        @Override
    +        @SuppressWarnings("unchecked")
    +        public void init(ProcessorContext context) {
    +            this.context = context;
    +            this.context.schedule(1000);
    +            this.kvStore = (KeyValueStore) context.getStateStore("Counts");
    +        }
    +
    +        @Override
    +        public void process(String dummy, String line) {
    +            String[] words = line.toLowerCase().split(" ");
    +
    +            for (String word : words) {
    +                Integer oldValue = this.kvStore.get(word);
    +
    +                if (oldValue == null) {
    +                    this.kvStore.put(word, 1);
    +                } else {
    +                    this.kvStore.put(word, oldValue + 1);
    +                }
    +            }
    +        }
    +
    +        @Override
    +        public void punctuate(long timestamp) {
    +            KeyValueIterator iter = this.kvStore.all();
    +
    +            while (iter.hasNext()) {
    +                KeyValue entry = iter.next();
    +                context.forward(entry.key, entry.value.toString());
    +            }
    +
    +            iter.close();
    +            context.commit();
    +        }
    +
    +        @Override
    +        public void close() {
    +            this.kvStore.close();
    +        }
    +    };
    +
    + +

    +In the above implementation, the following actions are performed: + +

      +
    • In the init method, schedule the punctuation every 1 second and retrieve the local state store by its name "Counts".
    • +
    • In the process method, upon each received record, split the value string into words, and update their counts into the state store (we will talk about this feature later in the section).
    • +
    • In the punctuate method, iterate the local state store and send the aggregated counts to the downstream processor, and commit the current stream state.
    • +
    +

    + +
    Processor Topology
    + +

    +With the customized processors defined in the Processor API, developers can use the TopologyBuilder to build a processor topology +by connecting these processors together: + +

    +    TopologyBuilder builder = new TopologyBuilder();
    +
    +    builder.addSource("SOURCE", "src-topic")
    +
    +        .addProcessor("PROCESS1", MyProcessor1::new /* the ProcessorSupplier that can generate MyProcessor1 */, "SOURCE")
    +        .addProcessor("PROCESS2", MyProcessor2::new /* the ProcessorSupplier that can generate MyProcessor2 */, "PROCESS1")
    +        .addProcessor("PROCESS3", MyProcessor3::new /* the ProcessorSupplier that can generate MyProcessor3 */, "PROCESS1")
    +
    +        .addSink("SINK1", "sink-topic1", "PROCESS1")
    +        .addSink("SINK2", "sink-topic2", "PROCESS2")
    +        .addSink("SINK3", "sink-topic3", "PROCESS3");
    +
    + +There are several steps in the above code to build the topology, and here is a quick walk through: + +
      +
    • First of all a source node named "SOURCE" is added to the topology using the addSource method, with one Kafka topic "src-topic" fed to it.
    • +
    • Three processor nodes are then added using the addProcessor method; here the first processor is a child of the "SOURCE" node, but is the parent of the other two processors.
    • +
    • Finally three sink nodes are added to complete the topology using the addSink method, each piping from a different parent processor node and writing to a separate topic.
    • +
    +

    + +
    Local State Store
    + +

    +Note that the Processor API is not limited to only accessing the current records as they arrive, but can also maintain local state stores +that keep recently arrived records to use in stateful processing operations such as aggregation or windowed joins. +To take advantage of this local states, developers can use the TopologyBuilder.addStateStore method when building the +processor topology to create the local state and associate it with the processor nodes that needs to access it; or they can connect a created +local state store with the existing processor nodes through TopologyBuilder.connectProcessorAndStateStores. + +

    +    TopologyBuilder builder = new TopologyBuilder();
    +
    +    builder.addSource("SOURCE", "src-topic")
    +
    +        .addProcessor("PROCESS1", MyProcessor1::new, "SOURCE")
    +        // create the in-memory state store "COUNTS" associated with processor "PROCESS1"
    +        .addStateStore(Stores.create("COUNTS").withStringKeys().withStringValues().inMemory().build(), "PROCESS1")
    +        .addProcessor("PROCESS2", MyProcessor3::new /* the ProcessorSupplier that can generate MyProcessor3 */, "PROCESS1")
    +        .addProcessor("PROCESS3", MyProcessor3::new /* the ProcessorSupplier that can generate MyProcessor3 */, "PROCESS1")
    +
    +        // connect the state store "COUNTS" with processor "PROCESS2"
    +        .connectProcessorAndStateStores("PROCESS2", "COUNTS");
    +
    +        .addSink("SINK1", "sink-topic1", "PROCESS1")
    +        .addSink("SINK2", "sink-topic2", "PROCESS2")
    +        .addSink("SINK3", "sink-topic3", "PROCESS3");
    +
    + +

    + +In the next section we present another way to build the processor topology: the Kafka Streams DSL. + +

    High-Level Streams DSL

    + +To build a processor topology using the Streams DSL, developers can apply the KStreamBuilder class, which is extended from the TopologyBuilder. +A simple example is included with the source code for Kafka in the streams/examples package. The rest of this section will walk +through some code to demonstrate the key steps in creating a topology using the Streams DSL, but we recommend developers to read the full example source +codes for details. + +
    Create Source Streams from Kafka
    + +

    +Either a record stream (defined as KStream) or a changelog stream (defined as KTable) +can be created as a source stream from one or more Kafka topics (for KTable you can only create the source stream +from a single topic). +

    + +
    +    KStreamBuilder builder = new KStreamBuilder();
    +
    +    KStream source1 = builder.stream("topic1", "topic2");
    +    KTable source2 = builder.table("topic3");
    +
    + +
    Transform a stream
    + +

    +There is a list of transformation operations provided for KStream and KTable respectively. +Each of these operations may generate either one or more KStream and KTable objects and +can be translated into one or more connected processors into the underlying processor topology. +All these transformation methods can be chained together to compose a complex processor topology. +Since KStream and KTable are strongly typed, all these transformation operations are defined as +generics functions where users could specify the input and output data types. +

    + +

    +Among these transformations, filter, map, mapValues, etc, are stateless +transformation operations and can be applied to both KStream and KTable, +where users can usually pass a customized function to these functions as a parameter, such as Predicate for filter, +KeyValueMapper for map, etc: + +

    + +
    +    // written in Java 8+, using lambda expressions
    +    KStream mapped = source1.mapValue(record -> record.get("category"));
    +
    + +

    +Stateless transformations, by definition, do not depend on any state for processing, and hence implementation-wise +they do not require a state store associated with the stream processor; Stateful transformations, on the other hand, +require accessing an associated state for processing and producing outputs. +For example, in join and aggregate operations, a windowing state is usually used to store all the received records +within the defined window boundary so far. The operators can then access these accumulated records in the store and compute +based on them. +

    + +
    +    // written in Java 8+, using lambda expressions
    +    KTable, Long> counts = source1.aggregateByKey(
    +        () -> 0L,  // initial value
    +        (aggKey, value, aggregate) -> aggregate + 1L,   // aggregating value
    +        HoppingWindows.of("counts").with(5000L).every(1000L), // intervals in milliseconds
    +    );
    +
    +    KStream joined = source1.leftJoin(source2,
    +        (record1, record2) -> record1.get("user") + "-" + record2.get("region");
    +    );
    +
    + +
    Write streams back to Kafka
    + +

    +At the end of the processing, users can choose to (continuously) write the final resulted streams back to a Kafka topic through +KStream.to and KTable.to. +

    + +
    +    joined.to("topic4");
    +
    + +If your application needs to continue reading and processing the records after they have been materialized +to a topic via to above, one option is to construct a new stream that reads from the output topic; +Kafka Streams provides a convenience method called through: + +
    +    // equivalent to
    +    //
    +    // joined.to("topic4");
    +    // materialized = builder.stream("topic4");
    +    KStream materialized = joined.through("topic4");
    +
    + + +
    +

    +Besides defining the topology, developers will also need to configure their applications +in StreamsConfig before running it. A complete list of +Kafka Streams configs can be found here. +

    \ No newline at end of file diff --git a/streams/examples/src/main/java/org/apache/kafka/streams/examples/pageview/PageViewTypedDemo.java b/streams/examples/src/main/java/org/apache/kafka/streams/examples/pageview/PageViewTypedDemo.java index 0385bdeaceb1..4124b32c301d 100644 --- a/streams/examples/src/main/java/org/apache/kafka/streams/examples/pageview/PageViewTypedDemo.java +++ b/streams/examples/src/main/java/org/apache/kafka/streams/examples/pageview/PageViewTypedDemo.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.examples.pageview; +import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.common.serialization.Serdes; @@ -86,7 +87,7 @@ public static void main(String[] args) throws Exception { props.put(StreamsConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class); // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data - props.put(StreamsConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); KStreamBuilder builder = new KStreamBuilder(); diff --git a/streams/examples/src/main/java/org/apache/kafka/streams/examples/pageview/PageViewUntypedDemo.java b/streams/examples/src/main/java/org/apache/kafka/streams/examples/pageview/PageViewUntypedDemo.java index 6f5cdf29701c..e61842ffe327 100644 --- a/streams/examples/src/main/java/org/apache/kafka/streams/examples/pageview/PageViewUntypedDemo.java +++ b/streams/examples/src/main/java/org/apache/kafka/streams/examples/pageview/PageViewUntypedDemo.java @@ -20,6 +20,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; +import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.common.serialization.Serdes; @@ -62,7 +63,7 @@ public static void main(String[] args) throws Exception { props.put(StreamsConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class); // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data - props.put(StreamsConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); KStreamBuilder builder = new KStreamBuilder(); diff --git a/streams/examples/src/main/java/org/apache/kafka/streams/examples/pipe/PipeDemo.java b/streams/examples/src/main/java/org/apache/kafka/streams/examples/pipe/PipeDemo.java index 619f33ddd931..3c1bd8c049a6 100644 --- a/streams/examples/src/main/java/org/apache/kafka/streams/examples/pipe/PipeDemo.java +++ b/streams/examples/src/main/java/org/apache/kafka/streams/examples/pipe/PipeDemo.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.examples.pipe; +import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.common.serialization.Serdes; import org.apache.kafka.streams.kstream.KStreamBuilder; import org.apache.kafka.streams.KafkaStreams; @@ -44,7 +45,7 @@ public static void main(String[] args) throws Exception { props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass()); // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data - props.put(StreamsConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); KStreamBuilder builder = new KStreamBuilder(); diff --git a/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountDemo.java b/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountDemo.java index e892abb0798c..c12977f8d43f 100644 --- a/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountDemo.java +++ b/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountDemo.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.examples.wordcount; +import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.common.serialization.Serdes; import org.apache.kafka.streams.KafkaStreams; import org.apache.kafka.streams.KeyValue; @@ -52,7 +53,7 @@ public static void main(String[] args) throws Exception { props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data - props.put(StreamsConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); KStreamBuilder builder = new KStreamBuilder(); diff --git a/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountProcessorDemo.java b/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountProcessorDemo.java index 8457415f1118..a5cddfd005e7 100644 --- a/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountProcessorDemo.java +++ b/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountProcessorDemo.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.examples.wordcount; +import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.common.serialization.Serdes; import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.StreamsConfig; @@ -111,7 +112,7 @@ public static void main(String[] args) throws Exception { props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass()); // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data - props.put(StreamsConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); TopologyBuilder builder = new TopologyBuilder(); diff --git a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java index d4efbee89105..3e0f9550a546 100644 --- a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java +++ b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java @@ -34,7 +34,6 @@ import java.util.Map; import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; -import static org.apache.kafka.common.config.ConfigDef.ValidString.in; /** * Configuration for Kafka Streams. Documentation for these configurations can be found in the client.id */ public static final String CLIENT_ID_CONFIG = CommonClientConfigs.CLIENT_ID_CONFIG; - /** auto.offset.reset */ - public static final String AUTO_OFFSET_RESET_CONFIG = ConsumerConfig.AUTO_OFFSET_RESET_CONFIG; - static { CONFIG = new ConfigDef().define(APPLICATION_ID_CONFIG, // required with no default value Type.STRING, @@ -197,12 +193,6 @@ public class StreamsConfig extends AbstractConfig { 60000, Importance.LOW, STATE_CLEANUP_DELAY_MS_DOC) - .define(AUTO_OFFSET_RESET_CONFIG, - Type.STRING, - "latest", - in("latest", "earliest", "none"), - Importance.MEDIUM, - ConsumerConfig.AUTO_OFFSET_RESET_DOC) .define(METRIC_REPORTER_CLASSES_CONFIG, Type.LIST, "", @@ -277,7 +267,7 @@ public Map getProducerConfigs(String clientId) { Map props = this.originals(); // remove consumer properties that are not required for producers - props.remove(StreamsConfig.AUTO_OFFSET_RESET_CONFIG); + props.remove(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG); // remove streams properties removeStreamsSpecificConfigs(props); From 206757eeb18589952291ce1a5578c66de3669f4c Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Fri, 25 Mar 2016 16:46:53 -0700 Subject: [PATCH 027/267] KAFKA-3316: Add REST API for listing connector plugins Author: Liquan Pei Reviewers: Ewen Cheslack-Postava Closes #1090 from Ishiihara/kafka-3316 (cherry picked from commit 78fa20eb58a948abd9ad4e44acfed606400a47f3) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/connect/runtime/AbstractHerder.java | 32 ++++++++++- .../rest/entities/ConnectorPluginInfo.java | 54 +++++++++++++++++++ .../resources/ConnectorPluginsResource.java | 10 ++++ .../connect/runtime/AbstractHerderTest.java | 1 - .../ConnectorPluginsResourceTest.java | 22 +++++++- 5 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/entities/ConnectorPluginInfo.java diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java index 8d83644d6fbc..a97c4db8faff 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java @@ -26,20 +26,29 @@ import org.apache.kafka.connect.runtime.rest.entities.ConfigInfos; import org.apache.kafka.connect.runtime.rest.entities.ConfigKeyInfo; import org.apache.kafka.connect.runtime.rest.entities.ConfigValueInfo; +import org.apache.kafka.connect.runtime.rest.entities.ConnectorPluginInfo; import org.apache.kafka.connect.runtime.rest.entities.ConnectorStateInfo; import org.apache.kafka.connect.storage.StatusBackingStore; +import org.apache.kafka.connect.tools.VerifiableSinkConnector; +import org.apache.kafka.connect.tools.VerifiableSourceConnector; import org.apache.kafka.connect.util.ConnectorTaskId; +import org.reflections.Reflections; +import org.reflections.util.ClasspathHelper; +import org.reflections.util.ConfigurationBuilder; import java.io.ByteArrayOutputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; +import java.lang.reflect.Modifier; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; /** @@ -69,7 +78,9 @@ public abstract class AbstractHerder implements Herder, TaskStatus.Listener, Con protected final StatusBackingStore statusBackingStore; private final String workerId; - protected Map tempConnectors = new ConcurrentHashMap<>(); + private Map tempConnectors = new ConcurrentHashMap<>(); + private static final List> SKIPPED_CONNECTORS = Arrays.>asList(VerifiableSourceConnector.class, VerifiableSinkConnector.class); + private static List validConnectorPlugins; public AbstractHerder(Worker worker, StatusBackingStore statusBackingStore, String workerId) { this.worker = worker; @@ -189,6 +200,25 @@ public ConfigInfos validateConfigs(String connType, Map connecto return generateResult(connType, resultConfigKeys, configValues, allGroups); } + public static List connectorPlugins() { + if (validConnectorPlugins != null) { + return validConnectorPlugins; + } + + Reflections reflections = new Reflections(new ConfigurationBuilder().setUrls(ClasspathHelper.forJavaClassPath())); + Set> connectorClasses = reflections.getSubTypesOf(Connector.class); + connectorClasses.removeAll(SKIPPED_CONNECTORS); + List connectorPlugins = new LinkedList<>(); + for (Class connectorClass: connectorClasses) { + int mod = connectorClass.getModifiers(); + if (!Modifier.isAbstract(mod) && !Modifier.isInterface(mod)) { + connectorPlugins.add(new ConnectorPluginInfo(connectorClass.getCanonicalName())); + } + } + validConnectorPlugins = connectorPlugins; + return connectorPlugins; + } + // public for testing public static ConfigInfos generateResult(String connType, Map configKeys, List configValues, List groups) { int errorCount = 0; diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/entities/ConnectorPluginInfo.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/entities/ConnectorPluginInfo.java new file mode 100644 index 000000000000..097142e945f0 --- /dev/null +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/entities/ConnectorPluginInfo.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

    Unless required by + * applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See + * the License for the specific language governing permissions and limitations under the License. + **/ + +package org.apache.kafka.connect.runtime.rest.entities; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; + +public class ConnectorPluginInfo { + + private String clazz; + + @JsonCreator + public ConnectorPluginInfo(@JsonProperty("class") String clazz) { + this.clazz = clazz; + } + + @JsonProperty("class") + public String clazz() { + return clazz; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ConnectorPluginInfo that = (ConnectorPluginInfo) o; + return Objects.equals(clazz, that.clazz); + } + + @Override + public int hashCode() { + return Objects.hash(clazz); + } + + @Override + public String toString() { + return clazz; + } +} diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResource.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResource.java index 84397078033a..9e87d0c4aa2e 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResource.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResource.java @@ -17,12 +17,16 @@ package org.apache.kafka.connect.runtime.rest.resources; +import org.apache.kafka.connect.runtime.AbstractHerder; import org.apache.kafka.connect.runtime.Herder; import org.apache.kafka.connect.runtime.rest.entities.ConfigInfos; +import org.apache.kafka.connect.runtime.rest.entities.ConnectorPluginInfo; +import java.util.List; import java.util.Map; import javax.ws.rs.Consumes; +import javax.ws.rs.GET; import javax.ws.rs.PUT; import javax.ws.rs.Path; import javax.ws.rs.PathParam; @@ -46,4 +50,10 @@ public ConfigInfos validateConfigs(final @PathParam("connectorType") String conn final Map connectorConfig) throws Throwable { return herder.validateConfigs(connType, connectorConfig); } + + @GET + @Path("/") + public List listConnectorPlugins() { + return AbstractHerder.connectorPlugins(); + } } diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractHerderTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractHerderTest.java index 1dc57846213f..e4084a809ae5 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractHerderTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractHerderTest.java @@ -114,5 +114,4 @@ public TaskStatus answer() throws Throwable { verifyAll(); } - } diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java index 625c91fa0edd..1049e7e0dd78 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java @@ -21,8 +21,8 @@ import org.apache.kafka.common.config.Config; import org.apache.kafka.common.config.ConfigDef; -import org.apache.kafka.common.config.ConfigDef.Type; import org.apache.kafka.common.config.ConfigDef.Importance; +import org.apache.kafka.common.config.ConfigDef.Type; import org.apache.kafka.connect.connector.Connector; import org.apache.kafka.connect.connector.Task; import org.apache.kafka.connect.runtime.AbstractHerder; @@ -32,6 +32,11 @@ import org.apache.kafka.connect.runtime.rest.entities.ConfigInfos; import org.apache.kafka.connect.runtime.rest.entities.ConfigKeyInfo; import org.apache.kafka.connect.runtime.rest.entities.ConfigValueInfo; +import org.apache.kafka.connect.runtime.rest.entities.ConnectorPluginInfo; +import org.apache.kafka.connect.sink.SinkConnector; +import org.apache.kafka.connect.source.SourceConnector; +import org.apache.kafka.connect.tools.VerifiableSinkConnector; +import org.apache.kafka.connect.tools.VerifiableSourceConnector; import org.easymock.EasyMock; import org.easymock.IAnswer; import org.junit.Before; @@ -49,8 +54,11 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; @RunWith(PowerMockRunner.class) @PrepareForTest(RestServer.class) @@ -64,6 +72,7 @@ public class ConnectorPluginsResourceTest { } private static final ConfigInfos CONFIG_INFOS; + static { List configs = new LinkedList<>(); @@ -120,6 +129,17 @@ public ConfigInfos answer() { PowerMock.verifyAll(); } + @Test + public void testListConnectorPlugins() { + Set connectorPlugins = new HashSet<>(connectorPluginsResource.listConnectorPlugins()); + assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(Connector.class.getCanonicalName()))); + assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(SourceConnector.class.getCanonicalName()))); + assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(SinkConnector.class.getCanonicalName()))); + assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(VerifiableSourceConnector.class.getCanonicalName()))); + assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(VerifiableSinkConnector.class.getCanonicalName()))); + assertTrue(connectorPlugins.contains(new ConnectorPluginInfo(ConnectorPluginsResourceTestConnector.class.getCanonicalName()))); + } + /* Name here needs to be unique as we are testing the aliasing mechanism */ public static class ConnectorPluginsResourceTestConnector extends Connector { From b45fe77791560a2171e0bbd7df09f648a01b016f Mon Sep 17 00:00:00 2001 From: Jiangjie Qin Date: Sat, 26 Mar 2016 09:22:59 -0700 Subject: [PATCH 028/267] KAFKA-3388; Fix expiration of batches sitting in the accumulator Author: Jiangjie Qin Reviewers: Ismael Juma , Jun Rao Closes #1056 from becketqin/KAFKA-3388 (cherry picked from commit 1fbe445dde71df0023a978c5e54dd229d3d23e1b) Signed-off-by: Jun Rao --- .../producer/internals/RecordAccumulator.java | 35 ++++++---- .../producer/internals/RecordBatch.java | 19 ++++-- .../internals/RecordAccumulatorTest.java | 67 ++++++++++++++++--- 3 files changed, 94 insertions(+), 27 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java index beaa832d3889..915c4d3e9d93 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java @@ -13,6 +13,7 @@ package org.apache.kafka.clients.producer.internals; import java.util.Iterator; + import org.apache.kafka.clients.producer.Callback; import org.apache.kafka.common.Cluster; import org.apache.kafka.common.MetricName; @@ -217,19 +218,27 @@ public List abortExpiredBatches(int requestTimeout, Cluster cluster int count = 0; for (Map.Entry> entry : this.batches.entrySet()) { Deque dq = entry.getValue(); - synchronized (dq) { - // iterate over the batches and expire them if they have stayed in accumulator for more than requestTimeOut - Iterator batchIterator = dq.iterator(); - while (batchIterator.hasNext()) { - RecordBatch batch = batchIterator.next(); - // check if the batch is expired - if (batch.maybeExpire(requestTimeout, now, this.lingerMs)) { - expiredBatches.add(batch); - count++; - batchIterator.remove(); - deallocate(batch); - } else { - if (!batch.inRetry()) { + // We only check if the batch should be expired if the partition does not have a batch in flight. + // This is to avoid the later batches get expired when an earlier batch is still in progress. + // This protection only takes effect when user sets max.in.flight.request.per.connection=1. + // Otherwise the expiration order is not guranteed. + TopicPartition tp = entry.getKey(); + if (!muted.contains(tp)) { + synchronized (dq) { + // iterate over the batches and expire them if they have stayed in accumulator for more than requestTimeOut + RecordBatch lastBatch = dq.peekLast(); + Iterator batchIterator = dq.iterator(); + while (batchIterator.hasNext()) { + RecordBatch batch = batchIterator.next(); + boolean isFull = batch != lastBatch || batch.records.isFull(); + // check if the batch is expired + if (batch.maybeExpire(requestTimeout, retryBackoffMs, now, this.lingerMs, isFull)) { + expiredBatches.add(batch); + count++; + batchIterator.remove(); + deallocate(batch); + } else { + // Stop at the first batch that has not expired. break; } } diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordBatch.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordBatch.java index eb7bbb3cefba..e6cd68fd065e 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordBatch.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordBatch.java @@ -134,14 +134,23 @@ public String toString() { } /** - * Expire the batch that is ready but is sitting in accumulator for more than requestTimeout due to metadata being unavailable. - * We need to explicitly check if the record is full or linger time is met because the accumulator's partition may not be ready - * if the leader is unavailable. + * A batch whose metadata is not available should be expired if one of the following is true: + *

      + *
    1. the batch is not in retry AND request timeout has elapsed after it is ready (full or linger.ms has reached). + *
    2. the batch is in retry AND request timeout has elapsed after the backoff period ended. + *
    */ - public boolean maybeExpire(int requestTimeout, long now, long lingerMs) { + public boolean maybeExpire(int requestTimeoutMs, long retryBackoffMs, long now, long lingerMs, boolean isFull) { boolean expire = false; - if ((this.records.isFull() && requestTimeout < (now - this.lastAppendTime)) || requestTimeout < (now - (this.lastAttemptMs + lingerMs))) { + + if (!this.inRetry() && isFull && requestTimeoutMs < (now - this.lastAppendTime)) + expire = true; + else if (!this.inRetry() && requestTimeoutMs < (now - (this.createdMs + lingerMs))) expire = true; + else if (this.inRetry() && requestTimeoutMs < (now - (this.lastAttemptMs + retryBackoffMs))) + expire = true; + + if (expire) { this.records.close(); this.done(-1L, Record.NO_TIMESTAMP, new TimeoutException("Batch containing " + recordCount + " record(s) expired due to timeout while requesting metadata from brokers for " + topicPartition)); } diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java index 366027286181..904aa73bd33c 100644 --- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java @@ -297,22 +297,71 @@ public void onCompletion(RecordMetadata metadata, Exception exception) { @Test public void testExpiredBatches() throws InterruptedException { - long now = time.milliseconds(); - RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, CompressionType.NONE, 10, 100L, metrics, time); + long retryBackoffMs = 100L; + long lingerMs = 3000L; + int requestTimeout = 60; + + RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, CompressionType.NONE, lingerMs, retryBackoffMs, metrics, time); int appends = 1024 / msgSize; + + // Test batches not in retry for (int i = 0; i < appends; i++) { accum.append(tp1, 0L, key, value, null, maxBlockTimeMs); - assertEquals("No partitions should be ready.", 0, accum.ready(cluster, now).readyNodes.size()); + assertEquals("No partitions should be ready.", 0, accum.ready(cluster, time.milliseconds()).readyNodes.size()); } - time.sleep(2000); - accum.ready(cluster, now); + // Make the batches ready due to batch full accum.append(tp1, 0L, key, value, null, 0); Set readyNodes = accum.ready(cluster, time.milliseconds()).readyNodes; assertEquals("Our partition's leader should be ready", Collections.singleton(node1), readyNodes); - Cluster cluster = new Cluster(new ArrayList(), new ArrayList(), Collections.emptySet()); - now = time.milliseconds(); - List expiredBatches = accum.abortExpiredBatches(60, cluster, now); - assertEquals(1, expiredBatches.size()); + // Advance the clock to expire the batch. + time.sleep(requestTimeout + 1); + accum.mutePartition(tp1); + List expiredBatches = accum.abortExpiredBatches(requestTimeout, cluster, time.milliseconds()); + assertEquals("The batch should not be expired when the partition is muted", 0, expiredBatches.size()); + + accum.unmutePartition(tp1); + expiredBatches = accum.abortExpiredBatches(requestTimeout, cluster, time.milliseconds()); + assertEquals("The batch should be expired", 1, expiredBatches.size()); + assertEquals("No partitions should be ready.", 0, accum.ready(cluster, time.milliseconds()).readyNodes.size()); + + // Advance the clock to make the next batch ready due to linger.ms + time.sleep(lingerMs); + assertEquals("Our partition's leader should be ready", Collections.singleton(node1), readyNodes); + time.sleep(requestTimeout + 1); + + accum.mutePartition(tp1); + expiredBatches = accum.abortExpiredBatches(requestTimeout, cluster, time.milliseconds()); + assertEquals("The batch should not be expired when metadata is still available and partition is muted", 0, expiredBatches.size()); + + accum.unmutePartition(tp1); + expiredBatches = accum.abortExpiredBatches(requestTimeout, cluster, time.milliseconds()); + assertEquals("The batch should be expired when the partition is not muted", 1, expiredBatches.size()); + assertEquals("No partitions should be ready.", 0, accum.ready(cluster, time.milliseconds()).readyNodes.size()); + + // Test batches in retry. + // Create a retried batch + accum.append(tp1, 0L, key, value, null, 0); + time.sleep(lingerMs); + readyNodes = accum.ready(cluster, time.milliseconds()).readyNodes; + assertEquals("Our partition's leader should be ready", Collections.singleton(node1), readyNodes); + Map> drained = accum.drain(cluster, readyNodes, Integer.MAX_VALUE, time.milliseconds()); + assertEquals("There should be only one batch.", drained.get(node1.id()).size(), 1); + time.sleep(1000L); + accum.reenqueue(drained.get(node1.id()).get(0), time.milliseconds()); + + // test expiration. + time.sleep(requestTimeout + retryBackoffMs); + expiredBatches = accum.abortExpiredBatches(requestTimeout, cluster, time.milliseconds()); + assertEquals("The batch should not be expired.", 0, expiredBatches.size()); + time.sleep(1L); + + accum.mutePartition(tp1); + expiredBatches = accum.abortExpiredBatches(requestTimeout, cluster, time.milliseconds()); + assertEquals("The batch should not be expired when the partition is muted", 0, expiredBatches.size()); + + accum.unmutePartition(tp1); + expiredBatches = accum.abortExpiredBatches(requestTimeout, cluster, time.milliseconds()); + assertEquals("The batch should be expired when the partition is not muted.", 1, expiredBatches.size()); } @Test From f37b550502f6e3429cb3582db7e1c93614e84c58 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Mon, 28 Mar 2016 09:00:03 -0700 Subject: [PATCH 029/267] MINOR: Fix typo and tweak wording in `RecordAccumulator` comments This was recently introduced in: https://github.com/apache/kafka/commit/1fbe445dde71df0023a978c5e54dd229d3d23e1b Author: Ismael Juma Reviewers: Jun Rao Closes #1152 from ijuma/fix-typos-in-record-accumulator (cherry picked from commit 4c0660bf3da9879cb405a0f85cf1524511e091e8) Signed-off-by: Jun Rao --- .../clients/producer/internals/RecordAccumulator.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java index 915c4d3e9d93..7f5b16f244ec 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java @@ -218,14 +218,14 @@ public List abortExpiredBatches(int requestTimeout, Cluster cluster int count = 0; for (Map.Entry> entry : this.batches.entrySet()) { Deque dq = entry.getValue(); - // We only check if the batch should be expired if the partition does not have a batch in flight. - // This is to avoid the later batches get expired when an earlier batch is still in progress. - // This protection only takes effect when user sets max.in.flight.request.per.connection=1. - // Otherwise the expiration order is not guranteed. TopicPartition tp = entry.getKey(); + // We only check if the batch should be expired if the partition does not have a batch in flight. + // This is to prevent later batches from being expired while an earlier batch is still in progress. + // Note that `muted` is only ever populated if `max.in.flight.request.per.connection=1` so this protection + // is only active in this case. Otherwise the expiration order is not guaranteed. if (!muted.contains(tp)) { synchronized (dq) { - // iterate over the batches and expire them if they have stayed in accumulator for more than requestTimeOut + // iterate over the batches and expire them if they have been in the accumulator for more than requestTimeOut RecordBatch lastBatch = dq.peekLast(); Iterator batchIterator = dq.iterator(); while (batchIterator.hasNext()) { From afe82512ba5ec3c22ce0e99acfea9ce4d5b2bb2e Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Mon, 28 Mar 2016 14:35:31 -0700 Subject: [PATCH 030/267] MINOR: Remove a couple of redundant `CoreUtils.rm` methods Also: * Rename remaining `CoreUtils.rm` to `delete` for consistency * Use `try with resources` in `Utils` to simplify code * Silence compiler warning due to exception catch clause in `TestUtils` Author: Ismael Juma Reviewers: Guozhang Wang Closes #1153 from ijuma/remove-redundant-core-utils-rm (cherry picked from commit 43d5078e981bbb25fd81cdc8ba4c339cd2d3f3d2) Signed-off-by: Guozhang Wang --- .../org/apache/kafka/common/utils/Utils.java | 18 ++++--------- core/src/main/scala/kafka/log/Log.scala | 3 ++- .../metrics/KafkaCSVMetricsReporter.scala | 7 ++--- .../main/scala/kafka/utils/CoreUtils.scala | 27 +------------------ .../kafka/api/ProducerCompressionTest.scala | 2 +- .../scala/other/kafka/StressTestLog.scala | 5 ++-- .../other/kafka/TestLinearWriteSpeed.scala | 7 +++-- .../unit/kafka/admin/AddPartitionsTest.scala | 2 +- .../scala/unit/kafka/admin/AdminTest.scala | 12 +++++---- .../integration/KafkaServerTestHarness.scala | 2 +- .../kafka/integration/RollingBounceTest.scala | 2 +- .../UncleanLeaderElectionTest.scala | 2 +- .../kafka/log/BrokerCompressionTest.scala | 9 +++---- .../scala/unit/kafka/log/CleanerTest.scala | 2 +- .../kafka/log/LogCleanerIntegrationTest.scala | 5 ++-- .../scala/unit/kafka/log/LogManagerTest.scala | 5 ++-- .../test/scala/unit/kafka/log/LogTest.scala | 6 ++--- .../unit/kafka/producer/ProducerTest.scala | 4 +-- .../kafka/server/AdvertiseBrokerTest.scala | 4 +-- .../server/HighwatermarkPersistenceTest.scala | 7 +++-- .../kafka/server/LeaderElectionTest.scala | 2 +- .../unit/kafka/server/LogOffsetTest.scala | 3 ++- .../unit/kafka/server/LogRecoveryTest.scala | 3 ++- .../unit/kafka/server/OffsetCommitTest.scala | 9 +++---- .../server/ServerGenerateBrokerIdTest.scala | 18 ++++++------- .../kafka/server/ServerShutdownTest.scala | 6 ++--- .../unit/kafka/server/ServerStartupTest.scala | 6 ++--- .../scala/unit/kafka/utils/TestUtils.scala | 10 +++---- .../unit/kafka/zk/EmbeddedZookeeper.scala | 9 +++---- 29 files changed, 86 insertions(+), 111 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java index 4c4225bdcf89..01675487ff99 100755 --- a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java +++ b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java @@ -442,13 +442,8 @@ public static String join(Collection list, String seperator) { */ public static Properties loadProps(String filename) throws IOException, FileNotFoundException { Properties props = new Properties(); - InputStream propStream = null; - try { - propStream = new FileInputStream(filename); + try (InputStream propStream = new FileInputStream(filename)) { props.load(propStream); - } finally { - if (propStream != null) - propStream.close(); } return props; } @@ -540,16 +535,13 @@ public static byte[] readBytes(ByteBuffer buffer) { */ public static String readFileAsString(String path, Charset charset) throws IOException { if (charset == null) charset = Charset.defaultCharset(); - FileInputStream stream = new FileInputStream(new File(path)); - String result = new String(); - try { + + try (FileInputStream stream = new FileInputStream(new File(path))) { FileChannel fc = stream.getChannel(); MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); - result = charset.decode(bb).toString(); - } finally { - stream.close(); + return charset.decode(bb).toString(); } - return result; + } public static String readFileAsString(String path) throws IOException { diff --git a/core/src/main/scala/kafka/log/Log.scala b/core/src/main/scala/kafka/log/Log.scala index 8c956f7378f5..81c19fae9318 100644 --- a/core/src/main/scala/kafka/log/Log.scala +++ b/core/src/main/scala/kafka/log/Log.scala @@ -32,6 +32,7 @@ import org.apache.kafka.common.record.TimestampType import scala.collection.JavaConversions import com.yammer.metrics.core.Gauge +import org.apache.kafka.common.utils.Utils object LogAppendInfo { val UnknownLogAppendInfo = LogAppendInfo(-1, -1, Message.NoTimestamp, NoCompressionCodec, NoCompressionCodec, -1, -1, false) @@ -714,7 +715,7 @@ class Log(val dir: File, removeLogMetrics() logSegments.foreach(_.delete()) segments.clear() - CoreUtils.rm(dir) + Utils.delete(dir) } } diff --git a/core/src/main/scala/kafka/metrics/KafkaCSVMetricsReporter.scala b/core/src/main/scala/kafka/metrics/KafkaCSVMetricsReporter.scala index cc0da9f7b89c..686f692548d9 100755 --- a/core/src/main/scala/kafka/metrics/KafkaCSVMetricsReporter.scala +++ b/core/src/main/scala/kafka/metrics/KafkaCSVMetricsReporter.scala @@ -22,14 +22,15 @@ package kafka.metrics import com.yammer.metrics.Metrics import java.io.File + import com.yammer.metrics.reporting.CsvReporter import java.util.concurrent.TimeUnit -import kafka.utils.{CoreUtils, VerifiableProperties, Logging} +import kafka.utils.{Logging, VerifiableProperties} +import org.apache.kafka.common.utils.Utils private trait KafkaCSVMetricsReporterMBean extends KafkaMetricsReporterMBean - private class KafkaCSVMetricsReporter extends KafkaMetricsReporter with KafkaCSVMetricsReporterMBean with Logging { @@ -48,7 +49,7 @@ private class KafkaCSVMetricsReporter extends KafkaMetricsReporter if (!initialized) { val metricsConfig = new KafkaMetricsConfig(props) csvDir = new File(props.getString("kafka.csv.metrics.dir", "kafka_metrics")) - CoreUtils.rm(csvDir) + Utils.delete(csvDir) csvDir.mkdirs() underlying = new CsvReporter(Metrics.defaultRegistry(), csvDir) if (props.getBoolean("kafka.csv.metrics.reporter.enabled", default = false)) { diff --git a/core/src/main/scala/kafka/utils/CoreUtils.scala b/core/src/main/scala/kafka/utils/CoreUtils.scala index b01f5cc413ab..fe2bebf195cf 100755 --- a/core/src/main/scala/kafka/utils/CoreUtils.scala +++ b/core/src/main/scala/kafka/utils/CoreUtils.scala @@ -78,36 +78,11 @@ object CoreUtils extends Logging { } } - /** - * Recursively delete the given file/directory and any subfiles (if any exist) - * @param file The root file at which to begin deleting - */ - def rm(file: String): Unit = rm(new File(file)) - /** * Recursively delete the list of files/directories and any subfiles (if any exist) * @param files sequence of files to be deleted */ - def rm(files: Seq[String]): Unit = files.foreach(f => rm(new File(f))) - - /** - * Recursively delete the given file/directory and any subfiles (if any exist) - * @param file The root file at which to begin deleting - */ - def rm(file: File) { - if(file == null) { - return - } else if(file.isDirectory) { - val files = file.listFiles() - if(files != null) { - for(f <- files) - rm(f) - } - file.delete() - } else { - file.delete() - } - } + def delete(files: Seq[String]): Unit = files.foreach(f => Utils.delete(new File(f))) /** * Register the given mbean with the platform mbean server, diff --git a/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala b/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala index c4a2bd788956..fc1ceec731c0 100755 --- a/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala +++ b/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala @@ -55,7 +55,7 @@ class ProducerCompressionTest(compression: String) extends ZooKeeperTestHarness @After override def tearDown() { server.shutdown - CoreUtils.rm(server.config.logDirs) + CoreUtils.delete(server.config.logDirs) super.tearDown() } diff --git a/core/src/test/scala/other/kafka/StressTestLog.scala b/core/src/test/scala/other/kafka/StressTestLog.scala index dead0ebb743f..8adc7e2b8824 100755 --- a/core/src/test/scala/other/kafka/StressTestLog.scala +++ b/core/src/test/scala/other/kafka/StressTestLog.scala @@ -19,11 +19,12 @@ package kafka import java.util.Properties import java.util.concurrent.atomic._ -import kafka.common._ + import kafka.message._ import kafka.log._ import kafka.utils._ import org.apache.kafka.clients.consumer.OffsetOutOfRangeException +import org.apache.kafka.common.utils.Utils /** * A stress test that instantiates a log and then runs continual appends against it from one thread and continual reads against it @@ -55,7 +56,7 @@ object StressTestLog { running.set(false) writer.join() reader.join() - CoreUtils.rm(dir) + Utils.delete(dir) } }) diff --git a/core/src/test/scala/other/kafka/TestLinearWriteSpeed.scala b/core/src/test/scala/other/kafka/TestLinearWriteSpeed.scala index 236d8579d9bc..db281bfa543a 100755 --- a/core/src/test/scala/other/kafka/TestLinearWriteSpeed.scala +++ b/core/src/test/scala/other/kafka/TestLinearWriteSpeed.scala @@ -21,11 +21,14 @@ import java.io._ import java.nio._ import java.nio.channels._ import java.util.{Properties, Random} + import kafka.log._ import kafka.utils._ import kafka.message._ + import scala.math._ import joptsimple._ +import org.apache.kafka.common.utils.Utils /** * This test does linear writes using either a kafka log or a file and measures throughput and latency. @@ -196,7 +199,7 @@ object TestLinearWriteSpeed { } class LogWritable(val dir: File, config: LogConfig, scheduler: Scheduler, val messages: ByteBufferMessageSet) extends Writable { - CoreUtils.rm(dir) + Utils.delete(dir) val log = new Log(dir, config, 0L, scheduler, SystemTime) def write(): Int = { log.append(messages, true) @@ -204,7 +207,7 @@ object TestLinearWriteSpeed { } def close() { log.close() - CoreUtils.rm(log.dir) + Utils.delete(log.dir) } } diff --git a/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala b/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala index b9bbaceb1557..ab8d36314477 100755 --- a/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala +++ b/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala @@ -59,7 +59,7 @@ class AddPartitionsTest extends ZooKeeperTestHarness { @After override def tearDown() { servers.foreach(_.shutdown()) - servers.foreach(server => CoreUtils.rm(server.config.logDirs)) + servers.foreach(server => CoreUtils.delete(server.config.logDirs)) super.tearDown() } diff --git a/core/src/test/scala/unit/kafka/admin/AdminTest.scala b/core/src/test/scala/unit/kafka/admin/AdminTest.scala index 8910e096d849..21bb6ab4631b 100755 --- a/core/src/test/scala/unit/kafka/admin/AdminTest.scala +++ b/core/src/test/scala/unit/kafka/admin/AdminTest.scala @@ -22,13 +22,15 @@ import org.apache.kafka.common.protocol.ApiKeys import org.junit.Assert._ import org.junit.Test import java.util.Properties + import kafka.utils._ import kafka.log._ import kafka.zk.ZooKeeperTestHarness -import kafka.utils.{Logging, ZkUtils, TestUtils} -import kafka.common.{TopicExistsException, TopicAndPartition} -import kafka.server.{ConfigType, KafkaServer, KafkaConfig} +import kafka.utils.{Logging, TestUtils, ZkUtils} +import kafka.common.{TopicAndPartition, TopicExistsException} +import kafka.server.{ConfigType, KafkaConfig, KafkaServer} import java.io.File + import TestUtils._ import scala.collection.{Map, immutable} @@ -418,7 +420,7 @@ class AdminTest extends ZooKeeperTestHarness with Logging with RackAwareTest { assertEquals(newConfig, configInZk) } finally { server.shutdown() - server.config.logDirs.foreach(CoreUtils.rm(_)) + CoreUtils.delete(server.config.logDirs) } } @@ -449,7 +451,7 @@ class AdminTest extends ZooKeeperTestHarness with Logging with RackAwareTest { assertEquals(new Quota(2000, true), server.apis.quotaManagers(ApiKeys.FETCH.id).quota(clientId)) } finally { server.shutdown() - server.config.logDirs.foreach(CoreUtils.rm(_)) + CoreUtils.delete(server.config.logDirs) } } diff --git a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala index 870b9ad5423d..676772f33118 100755 --- a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala +++ b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala @@ -87,7 +87,7 @@ trait KafkaServerTestHarness extends ZooKeeperTestHarness { @After override def tearDown() { servers.foreach(_.shutdown()) - servers.foreach(_.config.logDirs.foreach(CoreUtils.rm(_))) + servers.foreach(server => CoreUtils.delete(server.config.logDirs)) super.tearDown } diff --git a/core/src/test/scala/unit/kafka/integration/RollingBounceTest.scala b/core/src/test/scala/unit/kafka/integration/RollingBounceTest.scala index b9315684481b..5221855603a3 100755 --- a/core/src/test/scala/unit/kafka/integration/RollingBounceTest.scala +++ b/core/src/test/scala/unit/kafka/integration/RollingBounceTest.scala @@ -43,7 +43,7 @@ class RollingBounceTest extends ZooKeeperTestHarness { @After override def tearDown() { servers.foreach(_.shutdown()) - servers.foreach(server => CoreUtils.rm(server.config.logDirs)) + servers.foreach(server => CoreUtils.delete(server.config.logDirs)) super.tearDown() } diff --git a/core/src/test/scala/unit/kafka/integration/UncleanLeaderElectionTest.scala b/core/src/test/scala/unit/kafka/integration/UncleanLeaderElectionTest.scala index b725d8b59ee2..a8ba283d9d19 100755 --- a/core/src/test/scala/unit/kafka/integration/UncleanLeaderElectionTest.scala +++ b/core/src/test/scala/unit/kafka/integration/UncleanLeaderElectionTest.scala @@ -82,7 +82,7 @@ class UncleanLeaderElectionTest extends ZooKeeperTestHarness { @After override def tearDown() { servers.foreach(server => shutdownServer(server)) - servers.foreach(server => CoreUtils.rm(server.config.logDirs)) + servers.foreach(server => CoreUtils.delete(server.config.logDirs)) // restore log levels kafkaApisLogger.setLevel(Level.ERROR) diff --git a/core/src/test/scala/unit/kafka/log/BrokerCompressionTest.scala b/core/src/test/scala/unit/kafka/log/BrokerCompressionTest.scala index d0cb4a1dc319..7487bc57e8fc 100755 --- a/core/src/test/scala/unit/kafka/log/BrokerCompressionTest.scala +++ b/core/src/test/scala/unit/kafka/log/BrokerCompressionTest.scala @@ -17,7 +17,6 @@ package kafka.log -import java.io.File import kafka.utils._ import kafka.message._ import org.scalatest.junit.JUnitSuite @@ -26,9 +25,9 @@ import org.junit.Assert._ import org.junit.runner.RunWith import org.junit.runners.Parameterized import org.junit.runners.Parameterized.Parameters -import java.util.{Properties, Collection, ArrayList} -import kafka.server.KafkaConfig import org.apache.kafka.common.record.CompressionType +import org.apache.kafka.common.utils.Utils +import java.util.{Collection, Properties} import scala.collection.JavaConversions._ @RunWith(value = classOf[Parameterized]) @@ -41,7 +40,7 @@ class BrokerCompressionTest(messageCompression: String, brokerCompression: Strin @After def tearDown() { - CoreUtils.rm(tmpDir) + Utils.delete(tmpDir) } /** @@ -78,4 +77,4 @@ object BrokerCompressionTest { messageCompression <- CompressionType.values ) yield Array(messageCompression.name, brokerCompression) } -} \ No newline at end of file +} diff --git a/core/src/test/scala/unit/kafka/log/CleanerTest.scala b/core/src/test/scala/unit/kafka/log/CleanerTest.scala index 377323318aae..b6849f0dcdc7 100755 --- a/core/src/test/scala/unit/kafka/log/CleanerTest.scala +++ b/core/src/test/scala/unit/kafka/log/CleanerTest.scala @@ -49,7 +49,7 @@ class CleanerTest extends JUnitSuite { @After def teardown() { - CoreUtils.rm(tmpdir) + Utils.delete(tmpdir) } /** diff --git a/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala b/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala index 6b91611f1e9c..cc9873c862a6 100755 --- a/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala +++ b/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala @@ -25,6 +25,7 @@ import kafka.message._ import kafka.server.OffsetCheckpoint import kafka.utils._ import org.apache.kafka.common.record.CompressionType +import org.apache.kafka.common.utils.Utils import org.junit.Assert._ import org.junit._ import org.junit.runner.RunWith @@ -119,7 +120,7 @@ class LogCleanerIntegrationTest(compressionCodec: String) { @After def teardown() { time.scheduler.shutdown() - CoreUtils.rm(logDir) + Utils.delete(logDir) } /* create a cleaner instance and logs with the given parameters */ @@ -165,4 +166,4 @@ object LogCleanerIntegrationTest { list.add(Array(codec.name)) list } -} \ No newline at end of file +} diff --git a/core/src/test/scala/unit/kafka/log/LogManagerTest.scala b/core/src/test/scala/unit/kafka/log/LogManagerTest.scala index 46bfbed476e4..f290d54702f9 100755 --- a/core/src/test/scala/unit/kafka/log/LogManagerTest.scala +++ b/core/src/test/scala/unit/kafka/log/LogManagerTest.scala @@ -24,6 +24,7 @@ import kafka.common._ import kafka.server.OffsetCheckpoint import kafka.utils._ import org.apache.kafka.common.errors.OffsetOutOfRangeException +import org.apache.kafka.common.utils.Utils import org.junit.Assert._ import org.junit.{After, Before, Test} @@ -54,8 +55,8 @@ class LogManagerTest { def tearDown() { if(logManager != null) logManager.shutdown() - CoreUtils.rm(logDir) - logManager.logDirs.foreach(CoreUtils.rm(_)) + Utils.delete(logDir) + logManager.logDirs.foreach(Utils.delete) } /** diff --git a/core/src/test/scala/unit/kafka/log/LogTest.scala b/core/src/test/scala/unit/kafka/log/LogTest.scala index c2eb8179fbe2..4d75d53c254a 100755 --- a/core/src/test/scala/unit/kafka/log/LogTest.scala +++ b/core/src/test/scala/unit/kafka/log/LogTest.scala @@ -19,7 +19,6 @@ package kafka.log import java.io._ import java.util.Properties -import java.util.concurrent.atomic._ import org.apache.kafka.common.errors.{CorruptRecordException, OffsetOutOfRangeException, RecordBatchTooLargeException, RecordTooLargeException} import kafka.api.ApiVersion @@ -30,6 +29,7 @@ import org.junit.{After, Before, Test} import kafka.message._ import kafka.utils._ import kafka.server.KafkaConfig +import org.apache.kafka.common.utils.Utils class LogTest extends JUnitSuite { @@ -47,7 +47,7 @@ class LogTest extends JUnitSuite { @After def tearDown() { - CoreUtils.rm(tmpDir) + Utils.delete(tmpDir) } def createEmptyLogs(dir: File, offsets: Int*) { @@ -810,7 +810,7 @@ class LogTest extends JUnitSuite { log = new Log(logDir, config, recoveryPoint, time.scheduler, time) assertEquals(numMessages, log.logEndOffset) assertEquals("Messages in the log after recovery should be the same.", messages, log.logSegments.flatMap(_.log.iterator.toList)) - CoreUtils.rm(logDir) + Utils.delete(logDir) } } diff --git a/core/src/test/scala/unit/kafka/producer/ProducerTest.scala b/core/src/test/scala/unit/kafka/producer/ProducerTest.scala index 4a1ad5abcfdc..cf25cdbf1c63 100755 --- a/core/src/test/scala/unit/kafka/producer/ProducerTest.scala +++ b/core/src/test/scala/unit/kafka/producer/ProducerTest.scala @@ -96,8 +96,8 @@ class ProducerTest extends ZooKeeperTestHarness with Logging{ server1.shutdown server2.shutdown - CoreUtils.rm(server1.config.logDirs) - CoreUtils.rm(server2.config.logDirs) + CoreUtils.delete(server1.config.logDirs) + CoreUtils.delete(server2.config.logDirs) super.tearDown() } diff --git a/core/src/test/scala/unit/kafka/server/AdvertiseBrokerTest.scala b/core/src/test/scala/unit/kafka/server/AdvertiseBrokerTest.scala index 75fa664725e7..dc17aa434f98 100755 --- a/core/src/test/scala/unit/kafka/server/AdvertiseBrokerTest.scala +++ b/core/src/test/scala/unit/kafka/server/AdvertiseBrokerTest.scala @@ -43,7 +43,7 @@ class AdvertiseBrokerTest extends ZooKeeperTestHarness { @After override def tearDown() { server.shutdown() - CoreUtils.rm(server.config.logDirs) + CoreUtils.delete(server.config.logDirs) super.tearDown() } @@ -55,4 +55,4 @@ class AdvertiseBrokerTest extends ZooKeeperTestHarness { assertEquals(advertisedPort, endpoint.port) } -} \ No newline at end of file +} diff --git a/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala b/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala index 2e6660195ada..26e28171a409 100755 --- a/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala +++ b/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala @@ -18,16 +18,15 @@ package kafka.server import kafka.log._ import java.io.File -import org.I0Itec.zkclient.ZkClient import org.apache.kafka.common.metrics.Metrics +import org.apache.kafka.common.utils.{Utils, MockTime => JMockTime} import org.easymock.EasyMock import org.junit._ import org.junit.Assert._ import kafka.common._ import kafka.cluster.Replica -import kafka.utils.{ZkUtils, SystemTime, KafkaScheduler, TestUtils, MockTime, CoreUtils} +import kafka.utils.{KafkaScheduler, MockTime, SystemTime, TestUtils, ZkUtils} import java.util.concurrent.atomic.AtomicBoolean -import org.apache.kafka.common.utils.{MockTime => JMockTime} class HighwatermarkPersistenceTest { @@ -42,7 +41,7 @@ class HighwatermarkPersistenceTest { @After def teardown() { for(manager <- logManagers; dir <- manager.logDirs) - CoreUtils.rm(dir) + Utils.delete(dir) } @Test diff --git a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala index e84780aaa2a1..72589800e111 100755 --- a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala +++ b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala @@ -58,7 +58,7 @@ class LeaderElectionTest extends ZooKeeperTestHarness { @After override def tearDown() { servers.foreach(_.shutdown()) - servers.foreach(server => CoreUtils.rm(server.config.logDirs)) + servers.foreach(server => CoreUtils.delete(server.config.logDirs)) super.tearDown() } diff --git a/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala b/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala index 8c86a7b5956a..d5c696ee9ff1 100755 --- a/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala +++ b/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala @@ -30,6 +30,7 @@ import kafka.utils._ import kafka.zk.ZooKeeperTestHarness import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.protocol.Errors +import org.apache.kafka.common.utils.Utils import org.junit.Assert._ import org.junit.{After, Before, Test} @@ -57,7 +58,7 @@ class LogOffsetTest extends ZooKeeperTestHarness { override def tearDown() { simpleConsumer.close server.shutdown - CoreUtils.rm(logDir) + Utils.delete(logDir) super.tearDown() } diff --git a/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala b/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala index e13bfd96342d..d37de761f138 100755 --- a/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala +++ b/core/src/test/scala/unit/kafka/server/LogRecoveryTest.scala @@ -26,6 +26,7 @@ import java.io.File import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.serialization.{IntegerSerializer, StringSerializer} +import org.apache.kafka.common.utils.Utils import org.junit.{After, Before, Test} import org.junit.Assert._ @@ -94,7 +95,7 @@ class LogRecoveryTest extends ZooKeeperTestHarness { producer.close() for (server <- servers) { server.shutdown() - CoreUtils.rm(server.config.logDirs(0)) + Utils.delete(new File(server.config.logDirs(0))) } super.tearDown() } diff --git a/core/src/test/scala/unit/kafka/server/OffsetCommitTest.scala b/core/src/test/scala/unit/kafka/server/OffsetCommitTest.scala index 1d5148b0e158..29eaf2da4893 100755 --- a/core/src/test/scala/unit/kafka/server/OffsetCommitTest.scala +++ b/core/src/test/scala/unit/kafka/server/OffsetCommitTest.scala @@ -19,13 +19,14 @@ package kafka.server import kafka.api.{GroupCoordinatorRequest, OffsetCommitRequest, OffsetFetchRequest} import kafka.consumer.SimpleConsumer -import kafka.common.{OffsetMetadata, OffsetMetadataAndError, OffsetAndMetadata, TopicAndPartition} +import kafka.common.{OffsetAndMetadata, OffsetMetadata, OffsetMetadataAndError, TopicAndPartition} import kafka.utils._ import kafka.utils.TestUtils._ import kafka.zk.ZooKeeperTestHarness import org.apache.kafka.common.protocol.Errors - +import org.apache.kafka.common.utils.Utils import org.junit.{After, Before, Test} +import org.junit.Assert._ import java.util.Properties import java.io.File @@ -33,8 +34,6 @@ import java.io.File import scala.util.Random import scala.collection._ -import org.junit.Assert._ - class OffsetCommitTest extends ZooKeeperTestHarness { val random: Random = new Random() val group = "test-group" @@ -71,7 +70,7 @@ class OffsetCommitTest extends ZooKeeperTestHarness { override def tearDown() { simpleConsumer.close server.shutdown - CoreUtils.rm(logDir) + Utils.delete(logDir) super.tearDown() } diff --git a/core/src/test/scala/unit/kafka/server/ServerGenerateBrokerIdTest.scala b/core/src/test/scala/unit/kafka/server/ServerGenerateBrokerIdTest.scala index c26ff133a3f2..8e25366aed07 100755 --- a/core/src/test/scala/unit/kafka/server/ServerGenerateBrokerIdTest.scala +++ b/core/src/test/scala/unit/kafka/server/ServerGenerateBrokerIdTest.scala @@ -51,7 +51,7 @@ class ServerGenerateBrokerIdTest extends ZooKeeperTestHarness { server1.startup() assertEquals(server1.config.brokerId, 1001) server1.shutdown() - CoreUtils.rm(server1.config.logDirs) + CoreUtils.delete(server1.config.logDirs) TestUtils.verifyNonDaemonThreadsStatus(this.getClass.getName) } @@ -75,9 +75,9 @@ class ServerGenerateBrokerIdTest extends ZooKeeperTestHarness { assertTrue(verifyBrokerMetadata(server1.config.logDirs,1001)) assertTrue(verifyBrokerMetadata(server2.config.logDirs,0)) assertTrue(verifyBrokerMetadata(server3.config.logDirs,1002)) - CoreUtils.rm(server1.config.logDirs) - CoreUtils.rm(server2.config.logDirs) - CoreUtils.rm(server3.config.logDirs) + CoreUtils.delete(server1.config.logDirs) + CoreUtils.delete(server2.config.logDirs) + CoreUtils.delete(server3.config.logDirs) TestUtils.verifyNonDaemonThreadsStatus(this.getClass.getName) } @@ -93,7 +93,7 @@ class ServerGenerateBrokerIdTest extends ZooKeeperTestHarness { assertEquals(server3.config.brokerId,3) server3.shutdown() assertTrue(verifyBrokerMetadata(server3.config.logDirs,3)) - CoreUtils.rm(server3.config.logDirs) + CoreUtils.delete(server3.config.logDirs) TestUtils.verifyNonDaemonThreadsStatus(this.getClass.getName) } @@ -116,7 +116,7 @@ class ServerGenerateBrokerIdTest extends ZooKeeperTestHarness { server1.startup() server1.shutdown() assertTrue(verifyBrokerMetadata(config1.logDirs, 1001)) - CoreUtils.rm(server1.config.logDirs) + CoreUtils.delete(server1.config.logDirs) TestUtils.verifyNonDaemonThreadsStatus(this.getClass.getName) } @@ -133,7 +133,7 @@ class ServerGenerateBrokerIdTest extends ZooKeeperTestHarness { case e: kafka.common.InconsistentBrokerIdException => //success } server1.shutdown() - CoreUtils.rm(server1.config.logDirs) + CoreUtils.delete(server1.config.logDirs) TestUtils.verifyNonDaemonThreadsStatus(this.getClass.getName) } @@ -170,8 +170,8 @@ class ServerGenerateBrokerIdTest extends ZooKeeperTestHarness { // verify correct broker metadata was written assertTrue(verifyBrokerMetadata(serverA.config.logDirs,1)) assertTrue(verifyBrokerMetadata(newServerB.config.logDirs,2)) - CoreUtils.rm(serverA.config.logDirs) - CoreUtils.rm(newServerB.config.logDirs) + CoreUtils.delete(serverA.config.logDirs) + CoreUtils.delete(newServerB.config.logDirs) TestUtils.verifyNonDaemonThreadsStatus(this.getClass.getName) } diff --git a/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala b/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala index 67f62d9cf756..bc71edd5df60 100755 --- a/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala +++ b/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala @@ -104,7 +104,7 @@ class ServerShutdownTest extends ZooKeeperTestHarness { consumer.close() producer.close() server.shutdown() - CoreUtils.rm(server.config.logDirs) + CoreUtils.delete(server.config.logDirs) verifyNonDaemonThreadsStatus } @@ -117,7 +117,7 @@ class ServerShutdownTest extends ZooKeeperTestHarness { server.startup() server.shutdown() server.awaitShutdown() - CoreUtils.rm(server.config.logDirs) + CoreUtils.delete(server.config.logDirs) verifyNonDaemonThreadsStatus } @@ -145,7 +145,7 @@ class ServerShutdownTest extends ZooKeeperTestHarness { server.shutdown() server.awaitShutdown() } - CoreUtils.rm(server.config.logDirs) + CoreUtils.delete(server.config.logDirs) verifyNonDaemonThreadsStatus } diff --git a/core/src/test/scala/unit/kafka/server/ServerStartupTest.scala b/core/src/test/scala/unit/kafka/server/ServerStartupTest.scala index b321a0266bf9..9b49365c7706 100755 --- a/core/src/test/scala/unit/kafka/server/ServerStartupTest.scala +++ b/core/src/test/scala/unit/kafka/server/ServerStartupTest.scala @@ -40,7 +40,7 @@ class ServerStartupTest extends ZooKeeperTestHarness { assertTrue(pathExists) server.shutdown() - CoreUtils.rm(server.config.logDirs) + CoreUtils.delete(server.config.logDirs) } @Test @@ -66,7 +66,7 @@ class ServerStartupTest extends ZooKeeperTestHarness { assertEquals(brokerRegistration, zkUtils.readData(ZkUtils.BrokerIdsPath + "/" + brokerId)._1) server1.shutdown() - CoreUtils.rm(server1.config.logDirs) + CoreUtils.delete(server1.config.logDirs) } @Test @@ -80,6 +80,6 @@ class ServerStartupTest extends ZooKeeperTestHarness { assertEquals(brokerId, server.metadataCache.getAliveBrokers.head.id) server.shutdown() - CoreUtils.rm(server.config.logDirs) + CoreUtils.delete(server.config.logDirs) } } diff --git a/core/src/test/scala/unit/kafka/utils/TestUtils.scala b/core/src/test/scala/unit/kafka/utils/TestUtils.scala index 0730468a496b..a1e79128dda9 100755 --- a/core/src/test/scala/unit/kafka/utils/TestUtils.scala +++ b/core/src/test/scala/unit/kafka/utils/TestUtils.scala @@ -21,9 +21,8 @@ import java.io._ import java.nio._ import java.nio.file.Files import java.nio.channels._ -import java.util -import java.util.concurrent.{Callable, TimeUnit, Executors} -import java.util.{Collections, Random, Properties} +import java.util.concurrent.{Callable, Executors, TimeUnit} +import java.util.{Collections, Properties, Random} import java.security.cert.X509Certificate import javax.net.ssl.X509TrustManager import charset.Charset @@ -52,6 +51,7 @@ import org.apache.kafka.clients.CommonClientConfigs import org.apache.kafka.common.network.Mode import org.apache.kafka.common.record.CompressionType import org.apache.kafka.common.serialization.{ByteArraySerializer, Serializer} +import org.apache.kafka.common.utils.Utils import scala.collection.Map import scala.collection.JavaConversions._ @@ -100,7 +100,7 @@ object TestUtils extends Logging { Runtime.getRuntime().addShutdownHook(new Thread() { override def run() = { - CoreUtils.rm(f) + Utils.delete(f) } }) f @@ -1115,7 +1115,7 @@ object TestUtils extends Logging { } } catch { case ie: InterruptedException => failWithTimeout() - case e => exceptions += e + case e: Throwable => exceptions += e } finally { threadPool.shutdownNow() } diff --git a/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala b/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala index 5fa2f6519d8f..1030c46d93bd 100755 --- a/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala +++ b/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala @@ -21,10 +21,9 @@ import org.apache.zookeeper.server.ZooKeeperServer import org.apache.zookeeper.server.NIOServerCnxnFactory import kafka.utils.TestUtils import java.net.InetSocketAddress -import javax.security.auth.login.Configuration + import kafka.utils.CoreUtils -import org.apache.kafka.common.security.JaasUtils -import org.apache.kafka.common.utils.Utils.getPort +import org.apache.kafka.common.utils.Utils class EmbeddedZookeeper() { val snapshotDir = TestUtils.tempDir() @@ -40,8 +39,8 @@ class EmbeddedZookeeper() { def shutdown() { CoreUtils.swallow(zookeeper.shutdown()) CoreUtils.swallow(factory.shutdown()) - CoreUtils.rm(logDir) - CoreUtils.rm(snapshotDir) + Utils.delete(logDir) + Utils.delete(snapshotDir) } } From 6878df3cc85ac6efc99ae9f723e7664aa919f5d9 Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsuda Date: Tue, 29 Mar 2016 13:30:56 -0700 Subject: [PATCH 031/267] HOTFIX: RocksDBStore must clear dirty flags after flush guozhangwang Without clearing the dirty flags, RocksDBStore will perform flush for every new record. This bug made the store performance painfully slower. Author: Yasuhiro Matsuda Reviewers: Guozhang Wang Closes #1163 from ymatsuda/clear_dirty_flag (cherry picked from commit 5089f547d5d64a0235e1b4adc327a0cb05eb4ca8) Signed-off-by: Guozhang Wang --- .../streams/state/internals/RocksDBStore.java | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java index b206f37962af..fe327f6af8a2 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java @@ -165,7 +165,7 @@ public void init(ProcessorContext context, StateStore root) { public void apply(K key, RocksDBCacheEntry entry) { // flush all the dirty entries to RocksDB if this evicted entry is dirty if (entry.isDirty) { - flush(); + flushCache(); } } }); @@ -226,7 +226,6 @@ public V get(K key) { RocksDBCacheEntry entry = cache.get(key); if (entry == null) { - byte[] rawKey = serdes.rawKey(key); V value = serdes.valueFrom(getInternal(serdes.rawKey(key))); cache.put(key, new RocksDBCacheEntry(value)); @@ -251,8 +250,8 @@ private byte[] getInternal(byte[] rawKey) { @Override public void put(K key, V value) { if (cache != null) { - cache.put(key, new RocksDBCacheEntry(value, true)); cacheDirtyKeys.add(key); + cache.put(key, new RocksDBCacheEntry(value, true)); } else { byte[] rawKey = serdes.rawKey(key); byte[] rawValue = serdes.rawValue(value); @@ -298,7 +297,7 @@ public void putAll(List> entries) { put(entry.key, entry.value); } - // this function is only called in flush() + // this function is only called in flushCache() private void putAllInternal(List> entries) { WriteBatch batch = new WriteBatch(); @@ -324,7 +323,7 @@ public V delete(K key) { public KeyValueIterator range(K from, K to) { // we need to flush the cache if necessary before returning the iterator if (cache != null) - flush(); + flushCache(); return new RocksDBRangeIterator(db.newIterator(), serdes, from, to); } @@ -333,15 +332,14 @@ public KeyValueIterator range(K from, K to) { public KeyValueIterator all() { // we need to flush the cache if necessary before returning the iterator if (cache != null) - flush(); + flushCache(); RocksIterator innerIter = db.newIterator(); innerIter.seekToFirst(); return new RocksDbIterator(innerIter, serdes); } - @Override - public void flush() { + private void flushCache() { // flush of the cache entries if necessary if (cache != null) { List> putBatch = new ArrayList<>(cache.keys.size()); @@ -350,7 +348,7 @@ public void flush() { for (K key : cacheDirtyKeys) { RocksDBCacheEntry entry = cache.get(key); - assert entry.isDirty; + entry.isDirty = false; byte[] rawKey = serdes.rawKey(key); @@ -386,12 +384,19 @@ public void flush() { cacheDirtyKeys.clear(); } - flushInternal(); - if (loggingEnabled) changeLogger.logChange(getter); } + @Override + public void flush() { + // flush of the cache entries if necessary + flushCache(); + + // flush RocksDB + flushInternal(); + } + public void flushInternal() { try { db.flush(fOptions); From b642284dfef0b77ccd6b0e34ccf40ab19877f008 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Tue, 29 Mar 2016 14:18:57 -0700 Subject: [PATCH 032/267] KAFKA-3425: add missing upgrade notes Author: Jason Gustafson Reviewers: Grant Henke, Ashish Singh, Ismael Juma, Guozhang Wang Closes #1159 from hachikuji/KAFKA-3425 (cherry picked from commit 27583953528138ff3503fb8f300c97971718e48a) Signed-off-by: Guozhang Wang --- docs/upgrade.html | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/upgrade.html b/docs/upgrade.html index 060c3deb33dd..b9c4bec012c1 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -79,12 +79,16 @@
    Potential breaking c
  • MessageReader's package was changed from kafka.tools to kafka.common
  • MirrorMakerMessageHandler no longer exposes the handle(record: MessageAndMetadata[Array[Byte], Array[Byte]]) method as it was never called.
  • The 0.7 KafkaMigrationTool is no longer packaged with Kafka. If you need to migrate from 0.7 to 0.10.0, please migrate to 0.8 first and then follow the documented upgrade process to upgrade from 0.8 to 0.10.0.
  • +
  • The new consumer has standardized its APIs to accept java.util.Collection as the sequence type for method parameters. Existing code may have to be updated to work with the 0.10.0 client library.
  • Notable changes in 0.10.0.0
      -
    • The default value of the configuration parameter receive.buffer.bytes is now 64K for the new consumer
    • +
    • The default value of the configuration parameter receive.buffer.bytes is now 64K for the new consumer.
    • +
    • The new consumer now exposes the configuration parameter exclude.internal.topics to restrict internal topics (such as the consumer offsets topic) from accidentally being included in regular expression subscriptions. By default, it is enabled.
    • +
    • The old Scala producer has been deprecated. Users should migrate their code to the Java producer included in the kafka-clients JAR as soon as possible.
    • +
    • The new consumer API has been marked stable.

    Upgrading from 0.8.0, 0.8.1.X or 0.8.2.X to 0.9.0.0

    From 4726adba534cf8d91c24a179d66e135804e216a2 Mon Sep 17 00:00:00 2001 From: Geoff Anderson Date: Wed, 30 Mar 2016 00:50:29 -0700 Subject: [PATCH 033/267] MINOR: Advance system test ducktape dependency from 0.3.10 to 0.4.0 Previous version of ducktape was found to have a memory leak which caused occasional failures in nightly runs. Author: Geoff Anderson Reviewers: Ewen Cheslack-Postava Closes #1165 from granders/minor-advance-ducktape-to-0.4.0 (cherry picked from commit aaafd23df800d41e4b16fd399015991b2e426dc5) Signed-off-by: Ewen Cheslack-Postava --- tests/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/setup.py b/tests/setup.py index 865c3160f4de..de3ea62a9d8e 100644 --- a/tests/setup.py +++ b/tests/setup.py @@ -30,5 +30,5 @@ license="apache2.0", packages=find_packages(), include_package_data=True, - install_requires=["ducktape==0.3.10", "requests>=2.5.0"] + install_requires=["ducktape==0.4.0", "requests>=2.5.0"] ) From 9ef243310d6d16a5d508286f8a1c5b5f70d87680 Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsuda Date: Wed, 30 Mar 2016 14:26:01 -0700 Subject: [PATCH 034/267] MINOR: a simple benchmark for Streams guozhangwang miguno Author: Yasuhiro Matsuda Reviewers: Guozhang Wang Closes #1164 from ymatsuda/perf (cherry picked from commit 2788f2dc73dd62c35c87842d02d23a0ed6a62e97) Signed-off-by: Guozhang Wang --- .../processor/internals/StreamTask.java | 4 +- .../kafka/streams/perf/SimpleBenchmark.java | 413 ++++++++++++++++++ 2 files changed, 415 insertions(+), 2 deletions(-) create mode 100644 streams/src/test/java/org/apache/kafka/streams/perf/SimpleBenchmark.java diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java index c4cc2baf0e02..afa303ca1514 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java @@ -167,11 +167,11 @@ public int process() { this.currNode = recordInfo.node(); TopicPartition partition = recordInfo.partition(); - log.debug("Start processing one record [" + currRecord + "]"); + log.debug("Start processing one record [{}]", currRecord); this.currNode.process(currRecord.key(), currRecord.value()); - log.debug("Completed processing one record [" + currRecord + "]"); + log.debug("Completed processing one record [{}]", currRecord); // update the consumed offset map after processing is done consumedOffsets.put(partition, currRecord.offset()); diff --git a/streams/src/test/java/org/apache/kafka/streams/perf/SimpleBenchmark.java b/streams/src/test/java/org/apache/kafka/streams/perf/SimpleBenchmark.java new file mode 100644 index 000000000000..c88309046d69 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/perf/SimpleBenchmark.java @@ -0,0 +1,413 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.perf; + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.PartitionInfo; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.apache.kafka.common.serialization.LongDeserializer; +import org.apache.kafka.common.serialization.LongSerializer; +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.kstream.KStream; +import org.apache.kafka.streams.kstream.KStreamBuilder; +import org.apache.kafka.streams.processor.Processor; +import org.apache.kafka.streams.processor.ProcessorContext; +import org.apache.kafka.streams.processor.ProcessorSupplier; +import org.apache.kafka.streams.state.KeyValueStore; +import org.apache.kafka.streams.state.Stores; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.Properties; + +public class SimpleBenchmark { + + private final String kafka; + private final String zookeeper; + private final File stateDir; + + private static final String SOURCE_TOPIC = "simpleBenchmarkSourceTopic"; + private static final String SINK_TOPIC = "simpleBenchmarkSinkTopic"; + + private static final long NUM_RECORDS = 10000000L; + private static final Long END_KEY = NUM_RECORDS - 1; + private static final int KEY_SIZE = 8; + private static final int VALUE_SIZE = 100; + private static final int RECORD_SIZE = KEY_SIZE + VALUE_SIZE; + + private static final Serde BYTE_SERDE = Serdes.ByteArray(); + private static final Serde LONG_SERDE = Serdes.Long(); + + public SimpleBenchmark(File stateDir, String kafka, String zookeeper) { + super(); + this.stateDir = stateDir; + this.kafka = kafka; + this.zookeeper = zookeeper; + } + + public static void main(String[] args) throws Exception { + final File stateDir = new File("/tmp/kafka-streams-simple-benchmark"); + stateDir.mkdir(); + + final File rocksdbDir = new File(stateDir, "rocksdb-test"); + rocksdbDir.mkdir(); + + + final String kafka = "localhost:9092"; + final String zookeeper = "localhost:2181"; + + SimpleBenchmark benchmark = new SimpleBenchmark(stateDir, kafka, zookeeper); + + // producer performance + benchmark.produce(); + // consumer performance + benchmark.consume(); + // simple stream performance source->process + benchmark.processStream(); + // simple stream performance source->sink + benchmark.processStreamWithSink(); + // simple stream performance source->store + benchmark.processStreamWithStateStore(); + } + + public void processStream() { + CountDownLatch latch = new CountDownLatch(1); + + final KafkaStreams streams = createKafkaStreams(stateDir, kafka, zookeeper, latch); + + Thread thread = new Thread() { + public void run() { + streams.start(); + } + }; + thread.start(); + + long startTime = System.currentTimeMillis(); + + while (latch.getCount() > 0) { + try { + latch.await(); + } catch (InterruptedException ex) { + Thread.interrupted(); + } + } + + long endTime = System.currentTimeMillis(); + + System.out.println("Streams Performance [MB/sec read]: " + megaBytePerSec(endTime - startTime)); + + streams.close(); + try { + thread.join(); + } catch (Exception ex) { + // ignore + } + } + + public void processStreamWithSink() { + CountDownLatch latch = new CountDownLatch(1); + + final KafkaStreams streams = createKafkaStreamsWithSink(stateDir, kafka, zookeeper, latch); + + Thread thread = new Thread() { + public void run() { + streams.start(); + } + }; + thread.start(); + + long startTime = System.currentTimeMillis(); + + while (latch.getCount() > 0) { + try { + latch.await(); + } catch (InterruptedException ex) { + Thread.interrupted(); + } + } + + long endTime = System.currentTimeMillis(); + + System.out.println("Streams Performance [MB/sec read+write]: " + megaBytePerSec(endTime - startTime)); + + streams.close(); + try { + thread.join(); + } catch (Exception ex) { + // ignore + } + } + + public void processStreamWithStateStore() { + CountDownLatch latch = new CountDownLatch(1); + + final KafkaStreams streams = createKafkaStreamsWithStateStore(stateDir, kafka, zookeeper, latch); + + Thread thread = new Thread() { + public void run() { + streams.start(); + } + }; + thread.start(); + + long startTime = System.currentTimeMillis(); + + while (latch.getCount() > 0) { + try { + latch.await(); + } catch (InterruptedException ex) { + Thread.interrupted(); + } + } + + long endTime = System.currentTimeMillis(); + + System.out.println("Streams Performance [MB/sec read+store]: " + megaBytePerSec(endTime - startTime)); + + streams.close(); + try { + thread.join(); + } catch (Exception ex) { + // ignore + } + } + + public void produce() { + Properties props = new Properties(); + props.put(ProducerConfig.CLIENT_ID_CONFIG, "simple-benchmark-produce"); + props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, kafka); + props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, LongSerializer.class); + props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class); + + KafkaProducer producer = new KafkaProducer<>(props); + + byte[] value = new byte[VALUE_SIZE]; + long startTime = System.currentTimeMillis(); + + for (int i = 0; i < NUM_RECORDS; i++) { + producer.send(new ProducerRecord<>(SOURCE_TOPIC, (long) i, value)); + } + producer.close(); + + long endTime = System.currentTimeMillis(); + + System.out.println("Producer Performance [MB/sec write]: " + megaBytePerSec(endTime - startTime)); + } + + public void consume() { + Properties props = new Properties(); + props.put(ConsumerConfig.CLIENT_ID_CONFIG, "simple-benchmark-consumer"); + props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafka); + props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class); + props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class); + + KafkaConsumer consumer = new KafkaConsumer<>(props); + + List partitions = getAllPartitions(consumer, SOURCE_TOPIC); + consumer.assign(partitions); + consumer.seekToBeginning(partitions); + + Long key = null; + + long startTime = System.currentTimeMillis(); + + while (true) { + ConsumerRecords records = consumer.poll(500); + if (records.isEmpty()) { + if (END_KEY.equals(key)) + break; + } else { + for (ConsumerRecord record : records) { + key = record.key(); + } + } + } + + long endTime = System.currentTimeMillis(); + + System.out.println("Consumer Performance [MB/sec read]: " + megaBytePerSec(endTime - startTime)); + } + + private KafkaStreams createKafkaStreams(File stateDir, String kafka, String zookeeper, final CountDownLatch latch) { + Properties props = new Properties(); + props.put(StreamsConfig.APPLICATION_ID_CONFIG, "simple-benchmark-streams"); + props.put(StreamsConfig.STATE_DIR_CONFIG, stateDir.toString()); + props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, kafka); + props.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, zookeeper); + props.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 1); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + + KStreamBuilder builder = new KStreamBuilder(); + + KStream source = builder.stream(LONG_SERDE, BYTE_SERDE, SOURCE_TOPIC); + + source.process(new ProcessorSupplier() { + @Override + public Processor get() { + return new Processor() { + + @Override + public void init(ProcessorContext context) { + } + + @Override + public void process(Long key, byte[] value) { + if (END_KEY.equals(key)) { + latch.countDown(); + } + } + + @Override + public void punctuate(long timestamp) { + } + + @Override + public void close() { + } + }; + } + }); + + return new KafkaStreams(builder, props); + } + + private KafkaStreams createKafkaStreamsWithSink(File stateDir, String kafka, String zookeeper, final CountDownLatch latch) { + Properties props = new Properties(); + props.put(StreamsConfig.APPLICATION_ID_CONFIG, "simple-benchmark-streams-with-sink"); + props.put(StreamsConfig.STATE_DIR_CONFIG, stateDir.toString()); + props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, kafka); + props.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, zookeeper); + props.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 1); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + + KStreamBuilder builder = new KStreamBuilder(); + + KStream source = builder.stream(LONG_SERDE, BYTE_SERDE, SOURCE_TOPIC); + + source.to(LONG_SERDE, BYTE_SERDE, SINK_TOPIC); + source.process(new ProcessorSupplier() { + @Override + public Processor get() { + return new Processor() { + + @Override + public void init(ProcessorContext context) { + } + + @Override + public void process(Long key, byte[] value) { + if (END_KEY.equals(key)) { + latch.countDown(); + } + } + + @Override + public void punctuate(long timestamp) { + } + + @Override + public void close() { + } + }; + } + }); + + return new KafkaStreams(builder, props); + } + + + private KafkaStreams createKafkaStreamsWithStateStore(File stateDir, String kafka, String zookeeper, final CountDownLatch latch) { + Properties props = new Properties(); + props.put(StreamsConfig.APPLICATION_ID_CONFIG, "simple-benchmark-streams-with-store"); + props.put(StreamsConfig.STATE_DIR_CONFIG, stateDir.toString()); + props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, kafka); + props.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, zookeeper); + props.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 1); + props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + + KStreamBuilder builder = new KStreamBuilder(); + + builder.addStateStore(Stores.create("store").withLongKeys().withByteArrayValues().persistent().build()); + + KStream source = builder.stream(LONG_SERDE, BYTE_SERDE, SOURCE_TOPIC); + + source.process(new ProcessorSupplier() { + @Override + public Processor get() { + return new Processor() { + + KeyValueStore store; + + @SuppressWarnings("unchecked") + @Override + public void init(ProcessorContext context) { + store = (KeyValueStore) context.getStateStore("store"); + } + + @Override + public void process(Long key, byte[] value) { + store.put(key, value); + + if (END_KEY.equals(key)) { + latch.countDown(); + } + } + + @Override + public void punctuate(long timestamp) { + } + + @Override + public void close() { + } + }; + } + }, "store"); + + return new KafkaStreams(builder, props); + } + + private double megaBytePerSec(long time) { + return (double) (RECORD_SIZE * NUM_RECORDS / 1024 / 1024) / ((double) time / 1000); + } + + private List getAllPartitions(KafkaConsumer consumer, String... topics) { + ArrayList partitions = new ArrayList<>(); + + for (String topic : topics) { + for (PartitionInfo info : consumer.partitionsFor(topic)) { + partitions.add(new TopicPartition(info.topic(), info.partition())); + } + } + return partitions; + } + +} From ef676c15c54840d4b8a8ecb54a879b86b2547ea1 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Wed, 30 Mar 2016 19:30:34 -0700 Subject: [PATCH 035/267] KAFKA-3475; Introduce our own `MiniKdc` This also fixes KAFKA-3453 and KAFKA-2866. Author: Ismael Juma Reviewers: Gwen Shapira Closes #1155 from ijuma/kafka-3475-introduce-our-minikdc (cherry picked from commit 78d91dcd8805d850038df52718380a6f956abad7) Signed-off-by: Gwen Shapira --- build.gradle | 15 +- .../org/apache/kafka/common/utils/Utils.java | 2 +- .../main/scala/kafka/utils/CoreUtils.scala | 15 +- core/src/test/resources/minikdc-krb5.conf | 25 + core/src/test/resources/minikdc.ldiff | 47 ++ .../integration/kafka/api/SaslSetup.scala | 10 +- .../kafka/security/minikdc/MiniKdc.scala | 433 ++++++++++++++++++ .../integration/KafkaServerTestHarness.scala | 12 +- gradle/dependencies.gradle | 12 +- tests/kafkatest/services/security/minikdc.py | 12 +- 10 files changed, 557 insertions(+), 26 deletions(-) create mode 100644 core/src/test/resources/minikdc-krb5.conf create mode 100644 core/src/test/resources/minikdc.ldiff create mode 100644 core/src/test/scala/kafka/security/minikdc/MiniKdc.scala diff --git a/build.gradle b/build.gradle index 13a8b4e5ac18..d6f82a4f9129 100644 --- a/build.gradle +++ b/build.gradle @@ -338,7 +338,17 @@ project(':core') { testCompile project(':clients').sourceSets.test.output testCompile libs.bcpkix testCompile libs.easymock - testCompile libs.hadoopMiniKdc + testCompile(libs.apacheda) { + exclude group: 'xml-apis', module: 'xml-apis' + } + testCompile libs.apachedsCoreApi + testCompile libs.apachedsInterceptorKerberos + testCompile libs.apachedsProtocolShared + testCompile libs.apachedsProtocolKerberos + testCompile libs.apachedsProtocolLdap + testCompile libs.apachedsLdifPartition + testCompile libs.apachedsMavibotPartition + testCompile libs.apachedsJdbmPartition testCompile libs.junit testCompile libs.scalaTest } @@ -368,6 +378,9 @@ project(':core') { duplicatesStrategy 'exclude' } + systemTestLibs { + dependsOn testJar + } task genProtocolErrorDocs(type: JavaExec) { classpath = sourceSets.main.runtimeClasspath diff --git a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java index 01675487ff99..2a988222d403 100755 --- a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java +++ b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java @@ -476,7 +476,7 @@ public static String stackTrace(Throwable e) { * @param daemon Should the thread block JVM shutdown? * @return The unstarted thread */ - public static Thread newThread(String name, Runnable runnable, Boolean daemon) { + public static Thread newThread(String name, Runnable runnable, boolean daemon) { Thread thread = new Thread(runnable, name); thread.setDaemon(daemon); thread.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { diff --git a/core/src/main/scala/kafka/utils/CoreUtils.scala b/core/src/main/scala/kafka/utils/CoreUtils.scala index fe2bebf195cf..5b6c59f48f32 100755 --- a/core/src/main/scala/kafka/utils/CoreUtils.scala +++ b/core/src/main/scala/kafka/utils/CoreUtils.scala @@ -57,13 +57,14 @@ object CoreUtils extends Logging { } /** - * Create a daemon thread - * @param name The name of the thread - * @param fun The function to execute in the thread - * @return The unstarted thread - */ - def daemonThread(name: String, fun: => Unit): Thread = - Utils.daemonThread(name, runnable(fun)) + * Create a thread + * @param name The name of the thread + * @param daemon Whether the thread should block JVM shutdown + * @param fun The function to execute in the thread + * @return The unstarted thread + */ + def newThread(name: String, daemon: Boolean)(fun: => Unit): Thread = + Utils.newThread(name, runnable(fun), daemon) /** * Do the given action and log any exceptions thrown without rethrowing them diff --git a/core/src/test/resources/minikdc-krb5.conf b/core/src/test/resources/minikdc-krb5.conf new file mode 100644 index 000000000000..060340487552 --- /dev/null +++ b/core/src/test/resources/minikdc-krb5.conf @@ -0,0 +1,25 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +[libdefaults] +default_realm = {0} +udp_preference_limit = 1 + +[realms] +{0} = '{' + kdc = {1}:{2} +'}' diff --git a/core/src/test/resources/minikdc.ldiff b/core/src/test/resources/minikdc.ldiff new file mode 100644 index 000000000000..75e4dfdadf2e --- /dev/null +++ b/core/src/test/resources/minikdc.ldiff @@ -0,0 +1,47 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +dn: ou=users,dc=${0},dc=${1} +objectClass: organizationalUnit +objectClass: top +ou: users + +dn: uid=krbtgt,ou=users,dc=${0},dc=${1} +objectClass: top +objectClass: person +objectClass: inetOrgPerson +objectClass: krb5principal +objectClass: krb5kdcentry +cn: KDC Service +sn: Service +uid: krbtgt +userPassword: secret +krb5PrincipalName: krbtgt/${2}.${3}@${2}.${3} +krb5KeyVersionNumber: 0 + +dn: uid=ldap,ou=users,dc=${0},dc=${1} +objectClass: top +objectClass: person +objectClass: inetOrgPerson +objectClass: krb5principal +objectClass: krb5kdcentry +cn: LDAP +sn: Service +uid: ldap +userPassword: secret +krb5PrincipalName: ldap/${4}@${2}.${3} +krb5KeyVersionNumber: 0 diff --git a/core/src/test/scala/integration/kafka/api/SaslSetup.scala b/core/src/test/scala/integration/kafka/api/SaslSetup.scala index c36b28802b4f..8255e6a696fd 100644 --- a/core/src/test/scala/integration/kafka/api/SaslSetup.scala +++ b/core/src/test/scala/integration/kafka/api/SaslSetup.scala @@ -17,11 +17,11 @@ package kafka.api -import java.io.{File} +import java.io.File import javax.security.auth.login.Configuration -import kafka.utils.{JaasTestUtils,TestUtils} -import org.apache.hadoop.minikdc.MiniKdc +import kafka.security.minikdc.MiniKdc +import kafka.utils.{JaasTestUtils, TestUtils} import org.apache.kafka.common.security.JaasUtils import org.apache.kafka.common.security.kerberos.LoginManager @@ -39,8 +39,8 @@ case object Both extends SaslSetupMode * currently to setup a keytab and jaas files. */ trait SaslSetup { - private val workDir = new File(System.getProperty("test.dir", "build/tmp/test-workDir")) - private val kdcConf = MiniKdc.createConf() + private val workDir = TestUtils.tempDir() + private val kdcConf = MiniKdc.createConfig private val kdc = new MiniKdc(kdcConf, workDir) def startSasl(mode: SaslSetupMode = Both) { diff --git a/core/src/test/scala/kafka/security/minikdc/MiniKdc.scala b/core/src/test/scala/kafka/security/minikdc/MiniKdc.scala new file mode 100644 index 000000000000..14807bcabf81 --- /dev/null +++ b/core/src/test/scala/kafka/security/minikdc/MiniKdc.scala @@ -0,0 +1,433 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package kafka.security.minikdc + +import java.io._ +import java.net.InetSocketAddress +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.text.MessageFormat +import java.util.{Locale, Properties, UUID} + +import kafka.utils.{CoreUtils, Logging} + +import scala.collection.JavaConverters._ +import org.apache.commons.io.IOUtils +import org.apache.commons.lang.text.StrSubstitutor +import org.apache.directory.api.ldap.model.entry.{DefaultEntry, Entry} +import org.apache.directory.api.ldap.model.ldif.LdifReader +import org.apache.directory.api.ldap.model.name.Dn +import org.apache.directory.api.ldap.schema.extractor.impl.DefaultSchemaLdifExtractor +import org.apache.directory.api.ldap.schema.loader.LdifSchemaLoader +import org.apache.directory.api.ldap.schema.manager.impl.DefaultSchemaManager +import org.apache.directory.server.constants.ServerDNConstants +import org.apache.directory.server.core.DefaultDirectoryService +import org.apache.directory.server.core.api.{CacheService, DirectoryService, InstanceLayout} +import org.apache.directory.server.core.api.schema.SchemaPartition +import org.apache.directory.server.core.kerberos.KeyDerivationInterceptor +import org.apache.directory.server.core.partition.impl.btree.jdbm.{JdbmIndex, JdbmPartition} +import org.apache.directory.server.core.partition.ldif.LdifPartition +import org.apache.directory.server.kerberos.KerberosConfig +import org.apache.directory.server.kerberos.kdc.KdcServer +import org.apache.directory.server.kerberos.shared.crypto.encryption.KerberosKeyFactory +import org.apache.directory.server.kerberos.shared.keytab.{Keytab, KeytabEntry} +import org.apache.directory.server.protocol.shared.transport.{TcpTransport, UdpTransport} +import org.apache.directory.server.xdbm.Index +import org.apache.directory.shared.kerberos.KerberosTime +import org.apache.kafka.common.utils.Utils + +/** + * Mini KDC based on Apache Directory Server that can be embedded in tests or used from command line as a standalone + * KDC. + * + * MiniKdc sets 2 System properties when started and unsets them when stopped: + * + * - java.security.krb5.conf: set to the MiniKDC real/host/port + * - sun.security.krb5.debug: set to the debug value provided in the configuration + * + * As a result of this, multiple MiniKdc instances should not be started concurrently in the same JVM. + * + * MiniKdc default configuration values are: + * + * - org.name=EXAMPLE (used to create the REALM) + * - org.domain=COM (used to create the REALM) + * - kdc.bind.address=localhost + * - kdc.port=0 (ephemeral port) + * - instance=DefaultKrbServer + * - max.ticket.lifetime=86400000 (1 day) + * - max.renewable.lifetime604800000 (7 days) + * - transport=TCP + * - debug=false + * + * The generated krb5.conf forces TCP connections. + * + * Acknowledgements: this class is derived from the MiniKdc class in the hadoop-minikdc project (git commit + * d8d8ed35f00b15ee0f2f8aaf3fe7f7b42141286b). + * + * @constructor creates a new MiniKdc instance. + * @param config the MiniKdc configuration + * @param workDir the working directory which will contain krb5.conf, Apache DS files and any other files needed by + * MiniKdc. + * @throws Exception thrown if the MiniKdc could not be created. + */ +class MiniKdc(config: Properties, workDir: File) extends Logging { + + if (!config.keySet.containsAll(MiniKdc.RequiredProperties.asJava)) { + val missingProperties = MiniKdc.RequiredProperties.filterNot(config.keySet.asScala) + throw new IllegalArgumentException(s"Missing configuration properties: $missingProperties") + } + + info("Configuration:") + info("---------------------------------------------------------------") + config.asScala.foreach { case (key, value) => + info(s"\t$key: $value") + } + info("---------------------------------------------------------------") + + private val orgName = config.getProperty(MiniKdc.OrgName) + private val orgDomain = config.getProperty(MiniKdc.OrgDomain) + private val dnString = s"dc=$orgName,dc=$orgDomain" + private val realm = s"${orgName.toUpperCase(Locale.ENGLISH)}.${orgDomain.toUpperCase(Locale.ENGLISH)}" + private val krb5conf = new File(workDir, "krb5.conf") + + private var _port = config.getProperty(MiniKdc.KdcPort).toInt + private var ds: DirectoryService = null + private var kdc: KdcServer = null + + def port: Int = _port + + def host: String = config.getProperty(MiniKdc.KdcBindAddress) + + def start() { + if (kdc != null) + throw new RuntimeException("KDC already started") + initDirectoryService() + initKdcServer() + initJvmKerberosConfig() + } + + private def initDirectoryService() { + ds = new DefaultDirectoryService + ds.setInstanceLayout(new InstanceLayout(workDir)) + ds.setCacheService(new CacheService) + + // first load the schema + val instanceLayout = ds.getInstanceLayout + val schemaPartitionDirectory = new File(instanceLayout.getPartitionsDirectory, "schema") + val extractor = new DefaultSchemaLdifExtractor(instanceLayout.getPartitionsDirectory) + extractor.extractOrCopy + + val loader = new LdifSchemaLoader(schemaPartitionDirectory) + val schemaManager = new DefaultSchemaManager(loader) + schemaManager.loadAllEnabled() + ds.setSchemaManager(schemaManager) + // Init the LdifPartition with schema + val schemaLdifPartition = new LdifPartition(schemaManager, ds.getDnFactory) + schemaLdifPartition.setPartitionPath(schemaPartitionDirectory.toURI) + + // The schema partition + val schemaPartition = new SchemaPartition(schemaManager) + schemaPartition.setWrappedPartition(schemaLdifPartition) + ds.setSchemaPartition(schemaPartition) + + val systemPartition = new JdbmPartition(ds.getSchemaManager, ds.getDnFactory) + systemPartition.setId("system") + systemPartition.setPartitionPath(new File(ds.getInstanceLayout.getPartitionsDirectory, systemPartition.getId).toURI) + systemPartition.setSuffixDn(new Dn(ServerDNConstants.SYSTEM_DN)) + systemPartition.setSchemaManager(ds.getSchemaManager) + ds.setSystemPartition(systemPartition) + + ds.getChangeLog.setEnabled(false) + ds.setDenormalizeOpAttrsEnabled(true) + ds.addLast(new KeyDerivationInterceptor) + + // create one partition + val orgName = config.getProperty(MiniKdc.OrgName).toLowerCase(Locale.ENGLISH) + val orgDomain = config.getProperty(MiniKdc.OrgDomain).toLowerCase(Locale.ENGLISH) + val partition = new JdbmPartition(ds.getSchemaManager, ds.getDnFactory) + partition.setId(orgName) + partition.setPartitionPath(new File(ds.getInstanceLayout.getPartitionsDirectory, orgName).toURI) + val dn = new Dn(dnString) + partition.setSuffixDn(dn) + ds.addPartition(partition) + + // indexes + val indexedAttributes = Set[Index[_, String]]( + new JdbmIndex[Entry]("objectClass", false), + new JdbmIndex[Entry]("dc", false), + new JdbmIndex[Entry]("ou", false) + ).asJava + partition.setIndexedAttributes(indexedAttributes) + + // And start the ds + ds.setInstanceId(config.getProperty(MiniKdc.Instance)) + ds.startup() + + // context entry, after ds.startup() + val entry = ds.newEntry(dn) + entry.add("objectClass", "top", "domain") + entry.add("dc", orgName) + ds.getAdminSession.add(entry) + } + + private def initKdcServer() { + + def addInitialEntriesToDirectoryService(bindAddress: String) { + val map = Map ( + "0" -> orgName.toLowerCase(Locale.ENGLISH), + "1" -> orgDomain.toLowerCase(Locale.ENGLISH), + "2" -> orgName.toUpperCase(Locale.ENGLISH), + "3" -> orgDomain.toUpperCase(Locale.ENGLISH), + "4" -> bindAddress + ) + val inputStream = MiniKdc.getResourceAsStream("minikdc.ldiff") + try addEntriesToDirectoryService(StrSubstitutor.replace(IOUtils.toString(inputStream), map.asJava)) + finally CoreUtils.swallow(inputStream.close()) + } + + val bindAddress = config.getProperty(MiniKdc.KdcBindAddress) + addInitialEntriesToDirectoryService(bindAddress) + + val kerberosConfig = new KerberosConfig + kerberosConfig.setMaximumRenewableLifetime(config.getProperty(MiniKdc.MaxRenewableLifetime).toLong) + kerberosConfig.setMaximumTicketLifetime(config.getProperty(MiniKdc.MaxTicketLifetime).toLong) + kerberosConfig.setSearchBaseDn(dnString) + kerberosConfig.setPaEncTimestampRequired(false) + kdc = new KdcServer(kerberosConfig) + kdc.setDirectoryService(ds) + + // transport + val transport = config.getProperty(MiniKdc.Transport) + val absTransport = transport.trim match { + case "TCP" => new TcpTransport(bindAddress, port, 3, 50) + case "UDP" => new UdpTransport(port) + case _ => throw new IllegalArgumentException(s"Invalid transport: $transport") + } + kdc.addTransports(absTransport) + kdc.setServiceName(config.getProperty(MiniKdc.Instance)) + kdc.start() + + // if using ephemeral port, update port number for binding + if (port == 0) + _port = absTransport.getAcceptor.getLocalAddress.asInstanceOf[InetSocketAddress].getPort + + info(s"MiniKdc listening at port: $port") + } + + private def initJvmKerberosConfig(): Unit = { + writeKrb5Conf() + System.setProperty(MiniKdc.JavaSecurityKrb5Conf, krb5conf.getAbsolutePath) + System.setProperty(MiniKdc.SunSecurityKrb5Debug, config.getProperty(MiniKdc.Debug, "false")) + info(s"MiniKdc setting JVM krb5.conf to: ${krb5conf.getAbsolutePath}") + refreshJvmKerberosConfig() + } + + private def writeKrb5Conf() { + val stringBuilder = new StringBuilder + val reader = new BufferedReader( + new InputStreamReader(MiniKdc.getResourceAsStream("minikdc-krb5.conf"), StandardCharsets.UTF_8)) + try { + var line: String = null + while ({line = reader.readLine(); line != null}) { + stringBuilder.append(line).append("{3}") + } + } finally CoreUtils.swallow(reader.close()) + val output = MessageFormat.format(stringBuilder.toString, realm, host, port.toString, System.lineSeparator()) + Files.write(krb5conf.toPath, output.getBytes(StandardCharsets.UTF_8)) + } + + private def refreshJvmKerberosConfig(): Unit = { + val klass = + if (System.getProperty("java.vendor").contains("IBM")) + Class.forName("com.ibm.security.krb5.internal.Config") + else + Class.forName("sun.security.krb5.Config") + klass.getMethod("refresh").invoke(klass) + } + + def stop() { + if (kdc != null) { + System.clearProperty(MiniKdc.JavaSecurityKrb5Conf) + System.clearProperty(MiniKdc.SunSecurityKrb5Debug) + kdc.stop() + try ds.shutdown() + catch { + case ex: Exception => error("Could not shutdown ApacheDS properly", ex) + } + } + } + + /** + * Creates a principal in the KDC with the specified user and password. + * + * An exception will be thrown if the principal cannot be created. + * + * @param principal principal name, do not include the domain. + * @param password password. + */ + private def createPrincipal(principal: String, password: String) { + val ldifContent = s""" + |dn: uid=$principal,ou=users,dc=${orgName.toLowerCase(Locale.ENGLISH)},dc=${orgDomain.toLowerCase(Locale.ENGLISH)} + |objectClass: top + |objectClass: person + |objectClass: inetOrgPerson + |objectClass: krb5principal + |objectClass: krb5kdcentry + |cn: $principal + |sn: $principal + |uid: $principal + |userPassword: $password + |krb5PrincipalName: ${principal}@${realm} + |krb5KeyVersionNumber: 0""".stripMargin + addEntriesToDirectoryService(ldifContent) + } + + /** + * Creates multiple principals in the KDC and adds them to a keytab file. + * + * An exception will be thrown if the principal cannot be created. + * + * @param keytabFile keytab file to add the created principals + * @param principals principals to add to the KDC, do not include the domain. + */ + def createPrincipal(keytabFile: File, principals: String*) { + val generatedPassword = UUID.randomUUID.toString + val keytab = new Keytab + val entries = principals.flatMap { principal => + createPrincipal(principal, generatedPassword) + val principalWithRealm = s"${principal}@${realm}" + val timestamp = new KerberosTime + KerberosKeyFactory.getKerberosKeys(principalWithRealm, generatedPassword).asScala.values.map { encryptionKey => + val keyVersion = encryptionKey.getKeyVersion.toByte + new KeytabEntry(principalWithRealm, 1, timestamp, keyVersion, encryptionKey) + } + } + keytab.setEntries(entries.asJava) + keytab.write(keytabFile) + } + + private def addEntriesToDirectoryService(ldifContent: String): Unit = { + val reader = new LdifReader(new StringReader(ldifContent)) + try { + for (ldifEntry <- reader.asScala) + ds.getAdminSession.add(new DefaultEntry(ds.getSchemaManager, ldifEntry.getEntry)) + } finally CoreUtils.swallow(reader.close()) + } + +} + +object MiniKdc { + + val JavaSecurityKrb5Conf = "java.security.krb5.conf" + val SunSecurityKrb5Debug = "sun.security.krb5.debug" + + def main(args: Array[String]) { + args match { + case Array(workDirPath, configPath, keytabPath, principals@ _*) if principals.nonEmpty => + val workDir = new File(workDirPath) + if (!workDir.exists) + throw new RuntimeException(s"Specified work directory does not exist: ${workDir.getAbsolutePath}") + val config = createConfig + val configFile = new File(configPath) + if (!configFile.exists) + throw new RuntimeException(s"Specified configuration does not exist: ${configFile.getAbsolutePath}") + + val userConfig = Utils.loadProps(configFile.getAbsolutePath) + userConfig.asScala.foreach { case (key, value) => + config.put(key, value) + } + val keytabFile = new File(keytabPath).getAbsoluteFile + start(workDir, config, keytabFile, principals) + case _ => + println("Arguments: []+") + sys.exit(1) + } + } + + private def start(workDir: File, config: Properties, keytabFile: File, principals: Seq[String]) { + val miniKdc = new MiniKdc(config, workDir) + miniKdc.start() + miniKdc.createPrincipal(keytabFile, principals: _*) + val infoMessage = s""" + | + |Standalone MiniKdc Running + |--------------------------------------------------- + | Realm : ${miniKdc.realm} + | Running at : ${miniKdc.host}:${miniKdc.port} + | krb5conf : ${miniKdc.krb5conf} + | + | created keytab : $keytabFile + | with principals : ${principals.mkString(", ")} + | + |Hit or kill to stop it + |--------------------------------------------------- + | + """.stripMargin + println(infoMessage) + Runtime.getRuntime.addShutdownHook(CoreUtils.newThread("minikdc-shutdown-hook", daemon = false) { + miniKdc.stop() + }) + } + + val OrgName = "org.name" + val OrgDomain = "org.domain" + val KdcBindAddress = "kdc.bind.address" + val KdcPort = "kdc.port" + val Instance = "instance" + val MaxTicketLifetime = "max.ticket.lifetime" + val MaxRenewableLifetime = "max.renewable.lifetime" + val Transport = "transport" + val Debug = "debug" + + private val RequiredProperties = Set(OrgName, OrgDomain, KdcBindAddress, KdcPort, Instance, Transport, + MaxTicketLifetime, MaxRenewableLifetime) + + private val DefaultConfig = Map( + KdcBindAddress -> "localhost", + KdcPort -> "0", + Instance -> "DefaultKrbServer", + OrgName -> "Example", + OrgDomain -> "COM", + Transport -> "TCP", + MaxTicketLifetime -> "86400000", + MaxRenewableLifetime -> "604800000", + Debug -> "false" + ) + + /** + * Convenience method that returns MiniKdc default configuration. + * + * The returned configuration is a copy, it can be customized before using + * it to create a MiniKdc. + */ + def createConfig: Properties = { + val properties = new Properties + DefaultConfig.foreach { case (k, v) => properties.setProperty(k, v) } + properties + } + + @throws[IOException] + def getResourceAsStream(resourceName: String): InputStream = { + val cl = Option(Thread.currentThread.getContextClassLoader).getOrElse(classOf[MiniKdc].getClassLoader) + Option(cl.getResourceAsStream(resourceName)).getOrElse { + throw new IOException(s"Can not read resource file `$resourceName`") + } + } + +} diff --git a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala index 676772f33118..2ca64f2fc114 100755 --- a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala +++ b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala @@ -77,17 +77,15 @@ trait KafkaServerTestHarness extends ZooKeeperTestHarness { // The following method does nothing by default, but // if the test case requires setting up a cluster ACL, // then it needs to be implemented. - setClusterAcl match { - case Some(f) => - f() - case None => // Nothing to do - } + setClusterAcl.foreach(_.apply) } @After override def tearDown() { - servers.foreach(_.shutdown()) - servers.foreach(server => CoreUtils.delete(server.config.logDirs)) + if (servers != null) { + servers.foreach(_.shutdown()) + servers.foreach(server => CoreUtils.delete(server.config.logDirs)) + } super.tearDown } diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index 47158d64bf91..6ed317a216bc 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -23,6 +23,8 @@ ext { } versions += [ + apacheda: "1.0.0-M33", + apacheds: "2.0.0-M21", argparse4j: "0.5.0", bcpkix: "1.54", hadoop: "2.7.2", @@ -65,8 +67,16 @@ versions["baseScala"] = versions.scala.substring(0, versions.scala.lastIndexOf(" libs += [ argparse4j: "net.sourceforge.argparse4j:argparse4j:$versions.argparse4j", + apacheda: "org.apache.directory.api:api-all:$versions.apacheda", + apachedsCoreApi: "org.apache.directory.server:apacheds-core-api:$versions.apacheds", + apachedsInterceptorKerberos: "org.apache.directory.server:apacheds-interceptor-kerberos:$versions.apacheds", + apachedsProtocolShared: "org.apache.directory.server:apacheds-protocol-shared:$versions.apacheds", + apachedsProtocolKerberos: "org.apache.directory.server:apacheds-protocol-kerberos:$versions.apacheds", + apachedsProtocolLdap: "org.apache.directory.server:apacheds-protocol-ldap:$versions.apacheds", + apachedsLdifPartition: "org.apache.directory.server:apacheds-ldif-partition:$versions.apacheds", + apachedsMavibotPartition: "org.apache.directory.server:apacheds-mavibot-partition:$versions.apacheds", + apachedsJdbmPartition: "org.apache.directory.server:apacheds-jdbm-partition:$versions.apacheds", bcpkix: "org.bouncycastle:bcpkix-jdk15on:$versions.bcpkix", - hadoopMiniKdc: "org.apache.hadoop:hadoop-minikdc:$versions.hadoop", easymock: "org.easymock:easymock:$versions.easymock", jacksonDatabind: "com.fasterxml.jackson.core:jackson-databind:$versions.jackson", jacksonJaxrsJsonProvider: "com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:$versions.jackson", diff --git a/tests/kafkatest/services/security/minikdc.py b/tests/kafkatest/services/security/minikdc.py index 3b3a5f165602..b376e268f7f8 100644 --- a/tests/kafkatest/services/security/minikdc.py +++ b/tests/kafkatest/services/security/minikdc.py @@ -67,10 +67,9 @@ def start_node(self, node): principals = 'client ' + kafka_principals + self.extra_principals self.logger.info("Starting MiniKdc with principals " + principals) - lib_dir = "/opt/%s/core/build/dependant-testlibs" % kafka_dir(node) - kdc_jars = node.account.ssh_capture("ls " + lib_dir) - classpath = ":".join([os.path.join(lib_dir, jar.strip()) for jar in kdc_jars]) - cmd = "CLASSPATH=%s /opt/%s/bin/kafka-run-class.sh org.apache.hadoop.minikdc.MiniKdc %s %s %s %s 1>> %s 2>> %s &" % (classpath, kafka_dir(node), MiniKdc.WORK_DIR, MiniKdc.PROPS_FILE, MiniKdc.KEYTAB_FILE, principals, MiniKdc.LOG_FILE, MiniKdc.LOG_FILE) + jar_paths = self.core_jar_paths(node, "dependant-testlibs") + self.core_jar_paths(node, "libs") + classpath = ":".join(jar_paths) + cmd = "CLASSPATH=%s /opt/%s/bin/kafka-run-class.sh kafka.security.minikdc.MiniKdc %s %s %s %s 1>> %s 2>> %s &" % (classpath, kafka_dir(node), MiniKdc.WORK_DIR, MiniKdc.PROPS_FILE, MiniKdc.KEYTAB_FILE, principals, MiniKdc.LOG_FILE, MiniKdc.LOG_FILE) self.logger.debug("Attempting to start MiniKdc on %s with command: %s" % (str(node.account), cmd)) with node.account.monitor_log(MiniKdc.LOG_FILE) as monitor: node.account.ssh(cmd) @@ -82,6 +81,11 @@ def start_node(self, node): #KDC is set to bind openly (via 0.0.0.0). Change krb5.conf to hold the specific KDC address self.replace_in_file(MiniKdc.LOCAL_KRB5CONF_FILE, '0.0.0.0', node.account.hostname) + def core_jar_paths(self, node, lib_dir_name): + lib_dir = "/opt/%s/core/build/%s" % (kafka_dir(node), lib_dir_name) + jars = node.account.ssh_capture("ls " + lib_dir) + return [os.path.join(lib_dir, jar.strip()) for jar in jars] + def stop_node(self, node): self.logger.info("Stopping %s on %s" % (type(self).__name__, node.account.hostname)) node.account.kill_process("apacheds", allow_fail=False) From be822510c63d720dedf286c95588fc0c25a6a49c Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Wed, 30 Mar 2016 19:33:37 -0700 Subject: [PATCH 036/267] KAFKA-3381: Add system test for SimpleConsumerShell Author: Ashish Singh Reviewers: Geoff Anderson Closes #1053 from SinghAsDev/KAFKA-3381 (cherry picked from commit 9f6a6f97134a1d4969c91c4b4e9037b376e03440) Signed-off-by: Gwen Shapira --- .../services/simple_consumer_shell.py | 69 +++++++++++++++++ .../tests/simple_consumer_shell_test.py | 75 +++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 tests/kafkatest/services/simple_consumer_shell.py create mode 100644 tests/kafkatest/tests/simple_consumer_shell_test.py diff --git a/tests/kafkatest/services/simple_consumer_shell.py b/tests/kafkatest/services/simple_consumer_shell.py new file mode 100644 index 000000000000..8deee85d0d27 --- /dev/null +++ b/tests/kafkatest/services/simple_consumer_shell.py @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ducktape.services.background_thread import BackgroundThreadService + +from kafkatest.services.kafka.directory import kafka_dir + + +class SimpleConsumerShell(BackgroundThreadService): + + logs = { + "simple_consumer_shell_log": { + "path": "/mnt/simple_consumer_shell.log", + "collect_default": False} + } + + def __init__(self, context, num_nodes, kafka, topic, partition=0): + super(SimpleConsumerShell, self).__init__(context, num_nodes) + + self.kafka = kafka + self.topic = topic + self.partition = partition + self.output = "" + + def _worker(self, idx, node): + cmd = self.start_cmd(node) + self.logger.debug("SimpleConsumerShell %d command: %s" % (idx, cmd)) + self.output = "" + self.logger.debug(cmd) + for line in node.account.ssh_capture(cmd): + self.output += line + self.logger.debug(self.output) + + def start_cmd(self, node): + cmd = "/opt/%s/bin/" % kafka_dir(node) + cmd += "kafka-run-class.sh kafka.tools.SimpleConsumerShell" + cmd += " --topic %s --broker-list %s --partition %s --no-wait-at-logend" % (self.topic, self.kafka.bootstrap_servers(), self.partition) + + cmd += " 2>> /mnt/get_simple_consumer_shell.log | tee -a /mnt/get_simple_consumer_shell.log &" + return cmd + + def get_output(self): + return self.output + + def stop_node(self, node): + node.account.kill_process("SimpleConsumerShell", allow_fail=False) + if self.worker_threads is None: + return + + # block until the corresponding thread exits + if len(self.worker_threads) >= self.idx(node): + # Need to guard this because stop is preemptively called before the worker threads are added and started + self.worker_threads[self.idx(node) - 1].join() + + def clean_node(self, node): + node.account.kill_process("SimpleConsumerShell", clean_shutdown=False, allow_fail=False) + node.account.ssh("rm -rf /mnt/simple_consumer_shell.log", allow_fail=False) diff --git a/tests/kafkatest/tests/simple_consumer_shell_test.py b/tests/kafkatest/tests/simple_consumer_shell_test.py new file mode 100644 index 000000000000..74a7eeb911e3 --- /dev/null +++ b/tests/kafkatest/tests/simple_consumer_shell_test.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from ducktape.utils.util import wait_until +from ducktape.tests.test import Test +from kafkatest.services.simple_consumer_shell import SimpleConsumerShell +from kafkatest.services.verifiable_producer import VerifiableProducer + +from kafkatest.services.zookeeper import ZookeeperService +from kafkatest.services.kafka import KafkaService +TOPIC = "topic-simple-consumer-shell" +MAX_MESSAGES = 100 +NUM_PARTITIONS = 1 +REPLICATION_FACTOR = 1 + +class SimpleConsumerShellTest(Test): + """ + Tests SimpleConsumerShell tool + """ + def __init__(self, test_context): + super(SimpleConsumerShellTest, self).__init__(test_context) + self.num_zk = 1 + self.num_brokers = 1 + self.messages_received_count = 0 + self.topics = { + TOPIC: {'partitions': NUM_PARTITIONS, 'replication-factor': REPLICATION_FACTOR} + } + + self.zk = ZookeeperService(test_context, self.num_zk) + + def setUp(self): + self.zk.start() + + def start_kafka(self): + self.kafka = KafkaService( + self.test_context, self.num_brokers, + self.zk, topics=self.topics) + self.kafka.start() + + def run_producer(self): + # This will produce to kafka cluster + self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=TOPIC, throughput=1000, max_messages=MAX_MESSAGES) + self.producer.start() + wait_until(lambda: self.producer.num_acked == MAX_MESSAGES, timeout_sec=10, + err_msg="Timeout awaiting messages to be produced and acked") + + def start_simple_consumer_shell(self): + self.simple_consumer_shell = SimpleConsumerShell(self.test_context, 1, self.kafka, TOPIC) + self.simple_consumer_shell.start() + + def test_simple_consumer_shell(self): + """ + Tests if SimpleConsumerShell is fetching expected records + :return: None + """ + self.start_kafka() + self.run_producer() + self.start_simple_consumer_shell() + + # Assert that SimpleConsumerShell is fetching expected number of messages + wait_until(lambda: self.simple_consumer_shell.get_output().count("\n") == (MAX_MESSAGES + 1), timeout_sec=10, + err_msg="Timed out waiting to receive expected number of messages.") \ No newline at end of file From 36beebc755bc19a48e43d7dd99e9b5eed9b7c419 Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Thu, 31 Mar 2016 10:07:54 -0700 Subject: [PATCH 037/267] KAFKA-3451: Add basic HTML coverage report generation to gradle Author: Grant Henke Reviewers: Gwen Shapira, Ismael Juma, Ewen Cheslack-Postava Closes #1121 from granthenke/coverage (cherry picked from commit 623ab1e7c6497c000bc9c9978637f20542a3191c) Signed-off-by: Gwen Shapira --- README.md | 3 + build.gradle | 59 +++++++++++++++++++ core/src/main/scala/kafka/utils/ZkUtils.scala | 11 ++-- gradle/dependencies.gradle | 3 + 4 files changed, 71 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bc2ca3afb07e..97c5fef392e2 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,9 @@ Change the log4j setting in either `clients/src/test/resources/log4j.properties` ./gradlew -i -Dtest.single=RequestResponseSerializationTest core:test +### Generating test coverage reports ### + ./gradlew reportCoverage + ### Building a binary release gzipped tar ball ### ./gradlew clean ./gradlew releaseTarGz diff --git a/build.gradle b/build.gradle index d6f82a4f9129..da0152b990f9 100644 --- a/build.gradle +++ b/build.gradle @@ -26,12 +26,15 @@ buildscript { // For Apache Rat plugin to ignore non-Git files classpath "org.ajoberstar:grgit:1.5.0" classpath 'com.github.ben-manes:gradle-versions-plugin:0.12.0' + classpath 'org.scoverage:gradle-scoverage:2.0.1' } } allprojects { apply plugin: 'idea' apply plugin: 'eclipse' + apply plugin: "jacoco" + repositories { mavenCentral() } @@ -249,8 +252,50 @@ subprojects { configProperties = [importControlFile: "$rootDir/checkstyle/import-control.xml"] } test.dependsOn('checkstyleMain', 'checkstyleTest') + + // Ignore core since its a scala project + if (it.path != ':core') { + // NOTE: Gradles Jacoco plugin does not support "offline instrumentation" this means that classes mocked by PowerMock + // may report 0 coverage, since the source was modified after initial instrumentation. + // See https://github.com/jacoco/jacoco/issues/51 + jacocoTestReport { + dependsOn tasks.test + sourceSets sourceSets.main + reports { + html.enabled = true + xml.enabled = true + csv.enabled = false + } + } + } +} + +// Aggregates all jacoco results into the root project directory +task jacocoRootReport(type: org.gradle.testing.jacoco.tasks.JacocoReport) { + def javaProjects = subprojects.findAll { it.path != ':core' } + + description = 'Generates an aggregate report from all subprojects' + dependsOn(javaProjects.test) + + additionalSourceDirs = files(javaProjects.sourceSets.main.allSource.srcDirs) + sourceDirectories = files(javaProjects.sourceSets.main.allSource.srcDirs) + classDirectories = files(javaProjects.sourceSets.main.output) + executionData = files(javaProjects.jacocoTestReport.executionData) + + reports { + html.enabled = true + xml.enabled = true + } + + // workaround to ignore projects that don't have any tests at all + onlyIf = { true } + doFirst { + executionData = files(executionData.findAll { it.exists() }) + } } +task reportCoverage(dependsOn: ['jacocoRootReport', 'core:reportScoverage']) + for ( sv in ['2_10', '2_11'] ) { String svInDot = sv.replaceAll( "_", ".") @@ -320,6 +365,7 @@ project(':core') { println "Building project 'core' with Scala version ${versions.scala}" apply plugin: 'scala' + apply plugin: "org.scoverage" archivesBaseName = "kafka_${versions.baseScala}" dependencies { @@ -351,7 +397,20 @@ project(':core') { testCompile libs.apachedsJdbmPartition testCompile libs.junit testCompile libs.scalaTest + + scoverage libs.scoveragePlugin + scoverage libs.scoverageRuntime + } + + jacocoTestReport.enabled = false + scoverage { + reportDir = file("${rootProject.buildDir}/scoverage") + highlighting = false + } + checkScoverage { + minimumRate = 0.0 } + checkScoverage.shouldRunAfter('test') configurations { // manually excludes some unnecessary dependencies diff --git a/core/src/main/scala/kafka/utils/ZkUtils.scala b/core/src/main/scala/kafka/utils/ZkUtils.scala index 49d3cfaaf8bf..bd8ec7ec8776 100644 --- a/core/src/main/scala/kafka/utils/ZkUtils.scala +++ b/core/src/main/scala/kafka/utils/ZkUtils.scala @@ -452,12 +452,13 @@ class ZkUtils(val zkClient: ZkClient, } catch { case e1: ZkBadVersionException => optionalChecker match { - case Some(checker) => return checker(this, path, data) - case _ => debug("Checker method is not passed skipping zkData match") + case Some(checker) => checker(this, path, data) + case _ => + debug("Checker method is not passed skipping zkData match") + warn("Conditional update of path %s with data %s and expected version %d failed due to %s" + .format(path, data,expectVersion, e1.getMessage)) + (false, -1) } - warn("Conditional update of path %s with data %s and expected version %d failed due to %s".format(path, data, - expectVersion, e1.getMessage)) - (false, -1) case e2: Exception => warn("Conditional update of path %s with data %s and expected version %d failed due to %s".format(path, data, expectVersion, e2.getMessage)) diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index 6ed317a216bc..f0358385e93a 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -41,6 +41,7 @@ versions += [ rocksDB: "4.1.0", scalaTest: "2.2.6", scalaParserCombinators: "1.0.4", + scoverage: "1.1.1", slf4j: "1.7.18", snappy: "1.1.2.1", zkclient: "0.8", @@ -96,6 +97,8 @@ libs += [ scalaCompiler: "org.scala-lang:scala-compiler:$versions.scala", scalaTest: "org.scalatest:scalatest_$versions.baseScala:$versions.scalaTest", scalaParserCombinators: "org.scala-lang.modules:scala-parser-combinators_$versions.baseScala:$versions.scalaParserCombinators", + scoveragePlugin: "org.scoverage:scalac-scoverage-plugin_$versions.baseScala:$versions.scoverage", + scoverageRuntime: "org.scoverage:scalac-scoverage-runtime_$versions.baseScala:$versions.scoverage", slf4jApi: "org.slf4j:slf4j-api:$versions.slf4j", slf4jlog4j: "org.slf4j:slf4j-log4j12:$versions.slf4j", snappy: "org.xerial.snappy:snappy-java:$versions.snappy", From 6badd89ad3ef86111d99df3d345b7fff06b294c9 Mon Sep 17 00:00:00 2001 From: Rajini Sivaram Date: Fri, 1 Apr 2016 10:52:26 -0700 Subject: [PATCH 038/267] KAFKA-2910: Close Zookeeper clients in unit tests Zookeeper clients that are not closed after the server is shutdown keep trying to reconnect, reloading JAAS configuration. This impacts subsequent tests which rely on JAAS config to be reset. Author: Rajini Sivaram Reviewers: Flavio Junqueira , Ewen Cheslack-Postava Closes #1171 from rajinisivaram/KAFKA-2910 (cherry picked from commit 77142f6baeb35e6de9608a7372113f72ea330936) Signed-off-by: Ewen Cheslack-Postava --- core/src/test/scala/unit/kafka/utils/TestUtils.scala | 1 + core/src/test/scala/unit/kafka/zk/ZKEphemeralTest.scala | 4 +++- core/src/test/scala/unit/kafka/zk/ZKPathTest.scala | 8 ++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/unit/kafka/utils/TestUtils.scala b/core/src/test/scala/unit/kafka/utils/TestUtils.scala index a1e79128dda9..be7741df5cba 100755 --- a/core/src/test/scala/unit/kafka/utils/TestUtils.scala +++ b/core/src/test/scala/unit/kafka/utils/TestUtils.scala @@ -585,6 +585,7 @@ object TestUtils extends Logging { def updateConsumerOffset(config : ConsumerConfig, path : String, offset : Long) = { val zkUtils = ZkUtils(config.zkConnect, config.zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) zkUtils.updatePersistentPath(path, offset.toString) + zkUtils.close() } diff --git a/core/src/test/scala/unit/kafka/zk/ZKEphemeralTest.scala b/core/src/test/scala/unit/kafka/zk/ZKEphemeralTest.scala index fb53d775cf55..32c7a5dc5911 100644 --- a/core/src/test/scala/unit/kafka/zk/ZKEphemeralTest.scala +++ b/core/src/test/scala/unit/kafka/zk/ZKEphemeralTest.scala @@ -91,6 +91,7 @@ class ZKEphemeralTest(val secure: Boolean) extends ZooKeeperTestHarness { zkUtils = ZkUtils(zkConnect, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, JaasUtils.isZkSecurityEnabled()) val nodeExists = zkUtils.pathExists("/tmp/zktest") Assert.assertFalse(nodeExists) + zkUtils.close() } /***** @@ -137,7 +138,7 @@ class ZKEphemeralTest(val secure: Boolean) extends ZooKeeperTestHarness { val zk1 = zkUtils.zkConnection.getZookeeper //Creates a second session - val (_, zkConnection2) = ZkUtils.createZkClientAndConnection(zkConnect, zkSessionTimeoutMs, zkConnectionTimeout) + val (zkClient2, zkConnection2) = ZkUtils.createZkClientAndConnection(zkConnect, zkSessionTimeoutMs, zkConnectionTimeout) val zk2 = zkConnection2.getZookeeper var zwe = new ZKCheckedEphemeral(path, "", zk2, JaasUtils.isZkSecurityEnabled()) @@ -153,6 +154,7 @@ class ZKEphemeralTest(val secure: Boolean) extends ZooKeeperTestHarness { gotException = true } Assert.assertTrue(gotException) + zkClient2.close() } /** diff --git a/core/src/test/scala/unit/kafka/zk/ZKPathTest.scala b/core/src/test/scala/unit/kafka/zk/ZKPathTest.scala index 65dd5899c85f..92fae022a728 100644 --- a/core/src/test/scala/unit/kafka/zk/ZKPathTest.scala +++ b/core/src/test/scala/unit/kafka/zk/ZKPathTest.scala @@ -43,6 +43,7 @@ class ZKPathTest extends ZooKeeperTestHarness { case configException: ConfigException => case exception: Throwable => fail("Should have thrown ConfigException") } + zkUtils.close() } @Test @@ -57,6 +58,7 @@ class ZKPathTest extends ZooKeeperTestHarness { } assertTrue("Failed to create persistent path", zkUtils.pathExists(path)) + zkUtils.close() } @Test @@ -73,6 +75,7 @@ class ZKPathTest extends ZooKeeperTestHarness { case configException: ConfigException => case exception: Throwable => fail("Should have thrown ConfigException") } + zkUtils.close() } @Test @@ -87,6 +90,7 @@ class ZKPathTest extends ZooKeeperTestHarness { } assertTrue("Failed to create persistent path", zkUtils.pathExists(path)) + zkUtils.close() } @Test @@ -103,6 +107,7 @@ class ZKPathTest extends ZooKeeperTestHarness { case configException: ConfigException => case exception: Throwable => fail("Should have thrown ConfigException") } + zkUtils.close() } @Test @@ -117,6 +122,7 @@ class ZKPathTest extends ZooKeeperTestHarness { } assertTrue("Failed to create ephemeral path", zkUtils.pathExists(path)) + zkUtils.close() } @Test @@ -133,6 +139,7 @@ class ZKPathTest extends ZooKeeperTestHarness { case configException: ConfigException => case exception: Throwable => fail("Should have thrown ConfigException") } + zkUtils.close() } @Test @@ -149,5 +156,6 @@ class ZKPathTest extends ZooKeeperTestHarness { } assertTrue("Failed to create persistent path", zkUtils.pathExists(actualPath)) + zkUtils.close() } } From fd07af3fda837a79c5c754575edb7ad25c36666d Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Fri, 1 Apr 2016 13:14:47 -0700 Subject: [PATCH 039/267] MINOR: add null check for aggregate and reduce operators Author: Guozhang Wang Reviewers: Yasuhiro Matsuda, Gwen Shapira Closes #1175 from guozhangwang/KSNullPointerException (cherry picked from commit ae939467e8aec38f47e2474e74e7ab7ea29c2840) Signed-off-by: Gwen Shapira --- .../kafka/streams/kstream/internals/KStreamAggregate.java | 5 +++++ .../kafka/streams/kstream/internals/KStreamReduce.java | 5 +++++ .../kafka/streams/kstream/internals/KTableAggregate.java | 5 +++++ .../apache/kafka/streams/kstream/internals/KTableReduce.java | 5 +++++ 4 files changed, 20 insertions(+) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamAggregate.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamAggregate.java index f41bfa6ac653..871a12d42dfb 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamAggregate.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamAggregate.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.kstream.internals; +import org.apache.kafka.streams.errors.StreamsException; import org.apache.kafka.streams.kstream.Aggregator; import org.apache.kafka.streams.kstream.Initializer; import org.apache.kafka.streams.processor.AbstractProcessor; @@ -62,6 +63,10 @@ public void init(ProcessorContext context) { @Override public void process(K key, V value) { + // the keys should never be null + if (key == null) + throw new StreamsException("Record key for KStream aggregate operator with state " + storeName + " should not be null."); + T oldAgg = store.get(key); if (oldAgg == null) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamReduce.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamReduce.java index 0ec046533b40..e37fe341289c 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamReduce.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamReduce.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.kstream.internals; +import org.apache.kafka.streams.errors.StreamsException; import org.apache.kafka.streams.kstream.Reducer; import org.apache.kafka.streams.processor.AbstractProcessor; import org.apache.kafka.streams.processor.Processor; @@ -59,6 +60,10 @@ public void init(ProcessorContext context) { @Override public void process(K key, V value) { + // the keys should never be null + if (key == null) + throw new StreamsException("Record key for KStream reduce operator with state " + storeName + " should not be null."); + V oldAgg = store.get(key); V newAgg = oldAgg; diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableAggregate.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableAggregate.java index 6ce776a78671..806c6e95000c 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableAggregate.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableAggregate.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.kstream.internals; +import org.apache.kafka.streams.errors.StreamsException; import org.apache.kafka.streams.kstream.Aggregator; import org.apache.kafka.streams.kstream.Initializer; import org.apache.kafka.streams.processor.AbstractProcessor; @@ -64,6 +65,10 @@ public void init(ProcessorContext context) { @Override public void process(K key, Change value) { + // the keys should never be null + if (key == null) + throw new StreamsException("Record key for KTable aggregate operator with state " + storeName + " should not be null."); + T oldAgg = store.get(key); if (oldAgg == null) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableReduce.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableReduce.java index 0d1b55a8a82c..d56b3aec1ebb 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableReduce.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableReduce.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.kstream.internals; +import org.apache.kafka.streams.errors.StreamsException; import org.apache.kafka.streams.kstream.Reducer; import org.apache.kafka.streams.processor.AbstractProcessor; import org.apache.kafka.streams.processor.Processor; @@ -61,6 +62,10 @@ public void init(ProcessorContext context) { @Override public void process(K key, Change value) { + // the keys should never be null + if (key == null) + throw new StreamsException("Record key for KTable reduce operator with state " + storeName + " should not be null."); + V oldAgg = store.get(key); V newAgg = oldAgg; From e32aeec97b472dfa7c5e089407b778225d65cdf7 Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Fri, 1 Apr 2016 13:56:11 -0700 Subject: [PATCH 040/267] MINOR: Fix BNF output for protocol arrays conataining primitives in docs Before this patch arrays containing primitive types were not output: ``` Metadata Request (Version: 0) => [topics] ``` After this patch the type is listed: ``` Metadata Request (Version: 0) => [topics] topics => STRING ``` Author: Grant Henke Reviewers: Ashish Singh, Gwen Shapira Closes #1174 from granthenke/protocol-arrays (cherry picked from commit b865413cd8b71f905ad4ee695f193dcf91ee0595) Signed-off-by: Gwen Shapira --- .../java/org/apache/kafka/common/protocol/Protocol.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java index 43110b515289..475a4f2c362e 100644 --- a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java +++ b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java @@ -792,13 +792,13 @@ private static void schemaToBnfHtml(Schema schema, StringBuilder b, int indentSi b.append(field.name); b.append("] "); Type innerType = ((ArrayOf) field.type).type(); - if (innerType instanceof Schema && !subTypes.containsKey(field.name)) - subTypes.put(field.name, (Schema) innerType); + if (!subTypes.containsKey(field.name)) + subTypes.put(field.name, innerType); } else if (field.type instanceof Schema) { b.append(field.name); b.append(" "); if (!subTypes.containsKey(field.name)) - subTypes.put(field.name, (Schema) field.type); + subTypes.put(field.name, field.type); } else { b.append(field.name); b.append(" "); From ed60ecffae0d34c90984ee8ec3c147012b3ecb26 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Sat, 2 Apr 2016 23:02:19 -0700 Subject: [PATCH 041/267] KAFKA-3486: fix autocommit when partitions assigned manually Author: Jason Gustafson Reviewers: Ewen Cheslack-Postava Closes #1169 from hachikuji/KAFKA-3486 (cherry picked from commit dd5480a47eb0f45214c179b7f14ffaf493164222) Signed-off-by: Ewen Cheslack-Postava --- .../internals/ConsumerCoordinator.java | 54 +++----- .../internals/ConsumerCoordinatorTest.java | 122 ++++++++++++++++-- 2 files changed, 134 insertions(+), 42 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index e582ce3fc71b..a3649878bfe2 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -110,7 +110,13 @@ public ConsumerCoordinator(ConsumerNetworkClient client, addMetadataListener(); - this.autoCommitTask = autoCommitEnabled ? new AutoCommitTask(autoCommitIntervalMs) : null; + if (autoCommitEnabled) { + this.autoCommitTask = new AutoCommitTask(autoCommitIntervalMs); + this.autoCommitTask.reschedule(); + } else { + this.autoCommitTask = null; + } + this.sensors = new ConsumerCoordinatorMetrics(metrics, metricGrpPrefix); this.interceptors = interceptors; this.excludeInternalTopics = excludeInternalTopics; @@ -187,9 +193,9 @@ protected void onJoinComplete(int generation, // give the assignor a chance to update internal state based on the received assignment assignor.onAssignment(assignment); - // restart the autocommit task if needed + // reschedule the auto commit starting from now if (autoCommitEnabled) - autoCommitTask.enable(); + autoCommitTask.reschedule(); // execute the user's callback after rebalance ConsumerRebalanceListener listener = subscriptions.listener(); @@ -384,52 +390,36 @@ public void commitOffsetsSync(Map offsets) { private class AutoCommitTask implements DelayedTask { private final long interval; - private boolean enabled = false; - private boolean requestInFlight = false; public AutoCommitTask(long interval) { this.interval = interval; } - public void enable() { - if (!enabled) { - // there shouldn't be any instances scheduled, but call unschedule anyway to ensure - // that this task is only ever scheduled once - client.unschedule(this); - this.enabled = true; - - if (!requestInFlight) { - long now = time.milliseconds(); - client.schedule(this, interval + now); - } - } - } - - public void disable() { - this.enabled = false; - client.unschedule(this); + private void reschedule() { + client.schedule(this, time.milliseconds() + interval); } private void reschedule(long at) { - if (enabled) - client.schedule(this, at); + client.schedule(this, at); } public void run(final long now) { - if (!enabled) - return; - if (coordinatorUnknown()) { log.debug("Cannot auto-commit offsets for group {} since the coordinator is unknown", groupId); - client.schedule(this, now + retryBackoffMs); + reschedule(now + retryBackoffMs); + return; + } + + if (needRejoin()) { + // skip the commit when we're rejoining since we'll commit offsets synchronously + // before the revocation callback is invoked + reschedule(now + interval); return; } - requestInFlight = true; commitOffsetsAsync(subscriptions.allConsumed(), new OffsetCommitCallback() { @Override public void onComplete(Map offsets, Exception exception) { - requestInFlight = false; if (exception == null) { reschedule(now + interval); } else if (exception instanceof SendFailedException) { @@ -446,10 +436,6 @@ public void onComplete(Map offsets, Exception private void maybeAutoCommitOffsetsSync() { if (autoCommitEnabled) { - // disable periodic commits prior to committing synchronously. note that they will - // be re-enabled after a rebalance completes - autoCommitTask.disable(); - try { commitOffsetsSync(subscriptions.allConsumed()); } catch (WakeupException e) { diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java index 8844adc80b96..623e5ef93c1e 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java @@ -77,11 +77,11 @@ public class ConsumerCoordinatorTest { private String topicName = "test"; private String groupId = "test-group"; private TopicPartition tp = new TopicPartition(topicName, 0); - private int sessionTimeoutMs = 10; - private int heartbeatIntervalMs = 2; + private int sessionTimeoutMs = 10000; + private int heartbeatIntervalMs = 5000; private long retryBackoffMs = 100; private boolean autoCommitEnabled = false; - private long autoCommitIntervalMs = 5000; + private long autoCommitIntervalMs = 2000; private MockPartitionAssignor partitionAssignor = new MockPartitionAssignor(); private List assignors = Arrays.asList(partitionAssignor); private MockTime time; @@ -110,7 +110,7 @@ public void setup() { this.partitionAssignor.clear(); client.setNode(node); - this.coordinator = buildCoordinator(metrics, assignors, ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT); + this.coordinator = buildCoordinator(metrics, assignors, ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, autoCommitEnabled); } @After @@ -546,7 +546,7 @@ public void testExcludeInternalTopicsConfigOption() { @Test public void testIncludeInternalTopicsConfigOption() { - coordinator = buildCoordinator(new Metrics(), assignors, false); + coordinator = buildCoordinator(new Metrics(), assignors, false, false); subscriptions.subscribe(Pattern.compile(".*"), rebalanceListener); metadata.update(TestUtils.singletonCluster(TopicConstants.GROUP_METADATA_TOPIC_NAME, 2), time.milliseconds()); @@ -632,6 +632,107 @@ public void testCommitOffsetOnly() { assertEquals(100L, subscriptions.committed(tp).offset()); } + @Test + public void testAutoCommitDynamicAssignment() { + final String consumerId = "consumer"; + + ConsumerCoordinator coordinator = buildCoordinator(new Metrics(), assignors, + ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, true); + + subscriptions.subscribe(Arrays.asList(topicName), rebalanceListener); + subscriptions.needReassignment(); + + client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); + coordinator.ensureCoordinatorKnown(); + + client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); + client.prepareResponse(syncGroupResponse(Arrays.asList(tp), Errors.NONE.code())); + coordinator.ensurePartitionAssignment(); + + subscriptions.seek(tp, 100); + + client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code()))); + time.sleep(autoCommitIntervalMs); + consumerClient.poll(0); + + assertEquals(100L, subscriptions.committed(tp).offset()); + } + + @Test + public void testAutoCommitDynamicAssignmentRebalance() { + final String consumerId = "consumer"; + + ConsumerCoordinator coordinator = buildCoordinator(new Metrics(), assignors, + ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, true); + + subscriptions.subscribe(Arrays.asList(topicName), rebalanceListener); + subscriptions.needReassignment(); + + client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); + coordinator.ensureCoordinatorKnown(); + + // haven't joined, so should not cause a commit + time.sleep(autoCommitIntervalMs); + consumerClient.poll(0); + + client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); + client.prepareResponse(syncGroupResponse(Arrays.asList(tp), Errors.NONE.code())); + coordinator.ensurePartitionAssignment(); + + subscriptions.seek(tp, 100); + + client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code()))); + time.sleep(autoCommitIntervalMs); + consumerClient.poll(0); + + assertEquals(100L, subscriptions.committed(tp).offset()); + } + + @Test + public void testAutoCommitManualAssignment() { + ConsumerCoordinator coordinator = buildCoordinator(new Metrics(), assignors, + ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, true); + + subscriptions.assignFromUser(Arrays.asList(tp)); + subscriptions.seek(tp, 100); + + client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); + coordinator.ensureCoordinatorKnown(); + + client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code()))); + time.sleep(autoCommitIntervalMs); + consumerClient.poll(0); + + assertEquals(100L, subscriptions.committed(tp).offset()); + } + + @Test + public void testAutoCommitManualAssignmentCoordinatorUnknown() { + ConsumerCoordinator coordinator = buildCoordinator(new Metrics(), assignors, + ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, true); + + subscriptions.assignFromUser(Arrays.asList(tp)); + subscriptions.seek(tp, 100); + + // no commit initially since coordinator is unknown + consumerClient.poll(0); + time.sleep(autoCommitIntervalMs); + consumerClient.poll(0); + + assertNull(subscriptions.committed(tp)); + + // now find the coordinator + client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); + coordinator.ensureCoordinatorKnown(); + + // sleep only for the retry backoff + time.sleep(retryBackoffMs); + client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code()))); + consumerClient.poll(0); + + assertEquals(100L, subscriptions.committed(tp).offset()); + } + @Test public void testCommitOffsetMetadata() { subscriptions.assignFromUser(Arrays.asList(tp)); @@ -896,7 +997,8 @@ public void testProtocolMetadataOrder() { RangeAssignor range = new RangeAssignor(); try (Metrics metrics = new Metrics(time)) { - ConsumerCoordinator coordinator = buildCoordinator(metrics, Arrays.asList(roundRobin, range), ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT); + ConsumerCoordinator coordinator = buildCoordinator(metrics, Arrays.asList(roundRobin, range), + ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, false); List metadata = coordinator.metadata(); assertEquals(2, metadata.size()); assertEquals(roundRobin.name(), metadata.get(0).name()); @@ -904,7 +1006,8 @@ public void testProtocolMetadataOrder() { } try (Metrics metrics = new Metrics(time)) { - ConsumerCoordinator coordinator = buildCoordinator(metrics, Arrays.asList(range, roundRobin), ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT); + ConsumerCoordinator coordinator = buildCoordinator(metrics, Arrays.asList(range, roundRobin), + ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, false); List metadata = coordinator.metadata(); assertEquals(2, metadata.size()); assertEquals(range.name(), metadata.get(0).name()); @@ -912,7 +1015,10 @@ public void testProtocolMetadataOrder() { } } - private ConsumerCoordinator buildCoordinator(Metrics metrics, List assignors, boolean excludeInternalTopics) { + private ConsumerCoordinator buildCoordinator(Metrics metrics, + List assignors, + boolean excludeInternalTopics, + boolean autoCommitEnabled) { return new ConsumerCoordinator( consumerClient, groupId, From 6d2d9f6702861f0c575b0b58e233c80907ada700 Mon Sep 17 00:00:00 2001 From: Paul Cavallaro Date: Sun, 3 Apr 2016 18:14:53 -0700 Subject: [PATCH 042/267] MINOR: Fix small typo in design section Sentence was missing "as", minor grammar clean up. Author: Paul Cavallaro Reviewers: Ewen Cheslack-Postava Closes #1151 from paulcavallaro/docs-fix (cherry picked from commit 3a20ba305517ace78ea5f54554bd3b333f0b7d3d) Signed-off-by: Ewen Cheslack-Postava --- docs/design.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design.html b/docs/design.html index ad40431eeb18..a97a0adecc0b 100644 --- a/docs/design.html +++ b/docs/design.html @@ -300,7 +300,7 @@

    4.8 Log Compaction

    This retention policy can be set per-topic, so a single cluster can have some topics where retention is enforced by size or time and other topics where retention is enforced by compaction.

    -This functionality is inspired by one of LinkedIn's oldest and most successful pieces of infrastructure—a database changelog caching service called Databus. Unlike most log-structured storage systems Kafka is built for subscription and organizes data for fast linear reads and writes. Unlike Databus, Kafka acts a source-of-truth store so it is useful even in situations where the upstream data source would not otherwise be replayable. +This functionality is inspired by one of LinkedIn's oldest and most successful pieces of infrastructure—a database changelog caching service called Databus. Unlike most log-structured storage systems Kafka is built for subscription and organizes data for fast linear reads and writes. Unlike Databus, Kafka acts as a source-of-truth store so it is useful even in situations where the upstream data source would not otherwise be replayable.

    Log Compaction Basics

    From 99ab50ddb2bc52d3278955046617bd918db2463c Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Sun, 3 Apr 2016 19:04:48 -0700 Subject: [PATCH 043/267] MINOR: Clean up of SourceTaskOffsetCommiter Author: Liquan Pei Reviewers: Ewen Cheslack-Postava Closes #1170 from Ishiihara/minor-cleanup (cherry picked from commit 83cf38545be4614bd1f6b1759ada851fb38d63b0) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/connect/runtime/SourceTaskOffsetCommitter.java | 9 +++------ .../java/org/apache/kafka/connect/runtime/Worker.java | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceTaskOffsetCommitter.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceTaskOffsetCommitter.java index bee24e7b1fca..c7f869eb3a5d 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceTaskOffsetCommitter.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceTaskOffsetCommitter.java @@ -17,7 +17,6 @@ package org.apache.kafka.connect.runtime; -import org.apache.kafka.common.utils.Time; import org.apache.kafka.connect.errors.ConnectException; import org.apache.kafka.connect.util.ConnectorTaskId; import org.slf4j.Logger; @@ -45,13 +44,11 @@ class SourceTaskOffsetCommitter { private static final Logger log = LoggerFactory.getLogger(SourceTaskOffsetCommitter.class); - private Time time; private WorkerConfig config; private ScheduledExecutorService commitExecutorService = null; - private HashMap committers = new HashMap<>(); + private final HashMap committers = new HashMap<>(); - SourceTaskOffsetCommitter(Time time, WorkerConfig config) { - this.time = time; + SourceTaskOffsetCommitter(WorkerConfig config) { this.config = config; commitExecutorService = Executors.newSingleThreadScheduledExecutor(); } @@ -96,7 +93,7 @@ public void remove(ConnectorTaskId id) { } } - public void commit(ConnectorTaskId id, WorkerSourceTask workerTask) { + private void commit(ConnectorTaskId id, WorkerSourceTask workerTask) { final ScheduledCommitTask task; synchronized (committers) { task = committers.get(id); diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java index 1a9ff110db58..e1a806a2c303 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java @@ -120,7 +120,7 @@ public void start() { producer = new KafkaProducer<>(producerProps); offsetBackingStore.start(); - sourceTaskOffsetCommitter = new SourceTaskOffsetCommitter(time, config); + sourceTaskOffsetCommitter = new SourceTaskOffsetCommitter(config); log.info("Worker started"); } From e0ac36f05fb5dfecf69f5746b8bd8680ca251f50 Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Sun, 3 Apr 2016 20:04:36 -0700 Subject: [PATCH 044/267] KAFKA-3483: Restructure ducktape tests to simplify running subsets of tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … tests Author: Grant Henke Reviewers: Geoff Anderson , Ewen Cheslack-Postava Closes #1162 from granthenke/ducktape-structure (cherry picked from commit 45c585b4f7e3d5e5dd5297b4d121badbd2052922) Signed-off-by: Ewen Cheslack-Postava --- tests/kafkatest/benchmarks/__init__.py | 14 ++++++++++++++ tests/kafkatest/benchmarks/core/__init__.py | 15 +++++++++++++++ .../{tests => benchmarks/core}/benchmark_test.py | 0 tests/kafkatest/tests/client/__init__.py | 15 +++++++++++++++ .../tests/{ => client}/compression_test.py | 0 .../{ => client}/consumer_rolling_upgrade_test.py | 0 .../kafkatest/tests/{ => client}/consumer_test.py | 0 .../{ => client}/message_format_change_test.py | 0 tests/kafkatest/tests/{ => client}/quota_test.py | 0 tests/kafkatest/tests/connect/__init__.py | 15 +++++++++++++++ .../{ => connect}/connect_distributed_test.py | 0 .../tests/{ => connect}/connect_rest_test.py | 0 .../kafkatest/tests/{ => connect}/connect_test.py | 0 .../templates/connect-distributed.properties | 0 .../templates/connect-file-sink.properties | 0 .../templates/connect-file-source.properties | 0 .../templates/connect-standalone.properties | 0 tests/kafkatest/tests/core/__init__.py | 15 +++++++++++++++ .../compatibility_test_new_broker_test.py | 0 .../{ => core}/consumer_group_command_test.py | 0 .../tests/{ => core}/get_offset_shell_test.py | 0 .../tests/{ => core}/mirror_maker_test.py | 0 .../tests/{ => core}/reassign_partitions_test.py | 0 .../tests/{ => core}/replication_test.py | 0 .../{ => core}/security_rolling_upgrade_test.py | 0 .../{ => core}/simple_consumer_shell_test.py | 0 tests/kafkatest/tests/{ => core}/upgrade_test.py | 0 .../{ => core}/zookeeper_security_upgrade_test.py | 0 tests/kafkatest/tests/streams/__init__.py | 15 +++++++++++++++ .../tests/{ => streams}/streams_bounce_test.py | 0 .../tests/{ => streams}/streams_smoke_test.py | 0 tests/kafkatest/tests/tools/__init__.py | 15 +++++++++++++++ .../tests/{ => tools}/log4j_appender_test.py | 0 33 files changed, 104 insertions(+) create mode 100644 tests/kafkatest/benchmarks/__init__.py create mode 100644 tests/kafkatest/benchmarks/core/__init__.py rename tests/kafkatest/{tests => benchmarks/core}/benchmark_test.py (100%) create mode 100644 tests/kafkatest/tests/client/__init__.py rename tests/kafkatest/tests/{ => client}/compression_test.py (100%) rename tests/kafkatest/tests/{ => client}/consumer_rolling_upgrade_test.py (100%) rename tests/kafkatest/tests/{ => client}/consumer_test.py (100%) rename tests/kafkatest/tests/{ => client}/message_format_change_test.py (100%) rename tests/kafkatest/tests/{ => client}/quota_test.py (100%) create mode 100644 tests/kafkatest/tests/connect/__init__.py rename tests/kafkatest/tests/{ => connect}/connect_distributed_test.py (100%) rename tests/kafkatest/tests/{ => connect}/connect_rest_test.py (100%) rename tests/kafkatest/tests/{ => connect}/connect_test.py (100%) rename tests/kafkatest/tests/{ => connect}/templates/connect-distributed.properties (100%) rename tests/kafkatest/tests/{ => connect}/templates/connect-file-sink.properties (100%) rename tests/kafkatest/tests/{ => connect}/templates/connect-file-source.properties (100%) rename tests/kafkatest/tests/{ => connect}/templates/connect-standalone.properties (100%) create mode 100644 tests/kafkatest/tests/core/__init__.py rename tests/kafkatest/tests/{ => core}/compatibility_test_new_broker_test.py (100%) rename tests/kafkatest/tests/{ => core}/consumer_group_command_test.py (100%) rename tests/kafkatest/tests/{ => core}/get_offset_shell_test.py (100%) rename tests/kafkatest/tests/{ => core}/mirror_maker_test.py (100%) rename tests/kafkatest/tests/{ => core}/reassign_partitions_test.py (100%) rename tests/kafkatest/tests/{ => core}/replication_test.py (100%) rename tests/kafkatest/tests/{ => core}/security_rolling_upgrade_test.py (100%) rename tests/kafkatest/tests/{ => core}/simple_consumer_shell_test.py (100%) rename tests/kafkatest/tests/{ => core}/upgrade_test.py (100%) rename tests/kafkatest/tests/{ => core}/zookeeper_security_upgrade_test.py (100%) create mode 100644 tests/kafkatest/tests/streams/__init__.py rename tests/kafkatest/tests/{ => streams}/streams_bounce_test.py (100%) rename tests/kafkatest/tests/{ => streams}/streams_smoke_test.py (100%) create mode 100644 tests/kafkatest/tests/tools/__init__.py rename tests/kafkatest/tests/{ => tools}/log4j_appender_test.py (100%) diff --git a/tests/kafkatest/benchmarks/__init__.py b/tests/kafkatest/benchmarks/__init__.py new file mode 100644 index 000000000000..ec2014340d78 --- /dev/null +++ b/tests/kafkatest/benchmarks/__init__.py @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/kafkatest/benchmarks/core/__init__.py b/tests/kafkatest/benchmarks/core/__init__.py new file mode 100644 index 000000000000..ebc9bb3a9a03 --- /dev/null +++ b/tests/kafkatest/benchmarks/core/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults diff --git a/tests/kafkatest/tests/benchmark_test.py b/tests/kafkatest/benchmarks/core/benchmark_test.py similarity index 100% rename from tests/kafkatest/tests/benchmark_test.py rename to tests/kafkatest/benchmarks/core/benchmark_test.py diff --git a/tests/kafkatest/tests/client/__init__.py b/tests/kafkatest/tests/client/__init__.py new file mode 100644 index 000000000000..ebc9bb3a9a03 --- /dev/null +++ b/tests/kafkatest/tests/client/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults diff --git a/tests/kafkatest/tests/compression_test.py b/tests/kafkatest/tests/client/compression_test.py similarity index 100% rename from tests/kafkatest/tests/compression_test.py rename to tests/kafkatest/tests/client/compression_test.py diff --git a/tests/kafkatest/tests/consumer_rolling_upgrade_test.py b/tests/kafkatest/tests/client/consumer_rolling_upgrade_test.py similarity index 100% rename from tests/kafkatest/tests/consumer_rolling_upgrade_test.py rename to tests/kafkatest/tests/client/consumer_rolling_upgrade_test.py diff --git a/tests/kafkatest/tests/consumer_test.py b/tests/kafkatest/tests/client/consumer_test.py similarity index 100% rename from tests/kafkatest/tests/consumer_test.py rename to tests/kafkatest/tests/client/consumer_test.py diff --git a/tests/kafkatest/tests/message_format_change_test.py b/tests/kafkatest/tests/client/message_format_change_test.py similarity index 100% rename from tests/kafkatest/tests/message_format_change_test.py rename to tests/kafkatest/tests/client/message_format_change_test.py diff --git a/tests/kafkatest/tests/quota_test.py b/tests/kafkatest/tests/client/quota_test.py similarity index 100% rename from tests/kafkatest/tests/quota_test.py rename to tests/kafkatest/tests/client/quota_test.py diff --git a/tests/kafkatest/tests/connect/__init__.py b/tests/kafkatest/tests/connect/__init__.py new file mode 100644 index 000000000000..ebc9bb3a9a03 --- /dev/null +++ b/tests/kafkatest/tests/connect/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults diff --git a/tests/kafkatest/tests/connect_distributed_test.py b/tests/kafkatest/tests/connect/connect_distributed_test.py similarity index 100% rename from tests/kafkatest/tests/connect_distributed_test.py rename to tests/kafkatest/tests/connect/connect_distributed_test.py diff --git a/tests/kafkatest/tests/connect_rest_test.py b/tests/kafkatest/tests/connect/connect_rest_test.py similarity index 100% rename from tests/kafkatest/tests/connect_rest_test.py rename to tests/kafkatest/tests/connect/connect_rest_test.py diff --git a/tests/kafkatest/tests/connect_test.py b/tests/kafkatest/tests/connect/connect_test.py similarity index 100% rename from tests/kafkatest/tests/connect_test.py rename to tests/kafkatest/tests/connect/connect_test.py diff --git a/tests/kafkatest/tests/templates/connect-distributed.properties b/tests/kafkatest/tests/connect/templates/connect-distributed.properties similarity index 100% rename from tests/kafkatest/tests/templates/connect-distributed.properties rename to tests/kafkatest/tests/connect/templates/connect-distributed.properties diff --git a/tests/kafkatest/tests/templates/connect-file-sink.properties b/tests/kafkatest/tests/connect/templates/connect-file-sink.properties similarity index 100% rename from tests/kafkatest/tests/templates/connect-file-sink.properties rename to tests/kafkatest/tests/connect/templates/connect-file-sink.properties diff --git a/tests/kafkatest/tests/templates/connect-file-source.properties b/tests/kafkatest/tests/connect/templates/connect-file-source.properties similarity index 100% rename from tests/kafkatest/tests/templates/connect-file-source.properties rename to tests/kafkatest/tests/connect/templates/connect-file-source.properties diff --git a/tests/kafkatest/tests/templates/connect-standalone.properties b/tests/kafkatest/tests/connect/templates/connect-standalone.properties similarity index 100% rename from tests/kafkatest/tests/templates/connect-standalone.properties rename to tests/kafkatest/tests/connect/templates/connect-standalone.properties diff --git a/tests/kafkatest/tests/core/__init__.py b/tests/kafkatest/tests/core/__init__.py new file mode 100644 index 000000000000..ebc9bb3a9a03 --- /dev/null +++ b/tests/kafkatest/tests/core/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults diff --git a/tests/kafkatest/tests/compatibility_test_new_broker_test.py b/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py similarity index 100% rename from tests/kafkatest/tests/compatibility_test_new_broker_test.py rename to tests/kafkatest/tests/core/compatibility_test_new_broker_test.py diff --git a/tests/kafkatest/tests/consumer_group_command_test.py b/tests/kafkatest/tests/core/consumer_group_command_test.py similarity index 100% rename from tests/kafkatest/tests/consumer_group_command_test.py rename to tests/kafkatest/tests/core/consumer_group_command_test.py diff --git a/tests/kafkatest/tests/get_offset_shell_test.py b/tests/kafkatest/tests/core/get_offset_shell_test.py similarity index 100% rename from tests/kafkatest/tests/get_offset_shell_test.py rename to tests/kafkatest/tests/core/get_offset_shell_test.py diff --git a/tests/kafkatest/tests/mirror_maker_test.py b/tests/kafkatest/tests/core/mirror_maker_test.py similarity index 100% rename from tests/kafkatest/tests/mirror_maker_test.py rename to tests/kafkatest/tests/core/mirror_maker_test.py diff --git a/tests/kafkatest/tests/reassign_partitions_test.py b/tests/kafkatest/tests/core/reassign_partitions_test.py similarity index 100% rename from tests/kafkatest/tests/reassign_partitions_test.py rename to tests/kafkatest/tests/core/reassign_partitions_test.py diff --git a/tests/kafkatest/tests/replication_test.py b/tests/kafkatest/tests/core/replication_test.py similarity index 100% rename from tests/kafkatest/tests/replication_test.py rename to tests/kafkatest/tests/core/replication_test.py diff --git a/tests/kafkatest/tests/security_rolling_upgrade_test.py b/tests/kafkatest/tests/core/security_rolling_upgrade_test.py similarity index 100% rename from tests/kafkatest/tests/security_rolling_upgrade_test.py rename to tests/kafkatest/tests/core/security_rolling_upgrade_test.py diff --git a/tests/kafkatest/tests/simple_consumer_shell_test.py b/tests/kafkatest/tests/core/simple_consumer_shell_test.py similarity index 100% rename from tests/kafkatest/tests/simple_consumer_shell_test.py rename to tests/kafkatest/tests/core/simple_consumer_shell_test.py diff --git a/tests/kafkatest/tests/upgrade_test.py b/tests/kafkatest/tests/core/upgrade_test.py similarity index 100% rename from tests/kafkatest/tests/upgrade_test.py rename to tests/kafkatest/tests/core/upgrade_test.py diff --git a/tests/kafkatest/tests/zookeeper_security_upgrade_test.py b/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py similarity index 100% rename from tests/kafkatest/tests/zookeeper_security_upgrade_test.py rename to tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py diff --git a/tests/kafkatest/tests/streams/__init__.py b/tests/kafkatest/tests/streams/__init__.py new file mode 100644 index 000000000000..ebc9bb3a9a03 --- /dev/null +++ b/tests/kafkatest/tests/streams/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults diff --git a/tests/kafkatest/tests/streams_bounce_test.py b/tests/kafkatest/tests/streams/streams_bounce_test.py similarity index 100% rename from tests/kafkatest/tests/streams_bounce_test.py rename to tests/kafkatest/tests/streams/streams_bounce_test.py diff --git a/tests/kafkatest/tests/streams_smoke_test.py b/tests/kafkatest/tests/streams/streams_smoke_test.py similarity index 100% rename from tests/kafkatest/tests/streams_smoke_test.py rename to tests/kafkatest/tests/streams/streams_smoke_test.py diff --git a/tests/kafkatest/tests/tools/__init__.py b/tests/kafkatest/tests/tools/__init__.py new file mode 100644 index 000000000000..ebc9bb3a9a03 --- /dev/null +++ b/tests/kafkatest/tests/tools/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults diff --git a/tests/kafkatest/tests/log4j_appender_test.py b/tests/kafkatest/tests/tools/log4j_appender_test.py similarity index 100% rename from tests/kafkatest/tests/log4j_appender_test.py rename to tests/kafkatest/tests/tools/log4j_appender_test.py From bc47e2306d9558a0f0976b15a554fc2d84ade5fc Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Mon, 21 Mar 2016 09:53:17 -0700 Subject: [PATCH 045/267] Changing version to 0.10.1.0-SNAPSHOT --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index b058e58cbe30..0a612f6c8710 100644 --- a/gradle.properties +++ b/gradle.properties @@ -16,7 +16,7 @@ group=org.apache.kafka # NOTE: When you change this version number, you should also make sure to update # the version numbers in tests/kafkatest/__init__.py and kafka-merge-pr.py. -version=0.10.0.0-SNAPSHOT +version=0.10.1.0-SNAPSHOT scalaVersion=2.10.6 task=build org.gradle.jvmargs=-XX:MaxPermSize=512m -Xmx1024m -Xss2m From 9c5af253b170e2570ba2e17b4155520cea474e81 Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Mon, 21 Mar 2016 13:40:59 -0700 Subject: [PATCH 046/267] MINOR: update new version in additional places Note: This goes only to trunk. 0.10.0 branch will need a separate PR with different versions. Author: Gwen Shapira Reviewers: Ismael Juma , Ewen Cheslack-Postava Closes #1109 from gwenshap/minor-fix-version-trunk --- kafka-merge-pr.py | 2 +- tests/kafkatest/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kafka-merge-pr.py b/kafka-merge-pr.py index e12410511b65..f26a0a9c8197 100644 --- a/kafka-merge-pr.py +++ b/kafka-merge-pr.py @@ -72,7 +72,7 @@ DEV_BRANCH_NAME = "trunk" -DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.0.0") +DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.1.0") def get_json(url): try: diff --git a/tests/kafkatest/__init__.py b/tests/kafkatest/__init__.py index df1a6129dbc8..10163a02b5a8 100644 --- a/tests/kafkatest/__init__.py +++ b/tests/kafkatest/__init__.py @@ -23,4 +23,4 @@ # Instead, in trunk, the version should have a suffix of the form ".devN" # # For example, when Kafka is at version 0.9.0.0-SNAPSHOT, this should be something like "0.9.0.0.dev0" -__version__ = '0.10.0.0.dev0' +__version__ = '0.10.1.0.dev0' From e8593d1b4529a7b9f3471ac8c1411dba336d6708 Mon Sep 17 00:00:00 2001 From: Ryan P Date: Thu, 24 Mar 2016 10:12:19 -0700 Subject: [PATCH 047/267] KAFKA-3445: Validate TASKS_MAX_CONFIG's lower bound Currently the property TASKS_MAX_CONFIG is not validated against nonsensical values such as 0. This patch leverages the Range.atLeast() method to ensure value is at least 1. Author: Ryan P Reviewers: Ewen Cheslack-Postava Closes #1132 from rnpridgeon/KAFKA-3445 --- .../org/apache/kafka/connect/runtime/ConnectorConfig.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java index e21faf6cbf16..e4395523263f 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java @@ -22,6 +22,8 @@ import org.apache.kafka.common.config.ConfigDef.Importance; import org.apache.kafka.common.config.ConfigDef.Type; import org.apache.kafka.common.config.ConfigDef.Width; +import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; + import java.util.HashMap; import java.util.Map; @@ -54,6 +56,8 @@ public class ConnectorConfig extends AbstractConfig { public static final String TASKS_MAX_CONFIG = "tasks.max"; private static final String TASKS_MAX_DOC = "Maximum number of tasks to use for this connector."; public static final int TASKS_MAX_DEFAULT = 1; + private static final int TASKS_MIN_CONFIG = 1; + private static final String TASK_MAX_DISPLAY = "Tasks max"; public static final String TOPICS_CONFIG = "topics"; @@ -67,7 +71,7 @@ public class ConnectorConfig extends AbstractConfig { config = new ConfigDef() .define(NAME_CONFIG, Type.STRING, Importance.HIGH, NAME_DOC, COMMON_GROUP, 1, Width.MEDIUM, NAME_DISPLAY) .define(CONNECTOR_CLASS_CONFIG, Type.STRING, Importance.HIGH, CONNECTOR_CLASS_DOC, COMMON_GROUP, 2, Width.LONG, CONNECTOR_CLASS_DISPLAY) - .define(TASKS_MAX_CONFIG, Type.INT, TASKS_MAX_DEFAULT, Importance.HIGH, TASKS_MAX_DOC, COMMON_GROUP, 3, Width.SHORT, TASK_MAX_DISPLAY) + .define(TASKS_MAX_CONFIG, Type.INT, TASKS_MAX_DEFAULT, atLeast(TASKS_MIN_CONFIG), Importance.HIGH, TASKS_MAX_DOC, COMMON_GROUP, 3, Width.SHORT, TASK_MAX_DISPLAY) .define(TOPICS_CONFIG, Type.LIST, TOPICS_DEFAULT, Importance.HIGH, TOPICS_DOC, COMMON_GROUP, 4, Width.LONG, TOPICS_DISPLAY); } From ae0a5a0dfdb6f5d69322ce0fa4da2c6e5e0daeb6 Mon Sep 17 00:00:00 2001 From: Jeremy Custenborder Date: Thu, 24 Mar 2016 11:03:30 -0700 Subject: [PATCH 048/267] KAFKA-3407 - ErrorLoggingCallback trims helpful diagnostic information. This should help when diagnosing issues with the console producer. This allows the logger to use `exception` rather than `exception.getMessage()`. Author: Jeremy Custenborder Reviewers: Ewen Cheslack-Postava Closes #1079 from jcustenborder/KAFKA-3407 --- .../clients/producer/internals/ErrorLoggingCallback.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ErrorLoggingCallback.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ErrorLoggingCallback.java index 747e29fadbcf..18088c1c0429 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/ErrorLoggingCallback.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/ErrorLoggingCallback.java @@ -44,8 +44,8 @@ public void onCompletion(RecordMetadata metadata, Exception e) { logAsString ? new String(key) : key.length + " bytes"; String valueString = (valueLength == -1) ? "null" : logAsString ? new String(value) : valueLength + " bytes"; - log.error("Error when sending message to topic {} with key: {}, value: {} with error: {}", - topic, keyString, valueString, e.getMessage()); + log.error("Error when sending message to topic {} with key: {}, value: {} with error:", + topic, keyString, valueString, e); } } } From b5de41227f11a495d5dd7e1cf785220365d84534 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Fri, 1 Apr 2016 14:12:49 -0700 Subject: [PATCH 049/267] MINOR: Add check for empty topics iterator in ReplicaVerificationTool. Author: Ashish Singh Reviewers: Guozhang Wang, Gwen Shapira Closes #1167 from SinghAsDev/minorFixRelicaLagTool --- .../main/scala/kafka/tools/ReplicaVerificationTool.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/src/main/scala/kafka/tools/ReplicaVerificationTool.scala b/core/src/main/scala/kafka/tools/ReplicaVerificationTool.scala index fe4968d8cf00..71bf0c0407d4 100644 --- a/core/src/main/scala/kafka/tools/ReplicaVerificationTool.scala +++ b/core/src/main/scala/kafka/tools/ReplicaVerificationTool.scala @@ -128,6 +128,12 @@ object ReplicaVerificationTool extends Logging { else false ) + + if (filteredTopicMetadata.isEmpty) { + error("No topics found. " + topicWhiteListOpt + ", if specified, is either filtering out all topics or there is no topic.") + System.exit(1) + } + val topicPartitionReplicaList: Seq[TopicPartitionReplica] = filteredTopicMetadata.flatMap( topicMetadataResponse => topicMetadataResponse.partitionsMetadata.flatMap( From c588a72ad21f313d0c0ced11f083eca18fab84a1 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Fri, 1 Apr 2016 15:25:35 -0700 Subject: [PATCH 050/267] KAFKA-2844; Separate keytabs for sasl tests Use a different keytab for server and client in SASL tests Also: * Improve approach used to build the JAAS files programmatically * Delete stale `kafka_jaas.conf` file * Move `FourLetterWords` to its own file, add `Zk` prefix and clean-up its usage Author: Ismael Juma Reviewers: Harsha Chintalapani, Gwen Shapira Closes #533 from ijuma/separate-keytabs-for-sasl-tests --- core/src/test/resources/kafka_jaas.conf | 29 ---- .../integration/kafka/api/SaslSetup.scala | 39 ++--- .../security/auth/ZkAuthorizationTest.scala | 47 ++---- .../unit/kafka/utils/JaasTestUtils.scala | 156 +++++++++++------- .../scala/unit/kafka/zk/ZKEphemeralTest.scala | 66 ++++---- .../unit/kafka/zk/ZkFourLetterWords.scala | 47 ++++++ .../unit/kafka/zk/ZooKeeperTestHarness.scala | 48 ++---- 7 files changed, 217 insertions(+), 215 deletions(-) delete mode 100644 core/src/test/resources/kafka_jaas.conf create mode 100644 core/src/test/scala/unit/kafka/zk/ZkFourLetterWords.scala diff --git a/core/src/test/resources/kafka_jaas.conf b/core/src/test/resources/kafka_jaas.conf deleted file mode 100644 index b097e260b7a8..000000000000 --- a/core/src/test/resources/kafka_jaas.conf +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE - * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file - * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - */ -KafkaClient { - com.sun.security.auth.module.Krb5LoginModule required debug=true - useKeyTab=true - storeKey=true - serviceName="kafka" - keyTab="$keytab-location" - principal="client@EXAMPLE.COM"; -}; - -KafkaServer { - com.sun.security.auth.module.Krb5LoginModule required debug=true - useKeyTab=true - storeKey=true - serviceName="kafka" - keyTab="$keytab-location" - principal="kafka/localhost@EXAMPLE.COM"; -}; diff --git a/core/src/test/scala/integration/kafka/api/SaslSetup.scala b/core/src/test/scala/integration/kafka/api/SaslSetup.scala index 8255e6a696fd..967cae1ea563 100644 --- a/core/src/test/scala/integration/kafka/api/SaslSetup.scala +++ b/core/src/test/scala/integration/kafka/api/SaslSetup.scala @@ -35,8 +35,7 @@ case object KafkaSasl extends SaslSetupMode case object Both extends SaslSetupMode /* - * Trait used in SaslTestHarness and EndToEndAuthorizationTest - * currently to setup a keytab and jaas files. + * Trait used in SaslTestHarness and EndToEndAuthorizationTest to setup keytab and jaas files. */ trait SaslSetup { private val workDir = TestUtils.tempDir() @@ -46,34 +45,26 @@ trait SaslSetup { def startSasl(mode: SaslSetupMode = Both) { // Important if tests leak consumers, producers or brokers LoginManager.closeAll() - val keytabFile = createKeytabAndSetConfiguration(mode) + val (serverKeytabFile, clientKeytabFile) = createKeytabsAndSetConfiguration(mode) kdc.start() - kdc.createPrincipal(keytabFile, "client", "kafka/localhost") + kdc.createPrincipal(serverKeytabFile, "kafka/localhost") + kdc.createPrincipal(clientKeytabFile, "client") if (mode == Both || mode == ZkSasl) System.setProperty("zookeeper.authProvider.1", "org.apache.zookeeper.server.auth.SASLAuthenticationProvider") } - protected def createKeytabAndSetConfiguration(mode: SaslSetupMode): File = { - val (keytabFile, jaasFile) = createKeytabAndJaasFiles(mode) + protected def createKeytabsAndSetConfiguration(mode: SaslSetupMode): (File, File) = { + val serverKeytabFile = TestUtils.tempFile() + val clientKeytabFile = TestUtils.tempFile() + val jaasFile = mode match { + case ZkSasl => JaasTestUtils.writeZkFile() + case KafkaSasl => JaasTestUtils.writeKafkaFile(serverKeytabFile, clientKeytabFile) + case Both => JaasTestUtils.writeZkAndKafkaFiles(serverKeytabFile, clientKeytabFile) + } // This will cause a reload of the Configuration singleton when `getConfiguration` is called Configuration.setConfiguration(null) - System.setProperty(JaasUtils.JAVA_LOGIN_CONFIG_PARAM, jaasFile.getAbsolutePath) - keytabFile - } - - private def createKeytabAndJaasFiles(mode: SaslSetupMode): (File, File) = { - val keytabFile = TestUtils.tempFile() - val jaasFileName: String = mode match { - case ZkSasl => - JaasTestUtils.genZkFile - case KafkaSasl => - JaasTestUtils.genKafkaFile(keytabFile.getAbsolutePath) - case _ => - JaasTestUtils.genZkAndKafkaFile(keytabFile.getAbsolutePath) - } - val jaasFile = new File(jaasFileName) - - (keytabFile, jaasFile) + System.setProperty(JaasUtils.JAVA_LOGIN_CONFIG_PARAM, jaasFile) + (serverKeytabFile, clientKeytabFile) } def closeSasl() { @@ -81,7 +72,7 @@ trait SaslSetup { // Important if tests leak consumers, producers or brokers LoginManager.closeAll() System.clearProperty(JaasUtils.JAVA_LOGIN_CONFIG_PARAM) - System.clearProperty("zookeeper.authProvider.1"); + System.clearProperty("zookeeper.authProvider.1") Configuration.setConfiguration(null) } } diff --git a/core/src/test/scala/unit/kafka/security/auth/ZkAuthorizationTest.scala b/core/src/test/scala/unit/kafka/security/auth/ZkAuthorizationTest.scala index 6a533b3fdbc5..ab5324c9c370 100644 --- a/core/src/test/scala/unit/kafka/security/auth/ZkAuthorizationTest.scala +++ b/core/src/test/scala/unit/kafka/security/auth/ZkAuthorizationTest.scala @@ -22,17 +22,17 @@ import kafka.utils.{Logging, ZkUtils} import kafka.zk.ZooKeeperTestHarness import org.apache.kafka.common.KafkaException import org.apache.kafka.common.security.JaasUtils -import org.apache.zookeeper.data.{ACL, Stat} +import org.apache.zookeeper.data.{ACL} import org.junit.Assert._ -import org.junit.{After, Before, BeforeClass, Test} +import org.junit.{After, Before, Test} import scala.collection.JavaConverters._ import scala.util.{Try, Success, Failure} import javax.security.auth.login.Configuration +class ZkAuthorizationTest extends ZooKeeperTestHarness with Logging { + val jaasFile = kafka.utils.JaasTestUtils.writeZkFile + val authProvider = "zookeeper.authProvider.1" -class ZkAuthorizationTest extends ZooKeeperTestHarness with Logging{ - val jaasFile: String = kafka.utils.JaasTestUtils.genZkFile - val authProvider: String = "zookeeper.authProvider.1" @Before override def setUp() { Configuration.setConfiguration(null) @@ -65,12 +65,7 @@ class ZkAuthorizationTest extends ZooKeeperTestHarness with Logging{ JaasUtils.isZkSecurityEnabled() fail("Should have thrown an exception") } catch { - case e: KafkaException => { - // Expected - } - case e: Exception => { - fail(e.toString) - } + case e: KafkaException => // Expected } } @@ -241,10 +236,10 @@ class ZkAuthorizationTest extends ZooKeeperTestHarness with Logging{ case false => list.size == 1 } isListSizeCorrect && list.asScala.forall( - secure match { - case true => isAclSecure - case false => isAclUnsecure - }) + secure match { + case true => isAclSecure + case false => isAclUnsecure + }) } /** @@ -255,15 +250,9 @@ class ZkAuthorizationTest extends ZooKeeperTestHarness with Logging{ private def isAclSecure(acl: ACL): Boolean = { info(s"ACL $acl") acl.getPerms match { - case 1 => { - acl.getId.getScheme.equals("world") - } - case 31 => { - acl.getId.getScheme.equals("sasl") - } - case _: Int => { - false - } + case 1 => acl.getId.getScheme.equals("world") + case 31 => acl.getId.getScheme.equals("sasl") + case _ => false } } @@ -273,12 +262,8 @@ class ZkAuthorizationTest extends ZooKeeperTestHarness with Logging{ private def isAclUnsecure(acl: ACL): Boolean = { info(s"ACL $acl") acl.getPerms match { - case 31 => { - acl.getId.getScheme.equals("world") - } - case _: Int => { - false - } + case 31 => acl.getId.getScheme.equals("world") + case _ => false } } @@ -323,7 +308,7 @@ class ZkAuthorizationTest extends ZooKeeperTestHarness with Logging{ case "/" => result // For all other paths, try to delete it case path => - try{ + try { zkUtils.deletePath(path) Failure(new Exception(s"Have been able to delete $path")) } catch { diff --git a/core/src/test/scala/unit/kafka/utils/JaasTestUtils.scala b/core/src/test/scala/unit/kafka/utils/JaasTestUtils.scala index cf088302dc85..a14cd3f94cc6 100644 --- a/core/src/test/scala/unit/kafka/utils/JaasTestUtils.scala +++ b/core/src/test/scala/unit/kafka/utils/JaasTestUtils.scala @@ -16,72 +16,110 @@ */ package kafka.utils +import java.io.{File, BufferedWriter, FileWriter} object JaasTestUtils { - // ZooKeeper vals - val zkServerContextName = "Server" - val zkClientContextName = "Client" - val userSuperPasswd = "adminpasswd" - val user = "fpj" - val userPasswd = "fpjsecret" - val zkModule = "org.apache.zookeeper.server.auth.DigestLoginModule" - //Kafka vals - val kafkaServerContextName = "KafkaServer" - val kafkaClientContextName = "KafkaClient" - val kafkaServerPrincipal = "client@EXAMPLE.COM" - val kafkaClientPrincipal = "kafka/localhost@EXAMPLE.COM" - val kafkaModule = "com.sun.security.auth.module.Krb5LoginModule" - - def genZkFile: String = { - val jaasFile = java.io.File.createTempFile("jaas", ".conf") - val jaasOutputStream = new java.io.FileOutputStream(jaasFile) - writeZkToOutputStream(jaasOutputStream) - jaasOutputStream.close() - jaasFile.deleteOnExit() + + case class Krb5LoginModule(contextName: String, + useKeyTab: Boolean, + storeKey: Boolean, + keyTab: String, + principal: String, + debug: Boolean, + serviceName: Option[String]) { + def toJaasSection: JaasSection = { + JaasSection( + contextName, + "com.sun.security.auth.module.Krb5LoginModule", + debug = debug, + entries = Map( + "useKeyTab" -> useKeyTab.toString, + "storeKey" -> storeKey.toString, + "keyTab" -> keyTab, + "principal" -> principal + ) ++ serviceName.map(s => Map("serviceName" -> s)).getOrElse(Map.empty) + ) + } + } + + case class JaasSection(contextName: String, + moduleName: String, + debug: Boolean, + entries: Map[String, String]) { + override def toString: String = { + s"""|$contextName { + | $moduleName required + | debug=$debug + | ${entries.map { case (k, v) => s"""$k="$v"""" }.mkString("", "\n| ", ";")} + |}; + |""".stripMargin + } + } + + private val ZkServerContextName = "Server" + private val ZkClientContextName = "Client" + private val ZkUserSuperPasswd = "adminpasswd" + private val ZkUser = "fpj" + private val ZkUserPassword = "fpjsecret" + private val ZkModule = "org.apache.zookeeper.server.auth.DigestLoginModule" + + private val KafkaServerContextName = "KafkaServer" + private val KafkaServerPrincipal = "kafka/localhost@EXAMPLE.COM" + private val KafkaClientContextName = "KafkaClient" + private val KafkaClientPrincipal = "client@EXAMPLE.COM" + + def writeZkFile(): String = { + val jaasFile = TestUtils.tempFile() + writeToFile(jaasFile, zkSections) jaasFile.getCanonicalPath } - - def genKafkaFile(keytabLocation: String): String = { - val jaasFile = java.io.File.createTempFile("jaas", ".conf") - val jaasOutputStream = new java.io.FileOutputStream(jaasFile) - writeKafkaToOutputStream(jaasOutputStream, keytabLocation) - jaasOutputStream.close() - jaasFile.deleteOnExit() + + def writeKafkaFile(serverKeyTabLocation: File, clientKeyTabLocation: File): String = { + val jaasFile = TestUtils.tempFile() + writeToFile(jaasFile, kafkaSections(serverKeyTabLocation, clientKeyTabLocation)) jaasFile.getCanonicalPath } - - def genZkAndKafkaFile(keytabLocation: String): String = { - val jaasFile = java.io.File.createTempFile("jaas", ".conf") - val jaasOutputStream = new java.io.FileOutputStream(jaasFile) - writeKafkaToOutputStream(jaasOutputStream, keytabLocation) - jaasOutputStream.write("\n\n".getBytes) - writeZkToOutputStream(jaasOutputStream) - jaasOutputStream.close() - jaasFile.deleteOnExit() + + def writeZkAndKafkaFiles(serverKeyTabLocation: File, clientKeyTabLocation: File): String = { + val jaasFile = TestUtils.tempFile() + writeToFile(jaasFile, kafkaSections(serverKeyTabLocation, clientKeyTabLocation) ++ zkSections) jaasFile.getCanonicalPath } - - private def writeZkToOutputStream(jaasOutputStream: java.io.FileOutputStream) { - jaasOutputStream.write(s"$zkServerContextName {\n\t$zkModule required\n".getBytes) - jaasOutputStream.write(s"""\tuser_super="$userSuperPasswd"\n""".getBytes) - jaasOutputStream.write(s"""\tuser_$user="$userPasswd";\n};\n\n""".getBytes) - jaasOutputStream.write(s"""$zkClientContextName {\n\t$zkModule required\n""".getBytes) - jaasOutputStream.write(s"""\tusername="$user"\n""".getBytes) - jaasOutputStream.write(s"""\tpassword="$userPasswd";\n};""".getBytes) + + private def zkSections: Seq[JaasSection] = Seq( + JaasSection(ZkServerContextName, ZkModule, false, Map("user_super" -> ZkUserSuperPasswd, s"user_$ZkUser" -> ZkUserPassword)), + JaasSection(ZkClientContextName, ZkModule, false, Map("username" -> ZkUser, "password" -> ZkUserPassword)) + ) + + private def kafkaSections(serverKeytabLocation: File, clientKeytabLocation: File): Seq[JaasSection] = { + Seq( + Krb5LoginModule( + KafkaServerContextName, + useKeyTab = true, + storeKey = true, + keyTab = serverKeytabLocation.getAbsolutePath, + principal = KafkaServerPrincipal, + debug = true, + serviceName = Some("kafka")), + Krb5LoginModule( + KafkaClientContextName, + useKeyTab = true, + storeKey = true, + keyTab = clientKeytabLocation.getAbsolutePath, + principal = KafkaClientPrincipal, + debug = true, + serviceName = Some("kafka") + ) + ).map(_.toJaasSection) } - - private def writeKafkaToOutputStream(jaasOutputStream: java.io.FileOutputStream, keytabLocation: String) { - jaasOutputStream.write(s"$kafkaClientContextName {\n\t$kafkaModule required debug=true\n".getBytes) - jaasOutputStream.write(s"\tuseKeyTab=true\n".getBytes) - jaasOutputStream.write(s"\tstoreKey=true\n".getBytes) - jaasOutputStream.write(s"""\tserviceName="kafka"\n""".getBytes) - jaasOutputStream.write(s"""\tkeyTab="$keytabLocation"\n""".getBytes) - jaasOutputStream.write(s"""\tprincipal="$kafkaServerPrincipal";\n};\n\n""".getBytes) - jaasOutputStream.write(s"""$kafkaServerContextName {\n\t$kafkaModule required debug=true\n""".getBytes) - jaasOutputStream.write(s"\tuseKeyTab=true\n".getBytes) - jaasOutputStream.write(s"\tstoreKey=true\n".getBytes) - jaasOutputStream.write(s"""\tserviceName="kafka"\n""".getBytes) - jaasOutputStream.write(s"""\tkeyTab="$keytabLocation"\n""".getBytes) - jaasOutputStream.write(s"""\tprincipal="$kafkaClientPrincipal";\n};""".getBytes) + + private def jaasSectionsToString(jaasSections: Seq[JaasSection]): String = + jaasSections.mkString + + private def writeToFile(file: File, jaasSections: Seq[JaasSection]) { + val writer = new BufferedWriter(new FileWriter(file)) + try writer.write(jaasSectionsToString(jaasSections)) + finally writer.close() } -} \ No newline at end of file + +} diff --git a/core/src/test/scala/unit/kafka/zk/ZKEphemeralTest.scala b/core/src/test/scala/unit/kafka/zk/ZKEphemeralTest.scala index 32c7a5dc5911..c2c25ed4a8bf 100644 --- a/core/src/test/scala/unit/kafka/zk/ZKEphemeralTest.scala +++ b/core/src/test/scala/unit/kafka/zk/ZKEphemeralTest.scala @@ -17,10 +17,11 @@ package kafka.zk -import java.util.ArrayList -import java.util.Collection +import java.lang.Iterable import javax.security.auth.login.Configuration +import scala.collection.JavaConverters._ + import kafka.consumer.ConsumerConfig import kafka.utils.ZkUtils import kafka.utils.ZKCheckedEphemeral @@ -30,26 +31,24 @@ import org.apache.zookeeper.CreateMode import org.apache.zookeeper.WatchedEvent import org.apache.zookeeper.Watcher import org.apache.zookeeper.ZooDefs.Ids -import org.I0Itec.zkclient.exception.{ZkException,ZkNodeExistsException} +import org.I0Itec.zkclient.exception.ZkNodeExistsException import org.junit.{After, Before, Test, Assert} -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; -import org.junit.runner.RunWith; +import org.junit.runners.Parameterized +import org.junit.runners.Parameterized.Parameters +import org.junit.runner.RunWith object ZKEphemeralTest { + @Parameters - def enableSecurityOptions: Collection[Array[java.lang.Boolean]] = { - val list = new ArrayList[Array[java.lang.Boolean]]() - list.add(Array(true)) - list.add(Array(false)) - list - } + def enableSecurityOptions: Iterable[Array[java.lang.Boolean]] = + Seq[Array[java.lang.Boolean]](Array(true), Array(false)).asJava + } @RunWith(value = classOf[Parameterized]) class ZKEphemeralTest(val secure: Boolean) extends ZooKeeperTestHarness { - val jaasFile: String = kafka.utils.JaasTestUtils.genZkFile - val authProvider: String = "zookeeper.authProvider.1" + val jaasFile = kafka.utils.JaasTestUtils.writeZkFile() + val authProvider = "zookeeper.authProvider.1" var zkSessionTimeoutMs = 1000 @Before @@ -103,17 +102,14 @@ class ZKEphemeralTest(val secure: Boolean) extends ZooKeeperTestHarness { */ @Test def testZkWatchedEphemeral = { - var path = "/zwe-test" - testCreation(path) - path = "/zwe-test-parent/zwe-test" - testCreation(path) + testCreation("/zwe-test") + testCreation("/zwe-test-parent/zwe-test") } private def testCreation(path: String) { val zk = zkUtils.zkConnection.getZookeeper val zwe = new ZKCheckedEphemeral(path, "", zk, JaasUtils.isZkSecurityEnabled()) var created = false - var counter = 10 zk.exists(path, new Watcher() { def process(event: WatchedEvent) { @@ -140,19 +136,19 @@ class ZKEphemeralTest(val secure: Boolean) extends ZooKeeperTestHarness { //Creates a second session val (zkClient2, zkConnection2) = ZkUtils.createZkClientAndConnection(zkConnect, zkSessionTimeoutMs, zkConnectionTimeout) val zk2 = zkConnection2.getZookeeper - var zwe = new ZKCheckedEphemeral(path, "", zk2, JaasUtils.isZkSecurityEnabled()) + val zwe = new ZKCheckedEphemeral(path, "", zk2, JaasUtils.isZkSecurityEnabled()) // Creates znode for path in the first session zk1.create(path, Array[Byte](), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL) //Bootstraps the ZKWatchedEphemeral object - var gotException = false; - try { - zwe.create() - } catch { - case e: ZkNodeExistsException => - gotException = true - } + val gotException = + try { + zwe.create() + false + } catch { + case e: ZkNodeExistsException => true + } Assert.assertTrue(gotException) zkClient2.close() } @@ -168,15 +164,15 @@ class ZKEphemeralTest(val secure: Boolean) extends ZooKeeperTestHarness { // Creates znode for path in the first session zk.create(path, Array[Byte](), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL) - var zwe = new ZKCheckedEphemeral(path, "", zk, JaasUtils.isZkSecurityEnabled()) + val zwe = new ZKCheckedEphemeral(path, "", zk, JaasUtils.isZkSecurityEnabled()) //Bootstraps the ZKWatchedEphemeral object - var gotException = false; - try { - zwe.create() - } catch { - case e: ZkNodeExistsException => - gotException = true - } + val gotException = + try { + zwe.create() + false + } catch { + case e: ZkNodeExistsException => true + } Assert.assertFalse(gotException) } } diff --git a/core/src/test/scala/unit/kafka/zk/ZkFourLetterWords.scala b/core/src/test/scala/unit/kafka/zk/ZkFourLetterWords.scala new file mode 100644 index 000000000000..6eaee704b525 --- /dev/null +++ b/core/src/test/scala/unit/kafka/zk/ZkFourLetterWords.scala @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package kafka.zk + +import java.io.IOException +import java.net.{SocketTimeoutException, Socket, InetAddress, InetSocketAddress} + +/** + * ZooKeeper responds to a small set of commands. Each command is composed of four letters. You issue the commands to + * ZooKeeper via telnet or nc, at the client port. + * + * Three of the more interesting commands: "stat" gives some general information about the server and connected + * clients, while "srvr" and "cons" give extended details on server and connections respectively. + */ +object ZkFourLetterWords { + def sendStat(host: String, port: Int, timeout: Int) { + val hostAddress = + if (host != null) new InetSocketAddress(host, port) + else new InetSocketAddress(InetAddress.getByName(null), port) + val sock = new Socket() + try { + sock.connect(hostAddress, timeout) + val outStream = sock.getOutputStream + outStream.write("stat".getBytes) + outStream.flush() + } catch { + case e: SocketTimeoutException => throw new IOException("Exception while sending 4lw") + } finally { + sock.close + } + } +} diff --git a/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala b/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala index d618ba621f6b..95f4e350954c 100755 --- a/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala +++ b/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala @@ -17,40 +17,12 @@ package kafka.zk -import java.io._ -import java.net._ import javax.security.auth.login.Configuration -import org.I0Itec.zkclient.{ZkClient, ZkConnection} import kafka.utils.{ZkUtils, Logging, CoreUtils} import org.junit.{After, Before} import org.scalatest.junit.JUnitSuite import org.apache.kafka.common.security.JaasUtils -object FourLetterWords { - def sendStat(host: String, port: Int, timeout: Int) { - val hostAddress = if (host != null) - new InetSocketAddress(host, port) - else - new InetSocketAddress(InetAddress.getByName(null), port) - val sock = new Socket() - var reader: BufferedReader = null - sock.connect(hostAddress, timeout) - try { - val outstream = sock.getOutputStream - outstream.write("stat".getBytes) - outstream.flush - } catch { - case e: SocketTimeoutException => { - throw new IOException("Exception while sending 4lw") - } - } finally { - sock.close - if (reader != null) - reader.close - } - } -} - trait ZooKeeperTestHarness extends JUnitSuite with Logging { var zookeeper: EmbeddedZookeeper = null var zkPort: Int = -1 @@ -73,18 +45,20 @@ trait ZooKeeperTestHarness extends JUnitSuite with Logging { CoreUtils.swallow(zkUtils.close()) if (zookeeper != null) CoreUtils.swallow(zookeeper.shutdown()) - - var isDown = false - while(!isDown) { + + def isDown(): Boolean = { try { - FourLetterWords.sendStat("127.0.0.1", zkPort, 3000) - } catch { - case _: Throwable => { - info("Server is down") - isDown = true - } + ZkFourLetterWords.sendStat("127.0.0.1", zkPort, 3000) + false + } catch { case _: Throwable => + debug("Server is down") + true } } + + Iterator.continually(isDown()).exists(identity) + Configuration.setConfiguration(null) } + } From c216f8a8e8ff6a3c140b9e0678c4362d4b035982 Mon Sep 17 00:00:00 2001 From: Flavio Junqueira Date: Fri, 1 Apr 2016 15:57:39 -0700 Subject: [PATCH 051/267] KAFKA-2930: Update references to ZooKeeper in the docs. Author: Flavio Junqueira Reviewers: Ismael Juma, Gwen Shapira Closes #615 from fpj/KAFKA-2930 --- docs/ops.html | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/ops.html b/docs/ops.html index 541a01ddcaf8..b239a0eda550 100644 --- a/docs/ops.html +++ b/docs/ops.html @@ -934,17 +934,17 @@

    Audit

    6.7 ZooKeeper

    Stable version

    -At LinkedIn, we are running ZooKeeper 3.3.*. Version 3.3.3 has known serious issues regarding ephemeral node deletion and session expirations. After running into those issues in production, we upgraded to 3.3.4 and have been running that smoothly for over a year now. +The current stable branch is 3.4 and the latest release of that branch is 3.4.6, which is the one ZkClient 0.7 uses. ZkClient is the client layer Kafka uses to interact with ZooKeeper.

    Operationalizing ZooKeeper

    Operationally, we do the following for a healthy ZooKeeper installation:
      -
    • Redundancy in the physical/hardware/network layout: try not to put them all in the same rack, decent (but don't go nuts) hardware, try to keep redundant power and network paths, etc.
    • -
    • I/O segregation: if you do a lot of write type traffic you'll almost definitely want the transaction logs on a different disk group than application logs and snapshots (the write to the ZooKeeper service has a synchronous write to disk, which can be slow).
    • +
    • Redundancy in the physical/hardware/network layout: try not to put them all in the same rack, decent (but don't go nuts) hardware, try to keep redundant power and network paths, etc. A typical ZooKeeper ensemble has 5 or 7 servers, which tolerates 2 and 3 servers down, respectively. If you have a small deployment, then using 3 servers is acceptable, but keep in mind that you'll only be able to tolerate 1 server down in this case.
    • +
    • I/O segregation: if you do a lot of write type traffic you'll almost definitely want the transaction logs on a dedicated disk group. Writes to the transaction log are synchronous (but batched for performance), and consequently, concurrent writes can significantly affect performance. ZooKeeper snapshots can be one such a source of concurrent writes, and ideally should be written on a disk group separate from the transaction log. Snapshots are writtent to disk asynchronously, so it is typically ok to share with the operating system and message log files. You can configure a server to use a separate disk group with the dataLogDir parameter.
    • Application segregation: Unless you really understand the application patterns of other apps that you want to install on the same box, it can be a good idea to run ZooKeeper in isolation (though this can be a balancing act with the capabilities of the hardware).
    • Use care with virtualization: It can work, depending on your cluster layout and read/write patterns and SLAs, but the tiny overheads introduced by the virtualization layer can add up and throw off ZooKeeper, as it can be very time sensitive
    • -
    • ZooKeeper configuration and monitoring: It's java, make sure you give it 'enough' heap space (We usually run them with 3-5G, but that's mostly due to the data set size we have here). Unfortunately we don't have a good formula for it. As far as monitoring, both JMX and the 4 letter words (4lw) commands are very useful, they do overlap in some cases (and in those cases we prefer the 4 letter commands, they seem more predictable, or at the very least, they work better with the LI monitoring infrastructure)
    • -
    • Don't overbuild the cluster: large clusters, especially in a write heavy usage pattern, means a lot of intracluster communication (quorums on the writes and subsequent cluster member updates), but don't underbuild it (and risk swamping the cluster).
    • -
    • Try to run on a 3-5 node cluster: ZooKeeper writes use quorums and inherently that means having an odd number of machines in a cluster. Remember that a 5 node cluster will cause writes to slow down compared to a 3 node cluster, but will allow more fault tolerance.
    • +
    • ZooKeeper configuration: It's java, make sure you give it 'enough' heap space (We usually run them with 3-5G, but that's mostly due to the data set size we have here). Unfortunately we don't have a good formula for it, but keep in mind that allowing for more ZooKeeper state means that snapshots can become large, and large snapshots affect recovery time. In fact, if the snapshot becomes too large (a few gigabytes), then you may need to increase the initLimit parameter to give enough time for servers to recover and join the ensemble.
    • +
    • Monitoring: Both JMX and the 4 letter words (4lw) commands are very useful, they do overlap in some cases (and in those cases we prefer the 4 letter commands, they seem more predictable, or at the very least, they work better with the LI monitoring infrastructure)
    • +
    • Don't overbuild the cluster: large clusters, especially in a write heavy usage pattern, means a lot of intracluster communication (quorums on the writes and subsequent cluster member updates), but don't underbuild it (and risk swamping the cluster). Having more servers adds to your read capacity.
    Overall, we try to keep the ZooKeeper system as small as will handle the load (plus standard growth capacity planning) and as simple as possible. We try not to do anything fancy with the configuration or application layout as compared to the official release as well as keep it as self contained as possible. For these reasons, we tend to skip the OS packaged versions, since it has a tendency to try to put things in the OS standard hierarchy, which can be 'messy', for want of a better way to word it. From 09f4a7fdc923b03a2f2ea29ecb0659ca450e8149 Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsuda Date: Fri, 1 Apr 2016 17:14:29 -0700 Subject: [PATCH 052/267] MINOR: small code optimizations in streams guozhangwang Author: Yasuhiro Matsuda Reviewers: Guozhang Wang Closes #1176 from ymatsuda/optimize --- .../processor/internals/PartitionGroup.java | 6 +++- .../processor/internals/StreamTask.java | 12 +++---- .../processor/internals/StreamThread.java | 35 +++++++------------ 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/PartitionGroup.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/PartitionGroup.java index b487ff5b36ee..3d8f792c7c8e 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/PartitionGroup.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/PartitionGroup.java @@ -49,6 +49,10 @@ public ProcessorNode node() { public TopicPartition partition() { return queue.partition(); } + + public RecordQueue queue() { + return queue; + } } // since task is thread-safe, we do not need to synchronize on local variables @@ -88,7 +92,7 @@ public StampedRecord nextRecord(RecordInfo info) { // get the first record from this queue. record = queue.poll(); - if (queue.size() > 0) { + if (!queue.isEmpty()) { queuesByTime.offer(queue); } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java index afa303ca1514..61aeced9624a 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java @@ -179,7 +179,7 @@ public int process() { // after processing this record, if its partition queue's buffered size has been // decreased to the threshold, we can then resume the consumption on this partition - if (partitionGroup.numBuffered(partition) == this.maxBufferedSize) { + if (recordInfo.queue().size() == this.maxBufferedSize) { consumer.resume(singleton(partition)); requiresPoll = true; } @@ -320,13 +320,13 @@ private RecordQueue createRecordQueue(TopicPartition partition, SourceNode sourc @SuppressWarnings("unchecked") public void forward(K key, V value) { ProcessorNode thisNode = currNode; - for (ProcessorNode childNode : (List>) thisNode.children()) { - currNode = childNode; - try { + try { + for (ProcessorNode childNode : (List>) thisNode.children()) { + currNode = childNode; childNode.process(key, value); - } finally { - currNode = thisNode; } + } finally { + currNode = thisNode; } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java index 7d6b98f9b868..c2a8e06e9b11 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java @@ -350,9 +350,12 @@ private void runLoop() { requiresPoll = requiresPoll || task.requiresPoll(); sensors.processTimeSensor.record(time.milliseconds() - startProcess); - } - maybePunctuate(); + maybePunctuate(task); + + if (task.commitNeeded()) + commitOne(task, time.milliseconds()); + } // if pollTimeMs has passed since the last poll, we poll to respond to a possible rebalance // even when we paused all partitions. @@ -424,18 +427,16 @@ private boolean stillRunning() { return true; } - private void maybePunctuate() { - for (StreamTask task : activeTasks.values()) { - try { - long now = time.milliseconds(); + private void maybePunctuate(StreamTask task) { + try { + long now = time.milliseconds(); - if (task.maybePunctuate(now)) - sensors.punctuateTimeSensor.record(time.milliseconds() - now); + if (task.maybePunctuate(now)) + sensors.punctuateTimeSensor.record(time.milliseconds() - now); - } catch (KafkaException e) { - log.error("Failed to punctuate active task #" + task.id() + " in thread [" + this.getName() + "]: ", e); - throw e; - } + } catch (KafkaException e) { + log.error("Failed to punctuate active task #" + task.id() + " in thread [" + this.getName() + "]: ", e); + throw e; } } @@ -449,16 +450,6 @@ protected void maybeCommit() { lastCommit = now; processStandbyRecords = true; - } else { - for (StreamTask task : activeTasks.values()) { - try { - if (task.commitNeeded()) - commitOne(task, time.milliseconds()); - } catch (KafkaException e) { - log.error("Failed to commit active task #" + task.id() + " in thread [" + this.getName() + "]: ", e); - throw e; - } - } } } From eb08e493228e2e34eae361922796dcffb920e78d Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Sun, 3 Apr 2016 13:44:05 -0700 Subject: [PATCH 053/267] KAFKA-3419: clarify difference between topic subscription and partition assignment Author: Jason Gustafson Reviewers: Ashish Singh, Ismael Juma, Guozhang Wang Closes #1158 from hachikuji/KAFKA-3419 --- .../kafka/clients/consumer/KafkaConsumer.java | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java index b15d07f80969..c457c83692a5 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java @@ -125,8 +125,9 @@ * commits (note that offsets are always committed for a given consumer group), etc. * See Storing Offsets Outside Kafka for more details *

    - * It is also possible for the consumer to manually specify the partitions that are assigned to it through {@link #assign(Collection)}, - * which disables this dynamic partition assignment. + * It is also possible for the consumer to manually assign specific partitions + * (similar to the older "simple" consumer) using {@link #assign(Collection)}. In this case, dynamic partition + * assignment and consumer group coordination will be disabled. * *

    Usage Examples

    * The consumer APIs offer flexibility to cover a variety of consumption use cases. Here are some examples to @@ -242,27 +243,23 @@ * Note: The committed offset should always be the offset of the next message that your application will read. * Thus, when calling {@link #commitSync(Map) commitSync(offsets)} you should add one to the offset of the last message processed. * - *

    Subscribing To Specific Partitions

    + *

    Manual Partition Assignment

    * - * In the previous examples we subscribed to the topics we were interested in and let Kafka give our particular process - * a fair share of the partitions for those topics. This provides a simple load balancing mechanism so multiple - * instances of our program can divided up the work of processing records. + * In the previous examples, we subscribed to the topics we were interested in and let Kafka dynamically assign a + * fair share of the partitions for those topics based on the active consumers in the group. However, in + * some cases you may need finer control over the specific partitions that are assigned. For example: *

    - * In this mode the consumer will just get the partitions it subscribes to and if the consumer instance fails no attempt - * will be made to rebalance partitions to other instances. - *

    - * There are several cases where this makes sense: *

      - *
    • The first case is if the process is maintaining some kind of local state associated with that partition (like a - * local on-disk key-value store) and hence it should only get records for the partition it is maintaining on disk. - *
    • Another case is if the process itself is highly available and will be restarted if it fails (perhaps using a + *
    • If the process is maintaining some kind of local state associated with that partition (like a + * local on-disk key-value store), then it should only get records for the partition it is maintaining on disk. + *
    • If the process itself is highly available and will be restarted if it fails (perhaps using a * cluster management framework like YARN, Mesos, or AWS facilities, or as part of a stream processing framework). In - * this case there is no need for Kafka to detect the failure and reassign the partition, rather the consuming process + * this case there is no need for Kafka to detect the failure and reassign the partition since the consuming process * will be restarted on another machine. *
    *

    - * This mode is easy to specify, rather than subscribing to the topic, the consumer just subscribes to particular - * partitions: + * To use this mode, instead of subscribing to the topic using {@link #subscribe(Collection) subscribe}, you just call + * {@link #assign(Collection)} with the full list of partitions that you want to consume. * *

      *     String topic = "foo";
    @@ -271,11 +268,15 @@
      *     consumer.assign(Arrays.asList(partition0, partition1));
      * 
    * - * The group that the consumer specifies is still used for committing offsets, but now the set of partitions will only - * be changed if the consumer specifies new partitions, and no attempt at failure detection will be made. + * Once assigned, you can call {@link #poll(long) poll} in a loop, just as in the preceding examples to consume + * records. The group that the consumer specifies is still used for committing offsets, but now the set of partitions + * will only change with another call to {@link #assign(Collection) assign}. Manual partition assignment does + * not use group coordination, so consumer failures will not cause assigned partitions to be rebalanced. Each consumer + * acts independently even if it shares a groupId with another consumer. To avoid offset commit conflicts, you should + * usually ensure that the groupId is unique for each consumer instance. *

    - * It isn't possible to mix both subscription to specific partitions (with no load balancing) and to topics (with load - * balancing) using the same consumer instance. + * Note that it isn't possible to mix manual partition assignment (i.e. using {@link #assign(Collection) assign}) + * with dynamic partition assignment through topic subscription (i.e. using {@link #subscribe(Collection) subscribe}). * *

    Storing Offsets Outside Kafka

    * From 625c516e0e4096c6bccde998d5973525b8be196d Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Sun, 3 Apr 2016 16:34:46 -0700 Subject: [PATCH 054/267] KAFKA-3495; NetworkClient.blockingSendAndReceive` should rely on requestTimeout Also removed the code for handling negative timeouts in `blockingReady` as `Selector.poll` has not supported that for a while. Author: Ismael Juma Reviewers: Jun Rao Closes #1177 from ijuma/kafka-3495-blocking-send-and-receive-request-timeout --- .../apache/kafka/clients/NetworkClient.java | 2 +- .../controller/ControllerChannelManager.scala | 4 +- .../main/scala/kafka/server/KafkaServer.scala | 9 +-- .../kafka/server/ReplicaFetcherThread.scala | 4 +- .../utils/NetworkClientBlockingOps.scala | 66 ++++++++++--------- 5 files changed, 40 insertions(+), 45 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java index 4d01cdeb2e27..d22b508cd878 100644 --- a/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java +++ b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java @@ -390,7 +390,7 @@ private void processDisconnection(List responses, String nodeId, } /** - * Iterate over all the inflight requests and expire any requests that have exceeded the configured the requestTimeout. + * Iterate over all the inflight requests and expire any requests that have exceeded the configured requestTimeout. * The connection to the node associated with the request will be terminated and will be treated as a disconnection. * * @param responses The list of responses to update diff --git a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala index b376d15e4eb1..e9731fd4e091 100755 --- a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala +++ b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala @@ -178,9 +178,7 @@ class RequestSendThread(val controllerId: Int, val requestHeader = apiVersion.fold(networkClient.nextRequestHeader(apiKey))(networkClient.nextRequestHeader(apiKey, _)) val send = new RequestSend(brokerNode.idString, requestHeader, request.toStruct) val clientRequest = new ClientRequest(time.milliseconds(), true, send, null) - clientResponse = networkClient.blockingSendAndReceive(clientRequest, socketTimeoutMs)(time).getOrElse { - throw new SocketTimeoutException(s"No response received within $socketTimeoutMs ms") - } + clientResponse = networkClient.blockingSendAndReceive(clientRequest)(time) isSendSuccessful = true } } catch { diff --git a/core/src/main/scala/kafka/server/KafkaServer.scala b/core/src/main/scala/kafka/server/KafkaServer.scala index e29494baa1d9..f998d82104d3 100755 --- a/core/src/main/scala/kafka/server/KafkaServer.scala +++ b/core/src/main/scala/kafka/server/KafkaServer.scala @@ -320,9 +320,6 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime, threadNamePr val socketTimeoutMs = config.controllerSocketTimeoutMs - def socketTimeoutException: Throwable = - new SocketTimeoutException(s"Did not receive response within $socketTimeoutMs") - def networkClientControlledShutdown(retries: Int): Boolean = { val metadataUpdater = new ManualMetadataUpdater() val networkClient = { @@ -388,16 +385,14 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime, threadNamePr try { if (!networkClient.blockingReady(node(prevController), socketTimeoutMs)) - throw socketTimeoutException + throw new SocketTimeoutException(s"Failed to connect within $socketTimeoutMs ms") // send the controlled shutdown request val requestHeader = networkClient.nextRequestHeader(ApiKeys.CONTROLLED_SHUTDOWN_KEY) val send = new RequestSend(node(prevController).idString, requestHeader, new ControlledShutdownRequest(config.brokerId).toStruct) val request = new ClientRequest(kafkaMetricsTime.milliseconds(), true, send, null) - val clientResponse = networkClient.blockingSendAndReceive(request, socketTimeoutMs).getOrElse { - throw socketTimeoutException - } + val clientResponse = networkClient.blockingSendAndReceive(request) val shutdownResponse = new ControlledShutdownResponse(clientResponse.responseBody) if (shutdownResponse.errorCode == Errors.NONE.code && shutdownResponse.partitionsRemaining.isEmpty) { diff --git a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala index de7269f83323..26838cac96db 100644 --- a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala +++ b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala @@ -233,9 +233,7 @@ class ReplicaFetcherThread(name: String, else { val send = new RequestSend(sourceBroker.id.toString, header, request.toStruct) val clientRequest = new ClientRequest(time.milliseconds(), true, send, null) - networkClient.blockingSendAndReceive(clientRequest, socketTimeout)(time).getOrElse { - throw new SocketTimeoutException(s"No response received within $socketTimeout ms") - } + networkClient.blockingSendAndReceive(clientRequest)(time) } } catch { diff --git a/core/src/main/scala/kafka/utils/NetworkClientBlockingOps.scala b/core/src/main/scala/kafka/utils/NetworkClientBlockingOps.scala index 9ed9d29a2932..fd4af6e949b6 100644 --- a/core/src/main/scala/kafka/utils/NetworkClientBlockingOps.scala +++ b/core/src/main/scala/kafka/utils/NetworkClientBlockingOps.scala @@ -55,6 +55,7 @@ class NetworkClientBlockingOps(val client: NetworkClient) extends AnyVal { * care. */ def blockingReady(node: Node, timeout: Long)(implicit time: JTime): Boolean = { + require(timeout >=0, "timeout should be >= 0") client.ready(node, time.milliseconds()) || pollUntil(timeout) { (_, now) => if (client.isReady(node, now)) true @@ -65,19 +66,18 @@ class NetworkClientBlockingOps(val client: NetworkClient) extends AnyVal { } /** - * Invokes `client.send` followed by 1 or more `client.poll` invocations until a response is received, - * the timeout expires or a disconnection happens. + * Invokes `client.send` followed by 1 or more `client.poll` invocations until a response is received or a + * disconnection happens (which can happen for a number of reasons including a request timeout). * - * It returns `true` if the call completes normally or `false` if the timeout expires. In the case of a disconnection, - * an `IOException` is thrown instead. + * In case of a disconnection, an `IOException` is thrown. * * This method is useful for implementing blocking behaviour on top of the non-blocking `NetworkClient`, use it with * care. */ - def blockingSendAndReceive(request: ClientRequest, timeout: Long)(implicit time: JTime): Option[ClientResponse] = { + def blockingSendAndReceive(request: ClientRequest)(implicit time: JTime): ClientResponse = { client.send(request, time.milliseconds()) - pollUntilFound(timeout) { case (responses, _) => + pollContinuously { responses => val response = responses.find { response => response.request.request.header.correlationId == request.request.header.correlationId } @@ -102,41 +102,45 @@ class NetworkClientBlockingOps(val client: NetworkClient) extends AnyVal { * care. */ private def pollUntil(timeout: Long)(predicate: (Seq[ClientResponse], Long) => Boolean)(implicit time: JTime): Boolean = { - pollUntilFound(timeout) { (responses, now) => - if (predicate(responses, now)) Some(true) - else None - }.fold(false)(_ => true) - } - - /** - * Invokes `client.poll` until `collect` returns `Some` or the timeout expires. - * - * It returns the result of `collect` if the call completes normally or `None` if the timeout expires. Exceptions - * thrown via `collect` are not handled and will bubble up. - * - * This method is useful for implementing blocking behaviour on top of the non-blocking `NetworkClient`, use it with - * care. - */ - private def pollUntilFound[T](timeout: Long)(collect: (Seq[ClientResponse], Long) => Option[T])(implicit time: JTime): Option[T] = { - val methodStartTime = time.milliseconds() val timeoutExpiryTime = methodStartTime + timeout @tailrec - def recurse(iterationStartTime: Long): Option[T] = { - val pollTimeout = if (timeout < 0) timeout else timeoutExpiryTime - iterationStartTime + def recursivePoll(iterationStartTime: Long): Boolean = { + val pollTimeout = timeoutExpiryTime - iterationStartTime val responses = client.poll(pollTimeout, iterationStartTime).asScala - val result = collect(responses, iterationStartTime) - if (result.isDefined) result + if (predicate(responses, iterationStartTime)) true else { val afterPollTime = time.milliseconds() - if (timeout < 0 || afterPollTime < timeoutExpiryTime) - recurse(afterPollTime) - else None + if (afterPollTime < timeoutExpiryTime) recursivePoll(afterPollTime) + else false + } + } + + recursivePoll(methodStartTime) + } + + /** + * Invokes `client.poll` until `collect` returns `Some`. The value inside `Some` is returned. + * + * Exceptions thrown via `collect` are not handled and will bubble up. + * + * This method is useful for implementing blocking behaviour on top of the non-blocking `NetworkClient`, use it with + * care. + */ + private def pollContinuously[T](collect: Seq[ClientResponse] => Option[T])(implicit time: JTime): T = { + + @tailrec + def recursivePoll: T = { + // rely on request timeout to ensure we don't block forever + val responses = client.poll(Long.MaxValue, time.milliseconds()).asScala + collect(responses) match { + case Some(result) => result + case None => recursivePoll } } - recurse(methodStartTime) + recursivePoll } } From 80ba01e16ba17c48058987ee3a1384f1e23df343 Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsuda Date: Mon, 4 Apr 2016 14:57:15 -0700 Subject: [PATCH 055/267] HOTFIX: set timestamp in SinkNode guozhangwang Setting the timestamp in produced records in SinkNode. This forces the producer record's timestamp same as the context's timestamp. Author: Yasuhiro Matsuda Reviewers: Guozhang Wang Closes #1137 from ymatsuda/set_timestamp_in_sinknode --- .../org/apache/kafka/streams/processor/internals/SinkNode.java | 2 +- .../src/test/java/org/apache/kafka/test/KStreamTestDriver.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/SinkNode.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/SinkNode.java index ffc72fd92875..31a558bf5104 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/SinkNode.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/SinkNode.java @@ -57,7 +57,7 @@ public void init(ProcessorContext context) { public void process(K key, V value) { // send to all the registered topics RecordCollector collector = ((RecordCollector.Supplier) context).recordCollector(); - collector.send(new ProducerRecord<>(topic, key, value), keySerializer, valSerializer, partitioner); + collector.send(new ProducerRecord<>(topic, null, context.timestamp(), key, value), keySerializer, valSerializer, partitioner); } @Override diff --git a/streams/src/test/java/org/apache/kafka/test/KStreamTestDriver.java b/streams/src/test/java/org/apache/kafka/test/KStreamTestDriver.java index 05713c19c3ec..0c56c26e1765 100644 --- a/streams/src/test/java/org/apache/kafka/test/KStreamTestDriver.java +++ b/streams/src/test/java/org/apache/kafka/test/KStreamTestDriver.java @@ -57,6 +57,7 @@ public KStreamTestDriver(KStreamBuilder builder, this.topology = builder.build("X", null); this.stateDir = stateDir; this.context = new MockProcessorContext(this, stateDir, keySerde, valSerde, new MockRecordCollector()); + this.context.setTime(0L); for (StateStoreSupplier stateStoreSupplier : topology.stateStoreSuppliers()) { StateStore store = stateStoreSupplier.get(); From 9897813957c1b5cd70a0dd02094f184f73e06979 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Mon, 4 Apr 2016 18:49:29 -0700 Subject: [PATCH 056/267] KAFKA-3464: Add system tests for Connect with Kafka security enabled Author: Ewen Cheslack-Postava Reviewers: Ismael Juma, Gwen Shapira Closes #1141 from ewencp/kafka-3464-connect-security-system-tests --- tests/kafkatest/services/connect.py | 6 +++ tests/kafkatest/services/mirror_maker.py | 1 - .../services/security/security_config.py | 24 +++++++----- .../tests/connect/connect_distributed_test.py | 36 ++++++++++++++---- tests/kafkatest/tests/connect/connect_test.py | 37 ++++++++++++++----- .../templates/connect-distributed.properties | 7 +++- .../templates/connect-standalone.properties | 4 +- 7 files changed, 86 insertions(+), 29 deletions(-) diff --git a/tests/kafkatest/services/connect.py b/tests/kafkatest/services/connect.py index 76336e125741..51dade3471b5 100644 --- a/tests/kafkatest/services/connect.py +++ b/tests/kafkatest/services/connect.py @@ -48,6 +48,7 @@ class ConnectServiceBase(Service): def __init__(self, context, num_nodes, kafka, files): super(ConnectServiceBase, self).__init__(context, num_nodes) self.kafka = kafka + self.security_config = kafka.security_config.client_config() self.files = files def pids(self, node): @@ -89,6 +90,7 @@ def restart(self): def clean_node(self, node): node.account.kill_process("connect", clean_shutdown=False, allow_fail=True) + self.security_config.clean_node(node) node.account.ssh("rm -rf " + " ".join([self.CONFIG_FILE, self.LOG4J_CONFIG_FILE, self.PID_FILE, self.LOG_FILE, self.STDOUT_FILE, self.STDERR_FILE] + self.config_filenames() + self.files), allow_fail=False) def config_filenames(self): @@ -153,6 +155,7 @@ def node(self): def start_cmd(self, node, connector_configs): cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % self.LOG4J_CONFIG_FILE + cmd += "export KAFKA_OPTS=%s; " % self.security_config.kafka_opts cmd += "/opt/%s/bin/connect-standalone.sh %s " % (kafka_dir(node), self.CONFIG_FILE) cmd += " ".join(connector_configs) cmd += " & echo $! >&3 ) 1>> %s 2>> %s 3> %s" % (self.STDOUT_FILE, self.STDERR_FILE, self.PID_FILE) @@ -161,6 +164,7 @@ def start_cmd(self, node, connector_configs): def start_node(self, node): node.account.ssh("mkdir -p %s" % self.PERSISTENT_ROOT, allow_fail=False) + self.security_config.setup_node(node) node.account.create_file(self.CONFIG_FILE, self.config_template_func(node)) node.account.create_file(self.LOG4J_CONFIG_FILE, self.render('connect_log4j.properties', log_file=self.LOG_FILE)) remote_connector_configs = [] @@ -190,6 +194,7 @@ def __init__(self, context, num_nodes, kafka, files, offsets_topic="connect-offs def start_cmd(self, node): cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % self.LOG4J_CONFIG_FILE + cmd += "export KAFKA_OPTS=%s; " % self.security_config.kafka_opts cmd += "/opt/%s/bin/connect-distributed.sh %s " % (kafka_dir(node), self.CONFIG_FILE) cmd += " & echo $! >&3 ) 1>> %s 2>> %s 3> %s" % (self.STDOUT_FILE, self.STDERR_FILE, self.PID_FILE) return cmd @@ -197,6 +202,7 @@ def start_cmd(self, node): def start_node(self, node): node.account.ssh("mkdir -p %s" % self.PERSISTENT_ROOT, allow_fail=False) + self.security_config.setup_node(node) node.account.create_file(self.CONFIG_FILE, self.config_template_func(node)) node.account.create_file(self.LOG4J_CONFIG_FILE, self.render('connect_log4j.properties', log_file=self.LOG_FILE)) if self.connector_config_templates: diff --git a/tests/kafkatest/services/mirror_maker.py b/tests/kafkatest/services/mirror_maker.py index 4386788d5f5a..cb4b2c1ac9a0 100644 --- a/tests/kafkatest/services/mirror_maker.py +++ b/tests/kafkatest/services/mirror_maker.py @@ -1,4 +1,3 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. diff --git a/tests/kafkatest/services/security/security_config.py b/tests/kafkatest/services/security/security_config.py index b5efba81e4f8..1bbabd2359b0 100644 --- a/tests/kafkatest/services/security/security_config.py +++ b/tests/kafkatest/services/security/security_config.py @@ -17,6 +17,7 @@ import subprocess from ducktape.template import TemplateRenderer from kafkatest.services.security.minikdc import MiniKdc +import itertools class Keytool(object): @@ -172,17 +173,22 @@ def kafka_opts(self): else: return "" - def __str__(self): + def props(self, prefix=''): """ - Return properties as string with line separators. + Return properties as string with line separators, optionally with a prefix. This is used to append security config properties to a properties file. + :param prefix: prefix to add to each property + :return: a string containing line-separated properties """ + if self.security_protocol == SecurityConfig.PLAINTEXT: + return "" + config_lines = (prefix + key + "=" + value for key, value in self.properties.iteritems()) + # Extra blank lines ensure this can be appended/prepended safely + return "\n".join(itertools.chain([""], config_lines, [""])) - prop_str = "" - if self.security_protocol != SecurityConfig.PLAINTEXT: - for key, value in self.properties.items(): - prop_str += ("\n" + key + "=" + value) - prop_str += "\n" - return prop_str - + def __str__(self): + """ + Return properties as a string with line separators. + """ + return self.props() diff --git a/tests/kafkatest/tests/connect/connect_distributed_test.py b/tests/kafkatest/tests/connect/connect_distributed_test.py index 9aa16abfbd7a..698a827b1712 100644 --- a/tests/kafkatest/tests/connect/connect_distributed_test.py +++ b/tests/kafkatest/tests/connect/connect_distributed_test.py @@ -13,15 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from kafkatest.tests.kafka_test import KafkaTest +from ducktape.tests.test import Test + +from kafkatest.services.zookeeper import ZookeeperService +from kafkatest.services.kafka import KafkaService from kafkatest.services.connect import ConnectDistributedService, VerifiableSource, VerifiableSink from kafkatest.services.console_consumer import ConsoleConsumer +from kafkatest.services.security.security_config import SecurityConfig from ducktape.utils.util import wait_until from ducktape.mark import matrix import subprocess, itertools, time from collections import Counter -class ConnectDistributedTest(KafkaTest): +class ConnectDistributedTest(Test): """ Simple test of Kafka Connect in distributed mode, producing data from files on one cluster and consuming it on another, validating the total output is identical to the input. @@ -45,22 +49,39 @@ class ConnectDistributedTest(KafkaTest): SCHEMA = { "type": "string", "optional": False } def __init__(self, test_context): - super(ConnectDistributedTest, self).__init__(test_context, num_zk=1, num_brokers=1, topics={ + super(ConnectDistributedTest, self).__init__(test_context) + self.num_zk = 1 + self.num_brokers = 1 + self.topics = { 'test' : { 'partitions': 1, 'replication-factor': 1 } - }) + } + + self.zk = ZookeeperService(test_context, self.num_zk) - self.cc = ConnectDistributedService(test_context, 3, self.kafka, [self.INPUT_FILE, self.OUTPUT_FILE]) - self.cc.log_level = "DEBUG" self.key_converter = "org.apache.kafka.connect.json.JsonConverter" self.value_converter = "org.apache.kafka.connect.json.JsonConverter" self.schemas = True - def test_file_source_and_sink(self): + def setup_services(self, security_protocol=SecurityConfig.PLAINTEXT): + self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, + security_protocol=security_protocol, interbroker_security_protocol=security_protocol, + topics=self.topics) + + self.cc = ConnectDistributedService(self.test_context, 3, self.kafka, [self.INPUT_FILE, self.OUTPUT_FILE]) + self.cc.log_level = "DEBUG" + + self.zk.start() + self.kafka.start() + + + @matrix(security_protocol=[SecurityConfig.PLAINTEXT, SecurityConfig.SASL_SSL]) + def test_file_source_and_sink(self, security_protocol): """ Tests that a basic file connector works across clean rolling bounces. This validates that the connector is correctly created, tasks instantiated, and as nodes restart the work is rebalanced across nodes. """ + self.setup_services(security_protocol=security_protocol) self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node)) self.cc.start() @@ -94,6 +115,7 @@ def test_bounce(self, clean): """ num_tasks = 3 + self.setup_services() self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node)) self.cc.start() diff --git a/tests/kafkatest/tests/connect/connect_test.py b/tests/kafkatest/tests/connect/connect_test.py index 90f219a24271..7b57402bf7ce 100644 --- a/tests/kafkatest/tests/connect/connect_test.py +++ b/tests/kafkatest/tests/connect/connect_test.py @@ -13,14 +13,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from kafkatest.tests.kafka_test import KafkaTest +from ducktape.tests.test import Test + +from kafkatest.services.zookeeper import ZookeeperService +from kafkatest.services.kafka import KafkaService from kafkatest.services.connect import ConnectStandaloneService from kafkatest.services.console_consumer import ConsoleConsumer +from kafkatest.services.security.security_config import SecurityConfig from ducktape.utils.util import wait_until -from ducktape.mark import parametrize +from ducktape.mark import parametrize, matrix import hashlib, subprocess, json -class ConnectStandaloneFileTest(KafkaTest): +class ConnectStandaloneFileTest(Test): """ Simple test of Kafka Connect that produces data from a file in one standalone process and consumes it on another, validating the output is @@ -42,24 +46,39 @@ class ConnectStandaloneFileTest(KafkaTest): SCHEMA = { "type": "string", "optional": False } def __init__(self, test_context): - super(ConnectStandaloneFileTest, self).__init__(test_context, num_zk=1, num_brokers=1, topics={ + super(ConnectStandaloneFileTest, self).__init__(test_context) + self.num_zk = 1 + self.num_brokers = 1 + self.topics = { 'test' : { 'partitions': 1, 'replication-factor': 1 } - }) + } - self.source = ConnectStandaloneService(test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE]) - self.sink = ConnectStandaloneService(test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE]) - self.consumer_validator = ConsoleConsumer(test_context, 1, self.kafka, self.TOPIC, consumer_timeout_ms=1000) + self.zk = ZookeeperService(test_context, self.num_zk) @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=True) @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=False) @parametrize(converter="org.apache.kafka.connect.storage.StringConverter", schemas=None) - def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True): + @matrix(security_protocol=[SecurityConfig.PLAINTEXT, SecurityConfig.SASL_SSL]) + def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True, security_protocol='PLAINTEXT'): assert converter != None, "converter type must be set" # Template parameters self.key_converter = converter self.value_converter = converter self.schemas = schemas + self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, + security_protocol=security_protocol, interbroker_security_protocol=security_protocol, + topics=self.topics) + + self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE]) + self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE]) + self.consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC, + consumer_timeout_ms=1000, new_consumer=True) + + + self.zk.start() + self.kafka.start() + self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")]) self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")]) diff --git a/tests/kafkatest/tests/connect/templates/connect-distributed.properties b/tests/kafkatest/tests/connect/templates/connect-distributed.properties index 7a7440a4d907..48f5f789fae1 100644 --- a/tests/kafkatest/tests/connect/templates/connect-distributed.properties +++ b/tests/kafkatest/tests/connect/templates/connect-distributed.properties @@ -13,7 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -bootstrap.servers={{ kafka.bootstrap_servers() }} +bootstrap.servers={{ kafka.bootstrap_servers(kafka.security_config.security_protocol) }} +{{ kafka.security_config.client_config().props() }} +{{ kafka.security_config.client_config().props("producer.") }} +{{ kafka.security_config.client_config().props("consumer.") }} group.id={{ group|default("connect-cluster") }} @@ -43,4 +46,4 @@ rest.advertised.host.name = {{ node.account.hostname }} # Reduce session timeouts so tests that kill workers don't need to wait as long to recover session.timeout.ms=10000 -consumer.session.timeout.ms=10000 \ No newline at end of file +consumer.session.timeout.ms=10000 diff --git a/tests/kafkatest/tests/connect/templates/connect-standalone.properties b/tests/kafkatest/tests/connect/templates/connect-standalone.properties index bf1daf7bcc07..09c648720c7f 100644 --- a/tests/kafkatest/tests/connect/templates/connect-standalone.properties +++ b/tests/kafkatest/tests/connect/templates/connect-standalone.properties @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -bootstrap.servers={{ kafka.bootstrap_servers() }} +bootstrap.servers={{ kafka.bootstrap_servers(kafka.security_config.security_protocol) }} +{{ kafka.security_config.client_config().props("producer.") }} +{{ kafka.security_config.client_config().props("consumer.") }} key.converter={{ key_converter|default("org.apache.kafka.connect.json.JsonConverter") }} value.converter={{ value_converter|default("org.apache.kafka.connect.json.JsonConverter") }} From c36268f77fbf7f6a47a1e09ec3e38c20173a06c5 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Mon, 4 Apr 2016 21:28:59 -0700 Subject: [PATCH 057/267] KAFKA-2998: log warnings when client is disconnected from bootstrap brokers Author: Jason Gustafson Reviewers: Grant Henke, Guozhang Wang Closes #769 from hachikuji/KAFKA-2998 --- .../org/apache/kafka/clients/NetworkClient.java | 8 ++++++++ .../java/org/apache/kafka/common/Cluster.java | 17 +++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java index d22b508cd878..d2eaace98b83 100644 --- a/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java +++ b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java @@ -556,6 +556,14 @@ public boolean maybeHandleDisconnection(ClientRequest request) { ApiKeys requestKey = ApiKeys.forId(request.request().header().apiKey()); if (requestKey == ApiKeys.METADATA) { + Cluster cluster = metadata.fetch(); + if (cluster.isBootstrapConfigured()) { + int nodeId = Integer.parseInt(request.request().destination()); + Node node = cluster.nodeById(nodeId); + if (node != null) + log.warn("Bootstrap broker {}:{} disconnected", node.host(), node.port()); + } + metadataFetchInProgress = false; return true; } diff --git a/clients/src/main/java/org/apache/kafka/common/Cluster.java b/clients/src/main/java/org/apache/kafka/common/Cluster.java index 8e85df8f0903..e1bf581b3e5d 100644 --- a/clients/src/main/java/org/apache/kafka/common/Cluster.java +++ b/clients/src/main/java/org/apache/kafka/common/Cluster.java @@ -29,6 +29,7 @@ */ public final class Cluster { + private final boolean isBootstrapConfigured; private final List nodes; private final Set unauthorizedTopics; private final Map partitionsByTopicPartition; @@ -45,11 +46,19 @@ public final class Cluster { public Cluster(Collection nodes, Collection partitions, Set unauthorizedTopics) { + this(false, nodes, partitions, unauthorizedTopics); + } + + private Cluster(boolean isBootstrapConfigured, + Collection nodes, + Collection partitions, + Set unauthorizedTopics) { + this.isBootstrapConfigured = isBootstrapConfigured; + // make a randomized, unmodifiable copy of the nodes List copy = new ArrayList<>(nodes); Collections.shuffle(copy); this.nodes = Collections.unmodifiableList(copy); - this.nodesById = new HashMap<>(); for (Node node : nodes) this.nodesById.put(node.id(), node); @@ -115,7 +124,7 @@ public static Cluster bootstrap(List addresses) { int nodeId = -1; for (InetSocketAddress address : addresses) nodes.add(new Node(nodeId--, address.getHostString(), address.getPort())); - return new Cluster(nodes, new ArrayList(0), Collections.emptySet()); + return new Cluster(true, nodes, new ArrayList(0), Collections.emptySet()); } /** @@ -214,6 +223,10 @@ public Set unauthorizedTopics() { return unauthorizedTopics; } + public boolean isBootstrapConfigured() { + return isBootstrapConfigured; + } + @Override public String toString() { return "Cluster(nodes = " + this.nodes + ", partitions = " + this.partitionsByTopicPartition.values() + ")"; From d1a5883c8ad69fbd16c3dc03ff05db887580ded5 Mon Sep 17 00:00:00 2001 From: Matt McClure Date: Mon, 4 Apr 2016 22:07:20 -0700 Subject: [PATCH 058/267] KAFKA-3384: Conform to POSIX kill usage I believe this addresses KAFKA-3384. The POSIX kill manpage is at http://pubs.opengroup.org/onlinepubs/9699919799/utilities/kill.html Author: Matt McClure Reviewers: Geoff Anderson , Ewen Cheslack-Postava Closes #1148 from matthewlmcclure/KAFKA-3384 --- bin/kafka-server-stop.sh | 2 +- bin/zookeeper-server-stop.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/kafka-server-stop.sh b/bin/kafka-server-stop.sh index f75ded78ea49..d3c660cab5a7 100755 --- a/bin/kafka-server-stop.sh +++ b/bin/kafka-server-stop.sh @@ -19,6 +19,6 @@ if [ -z "$PIDS" ]; then echo "No kafka server to stop" exit 1 else - kill -SIGTERM $PIDS + kill -s TERM $PIDS fi diff --git a/bin/zookeeper-server-stop.sh b/bin/zookeeper-server-stop.sh index 07c79102b466..f771064cb550 100755 --- a/bin/zookeeper-server-stop.sh +++ b/bin/zookeeper-server-stop.sh @@ -19,6 +19,6 @@ if [ -z "$PIDS" ]; then echo "No zookeeper server to stop" exit 1 else - kill -SIGTERM $PIDS + kill -s TERM $PIDS fi From 703014824aeb0690dfb30a98fcd0f11e9d1e68fc Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 5 Apr 2016 11:46:04 -0700 Subject: [PATCH 059/267] KAFKA-3510; OffsetIndex thread safety * Make all fields accessed outside of a lock `volatile` * Only allow mutation within the class * Remove unnecessary `AtomicInteger` since mutation always happens inside a lock Author: Ismael Juma Reviewers: Guozhang Wang Closes #1188 from ijuma/kafka-3510-offset-index-thread-safety --- core/src/main/scala/kafka/log/Log.scala | 2 +- .../src/main/scala/kafka/log/LogSegment.scala | 2 +- .../main/scala/kafka/log/OffsetIndex.scala | 173 +++++++++--------- .../scala/kafka/tools/DumpLogSegments.scala | 2 +- .../unit/kafka/log/OffsetIndexTest.scala | 10 +- 5 files changed, 98 insertions(+), 91 deletions(-) diff --git a/core/src/main/scala/kafka/log/Log.scala b/core/src/main/scala/kafka/log/Log.scala index 81c19fae9318..8465b649ac77 100644 --- a/core/src/main/scala/kafka/log/Log.scala +++ b/core/src/main/scala/kafka/log/Log.scala @@ -215,7 +215,7 @@ class Log(val dir: File, val fileName = logFile.getName val startOffset = fileName.substring(0, fileName.length - LogFileSuffix.length).toLong val indexFile = new File(CoreUtils.replaceSuffix(logFile.getPath, LogFileSuffix, IndexFileSuffix) + SwapFileSuffix) - val index = new OffsetIndex(file = indexFile, baseOffset = startOffset, maxIndexSize = config.maxIndexSize) + val index = new OffsetIndex(indexFile, baseOffset = startOffset, maxIndexSize = config.maxIndexSize) val swapSegment = new LogSegment(new FileMessageSet(file = swapFile), index = index, baseOffset = startOffset, diff --git a/core/src/main/scala/kafka/log/LogSegment.scala b/core/src/main/scala/kafka/log/LogSegment.scala index 9fc68a4a9235..3a4bbc86d6c8 100755 --- a/core/src/main/scala/kafka/log/LogSegment.scala +++ b/core/src/main/scala/kafka/log/LogSegment.scala @@ -55,7 +55,7 @@ class LogSegment(val log: FileMessageSet, def this(dir: File, startOffset: Long, indexIntervalBytes: Int, maxIndexSize: Int, rollJitterMs: Long, time: Time, fileAlreadyExists: Boolean = false, initFileSize: Int = 0, preallocate: Boolean = false) = this(new FileMessageSet(file = Log.logFilename(dir, startOffset), fileAlreadyExists = fileAlreadyExists, initFileSize = initFileSize, preallocate = preallocate), - new OffsetIndex(file = Log.indexFilename(dir, startOffset), baseOffset = startOffset, maxIndexSize = maxIndexSize), + new OffsetIndex(Log.indexFilename(dir, startOffset), baseOffset = startOffset, maxIndexSize = maxIndexSize), startOffset, indexIntervalBytes, rollJitterMs, diff --git a/core/src/main/scala/kafka/log/OffsetIndex.scala b/core/src/main/scala/kafka/log/OffsetIndex.scala index e95c9d139ecd..ce35d6874c7c 100755 --- a/core/src/main/scala/kafka/log/OffsetIndex.scala +++ b/core/src/main/scala/kafka/log/OffsetIndex.scala @@ -24,7 +24,6 @@ import java.io._ import java.nio._ import java.nio.channels._ import java.util.concurrent.locks._ -import java.util.concurrent.atomic._ import kafka.utils._ import kafka.utils.CoreUtils.inLock import kafka.common.InvalidOffsetException @@ -54,62 +53,70 @@ import kafka.common.InvalidOffsetException * All external APIs translate from relative offsets to full offsets, so users of this class do not interact with the internal * storage format. */ -class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSize: Int = -1) extends Logging { +class OffsetIndex(@volatile private[this] var _file: File, val baseOffset: Long, val maxIndexSize: Int = -1) extends Logging { private val lock = new ReentrantLock /* initialize the memory mapping for this index */ - private var mmap: MappedByteBuffer = - { - val newlyCreated = file.createNewFile() - val raf = new RandomAccessFile(file, "rw") - try { - /* pre-allocate the file if necessary */ - if(newlyCreated) { - if(maxIndexSize < 8) - throw new IllegalArgumentException("Invalid max index size: " + maxIndexSize) - raf.setLength(roundToExactMultiple(maxIndexSize, 8)) - } - - /* memory-map the file */ - val len = raf.length() - val idx = raf.getChannel.map(FileChannel.MapMode.READ_WRITE, 0, len) - - /* set the position in the index for the next entry */ - if(newlyCreated) - idx.position(0) - else - // if this is a pre-existing index, assume it is all valid and set position to last entry - idx.position(roundToExactMultiple(idx.limit, 8)) - idx - } finally { - CoreUtils.swallow(raf.close()) + @volatile + private[this] var mmap: MappedByteBuffer = { + val newlyCreated = _file.createNewFile() + val raf = new RandomAccessFile(_file, "rw") + try { + /* pre-allocate the file if necessary */ + if (newlyCreated) { + if (maxIndexSize < 8) + throw new IllegalArgumentException("Invalid max index size: " + maxIndexSize) + raf.setLength(roundToExactMultiple(maxIndexSize, 8)) } + + /* memory-map the file */ + val len = raf.length() + val idx = raf.getChannel.map(FileChannel.MapMode.READ_WRITE, 0, len) + + /* set the position in the index for the next entry */ + if (newlyCreated) + idx.position(0) + else + // if this is a pre-existing index, assume it is all valid and set position to last entry + idx.position(roundToExactMultiple(idx.limit, 8)) + idx + } finally { + CoreUtils.swallow(raf.close()) } - + } + /* the number of eight-byte entries currently in the index */ - private var size = new AtomicInteger(mmap.position / 8) - - /** - * The maximum number of eight-byte entries this index can hold - */ @volatile - var maxEntries = mmap.limit / 8 - - /* the last offset in the index */ - var lastOffset = readLastEntry.offset + private[this] var _entries = mmap.position / 8 + + /* The maximum number of eight-byte entries this index can hold */ + @volatile + private[this] var _maxEntries = mmap.limit / 8 + + @volatile + private[this] var _lastOffset = readLastEntry.offset debug("Loaded index file %s with maxEntries = %d, maxIndexSize = %d, entries = %d, lastOffset = %d, file position = %d" - .format(file.getAbsolutePath, maxEntries, maxIndexSize, entries(), lastOffset, mmap.position)) + .format(_file.getAbsolutePath, _maxEntries, maxIndexSize, _entries, _lastOffset, mmap.position)) + + /** The maximum number of entries this index can hold */ + def maxEntries: Int = _maxEntries + + /** The last offset in the index */ + def lastOffset: Long = _lastOffset + + /** The index file */ + def file: File = _file /** * The last entry in the index */ def readLastEntry(): OffsetPosition = { inLock(lock) { - size.get match { + _entries match { case 0 => OffsetPosition(baseOffset, 0) - case s => OffsetPosition(baseOffset + relativeOffset(this.mmap, s-1), physical(this.mmap, s-1)) + case s => OffsetPosition(baseOffset + relativeOffset(mmap, s - 1), physical(mmap, s - 1)) } } } @@ -149,22 +156,22 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi val relOffset = targetOffset - baseOffset // check if the index is empty - if(entries == 0) + if (_entries == 0) return -1 // check if the target offset is smaller than the least offset - if(relativeOffset(idx, 0) > relOffset) + if (relativeOffset(idx, 0) > relOffset) return -1 // binary search for the entry var lo = 0 - var hi = entries-1 - while(lo < hi) { + var hi = _entries - 1 + while (lo < hi) { val mid = ceil(hi/2.0 + lo/2.0).toInt val found = relativeOffset(idx, mid) - if(found == relOffset) + if (found == relOffset) return mid - else if(found < relOffset) + else if (found < relOffset) lo = mid else hi = mid - 1 @@ -185,8 +192,8 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi */ def entry(n: Int): OffsetPosition = { maybeLock(lock) { - if(n >= entries) - throw new IllegalArgumentException("Attempt to fetch the %dth entry from an index of size %d.".format(n, entries)) + if(n >= _entries) + throw new IllegalArgumentException("Attempt to fetch the %dth entry from an index of size %d.".format(n, _entries)) val idx = mmap.duplicate OffsetPosition(relativeOffset(idx, n), physical(idx, n)) } @@ -197,17 +204,17 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi */ def append(offset: Long, position: Int) { inLock(lock) { - require(!isFull, "Attempt to append to a full index (size = " + size + ").") - if (size.get == 0 || offset > lastOffset) { - debug("Adding index entry %d => %d to %s.".format(offset, position, file.getName)) - this.mmap.putInt((offset - baseOffset).toInt) - this.mmap.putInt(position) - this.size.incrementAndGet() - this.lastOffset = offset - require(entries * 8 == mmap.position, entries + " entries but file position in index is " + mmap.position + ".") + require(!isFull, "Attempt to append to a full index (size = " + _entries + ").") + if (_entries == 0 || offset > _lastOffset) { + debug("Adding index entry %d => %d to %s.".format(offset, position, _file.getName)) + mmap.putInt((offset - baseOffset).toInt) + mmap.putInt(position) + _entries += 1 + _lastOffset = offset + require(_entries * 8 == mmap.position, _entries + " entries but file position in index is " + mmap.position + ".") } else { throw new InvalidOffsetException("Attempt to append an offset (%d) to position %d no larger than the last offset appended (%d) to %s." - .format(offset, entries, lastOffset, file.getAbsolutePath)) + .format(offset, _entries, _lastOffset, _file.getAbsolutePath)) } } } @@ -215,7 +222,7 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi /** * True iff there are no more slots available in this index */ - def isFull: Boolean = entries >= this.maxEntries + def isFull: Boolean = _entries >= _maxEntries /** * Truncate the entire index, deleting all entries @@ -252,9 +259,9 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi */ private def truncateToEntries(entries: Int) { inLock(lock) { - this.size.set(entries) - mmap.position(this.size.get * 8) - this.lastOffset = readLastEntry.offset + _entries = entries + mmap.position(_entries * 8) + _lastOffset = readLastEntry.offset } } @@ -264,7 +271,7 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi */ def trimToValidSize() { inLock(lock) { - resize(entries * 8) + resize(_entries * 8) } } @@ -276,18 +283,18 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi */ def resize(newSize: Int) { inLock(lock) { - val raf = new RandomAccessFile(file, "rw") + val raf = new RandomAccessFile(_file, "rw") val roundedNewSize = roundToExactMultiple(newSize, 8) - val position = this.mmap.position + val position = mmap.position /* Windows won't let us modify the file length while the file is mmapped :-( */ - if(Os.isWindows) - forceUnmap(this.mmap) + if (Os.isWindows) + forceUnmap(mmap) try { raf.setLength(roundedNewSize) - this.mmap = raf.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, roundedNewSize) - this.maxEntries = this.mmap.limit / 8 - this.mmap.position(position) + mmap = raf.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, roundedNewSize) + _maxEntries = mmap.limit / 8 + mmap.position(position) } finally { CoreUtils.swallow(raf.close()) } @@ -319,19 +326,19 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi * Delete this index file */ def delete(): Boolean = { - info("Deleting index " + this.file.getAbsolutePath) - if(Os.isWindows) - CoreUtils.swallow(forceUnmap(this.mmap)) - this.file.delete() + info("Deleting index " + _file.getAbsolutePath) + if (Os.isWindows) + CoreUtils.swallow(forceUnmap(mmap)) + _file.delete() } /** The number of entries in this index */ - def entries() = size.get + def entries = _entries /** * The number of bytes actually used by this index */ - def sizeInBytes() = 8 * entries + def sizeInBytes() = 8 * _entries /** Close the index */ def close() { @@ -343,8 +350,8 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi * @throws IOException if rename fails */ def renameTo(f: File) { - try Utils.atomicMoveWithFallback(file.toPath, f.toPath) - finally this.file = f + try Utils.atomicMoveWithFallback(_file.toPath, f.toPath) + finally _file = f } /** @@ -352,13 +359,13 @@ class OffsetIndex(@volatile var file: File, val baseOffset: Long, val maxIndexSi * @throws IllegalArgumentException if any problems are found */ def sanityCheck() { - require(entries == 0 || lastOffset > baseOffset, + require(_entries == 0 || lastOffset > baseOffset, "Corrupt index found, index file (%s) has non-zero size but the last offset is %d and the base offset is %d" - .format(file.getAbsolutePath, lastOffset, baseOffset)) - val len = file.length() - require(len % 8 == 0, - "Index file " + file.getName + " is corrupt, found " + len + - " bytes which is not positive or not a multiple of 8.") + .format(_file.getAbsolutePath, lastOffset, baseOffset)) + val len = _file.length() + require(len % 8 == 0, + "Index file " + _file.getName + " is corrupt, found " + len + + " bytes which is not positive or not a multiple of 8.") } /** diff --git a/core/src/main/scala/kafka/tools/DumpLogSegments.scala b/core/src/main/scala/kafka/tools/DumpLogSegments.scala index e882a300aac9..dc99672136b2 100755 --- a/core/src/main/scala/kafka/tools/DumpLogSegments.scala +++ b/core/src/main/scala/kafka/tools/DumpLogSegments.scala @@ -124,7 +124,7 @@ object DumpLogSegments { val startOffset = file.getName().split("\\.")(0).toLong val logFile = new File(file.getAbsoluteFile.getParent, file.getName.split("\\.")(0) + Log.LogFileSuffix) val messageSet = new FileMessageSet(logFile, false) - val index = new OffsetIndex(file = file, baseOffset = startOffset) + val index = new OffsetIndex(file, baseOffset = startOffset) //Check that index passes sanityCheck, this is the check that determines if indexes will be rebuilt on startup or not. if (indexSanityOnly) { diff --git a/core/src/test/scala/unit/kafka/log/OffsetIndexTest.scala b/core/src/test/scala/unit/kafka/log/OffsetIndexTest.scala index dfd7b54fcf20..869e618b9ef5 100644 --- a/core/src/test/scala/unit/kafka/log/OffsetIndexTest.scala +++ b/core/src/test/scala/unit/kafka/log/OffsetIndexTest.scala @@ -34,7 +34,7 @@ class OffsetIndexTest extends JUnitSuite { @Before def setup() { - this.idx = new OffsetIndex(file = nonExistantTempFile(), baseOffset = 45L, maxIndexSize = 30 * 8) + this.idx = new OffsetIndex(nonExistantTempFile(), baseOffset = 45L, maxIndexSize = 30 * 8) } @After @@ -103,7 +103,7 @@ class OffsetIndexTest extends JUnitSuite { idx.append(first.offset, first.position) idx.append(sec.offset, sec.position) idx.close() - val idxRo = new OffsetIndex(file = idx.file, baseOffset = idx.baseOffset) + val idxRo = new OffsetIndex(idx.file, baseOffset = idx.baseOffset) assertEquals(first, idxRo.lookup(first.offset)) assertEquals(sec, idxRo.lookup(sec.offset)) assertEquals(sec.offset, idxRo.lastOffset) @@ -113,7 +113,7 @@ class OffsetIndexTest extends JUnitSuite { @Test def truncate() { - val idx = new OffsetIndex(file = nonExistantTempFile(), baseOffset = 0L, maxIndexSize = 10 * 8) + val idx = new OffsetIndex(nonExistantTempFile(), baseOffset = 0L, maxIndexSize = 10 * 8) idx.truncate() for(i <- 1 until 10) idx.append(i, i) @@ -140,7 +140,7 @@ class OffsetIndexTest extends JUnitSuite { idx.append(5, 5) idx.truncate() - assertEquals("Full truncation should leave no entries", 0, idx.entries()) + assertEquals("Full truncation should leave no entries", 0, idx.entries) idx.append(0, 0) } @@ -169,4 +169,4 @@ class OffsetIndexTest extends JUnitSuite { file.delete() file } -} \ No newline at end of file +} From e733d8c2fbcee19ee77c436e66abb29850a2f7c2 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 5 Apr 2016 18:16:48 -0400 Subject: [PATCH 060/267] KAFKA-3489; Update request metrics if a client closes a connection while the broker response is in flight I also fixed a few issues in `SocketServerTest` and included a few clean-ups. Author: Ismael Juma Reviewers: Jun Rao Closes #1172 from ijuma/kafka-3489-update-request-metrics-if-client-closes --- .../apache/kafka/common/network/Selector.java | 6 +- .../scala/kafka/network/RequestChannel.scala | 53 ++--- .../scala/kafka/network/SocketServer.scala | 185 ++++++++++-------- .../unit/kafka/network/SocketServerTest.scala | 141 ++++++++++--- 4 files changed, 257 insertions(+), 128 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/network/Selector.java b/clients/src/main/java/org/apache/kafka/common/network/Selector.java index 698b99c3b838..c33374181821 100644 --- a/clients/src/main/java/org/apache/kafka/common/network/Selector.java +++ b/clients/src/main/java/org/apache/kafka/common/network/Selector.java @@ -491,7 +491,7 @@ public boolean isChannelReady(String id) { private KafkaChannel channelOrFail(String id) { KafkaChannel channel = this.channels.get(id); if (channel == null) - throw new IllegalStateException("Attempt to retrieve channel for which there is no open connection. Connection id " + id + " existing connections " + channels.keySet().toString()); + throw new IllegalStateException("Attempt to retrieve channel for which there is no open connection. Connection id " + id + " existing connections " + channels.keySet()); return channel; } @@ -551,7 +551,7 @@ private void addToStagedReceives(KafkaChannel channel, NetworkReceive receive) { * checks if there are any staged receives and adds to completedReceives */ private void addToCompletedReceives() { - if (this.stagedReceives.size() > 0) { + if (!this.stagedReceives.isEmpty()) { Iterator>> iter = this.stagedReceives.entrySet().iterator(); while (iter.hasNext()) { Map.Entry> entry = iter.next(); @@ -561,7 +561,7 @@ private void addToCompletedReceives() { NetworkReceive networkReceive = deque.poll(); this.completedReceives.add(networkReceive); this.sensors.recordBytesReceived(channel.id(), networkReceive.payload().limit()); - if (deque.size() == 0) + if (deque.isEmpty()) iter.remove(); } } diff --git a/core/src/main/scala/kafka/network/RequestChannel.scala b/core/src/main/scala/kafka/network/RequestChannel.scala index 1105802e0890..17c5b9b3fbed 100644 --- a/core/src/main/scala/kafka/network/RequestChannel.scala +++ b/core/src/main/scala/kafka/network/RequestChannel.scala @@ -117,36 +117,39 @@ object RequestChannel extends Logging { if (apiRemoteCompleteTimeMs < 0) apiRemoteCompleteTimeMs = responseCompleteTimeMs - val requestQueueTime = (requestDequeueTimeMs - startTimeMs).max(0L) - val apiLocalTime = (apiLocalCompleteTimeMs - requestDequeueTimeMs).max(0L) - val apiRemoteTime = (apiRemoteCompleteTimeMs - apiLocalCompleteTimeMs).max(0L) - val apiThrottleTime = (responseCompleteTimeMs - apiRemoteCompleteTimeMs).max(0L) - val responseQueueTime = (responseDequeueTimeMs - responseCompleteTimeMs).max(0L) - val responseSendTime = (endTimeMs - responseDequeueTimeMs).max(0L) + val requestQueueTime = math.max(requestDequeueTimeMs - startTimeMs, 0) + val apiLocalTime = math.max(apiLocalCompleteTimeMs - requestDequeueTimeMs, 0) + val apiRemoteTime = math.max(apiRemoteCompleteTimeMs - apiLocalCompleteTimeMs, 0) + val apiThrottleTime = math.max(responseCompleteTimeMs - apiRemoteCompleteTimeMs, 0) + val responseQueueTime = math.max(responseDequeueTimeMs - responseCompleteTimeMs, 0) + val responseSendTime = math.max(endTimeMs - responseDequeueTimeMs, 0) val totalTime = endTimeMs - startTimeMs - var metricsList = List(RequestMetrics.metricsMap(ApiKeys.forId(requestId).name)) - if (requestId == ApiKeys.FETCH.id) { - val isFromFollower = requestObj.asInstanceOf[FetchRequest].isFromFollower - metricsList ::= ( if (isFromFollower) - RequestMetrics.metricsMap(RequestMetrics.followFetchMetricName) - else - RequestMetrics.metricsMap(RequestMetrics.consumerFetchMetricName) ) - } - metricsList.foreach{ - m => m.requestRate.mark() - m.requestQueueTimeHist.update(requestQueueTime) - m.localTimeHist.update(apiLocalTime) - m.remoteTimeHist.update(apiRemoteTime) - m.throttleTimeHist.update(apiThrottleTime) - m.responseQueueTimeHist.update(responseQueueTime) - m.responseSendTimeHist.update(responseSendTime) - m.totalTimeHist.update(totalTime) + val fetchMetricNames = + if (requestId == ApiKeys.FETCH.id) { + val isFromFollower = requestObj.asInstanceOf[FetchRequest].isFromFollower + Seq( + if (isFromFollower) RequestMetrics.followFetchMetricName + else RequestMetrics.consumerFetchMetricName + ) + } + else Seq.empty + val metricNames = fetchMetricNames :+ ApiKeys.forId(requestId).name + metricNames.foreach { metricName => + val m = RequestMetrics.metricsMap(metricName) + m.requestRate.mark() + m.requestQueueTimeHist.update(requestQueueTime) + m.localTimeHist.update(apiLocalTime) + m.remoteTimeHist.update(apiRemoteTime) + m.throttleTimeHist.update(apiThrottleTime) + m.responseQueueTimeHist.update(responseQueueTime) + m.responseSendTimeHist.update(responseSendTime) + m.totalTimeHist.update(totalTime) } - if(requestLogger.isTraceEnabled) + if (requestLogger.isTraceEnabled) requestLogger.trace("Completed request:%s from connection %s;totalTime:%d,requestQueueTime:%d,localTime:%d,remoteTime:%d,responseQueueTime:%d,sendTime:%d,securityProtocol:%s,principal:%s" .format(requestDesc(true), connectionId, totalTime, requestQueueTime, apiLocalTime, apiRemoteTime, responseQueueTime, responseSendTime, securityProtocol, session.principal)) - else if(requestLogger.isDebugEnabled) + else if (requestLogger.isDebugEnabled) requestLogger.debug("Completed request:%s from connection %s;totalTime:%d,requestQueueTime:%d,localTime:%d,remoteTime:%d,responseQueueTime:%d,sendTime:%d,securityProtocol:%s,principal:%s" .format(requestDesc(false), connectionId, totalTime, requestQueueTime, apiLocalTime, apiRemoteTime, responseQueueTime, responseSendTime, securityProtocol, session.principal)) } diff --git a/core/src/main/scala/kafka/network/SocketServer.scala b/core/src/main/scala/kafka/network/SocketServer.scala index 5c31ac656607..f1ec2ef6c451 100644 --- a/core/src/main/scala/kafka/network/SocketServer.scala +++ b/core/src/main/scala/kafka/network/SocketServer.scala @@ -31,9 +31,8 @@ import kafka.common.KafkaException import kafka.metrics.KafkaMetricsGroup import kafka.server.KafkaConfig import kafka.utils._ -import org.apache.kafka.common.MetricName import org.apache.kafka.common.metrics._ -import org.apache.kafka.common.network.{Selector => KSelector, LoginType, Mode, ChannelBuilders} +import org.apache.kafka.common.network.{ChannelBuilders, KafkaChannel, LoginType, Mode, Selector => KSelector} import org.apache.kafka.common.security.auth.KafkaPrincipal import org.apache.kafka.common.protocol.SecurityProtocol import org.apache.kafka.common.protocol.types.SchemaException @@ -41,7 +40,7 @@ import org.apache.kafka.common.utils.{Time, Utils} import scala.collection._ import JavaConverters._ -import scala.util.control.{NonFatal, ControlThrowable} +import scala.util.control.{ControlThrowable, NonFatal} /** * An NIO socket server. The threading model is @@ -83,8 +82,6 @@ class SocketServer(val config: KafkaConfig, val metrics: Metrics, val time: Time val sendBufferSize = config.socketSendBufferBytes val recvBufferSize = config.socketReceiveBufferBytes - val maxRequestSize = config.socketRequestMaxBytes - val connectionsMaxIdleMs = config.connectionsMaxIdleMs val brokerId = config.brokerId var processorBeginIndex = 0 @@ -92,18 +89,8 @@ class SocketServer(val config: KafkaConfig, val metrics: Metrics, val time: Time val protocol = endpoint.protocolType val processorEndIndex = processorBeginIndex + numProcessorThreads - for (i <- processorBeginIndex until processorEndIndex) { - processors(i) = new Processor(i, - time, - maxRequestSize, - requestChannel, - connectionQuotas, - connectionsMaxIdleMs, - protocol, - config.values, - metrics - ) - } + for (i <- processorBeginIndex until processorEndIndex) + processors(i) = newProcessor(i, connectionQuotas, protocol) val acceptor = new Acceptor(endpoint, sendBufferSize, recvBufferSize, brokerId, processors.slice(processorBeginIndex, processorEndIndex), connectionQuotas) @@ -148,10 +135,27 @@ class SocketServer(val config: KafkaConfig, val metrics: Metrics, val time: Time } } + /* `protected` for test usage */ + protected[network] def newProcessor(id: Int, connectionQuotas: ConnectionQuotas, protocol: SecurityProtocol): Processor = { + new Processor(id, + time, + config.socketRequestMaxBytes, + requestChannel, + connectionQuotas, + config.connectionsMaxIdleMs, + protocol, + config.values, + metrics + ) + } + /* For test usage */ private[network] def connectionCount(address: InetAddress): Int = Option(connectionQuotas).fold(0)(_.get(address)) + /* For test usage */ + private[network] def processor(index: Int): Processor = processors(index) + } /** @@ -376,10 +380,7 @@ private[kafka] class Processor(val id: Int, private val newConnections = new ConcurrentLinkedQueue[SocketChannel]() private val inflightResponses = mutable.Map[String, RequestChannel.Response]() - private val channelBuilder = ChannelBuilders.create(protocol, Mode.SERVER, LoginType.SERVER, channelConfigs) - private val metricTags = new util.HashMap[String, String]() - metricTags.put("networkProcessor", id.toString) - + private val metricTags = Map("networkProcessor" -> id.toString).asJava newGauge("IdlePercent", new Gauge[Double] { @@ -398,65 +399,27 @@ private[kafka] class Processor(val id: Int, "socket-server", metricTags, false, - channelBuilder) + ChannelBuilders.create(protocol, Mode.SERVER, LoginType.SERVER, channelConfigs)) override def run() { startupComplete() - while(isRunning) { + while (isRunning) { try { // setup any new connections that have been queued up configureNewConnections() // register any new responses for writing processNewResponses() - - try { - selector.poll(300) - } catch { - case e @ (_: IllegalStateException | _: IOException) => - error("Closing processor %s due to illegal state or IO exception".format(id)) - swallow(closeAll()) - shutdownComplete() - throw e - } - selector.completedReceives.asScala.foreach { receive => - try { - val channel = selector.channel(receive.source) - val session = RequestChannel.Session(new KafkaPrincipal(KafkaPrincipal.USER_TYPE, channel.principal.getName), - channel.socketAddress) - val req = RequestChannel.Request(processor = id, connectionId = receive.source, session = session, buffer = receive.payload, startTimeMs = time.milliseconds, securityProtocol = protocol) - requestChannel.sendRequest(req) - selector.mute(receive.source) - } catch { - case e @ (_: InvalidRequestException | _: SchemaException) => - // note that even though we got an exception, we can assume that receive.source is valid. Issues with constructing a valid receive object were handled earlier - error("Closing socket for " + receive.source + " because of error", e) - close(selector, receive.source) - } - } - - selector.completedSends.asScala.foreach { send => - val resp = inflightResponses.remove(send.destination).getOrElse { - throw new IllegalStateException(s"Send for ${send.destination} completed, but not in `inflightResponses`") - } - resp.request.updateRequestMetrics() - selector.unmute(send.destination) - } - - selector.disconnected.asScala.foreach { connectionId => - val remoteHost = ConnectionId.fromString(connectionId).getOrElse { - throw new IllegalStateException(s"connectionId has unexpected format: $connectionId") - }.remoteHost - // the channel has been closed by the selector but the quotas still need to be updated - connectionQuotas.dec(InetAddress.getByName(remoteHost)) - } - + poll() + processCompletedReceives() + processCompletedSends() + processDisconnected() } catch { // We catch all the throwables here to prevent the processor thread from exiting. We do this because - // letting a processor exit might cause bigger impact on the broker. Usually the exceptions thrown would + // letting a processor exit might cause a bigger impact on the broker. Usually the exceptions thrown would // be either associated with a specific socket channel or a bad request. We just ignore the bad socket channel // or request. This behavior might need to be reviewed if we see an exception that need the entire broker to stop. - case e : ControlThrowable => throw e - case e : Throwable => + case e: ControlThrowable => throw e + case e: Throwable => error("Processor got uncaught exception.", e) } } @@ -468,7 +431,7 @@ private[kafka] class Processor(val id: Int, private def processNewResponses() { var curr = requestChannel.receiveResponse(id) - while(curr != null) { + while (curr != null) { try { curr.responseAction match { case RequestChannel.NoOpAction => @@ -478,9 +441,7 @@ private[kafka] class Processor(val id: Int, trace("Socket server received empty response to send, registering for read: " + curr) selector.unmute(curr.request.connectionId) case RequestChannel.SendAction => - trace("Socket server received response to send, registering for write and sending data: " + curr) - selector.send(curr.responseSend) - inflightResponses += (curr.request.connectionId -> curr) + sendResponse(curr) case RequestChannel.CloseConnectionAction => curr.request.updateRequestMetrics trace("Closing socket connection actively according to the response code.") @@ -492,6 +453,71 @@ private[kafka] class Processor(val id: Int, } } + /* `protected` for test usage */ + protected[network] def sendResponse(response: RequestChannel.Response) { + trace(s"Socket server received response to send, registering for write and sending data: $response") + val channel = selector.channel(response.responseSend.destination) + // `channel` can be null if the selector closed the connection because it was idle for too long + if (channel == null) { + warn(s"Attempting to send response via channel for which there is no open connection, connection id $id") + response.request.updateRequestMetrics() + } + else { + selector.send(response.responseSend) + inflightResponses += (response.request.connectionId -> response) + } + } + + private def poll() { + try selector.poll(300) + catch { + case e @ (_: IllegalStateException | _: IOException) => + error(s"Closing processor $id due to illegal state or IO exception") + swallow(closeAll()) + shutdownComplete() + throw e + } + } + + private def processCompletedReceives() { + selector.completedReceives.asScala.foreach { receive => + try { + val channel = selector.channel(receive.source) + val session = RequestChannel.Session(new KafkaPrincipal(KafkaPrincipal.USER_TYPE, channel.principal.getName), + channel.socketAddress) + val req = RequestChannel.Request(processor = id, connectionId = receive.source, session = session, buffer = receive.payload, startTimeMs = time.milliseconds, securityProtocol = protocol) + requestChannel.sendRequest(req) + selector.mute(receive.source) + } catch { + case e @ (_: InvalidRequestException | _: SchemaException) => + // note that even though we got an exception, we can assume that receive.source is valid. Issues with constructing a valid receive object were handled earlier + error(s"Closing socket for ${receive.source} because of error", e) + close(selector, receive.source) + } + } + } + + private def processCompletedSends() { + selector.completedSends.asScala.foreach { send => + val resp = inflightResponses.remove(send.destination).getOrElse { + throw new IllegalStateException(s"Send for ${send.destination} completed, but not in `inflightResponses`") + } + resp.request.updateRequestMetrics() + selector.unmute(send.destination) + } + } + + private def processDisconnected() { + selector.disconnected.asScala.foreach { connectionId => + val remoteHost = ConnectionId.fromString(connectionId).getOrElse { + throw new IllegalStateException(s"connectionId has unexpected format: $connectionId") + }.remoteHost + inflightResponses.remove(connectionId).foreach(_.request.updateRequestMetrics()) + // the channel has been closed by the selector but the quotas still need to be updated + connectionQuotas.dec(InetAddress.getByName(remoteHost)) + } + } + /** * Queue up a new connection for reading */ @@ -504,10 +530,10 @@ private[kafka] class Processor(val id: Int, * Register any new connections that have been queued up */ private def configureNewConnections() { - while(!newConnections.isEmpty) { + while (!newConnections.isEmpty) { val channel = newConnections.poll() try { - debug("Processor " + id + " listening to new connection from " + channel.socket.getRemoteSocketAddress) + debug(s"Processor $id listening to new connection from ${channel.socket.getRemoteSocketAddress}") val localHost = channel.socket().getLocalAddress.getHostAddress val localPort = channel.socket().getLocalPort val remoteHost = channel.socket().getInetAddress.getHostAddress @@ -515,12 +541,12 @@ private[kafka] class Processor(val id: Int, val connectionId = ConnectionId(localHost, localPort, remoteHost, remotePort).toString selector.register(connectionId, channel) } catch { - // We explicitly catch all non fatal exceptions and close the socket to avoid socket leak. The other - // throwables will be caught in processor and logged as uncaught exception. + // We explicitly catch all non fatal exceptions and close the socket to avoid a socket leak. The other + // throwables will be caught in processor and logged as uncaught exceptions. case NonFatal(e) => - // need to close the channel here to avoid socket leak. + // need to close the channel here to avoid a socket leak. close(channel) - error("Processor " + id + " closed connection from " + channel.getRemoteAddress, e) + error(s"Processor $id closed connection from ${channel.getRemoteAddress}", e) } } } @@ -535,6 +561,9 @@ private[kafka] class Processor(val id: Int, selector.close() } + /* For test usage */ + private[network] def channel(connectionId: String): Option[KafkaChannel] = + Option(selector.channel(connectionId)) /** * Wakeup the thread for selection. diff --git a/core/src/test/scala/unit/kafka/network/SocketServerTest.scala b/core/src/test/scala/unit/kafka/network/SocketServerTest.scala index 5d28894e32bf..81e5232e85cd 100644 --- a/core/src/test/scala/unit/kafka/network/SocketServerTest.scala +++ b/core/src/test/scala/unit/kafka/network/SocketServerTest.scala @@ -39,7 +39,7 @@ import org.junit.Assert._ import org.junit._ import org.scalatest.junit.JUnitSuite -import scala.collection.Map +import scala.collection.mutable.ArrayBuffer class SocketServerTest extends JUnitSuite { val props = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect, port = 0) @@ -55,6 +55,7 @@ class SocketServerTest extends JUnitSuite { val metrics = new Metrics val server = new SocketServer(config, metrics, new SystemTime) server.startup() + val sockets = new ArrayBuffer[Socket] def sendRequest(socket: Socket, request: Array[Byte], id: Option[Short] = None) { val outgoing = new DataOutputStream(socket.getOutputStream) @@ -79,7 +80,12 @@ class SocketServerTest extends JUnitSuite { /* A simple request handler that just echos back the response */ def processRequest(channel: RequestChannel) { - val request = channel.receiveRequest + val request = channel.receiveRequest(2000) + assertNotNull("receiveRequest timed out", request) + processRequest(channel, request) + } + + def processRequest(channel: RequestChannel, request: RequestChannel.Request) { val byteBuffer = ByteBuffer.allocate(request.header.sizeOf + request.body.sizeOf) request.header.writeTo(byteBuffer) request.body.writeTo(byteBuffer) @@ -89,13 +95,18 @@ class SocketServerTest extends JUnitSuite { channel.sendResponse(new RequestChannel.Response(request.processor, request, send)) } - def connect(s: SocketServer = server, protocol: SecurityProtocol = SecurityProtocol.PLAINTEXT) = - new Socket("localhost", server.boundPort(protocol)) + def connect(s: SocketServer = server, protocol: SecurityProtocol = SecurityProtocol.PLAINTEXT) = { + val socket = new Socket("localhost", s.boundPort(protocol)) + sockets += socket + socket + } @After - def cleanup() { + def tearDown() { metrics.close() server.shutdown() + sockets.foreach(_.close()) + sockets.clear() } private def producerRequestBytes: Array[Byte] = { @@ -183,7 +194,7 @@ class SocketServerTest extends JUnitSuite { @Test def testMaxConnectionsPerIp() { - // make the maximum allowable number of connections and then leak them + // make the maximum allowable number of connections val conns = (0 until server.config.maxConnectionsPerIp).map(_ => connect()) // now try one more (should fail) val conn = connect() @@ -201,27 +212,30 @@ class SocketServerTest extends JUnitSuite { sendRequest(conn2, serializedBytes) val request = server.requestChannel.receiveRequest(2000) assertNotNull(request) - conn2.close() - conns.tail.foreach(_.close()) } @Test - def testMaxConnectionsPerIPOverrides() { - val overrideNum = 6 - val overrides = Map("localhost" -> overrideNum) + def testMaxConnectionsPerIpOverrides() { + val overrideNum = server.config.maxConnectionsPerIp + 1 val overrideProps = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect, port = 0) + overrideProps.put(KafkaConfig.MaxConnectionsPerIpOverridesProp, s"localhost:$overrideNum") val serverMetrics = new Metrics() - val overrideServer: SocketServer = new SocketServer(KafkaConfig.fromProps(overrideProps), serverMetrics, new SystemTime()) + val overrideServer = new SocketServer(KafkaConfig.fromProps(overrideProps), serverMetrics, new SystemTime()) try { overrideServer.startup() - // make the maximum allowable number of connections and then leak them - val conns = ((0 until overrideNum).map(i => connect(overrideServer))) + // make the maximum allowable number of connections + val conns = (0 until overrideNum).map(_ => connect(overrideServer)) + + // it should succeed + val serializedBytes = producerRequestBytes + sendRequest(conns.last, serializedBytes) + val request = overrideServer.requestChannel.receiveRequest(2000) + assertNotNull(request) + // now try one more (should fail) val conn = connect(overrideServer) conn.setSoTimeout(3000) assertEquals(-1, conn.getInputStream.read()) - conn.close() - conns.foreach(_.close()) } finally { overrideServer.shutdown() serverMetrics.close() @@ -229,16 +243,16 @@ class SocketServerTest extends JUnitSuite { } @Test - def testSslSocketServer(): Unit = { + def testSslSocketServer() { val trustStoreFile = File.createTempFile("truststore", ".jks") val overrideProps = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect, interBrokerSecurityProtocol = Some(SecurityProtocol.SSL), trustStoreFile = Some(trustStoreFile)) overrideProps.put(KafkaConfig.ListenersProp, "SSL://localhost:0") val serverMetrics = new Metrics - val overrideServer: SocketServer = new SocketServer(KafkaConfig.fromProps(overrideProps), serverMetrics, new SystemTime) - overrideServer.startup() + val overrideServer = new SocketServer(KafkaConfig.fromProps(overrideProps), serverMetrics, new SystemTime) try { + overrideServer.startup() val sslContext = SSLContext.getInstance("TLSv1.2") sslContext.init(null, Array(TestUtils.trustAllCerts), new java.security.SecureRandom()) val socketFactory = sslContext.getSocketFactory @@ -271,12 +285,95 @@ class SocketServerTest extends JUnitSuite { } @Test - def testSessionPrincipal(): Unit = { + def testSessionPrincipal() { val socket = connect() val bytes = new Array[Byte](40) sendRequest(socket, bytes, Some(0)) - assertEquals(KafkaPrincipal.ANONYMOUS, server.requestChannel.receiveRequest().session.principal) - socket.close() + assertEquals(KafkaPrincipal.ANONYMOUS, server.requestChannel.receiveRequest(2000).session.principal) + } + + /* Test that we update request metrics if the client closes the connection while the broker response is in flight. */ + @Test + def testClientDisconnectionUpdatesRequestMetrics() { + val props = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect, port = 0) + val serverMetrics = new Metrics + var conn: Socket = null + val overrideServer = new SocketServer(KafkaConfig.fromProps(props), serverMetrics, new SystemTime) { + override def newProcessor(id: Int, connectionQuotas: ConnectionQuotas, protocol: SecurityProtocol): Processor = { + new Processor(id, time, config.socketRequestMaxBytes, requestChannel, connectionQuotas, + config.connectionsMaxIdleMs, protocol, config.values, metrics) { + override protected[network] def sendResponse(response: RequestChannel.Response) { + conn.close() + super.sendResponse(response) + } + } + } + } + try { + overrideServer.startup() + conn = connect(overrideServer) + val serializedBytes = producerRequestBytes + sendRequest(conn, serializedBytes) + + val channel = overrideServer.requestChannel + val request = channel.receiveRequest(2000) + + val requestMetrics = RequestMetrics.metricsMap(ApiKeys.forId(request.requestId).name) + def totalTimeHistCount(): Long = requestMetrics.totalTimeHist.count + val expectedTotalTimeCount = totalTimeHistCount() + 1 + + // send a large buffer to ensure that the broker detects the client disconnection while writing to the socket channel. + // On Mac OS X, the initial write seems to always succeed and it is able to write up to 102400 bytes on the initial + // write. If the buffer is smaller than this, the write is considered complete and the disconnection is not + // detected. If the buffer is larger than 102400 bytes, a second write is attempted and it fails with an + // IOException. + val send = new NetworkSend(request.connectionId, ByteBuffer.allocate(550000)) + channel.sendResponse(new RequestChannel.Response(request.processor, request, send)) + TestUtils.waitUntilTrue(() => totalTimeHistCount() == expectedTotalTimeCount, + s"request metrics not updated, expected: $expectedTotalTimeCount, actual: ${totalTimeHistCount()}") + + } finally { + overrideServer.shutdown() + serverMetrics.close() + } + } + + /* + * Test that we update request metrics if the channel has been removed from the selector when the broker calls + * `selector.send` (selector closes old connections, for example). + */ + @Test + def testBrokerSendAfterChannelClosedUpdatesRequestMetrics() { + val props = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect, port = 0) + props.setProperty(KafkaConfig.ConnectionsMaxIdleMsProp, "100") + val serverMetrics = new Metrics + var conn: Socket = null + val overrideServer = new SocketServer(KafkaConfig.fromProps(props), serverMetrics, new SystemTime) + try { + overrideServer.startup() + conn = connect(overrideServer) + val serializedBytes = producerRequestBytes + sendRequest(conn, serializedBytes) + val channel = overrideServer.requestChannel + val request = channel.receiveRequest(2000) + + TestUtils.waitUntilTrue(() => overrideServer.processor(request.processor).channel(request.connectionId).isEmpty, + s"Idle connection `${request.connectionId}` was not closed by selector") + + val requestMetrics = RequestMetrics.metricsMap(ApiKeys.forId(request.requestId).name) + def totalTimeHistCount(): Long = requestMetrics.totalTimeHist.count + val expectedTotalTimeCount = totalTimeHistCount() + 1 + + processRequest(channel, request) + + TestUtils.waitUntilTrue(() => totalTimeHistCount() == expectedTotalTimeCount, + s"request metrics not updated, expected: $expectedTotalTimeCount, actual: ${totalTimeHistCount()}") + + } finally { + overrideServer.shutdown() + serverMetrics.close() + } + } } From 35fadbf639650a14f061a97904755d12499fd7fa Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Tue, 5 Apr 2016 15:17:46 -0700 Subject: [PATCH 061/267] KAFKA-3508: Fix transient SimpleACLAuthorizerTest failures Allows the the maximum retires when writing to zookeeper to be overridden in tests and sets the value to Int.MaxValue to avoid transient failure. Author: Grant Henke Reviewers: Ismael Juma , Ewen Cheslack-Postava Closes #1156 from granthenke/transient-acl-test --- .../scala/kafka/security/auth/SimpleAclAuthorizer.scala | 2 +- .../unit/kafka/security/auth/SimpleAclAuthorizerTest.scala | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/kafka/security/auth/SimpleAclAuthorizer.scala b/core/src/main/scala/kafka/security/auth/SimpleAclAuthorizer.scala index 1a06af2ad8fd..18fff453e085 100644 --- a/core/src/main/scala/kafka/security/auth/SimpleAclAuthorizer.scala +++ b/core/src/main/scala/kafka/security/auth/SimpleAclAuthorizer.scala @@ -79,7 +79,7 @@ class SimpleAclAuthorizer extends Authorizer with Logging { // The maximum number of times we should try to update the resource acls in zookeeper before failing; // This should never occur, but is a safeguard just in case. - private val maxUpdateRetries = 10 + protected[auth] var maxUpdateRetries = 10 private val retryBackoffMs = 100 private val retryBackoffJitterMs = 50 diff --git a/core/src/test/scala/unit/kafka/security/auth/SimpleAclAuthorizerTest.scala b/core/src/test/scala/unit/kafka/security/auth/SimpleAclAuthorizerTest.scala index bdadb15f430d..7fcc33dd8c1b 100644 --- a/core/src/test/scala/unit/kafka/security/auth/SimpleAclAuthorizerTest.scala +++ b/core/src/test/scala/unit/kafka/security/auth/SimpleAclAuthorizerTest.scala @@ -44,6 +44,10 @@ class SimpleAclAuthorizerTest extends ZooKeeperTestHarness { override def setUp() { super.setUp() + // Increase maxUpdateRetries to avoid transient failures + simpleAclAuthorizer.maxUpdateRetries = Int.MaxValue + simpleAclAuthorizer2.maxUpdateRetries = Int.MaxValue + val props = TestUtils.createBrokerConfig(0, zkConnect) props.put(SimpleAclAuthorizer.SuperUsersProp, superUsers) @@ -307,7 +311,7 @@ class SimpleAclAuthorizerTest extends ZooKeeperTestHarness { def testHighConcurrencyModificationOfResourceAcls() { val commonResource = new Resource(Topic, "test") - val acls = (0 to 100).map { i => + val acls = (0 to 50).map { i => val useri = new KafkaPrincipal(KafkaPrincipal.USER_TYPE, i.toString) new Acl(useri, Allow, WildCardHost, Read) } From 5c5fe7bd795f5aab5248fb718c61c8ca3f2f571a Mon Sep 17 00:00:00 2001 From: "Matthias J. Sax" Date: Tue, 5 Apr 2016 15:56:09 -0700 Subject: [PATCH 062/267] KAFKA-3477: extended KStream/KTable API to specify custom partitioner for sinks Author: mjsax Reviewers: Guozhang Wang Closes #1180 from mjsax/kafka-3477-streamPartitioner-DSL --- .../apache/kafka/streams/KafkaStreams.java | 2 +- .../apache/kafka/streams/kstream/KStream.java | 88 ++++++++++++++++--- .../apache/kafka/streams/kstream/KTable.java | 88 ++++++++++++++++--- .../kstream/internals/KStreamImpl.java | 39 +++++--- .../streams/kstream/internals/KTableImpl.java | 30 ++++++- 5 files changed, 206 insertions(+), 41 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java index 20958e4a4730..e8fda103fdc6 100644 --- a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java +++ b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java @@ -49,7 +49,7 @@ * A {@link KafkaStreams} instance can co-ordinate with any other instances with the same application ID (whether in this same process, on other processes * on this machine, or on remote machines) as a single (possibly distributed) stream processing client. These instances will divide up the work * based on the assignment of the input topic partitions so that all partitions are being - * consumed. If instances are added or failed, all instances will rebelance the partition assignment among themselves + * consumed. If instances are added or failed, all instances will rebalance the partition assignment among themselves * to balance processing load. *

    * Internally the {@link KafkaStreams} instance contains a normal {@link org.apache.kafka.clients.producer.KafkaProducer KafkaProducer} diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java index 2313b8bf749c..e4933cb08e71 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java @@ -21,6 +21,7 @@ import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.processor.ProcessorSupplier; +import org.apache.kafka.streams.processor.StreamPartitioner; /** * KStream is an abstraction of a record stream of key-value pairs. @@ -92,44 +93,105 @@ public interface KStream { /** * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic - * using default serializers and deserializers. + * using default serializers and deserializers and producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}. * This is equivalent to calling {@link #to(String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(String...)}. * * @param topic the topic name */ KStream through(String topic); + /** + * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic + * using default serializers and deserializers and a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. + * This is equivalent to calling {@link #to(StreamPartitioner, String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(String...)}. + * + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * @param topic the topic name + */ + KStream through(StreamPartitioner partitioner, String topic); + /** * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic. + * If {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} + * for the key {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} is used + * — otherwise producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} is used. * This is equivalent to calling {@link #to(Serde, Serde, String)} and * {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(Serde, Serde, String...)}. * - * @param keySerde key serde used to send key-value pairs, - * if not specified the default key serde defined in the configuration will be used - * @param valSerde value serde used to send key-value pairs, - * if not specified the default value serde defined in the configuration will be used - * @param topic the topic name + * @param keySerde key serde used to send key-value pairs, + * if not specified the default key serde defined in the configuration will be used + * @param valSerde value serde used to send key-value pairs, + * if not specified the default value serde defined in the configuration will be used + * @param topic the topic name */ KStream through(Serde keySerde, Serde valSerde, String topic); /** - * Materialize this stream to a topic using default serializers specified in the config. + * Materialize this stream to a topic, also creates a new instance of {@link KStream} from the topic + * using a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. + * This is equivalent to calling {@link #to(Serde, Serde, StreamPartitioner, String)} and + * {@link org.apache.kafka.streams.kstream.KStreamBuilder#stream(Serde, Serde, String...)}. + * + * @param keySerde key serde used to send key-value pairs, + * if not specified the default key serde defined in the configuration will be used + * @param valSerde value serde used to send key-value pairs, + * if not specified the default value serde defined in the configuration will be used + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified and {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key + * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} will be used + * — otherwise {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * @param topic the topic name + */ + KStream through(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic); + + /** + * Materialize this stream to a topic using default serializers specified in the config + * and producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}. * * @param topic the topic name */ void to(String topic); /** - * Materialize this stream to a topic. + * Materialize this stream to a topic using default serializers specified in the config and a customizable + * {@link StreamPartitioner} to determine the distribution of records to partitions. * - * @param keySerde key serde used to send key-value pairs, - * if not specified the default serde defined in the configs will be used - * @param valSerde value serde used to send key-value pairs, - * if not specified the default serde defined in the configs will be used - * @param topic the topic name + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * @param topic the topic name + */ + void to(StreamPartitioner partitioner, String topic); + + /** + * Materialize this stream to a topic. If {@code keySerde} provides a + * {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key + * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} is used + * — otherwise producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} is used. + * + * @param keySerde key serde used to send key-value pairs, + * if not specified the default serde defined in the configs will be used + * @param valSerde value serde used to send key-value pairs, + * if not specified the default serde defined in the configs will be used + * @param topic the topic name */ void to(Serde keySerde, Serde valSerde, String topic); + /** + * Materialize this stream to a topic using a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. + * + * @param keySerde key serde used to send key-value pairs, + * if not specified the default serde defined in the configs will be used + * @param valSerde value serde used to send key-value pairs, + * if not specified the default serde defined in the configs will be used + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified and {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key + * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} will be used + * — otherwise {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * @param topic the topic name + */ + void to(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic); + /** * Create a new {@link KStream} instance by applying a {@link org.apache.kafka.streams.kstream.Transformer} to all elements in this stream, one element at a time. * diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java index 30ea88256379..581ee282360e 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java @@ -20,6 +20,7 @@ import org.apache.kafka.common.annotation.InterfaceStability; import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.processor.StreamPartitioner; /** * KTable is an abstraction of a changelog stream from a primary-keyed table. @@ -54,44 +55,105 @@ public interface KTable { /** * Materialize this stream to a topic, also creates a new instance of {@link KTable} from the topic - * using default serializers and deserializers. + * using default serializers and deserializers and producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}. * This is equivalent to calling {@link #to(String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#table(String)}. * * @param topic the topic name */ KTable through(String topic); + /** + * Materialize this stream to a topic, also creates a new instance of {@link KTable} from the topic using default serializers + * and deserializers and a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. + * This is equivalent to calling {@link #to(String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#table(String)}. + * + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * @param topic the topic name + */ + KTable through(StreamPartitioner partitioner, String topic); + /** * Materialize this stream to a topic, also creates a new instance of {@link KTable} from the topic. + * If {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} + * for the key {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} is used + * — otherwise producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} is used. * This is equivalent to calling {@link #to(Serde, Serde, String)} and * {@link org.apache.kafka.streams.kstream.KStreamBuilder#table(Serde, Serde, String)}. * - * @param keySerde key serde used to send key-value pairs, - * if not specified the default key serde defined in the configuration will be used - * @param valSerde value serde used to send key-value pairs, - * if not specified the default value serde defined in the configuration will be used - * @param topic the topic name + * @param keySerde key serde used to send key-value pairs, + * if not specified the default key serde defined in the configuration will be used + * @param valSerde value serde used to send key-value pairs, + * if not specified the default value serde defined in the configuration will be used + * @param topic the topic name */ KTable through(Serde keySerde, Serde valSerde, String topic); /** - * Materialize this stream to a topic using default serializers specified in the config. + * Materialize this stream to a topic, also creates a new instance of {@link KTable} from the topic + * using a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. + * This is equivalent to calling {@link #to(Serde, Serde, StreamPartitioner, String)} and + * {@link org.apache.kafka.streams.kstream.KStreamBuilder#table(Serde, Serde, String)}. + * + * @param keySerde key serde used to send key-value pairs, + * if not specified the default key serde defined in the configuration will be used + * @param valSerde value serde used to send key-value pairs, + * if not specified the default value serde defined in the configuration will be used + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified and {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key + * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} will be used + * — otherwise {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * @param topic the topic name + */ + KTable through(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic); + + /** + * Materialize this stream to a topic using default serializers specified in the config + * and producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}. * * @param topic the topic name */ void to(String topic); /** - * Materialize this stream to a topic. + * Materialize this stream to a topic using default serializers specified in the config + * and a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. + * + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * @param topic the topic name + */ + void to(StreamPartitioner partitioner, String topic); + + /** + * Materialize this stream to a topic. If {@code keySerde} provides a + * {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key + * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} is used + * — otherwise producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} is used. * - * @param keySerde key serde used to send key-value pairs, - * if not specified the default serde defined in the configs will be used - * @param valSerde value serde used to send key-value pairs, - * if not specified the default serde defined in the configs will be used - * @param topic the topic name + * @param keySerde key serde used to send key-value pairs, + * if not specified the default serde defined in the configs will be used + * @param valSerde value serde used to send key-value pairs, + * if not specified the default serde defined in the configs will be used + * @param topic the topic name */ void to(Serde keySerde, Serde valSerde, String topic); + /** + * Materialize this stream to a topic using a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. + * + * @param keySerde key serde used to send key-value pairs, + * if not specified the default serde defined in the configs will be used + * @param valSerde value serde used to send key-value pairs, + * if not specified the default serde defined in the configs will be used + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified and {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key + * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} will be used + * — otherwise {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * @param topic the topic name + */ + void to(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic); + /** * Convert this stream to a new instance of {@link KStream}. */ diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java index 5889e078c330..0fb3984245f1 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java @@ -194,37 +194,56 @@ public static KStream merge(KStreamBuilder topology, KStream[ } @Override - public KStream through(Serde keySerde, Serde valSerde, String topic) { - to(keySerde, valSerde, topic); + public KStream through(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic) { + to(keySerde, valSerde, partitioner, topic); return topology.stream(keySerde, valSerde, topic); } + @Override + public KStream through(Serde keySerde, Serde valSerde, String topic) { + return through(keySerde, valSerde, null, topic); + } + + @Override + public KStream through(StreamPartitioner partitioner, String topic) { + return through(null, null, partitioner, topic); + } + @Override public KStream through(String topic) { - return through(null, null, topic); + return through(null, null, null, topic); } @Override public void to(String topic) { - to(null, null, topic); + to(null, null, null, topic); + } + + @Override + public void to(StreamPartitioner partitioner, String topic) { + to(null, null, partitioner, topic); } - @SuppressWarnings("unchecked") @Override public void to(Serde keySerde, Serde valSerde, String topic) { + to(keySerde, valSerde, null, topic); + } + + @SuppressWarnings("unchecked") + @Override + public void to(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic) { String name = topology.newName(SINK_NAME); - StreamPartitioner streamPartitioner = null; Serializer keySerializer = keySerde == null ? null : keySerde.serializer(); Serializer valSerializer = keySerde == null ? null : valSerde.serializer(); - - if (keySerializer != null && keySerializer instanceof WindowedSerializer) { + + if (partitioner == null && keySerializer != null && keySerializer instanceof WindowedSerializer) { WindowedSerializer windowedSerializer = (WindowedSerializer) keySerializer; - streamPartitioner = (StreamPartitioner) new WindowedStreamPartitioner(windowedSerializer); + partitioner = (StreamPartitioner) new WindowedStreamPartitioner(windowedSerializer); } - topology.addSink(name, topic, keySerializer, valSerializer, streamPartitioner, this.name); + topology.addSink(name, topic, keySerializer, valSerializer, partitioner, this.name); } @Override diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java index fd464a08d7e9..156f2db4b18c 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java @@ -32,6 +32,7 @@ import org.apache.kafka.streams.kstream.ValueMapper; import org.apache.kafka.streams.processor.ProcessorSupplier; import org.apache.kafka.streams.processor.StateStoreSupplier; +import org.apache.kafka.streams.processor.StreamPartitioner; import org.apache.kafka.streams.state.Stores; import java.util.Collections; @@ -133,25 +134,46 @@ public KTable mapValues(ValueMapper mapper) { @Override public KTable through(Serde keySerde, Serde valSerde, + StreamPartitioner partitioner, String topic) { - to(keySerde, valSerde, topic); + to(keySerde, valSerde, partitioner, topic); return topology.table(keySerde, valSerde, topic); } + @Override + public KTable through(Serde keySerde, Serde valSerde, String topic) { + return through(keySerde, valSerde, null, topic); + } + + @Override + public KTable through(StreamPartitioner partitioner, String topic) { + return through(null, null, partitioner, topic); + } + @Override public KTable through(String topic) { - return through(null, null, topic); + return through(null, null, null, topic); } @Override public void to(String topic) { - to(null, null, topic); + to(null, null, null, topic); + } + + @Override + public void to(StreamPartitioner partitioner, String topic) { + to(null, null, partitioner, topic); } @Override public void to(Serde keySerde, Serde valSerde, String topic) { - this.toStream().to(keySerde, valSerde, topic); + this.toStream().to(keySerde, valSerde, null, topic); + } + + @Override + public void to(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic) { + this.toStream().to(keySerde, valSerde, partitioner, topic); } @Override From 050bf60a4725b239db76732d0f7d7f1c7c41ab06 Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Mon, 21 Mar 2016 09:48:02 -0700 Subject: [PATCH 063/267] Changing version to 0.10.0.0 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 0a612f6c8710..7f30b4d4d7ed 100644 --- a/gradle.properties +++ b/gradle.properties @@ -16,7 +16,7 @@ group=org.apache.kafka # NOTE: When you change this version number, you should also make sure to update # the version numbers in tests/kafkatest/__init__.py and kafka-merge-pr.py. -version=0.10.1.0-SNAPSHOT +version=0.10.0.0 scalaVersion=2.10.6 task=build org.gradle.jvmargs=-XX:MaxPermSize=512m -Xmx1024m -Xss2m From aa6f0d8d5f084126bbba1fb6b40e392f2b908262 Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Mon, 21 Mar 2016 18:58:45 -0700 Subject: [PATCH 064/267] MINOR: update new version in additional places matching set of version fixes. ewencp junrao Author: Gwen Shapira Reviewers: Ismael Juma , Geoff Anderson , Ewen Cheslack-Postava Closes #1110 from gwenshap/minor-fix-version-010 --- kafka-merge-pr.py | 2 +- tests/kafkatest/__init__.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/kafka-merge-pr.py b/kafka-merge-pr.py index f26a0a9c8197..2345dbbd4516 100644 --- a/kafka-merge-pr.py +++ b/kafka-merge-pr.py @@ -72,7 +72,7 @@ DEV_BRANCH_NAME = "trunk" -DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.1.0") +DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.0.1") def get_json(url): try: diff --git a/tests/kafkatest/__init__.py b/tests/kafkatest/__init__.py index 10163a02b5a8..e1c87b7b4a55 100644 --- a/tests/kafkatest/__init__.py +++ b/tests/kafkatest/__init__.py @@ -23,4 +23,5 @@ # Instead, in trunk, the version should have a suffix of the form ".devN" # # For example, when Kafka is at version 0.9.0.0-SNAPSHOT, this should be something like "0.9.0.0.dev0" -__version__ = '0.10.1.0.dev0' +__version__ = '0.10.0.0' + From 0773bc4ba604bc8b57040583ac4c1cb6832ba188 Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Wed, 23 Mar 2016 15:41:07 -0700 Subject: [PATCH 065/267] MINOR: Revert 0.10.0 branch to SNAPSHOT per change in release process Author: Gwen Shapira Reviewers: Ewen Cheslack-Postava Closes #1126 from gwenshap/minor-release-version --- gradle.properties | 2 +- kafka-merge-pr.py | 2 +- tests/kafkatest/__init__.py | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/gradle.properties b/gradle.properties index 7f30b4d4d7ed..b058e58cbe30 100644 --- a/gradle.properties +++ b/gradle.properties @@ -16,7 +16,7 @@ group=org.apache.kafka # NOTE: When you change this version number, you should also make sure to update # the version numbers in tests/kafkatest/__init__.py and kafka-merge-pr.py. -version=0.10.0.0 +version=0.10.0.0-SNAPSHOT scalaVersion=2.10.6 task=build org.gradle.jvmargs=-XX:MaxPermSize=512m -Xmx1024m -Xss2m diff --git a/kafka-merge-pr.py b/kafka-merge-pr.py index 2345dbbd4516..e12410511b65 100644 --- a/kafka-merge-pr.py +++ b/kafka-merge-pr.py @@ -72,7 +72,7 @@ DEV_BRANCH_NAME = "trunk" -DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.0.1") +DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.0.0") def get_json(url): try: diff --git a/tests/kafkatest/__init__.py b/tests/kafkatest/__init__.py index e1c87b7b4a55..df1a6129dbc8 100644 --- a/tests/kafkatest/__init__.py +++ b/tests/kafkatest/__init__.py @@ -23,5 +23,4 @@ # Instead, in trunk, the version should have a suffix of the form ".devN" # # For example, when Kafka is at version 0.9.0.0-SNAPSHOT, this should be something like "0.9.0.0.dev0" -__version__ = '0.10.0.0' - +__version__ = '0.10.0.0.dev0' From e97735f035110fe522197f125d8b6c24c157ae78 Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Mon, 11 Apr 2016 16:29:29 -0700 Subject: [PATCH 066/267] Minor: Correcting version to 0.10.0.0-SNAPSHOT again following merge with trunk --- gradle.properties | 2 +- kafka-merge-pr.py | 2 +- tests/kafkatest/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gradle.properties b/gradle.properties index 0a612f6c8710..b058e58cbe30 100644 --- a/gradle.properties +++ b/gradle.properties @@ -16,7 +16,7 @@ group=org.apache.kafka # NOTE: When you change this version number, you should also make sure to update # the version numbers in tests/kafkatest/__init__.py and kafka-merge-pr.py. -version=0.10.1.0-SNAPSHOT +version=0.10.0.0-SNAPSHOT scalaVersion=2.10.6 task=build org.gradle.jvmargs=-XX:MaxPermSize=512m -Xmx1024m -Xss2m diff --git a/kafka-merge-pr.py b/kafka-merge-pr.py index f26a0a9c8197..e12410511b65 100644 --- a/kafka-merge-pr.py +++ b/kafka-merge-pr.py @@ -72,7 +72,7 @@ DEV_BRANCH_NAME = "trunk" -DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.1.0") +DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.10.0.0") def get_json(url): try: diff --git a/tests/kafkatest/__init__.py b/tests/kafkatest/__init__.py index 10163a02b5a8..df1a6129dbc8 100644 --- a/tests/kafkatest/__init__.py +++ b/tests/kafkatest/__init__.py @@ -23,4 +23,4 @@ # Instead, in trunk, the version should have a suffix of the form ".devN" # # For example, when Kafka is at version 0.9.0.0-SNAPSHOT, this should be something like "0.9.0.0.dev0" -__version__ = '0.10.1.0.dev0' +__version__ = '0.10.0.0.dev0' From f38bba89e4633ae0fb6b81ce64b507bb2a2999cc Mon Sep 17 00:00:00 2001 From: Eno Thereska Date: Fri, 29 Apr 2016 15:14:36 -0700 Subject: [PATCH 067/267] HOTFIX: Fix equality semantics of KeyValue Fixes wrong KeyValue equals logic when keys not equal but values equal. Original hotfix PR at https://github.com/apache/kafka/pull/1293 (/cc enothereska) Please review: ewencp ijuma guozhangwang Author: Eno Thereska Author: Michael G. Noll Reviewers: Michael G. Noll , Ewen Cheslack-Postava Closes #1294 from miguno/KeyValue-equality-hotfix (cherry picked from commit 60380e31d4bf6688d8d26ec44cf514a3c32731cb) Signed-off-by: Ewen Cheslack-Postava --- .../org/apache/kafka/streams/KeyValue.java | 16 ++--- .../apache/kafka/streams/KeyValueTest.java | 65 +++++++++++++------ 2 files changed, 52 insertions(+), 29 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/KeyValue.java b/streams/src/main/java/org/apache/kafka/streams/KeyValue.java index 58f2083b8457..64b38cdfbf36 100644 --- a/streams/src/main/java/org/apache/kafka/streams/KeyValue.java +++ b/streams/src/main/java/org/apache/kafka/streams/KeyValue.java @@ -63,22 +63,22 @@ public String toString() { } @Override - public boolean equals(Object other) { - if (this == other) + public boolean equals(Object obj) { + if (this == obj) return true; - if (other instanceof KeyValue) { - KeyValue otherKV = (KeyValue) other; - - return key == null ? otherKV.key == null : key.equals(otherKV.key) - && value == null ? otherKV.value == null : value.equals(otherKV.value); - } else { + if (!(obj instanceof KeyValue)) { return false; } + + KeyValue other = (KeyValue) obj; + return (this.key == null ? other.key == null : this.key.equals(other.key)) + && (this.value == null ? other.value == null : this.value.equals(other.value)); } @Override public int hashCode() { return Objects.hash(key, value); } + } diff --git a/streams/src/test/java/org/apache/kafka/streams/KeyValueTest.java b/streams/src/test/java/org/apache/kafka/streams/KeyValueTest.java index 47c8ecd1c228..805fa18369e6 100644 --- a/streams/src/test/java/org/apache/kafka/streams/KeyValueTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/KeyValueTest.java @@ -24,27 +24,50 @@ public class KeyValueTest { - private KeyValue kv1a = new KeyValue<>("key1", 1L); - private KeyValue kv1b = new KeyValue<>("key1", 1L); - private KeyValue kv2 = new KeyValue<>("key2", 2L); - private KeyValue kv3 = new KeyValue<>("key3", 3L); - @Test - public void testEquals() { - assertTrue(kv1a.equals(kv1a)); - assertTrue(kv1a.equals(kv1b)); - assertTrue(kv1b.equals(kv1a)); - assertFalse(kv1a.equals(kv2)); - assertFalse(kv1a.equals(kv3)); - assertFalse(kv2.equals(kv3)); - assertFalse(kv1a.equals(null)); - } + public void shouldHaveSaneEqualsAndHashCode() { + KeyValue kv = KeyValue.pair("key1", 1L); + KeyValue copyOfKV = KeyValue.pair(kv.key, kv.value); - @Test - public void testHashcode() { - assertTrue(kv1a.hashCode() == kv1b.hashCode()); - assertFalse(kv1a.hashCode() == kv2.hashCode()); - assertFalse(kv1a.hashCode() == kv3.hashCode()); - assertFalse(kv2.hashCode() == kv3.hashCode()); + // Reflexive + assertTrue(kv.equals(kv)); + assertTrue(kv.hashCode() == kv.hashCode()); + + // Symmetric + assertTrue(kv.equals(copyOfKV)); + assertTrue(kv.hashCode() == copyOfKV.hashCode()); + assertTrue(copyOfKV.hashCode() == kv.hashCode()); + + // Transitive + KeyValue copyOfCopyOfKV = KeyValue.pair(copyOfKV.key, copyOfKV.value); + assertTrue(copyOfKV.equals(copyOfCopyOfKV)); + assertTrue(copyOfKV.hashCode() == copyOfCopyOfKV.hashCode()); + assertTrue(kv.equals(copyOfCopyOfKV)); + assertTrue(kv.hashCode() == copyOfCopyOfKV.hashCode()); + + // Inequality scenarios + assertFalse("must be false for null", kv.equals(null)); + assertFalse("must be false if key is non-null and other key is null", kv.equals(KeyValue.pair(null, kv.value))); + assertFalse("must be false if value is non-null and other value is null", kv.equals(KeyValue.pair(kv.key, null))); + KeyValue differentKeyType = KeyValue.pair(1L, kv.value); + assertFalse("must be false for different key types", kv.equals(differentKeyType)); + KeyValue differentValueType = KeyValue.pair(kv.key, "anyString"); + assertFalse("must be false for different value types", kv.equals(differentValueType)); + KeyValue differentKeyValueTypes = KeyValue.pair(1L, "anyString"); + assertFalse("must be false for different key and value types", kv.equals(differentKeyValueTypes)); + assertFalse("must be false for different types of objects", kv.equals(new Object())); + + KeyValue differentKey = KeyValue.pair(kv.key + "suffix", kv.value); + assertFalse("must be false if key is different", kv.equals(differentKey)); + assertFalse("must be false if key is different", differentKey.equals(kv)); + + KeyValue differentValue = KeyValue.pair(kv.key, kv.value + 1L); + assertFalse("must be false if value is different", kv.equals(differentValue)); + assertFalse("must be false if value is different", differentValue.equals(kv)); + + KeyValue differentKeyAndValue = KeyValue.pair(kv.key + "suffix", kv.value + 1L); + assertFalse("must be false if key and value are different", kv.equals(differentKeyAndValue)); + assertFalse("must be false if key and value are different", differentKeyAndValue.equals(kv)); } -} + +} \ No newline at end of file From 548638af118b4ef36e7b6a1c5cb172604ca732fa Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 30 Apr 2016 18:53:44 -0700 Subject: [PATCH 068/267] HOTFIX: Fix main classpath libs glob for release (fixup KAFKA-3615 regression) bin/kafka-run-class.sh does not correctly setup the CLASSPATH in release rc2. Author: Dana Powers Reviewers: Ewen Cheslack-Postava Closes #1302 from dpkp/KAFKA-3615-fix (cherry picked from commit 1e4dd66b19857f4f0ce3f83fd0a808885b0a88c1) Signed-off-by: Ewen Cheslack-Postava --- bin/kafka-run-class.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/kafka-run-class.sh b/bin/kafka-run-class.sh index e7f8d2ef9fec..688cced28614 100755 --- a/bin/kafka-run-class.sh +++ b/bin/kafka-run-class.sh @@ -118,7 +118,7 @@ do done # classpath addition for release -for file in $base_dir/libs; +for file in $base_dir/libs/*; do if should_include_file "$file"; then CLASSPATH=$CLASSPATH:$file From 3cfe73a2cadc2321602a6777444282180484fa9a Mon Sep 17 00:00:00 2001 From: Geoff Anderson Date: Mon, 2 May 2016 21:42:01 -0700 Subject: [PATCH 069/267] MINOR: Add version check on enable-systest-events flag Recent patch adding enable-systest-events flag without any version check breaks all uses of versioned console consumer. E.g. upgrade tests, compatibility tests etc. Added a check to only apply the flag if running 0.10.0 or greater. Author: Geoff Anderson Reviewers: Ewen Cheslack-Postava Closes #1298 from granders/minor-systest-fix-versioned-console-consumer (cherry picked from commit 5c47b9f80e2130ab4cc76e2645e0ac0215bf8abe) Signed-off-by: Ewen Cheslack-Postava --- tests/kafkatest/services/console_consumer.py | 24 ++++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/kafkatest/services/console_consumer.py b/tests/kafkatest/services/console_consumer.py index 37638e2a9812..5a33052c4472 100644 --- a/tests/kafkatest/services/console_consumer.py +++ b/tests/kafkatest/services/console_consumer.py @@ -17,9 +17,8 @@ from ducktape.services.background_thread import BackgroundThreadService from kafkatest.services.kafka.directory import kafka_dir -from kafkatest.services.kafka.version import TRUNK, LATEST_0_8_2, LATEST_0_9 +from kafkatest.services.kafka.version import TRUNK, LATEST_0_8_2, LATEST_0_9, V_0_10_0_0 from kafkatest.services.monitor.jmx import JmxMixin -from kafkatest.services.security.security_config import SecurityConfig import itertools import os @@ -91,7 +90,8 @@ class ConsoleConsumer(JmxMixin, BackgroundThreadService): def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-group", new_consumer=False, message_validator=None, from_beginning=True, consumer_timeout_ms=None, version=TRUNK, - client_id="console-consumer", print_key=False, jmx_object_names=None, jmx_attributes=[]): + client_id="console-consumer", print_key=False, jmx_object_names=None, jmx_attributes=[], + enable_systest_events=False): """ Args: context: standard context @@ -106,6 +106,8 @@ def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-gro waiting for the consumer to stop is a pretty good way to consume all messages in a topic. print_key if True, print each message's key in addition to its value + enable_systest_events if True, console consumer will print additional lifecycle-related information + only available in 0.10.0 and later. """ JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) BackgroundThreadService.__init__(self, context, num_nodes) @@ -128,6 +130,11 @@ def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-gro self.print_key = print_key self.log_level = "TRACE" + self.enable_systest_events = enable_systest_events + if self.enable_systest_events: + # Only available in 0.10.0 and up + assert version >= V_0_10_0_0 + def prop_file(self, node): """Return a string which can be used to create a configuration file appropriate for the given node.""" # Process client configuration @@ -184,9 +191,14 @@ def start_cmd(self, node): # LoggingMessageFormatter was introduced after 0.9 if node.version > LATEST_0_9: - cmd+=" --formatter kafka.tools.LoggingMessageFormatter" + cmd += " --formatter kafka.tools.LoggingMessageFormatter" + + if self.enable_systest_events: + # enable systest events is only available in 0.10.0 and later + # check the assertion here as well, in case node.version has been modified + assert node.version >= V_0_10_0_0 + cmd += " --enable-systest-events" - cmd += " --enable-systest-events" cmd += " 2>> %(stderr)s | tee -a %(stdout)s &" % args return cmd @@ -228,7 +240,9 @@ def _worker(self, idx, node): for line in itertools.chain([first_line], consumer_output): msg = line.strip() + if msg == "shutdown_complete": + # Note that we can only rely on shutdown_complete message if running 0.10.0 or greater if node in self.clean_shutdown_nodes: raise Exception("Unexpected shutdown event from consumer, already shutdown. Consumer index: %d" % idx) self.clean_shutdown_nodes.add(node) From 2f39fb0d151301018339c8e6dfe61507f3f85b1b Mon Sep 17 00:00:00 2001 From: Rajini Sivaram Date: Tue, 3 May 2016 09:49:23 -0700 Subject: [PATCH 070/267] KAFKA-3517; Add documentation for SASL/PLAIN Documentation corresponding to KIP-43 - SASL/PLAIN and multiple mechanism support. Author: Rajini Sivaram Reviewers: Magnus Edenhill , Jun Rao Closes #1232 from rajinisivaram/KAFKA-3517 (cherry picked from commit abac51dc0bf0d5bdc1d5414787da3191e00adcab) Signed-off-by: Jun Rao --- docs/protocol.html | 15 ++ docs/security.html | 372 +++++++++++++++++++++++++++++++++------------ 2 files changed, 293 insertions(+), 94 deletions(-) diff --git a/docs/protocol.html b/docs/protocol.html index cb359f17f6d7..c26f16b464cd 100644 --- a/docs/protocol.html +++ b/docs/protocol.html @@ -114,6 +114,21 @@
    Versioning and

    Currently all versions are baselined at 0, as we evolve these APIs we will indicate the format for each version individually.

    +
    SASL Authentication Sequence
    +

    The following sequence is used for SASL authentication: +

      +
    1. Kafka ApiVersionsRequest may be sent by the client to obtain the version ranges of requests supported by the broker. This is optional.
    2. +
    3. Kafka SaslHandshakeRequest containing the SASL mechanism for authentication is sent by the client. If the requested mechanism is not enabled + in the server, the server responds with the list of supported mechanisms and closes the client connection. If the mechanism is enabled + in the server, the server sends a successful response and continues with SASL authentication. +
    4. The actual SASL authentication is now performed. A series of SASL client and server tokens corresponding to the mechanism are sent as opaque + packets. These packets contain a 32-bit size followed by the token as defined by the protocol for the SASL mechanism. +
    5. If authentication succeeds, subsequent packets are handled as Kafka API requests. Otherwise, the client connection is closed. +
    +

    For interoperability with 0.9.0.x clients, the first packet received by the server is handled as a SASL/GSSAPI client token if it is not a valid +Kafka request. SASL/GSSAPI authentication is performed starting with this packet, skipping the first two steps above.

    + +

    The Protocol

    Protocol Primitive Types
    diff --git a/docs/security.html b/docs/security.html index a2e781626fe5..3e5085beb58b 100644 --- a/docs/security.html +++ b/docs/security.html @@ -18,7 +18,8 @@

    7.1 Security Overview

    In release 0.9.0.0, the Kafka community added a number of features that, used either separately or together, increases security in a Kafka cluster. These features are considered to be of beta quality. The following security measures are currently supported:
      -
    1. Authentication of connections to brokers from clients (producers and consumers), other brokers and tools, using either SSL or SASL (Kerberos)
    2. +
    3. Authentication of connections to brokers from clients (producers and consumers), other brokers and tools, using either SSL or SASL (Kerberos). + SASL/PLAIN can also be used from release 0.10.0.0 onwards.
    4. Authentication of connections from brokers to ZooKeeper
    5. Encryption of data transferred between brokers and clients, between brokers, or between brokers and tools using SSL (Note that there is a performance degradation when SSL is enabled, the magnitude of which depends on the CPU type and the JVM implementation.)
    6. Authorization of read / write operations by clients
    7. @@ -182,22 +183,95 @@

      7.2 Encryption and Authentication

      7.3 Authentication using SASL

        -
      1. Prerequisites

        +
      2. SASL configuration for Kafka brokers

          -
        1. Kerberos
          - If your organization is already using a Kerberos server (for example, by using Active Directory), there is no need to install a new server just for Kafka. Otherwise you will need to install one, your Linux vendor likely has packages for Kerberos and a short guide on how to install and configure it (Ubuntu, Redhat). Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to $JAVA_HOME/jre/lib/security.
        2. -
        3. Create Kerberos Principals
          - If you are using the organization's Kerberos or Active Directory server, ask your Kerberos administrator for a principal for each Kafka broker in your cluster and for every operating system user that will access Kafka with Kerberos authentication (via clients and tools).
          - If you have installed your own Kerberos, you will need to create these principals yourself using the following commands: +
        4. Select one or more supported mechanisms to enable in the broker. GSSAPI + and PLAIN are the mechanisms currently supported in Kafka.
        5. +
        6. Add a JAAS config file for the selected mechanisms as described in the examples + for setting up GSSAPI (Kerberos) + or PLAIN.
        7. +
        8. Pass the JAAS config file location as JVM parameter to each Kafka broker. + For example: +
              -Djava.security.auth.login.config=/etc/kafka/kafka_server_jaas.conf
        9. +
        10. Configure a SASL port in server.properties, by adding at least one of + SASL_PLAINTEXT or SASL_SSL to the listeners parameter, which + contains one or more comma-separated values: +
              listeners=SASL_PLAINTEXT://host.name:port
          + If SASL_SSL is used, then SSL must also be + configured. If you are only configuring a SASL port (or if you want + the Kafka brokers to authenticate each other using SASL) then make sure + you set the same SASL protocol for inter-broker communication: +
              security.inter.broker.protocol=SASL_PLAINTEXT (or SASL_SSL)
        11. +
        12. Enable one or more SASL mechanisms in server.properties: +
              sasl.enabled.mechanisms=GSSAPI (,PLAIN)
        13. +
        14. Configure the SASL mechanism for inter-broker communication in server.properties + if using SASL for inter-broker communication: +
              sasl.mechanism.inter.broker.protocol=GSSAPI (or PLAIN)
        15. +
        16. Follow the steps in GSSAPI (Kerberos) + or PLAIN to configure SASL + for the enabled mechanisms. To enable multiple mechanisms in the broker, follow + the steps here.
        17. + Important notes: +
            +
          1. KafkaServer is the section name in the JAAS file used by each + KafkaServer/Broker. This section provides SASL configuration options + for the broker including any SASL client connections made by the broker + for inter-broker communication.
          2. +
          3. Client section is used to authenticate a SASL connection with + zookeeper. It also allows the brokers to set SASL ACL on zookeeper + nodes which locks these nodes down so that only the brokers can + modify it. It is necessary to have the same principal name across all + brokers. If you want to use a section name other than Client, set the + system property zookeeper.sasl.client to the appropriate + name (e.g., -Dzookeeper.sasl.client=ZkClient).
          4. +
          5. ZooKeeper uses "zookeeper" as the service name by default. If you + want to change this, set the system property + zookeeper.sasl.client.username to the appropriate name + (e.g., -Dzookeeper.sasl.client.username=zk).
          6. +
          +
        +
      3. +
      4. SASL configuration for Kafka clients

        + SASL authentication is only supported for the new Java Kafka producer and + consumer, the older API is not supported. To configure SASL authentication + on the clients: +
          +
        1. Select a SASL mechanism for authentication.
        2. +
        3. Add a JAAS config file for the selected mechanism as described in the examples + for setting up GSSAPI (Kerberos) + or PLAIN. KafkaClient is the + section name in the JAAS file used by Kafka clients.
        4. +
        5. Pass the JAAS config file location as JVM parameter to each client JVM. For example: +
              -Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf
        6. +
        7. Configure the following properties in producer.properties or + consumer.properties: +
              security.protocol=SASL_PLAINTEXT (or SASL_SSL)
          +    sasl.mechanism=GSSAPI (or PLAIN)
        8. +
        9. Follow the steps in GSSAPI (Kerberos) + or PLAIN to configure SASL + for the selected mechanism.
        10. +
        +
      5. +
      6. Authentication using SASL/Kerberos

        +
          +
        1. Prerequisites
          +
            +
          1. Kerberos
            + If your organization is already using a Kerberos server (for example, by using Active Directory), there is no need to install a new server just for Kafka. Otherwise you will need to install one, your Linux vendor likely has packages for Kerberos and a short guide on how to install and configure it (Ubuntu, Redhat). Note that if you are using Oracle Java, you will need to download JCE policy files for your Java version and copy them to $JAVA_HOME/jre/lib/security.
          2. +
          3. Create Kerberos Principals
            + If you are using the organization's Kerberos or Active Directory server, ask your Kerberos administrator for a principal for each Kafka broker in your cluster and for every operating system user that will access Kafka with Kerberos authentication (via clients and tools).
            + If you have installed your own Kerberos, you will need to create these principals yourself using the following commands:
                 sudo /usr/sbin/kadmin.local -q 'addprinc -randkey kafka/{hostname}@{REALM}'
                 sudo /usr/sbin/kadmin.local -q "ktadd -k /etc/security/keytabs/{keytabname}.keytab kafka/{hostname}@{REALM}"
          4. -
          5. Make sure all hosts can be reachable using hostnames - it is a Kerberos requirement that all your hosts can be resolved with their FQDNs.
          6. -
          -
        2. Configuring Kafka Brokers

          -
            -
          1. Add a suitably modified JAAS file similar to the one below to each Kafka broker's config directory, let's call it kafka_server_jaas.conf for this example (note that each broker should have its own keytab): -
            +          
          2. Make sure all hosts can be reachable using hostnames - it is a Kerberos requirement that all your hosts can be resolved with their FQDNs.
          3. +
          +
        3. Configuring Kafka Brokers
          +
            +
          1. Add a suitably modified JAAS file similar to the one below to each Kafka broker's config directory, let's call it kafka_server_jaas.conf for this example (note that each broker should have its own keytab): +
                 KafkaServer {
                     com.sun.security.auth.module.Krb5LoginModule required
                     useKeyTab=true
            @@ -215,40 +289,31 @@ 

            7.3 Authentication using SASL -

          2. -
          3. Pass the JAAS and optionally the krb5 file locations as JVM parameters to each Kafka broker (see here for more details): -
            -    -Djava.security.krb5.conf=/etc/kafka/krb5.conf
            +          
          4. + KafkaServer section in the JAAS file tells the broker which principal to use and the location of the keytab where this principal is stored. It + allows the broker to login using the keytab specified in this section. See notes for more details on Zookeeper SASL configuration. +
          5. Pass the JAAS and optionally the krb5 file locations as JVM parameters to each Kafka broker (see here for more details): +
                -Djava.security.krb5.conf=/etc/kafka/krb5.conf
                 -Djava.security.auth.login.config=/etc/kafka/kafka_server_jaas.conf
            -
          6. -
          7. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting kafka broker.
          8. -
          9. Configure a SASL port in server.properties, by adding at least one of SASL_PLAINTEXT or SASL_SSL to the listeners parameter, which contains one or more comma-separated values: -
            -    listeners=SASL_PLAINTEXT://host.name:port
            - If SASL_SSL is used, then SSL must also be configured. - If you are only configuring a SASL port (or if you want the Kafka brokers to authenticate each other using SASL) then make sure you set the same SASL protocol for inter-broker communication: -
            -    security.inter.broker.protocol=SASL_PLAINTEXT (or SASL_SSL)
          10. - - We must also configure the service name in server.properties, which should match the principal name of the kafka brokers. In the above example, principal is "kafka/kafka1.hostname.com@EXAMPLE.com", so: -
            -    sasl.kerberos.service.name=kafka
            - - Important notes: -
              -
            1. KafkaServer is a section name in JAAS file used by each KafkaServer/Broker. This section tells the broker which principal to use and the location of the keytab where this principal is stored. It allows the broker to login using the keytab specified in this section.
            2. -
            3. Client section is used to authenticate a SASL connection with zookeeper. It also allows the brokers to set SASL ACL on zookeeper nodes which locks these nodes down so that only the brokers can modify it. It is necessary to have the same principal name across all brokers. If you want to use a section name other than Client, set the system property zookeeper.sasl.client to the appropriate name (e.g., -Dzookeeper.sasl.client=ZkClient).
            4. -
            5. ZooKeeper uses "zookeeper" as the service name by default. If you want to change this, set the system property zookeeper.sasl.client.username to the appropriate name (e.g., -Dzookeeper.sasl.client.username=zk).
            6. -
            - -
          -
        4. Configuring Kafka Clients

          - SASL authentication is only supported for the new kafka producer and consumer, the older API is not supported. To configure SASL authentication on the clients: -
            -
          1. - Clients (producers, consumers, connect workers, etc) will authenticate to the cluster with their own principal (usually with the same name as the user running the client), so obtain or create these principals as needed. Then create a JAAS file for each principal. - The KafkaClient section describes how the clients like producer and consumer can connect to the Kafka Broker. The following is an example configuration for a client using a keytab (recommended for long-running processes): -
            +          
          2. +
          3. Make sure the keytabs configured in the JAAS file are readable by the operating system user who is starting kafka broker.
          4. +
          5. Configure SASL port and SASL mechanisms in server.properties as described here. For example: +
                listeners=SASL_PLAINTEXT://host.name:port
            +    security.inter.broker.protocol=SASL_PLAINTEXT
            +    sasl.mechanism.inter.broker.protocol=GSSAPI
            +    sasl.enabled.mechanisms=GSSAPI
            +          
            +
          6. We must also configure the service name in server.properties, which should match the principal name of the kafka brokers. In the above example, principal is "kafka/kafka1.hostname.com@EXAMPLE.com", so: +
                sasl.kerberos.service.name=kafka
            + +
        5. +
        6. Configuring Kafka Clients
          + To configure SASL authentication on the clients: +
            +
          1. + Clients (producers, consumers, connect workers, etc) will authenticate to the cluster with their own principal (usually with the same name as the user running the client), so obtain or create these principals as needed. Then create a JAAS file for each principal. + The KafkaClient section describes how the clients like producer and consumer can connect to the Kafka Broker. The following is an example configuration for a client using a keytab (recommended for long-running processes): +
                 KafkaClient {
                     com.sun.security.auth.module.Krb5LoginModule required
                     useKeyTab=true
            @@ -257,93 +322,212 @@ 

            7.3 Authentication using SASL - For command-line utilities like kafka-console-consumer or kafka-console-producer, kinit can be used along with "useTicketCache=true" as in: -
            +              For command-line utilities like kafka-console-consumer or kafka-console-producer, kinit can be used along with "useTicketCache=true" as in:
            +              
                 KafkaClient {
                     com.sun.security.auth.module.Krb5LoginModule required
                     useTicketCache=true;
                 };
            -

          2. -
          3. Pass the JAAS and optionally krb5 file locations as JVM parameters to each client JVM (see here for more details): -
            -    -Djava.security.krb5.conf=/etc/kafka/krb5.conf
            +              
          4. +
          5. Pass the JAAS and optionally krb5 file locations as JVM parameters to each client JVM (see here for more details): +
                -Djava.security.krb5.conf=/etc/kafka/krb5.conf
                 -Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf
          6. -
          7. Make sure the keytabs configured in the kafka_client_jaas.conf are readable by the operating system user who is starting kafka client.
          8. -
          9. Configure the following properties in producer.properties or consumer.properties: -
            -    security.protocol=SASL_PLAINTEXT (or SASL_SSL)
            -    sasl.kerberos.service.name=kafka
            -
          10. -
        7. - -
        8. Incorporating Security Features in a Running Cluster

          - You can secure a running cluster via one or more of the supported protocols discussed previously. This is done in phases: -

          +
        9. Make sure the keytabs configured in the kafka_client_jaas.conf are readable by the operating system user who is starting kafka client.
        10. +
        11. Configure the following properties in producer.properties or consumer.properties: +
              security.protocol=SASL_PLAINTEXT (or SASL_SSL)
          +    sasl.mechanism=GSSAPI
          +    sasl.kerberos.service.name=kafka
        12. +
        +
      7. +
      + + +
    8. Authentication using SASL/PLAIN

      +

      SASL/PLAIN is a simple username/password authentication mechanism that is typically used with TLS for encryption to implement secure authentication. + Kafka supports a default implementation for SASL/PLAIN which can be extended for production use as described here.

      + The username is used as the authenticated Principal for configuration of ACLs etc. +
        +
      1. Configuring Kafka Brokers
        +
          +
        1. Add a suitably modified JAAS file similar to the one below to each Kafka broker's config directory, let's call it kafka_server_jaas.conf for this example: +
          +    KafkaServer {
          +        org.apache.kafka.common.security.plain.PlainLoginModule required
          +        username="admin"
          +        password="admin-secret"
          +        user_admin="admin-secret"
          +        user_alice="alice-secret";
          +    };
          + This configuration defines two users (admin and alice). The properties username and password + in the KafkaServer section are used by the broker to initiate connections to other brokers. In this example, + admin is the user for inter-broker communication. The set of properties user_userName defines + the passwords for all users that connect to the broker and the broker validates all client connections including + those from other brokers using these properties.
        2. +
        3. Pass the JAAS config file location as JVM parameter to each Kafka broker: +
              -Djava.security.auth.login.config=/etc/kafka/kafka_server_jaas.conf
        4. +
        5. Configure SASL port and SASL mechanisms in server.properties as described here. For example: +
              listeners=SASL_SSL://host.name:port
          +    security.inter.broker.protocol=SASL_SSL
          +    sasl.mechanism.inter.broker.protocol=PLAIN
          +    sasl.enabled.mechanisms=PLAIN
        6. +
        +
      2. + +
      3. Configuring Kafka Clients
        + To configure SASL authentication on the clients: +
          +
        1. The KafkaClient section describes how the clients like producer and consumer can connect to the Kafka Broker. + The following is an example configuration for a client for the PLAIN mechanism: +
          +    KafkaClient {
          +        org.apache.kafka.common.security.plain.PlainLoginModule required
          +        username="alice"
          +        password="alice-secret";
          +    };
          + The properties username and password in the KafkaClient section are used by clients to configure + the user for client connections. In this example, clients connect to the broker as user alice. +
        2. +
        3. Pass the JAAS config file location as JVM parameter to each client JVM: +
              -Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf
        4. +
        5. Configure the following properties in producer.properties or consumer.properties: +
              security.protocol=SASL_SSL
          +    sasl.mechanism=PLAIN
        6. +
        +
      4. +
      5. Use of SASL/PLAIN in production
          -
        • Incrementally bounce the cluster nodes to open additional secured port(s).
        • -
        • Restart clients using the secured rather than PLAINTEXT port (assuming you are securing the client-broker connection).
        • -
        • Incrementally bounce the cluster again to enable broker-to-broker security (if this is required)
        • -
        • A final incremental bounce to close the PLAINTEXT port.
        • +
        • SASL/PLAIN should be used only with SSL as transport layer to ensure that clear passwords are not transmitted on the wire without encryption.
        • +
        • The default implementation of SASL/PLAIN in Kafka specifies usernames and passwords in the JAAS configuration file as shown + here. To avoid storing passwords on disk, you can plugin your own implementation of + javax.security.auth.spi.LoginModule that provides usernames and passwords from an external source. The login module implementation should + provide username as the public credential and password as the private credential of the Subject. The default implementation + org.apache.kafka.common.security.plain.PlainLoginModule can be used as an example.
        • +
        • In production systems, external authentication servers may implement password authentication. Kafka brokers can be integrated with these servers by adding + your own implementation of javax.security.sasl.SaslServer. The default implementation included in Kafka in the package + org.apache.kafka.common.security.plain can be used as an example to get started. +
            +
          • New providers must be installed and registered in the JVM. Providers can be installed by adding provider classes to + the normal CLASSPATH or bundled as a jar file and added to JAVA_HOME/lib/ext.
          • +
          • Providers can be registered statically by adding a provider to the security properties file + JAVA_HOME/lib/security/java.security. +
                security.provider.n=providerClassName
            + where providerClassName is the fully qualified name of the new provider and n is the preference order with + lower numbers indicating higher preference.
          • +
          • Alternatively, you can register providers dynamically at runtime by invoking Security.addProvider at the beginning of the client + application or in a static initializer in the login module. For example: +
                Security.addProvider(new PlainSaslServerProvider());
          • +
          • For more details, see JCA Reference.
          • +
          +
        -

        - The specific steps for configuring SSL and SASL are described in sections 7.2 and 7.3. - Follow these steps to enable security for your desired protocol(s). -

        - The security implementation lets you configure different protocols for both broker-client and broker-broker communication. - These must be enabled in separate bounces. A PLAINTEXT port must be left open throughout so brokers and/or clients can continue to communicate. -

        - - When performing an incremental bounce stop the brokers cleanly via a SIGTERM. It's also good practice to wait for restarted replicas to return to the ISR list before moving onto the next node. -

        - As an example, say we wish to encrypt both broker-client and broker-broker communication with SSL. In the first incremental bounce, a SSL port is opened on each node: +
      6. +
      +
    9. +
    10. Enabling multiple SASL mechanisms in a broker

      +
        +
      1. Specify configuration for the login modules of all enabled mechanisms in the KafkaServer section of the JAAS config file. For example:
        +    KafkaServer {
        +        com.sun.security.auth.module.Krb5LoginModule required
        +        useKeyTab=true
        +        storeKey=true
        +        keyTab="/etc/security/keytabs/kafka_server.keytab"
        +        principal="kafka/kafka1.hostname.com@EXAMPLE.COM";
        +
        +        org.apache.kafka.common.security.plain.PlainLoginModule required
        +        username="admin"
        +        password="admin-secret"
        +        user_admin="admin-secret"
        +        user_alice="alice-secret";
        +    };
      2. +
      3. Enable the SASL mechanisms in server.properties:
            sasl.enabled.mechanisms=GSSAPI,PLAIN
      4. +
      5. Specify the SASL security protocol and mechanism for inter-broker communication in server.properties if required: +
            security.inter.broker.protocol=SASL_PLAINTEXT (or SASL_SSL)
        +    sasl.mechanism.inter.broker.protocol=GSSAPI (or PLAIN)
      6. +
      7. Follow the mechanism-specific steps in GSSAPI (Kerberos) + and PLAIN to configure SASL for the enabled mechanisms.
      8. +
      +
    11. +
    12. Incorporating Security Features in a Running Cluster

      + You can secure a running cluster via one or more of the supported protocols discussed previously. This is done in phases: +

      +
        +
      • Incrementally bounce the cluster nodes to open additional secured port(s).
      • +
      • Restart clients using the secured rather than PLAINTEXT port (assuming you are securing the client-broker connection).
      • +
      • Incrementally bounce the cluster again to enable broker-to-broker security (if this is required)
      • +
      • A final incremental bounce to close the PLAINTEXT port.
      • +
      +

      + The specific steps for configuring SSL and SASL are described in sections 7.2 and 7.3. + Follow these steps to enable security for your desired protocol(s). +

      + The security implementation lets you configure different protocols for both broker-client and broker-broker communication. + These must be enabled in separate bounces. A PLAINTEXT port must be left open throughout so brokers and/or clients can continue to communicate. +

      + + When performing an incremental bounce stop the brokers cleanly via a SIGTERM. It's also good practice to wait for restarted replicas to return to the ISR list before moving onto the next node. +

      + As an example, say we wish to encrypt both broker-client and broker-broker communication with SSL. In the first incremental bounce, a SSL port is opened on each node: +
                listeners=PLAINTEXT://broker1:9091,SSL://broker1:9092
      - We then restart the clients, changing their config to point at the newly opened, secured port: + We then restart the clients, changing their config to point at the newly opened, secured port: -
      +          
               bootstrap.servers = [broker1:9092,...]
               security.protocol = SSL
               ...etc
      - In the second incremental server bounce we instruct Kafka to use SSL as the broker-broker protocol (which will use the same SSL port): + In the second incremental server bounce we instruct Kafka to use SSL as the broker-broker protocol (which will use the same SSL port): -
      +          
               listeners=PLAINTEXT://broker1:9091,SSL://broker1:9092
               security.inter.broker.protocol=SSL
      - In the final bounce we secure the cluster by closing the PLAINTEXT port: + In the final bounce we secure the cluster by closing the PLAINTEXT port: -
      +          
               listeners=SSL://broker1:9092
               security.inter.broker.protocol=SSL
      - Alternatively we might choose to open multiple ports so that different protocols can be used for broker-broker and broker-client communication. Say we wished to use SSL encryption throughout (i.e. for broker-broker and broker-client communication) but we'd like to add SASL authentication to the broker-client connection also. We would achieve this by opening two additional ports during the first bounce: + Alternatively we might choose to open multiple ports so that different protocols can be used for broker-broker and broker-client communication. Say we wished to use SSL encryption throughout (i.e. for broker-broker and broker-client communication) but we'd like to add SASL authentication to the broker-client connection also. We would achieve this by opening two additional ports during the first bounce: -
      +          
               listeners=PLAINTEXT://broker1:9091,SSL://broker1:9092,SASL_SSL://broker1:9093
      - We would then restart the clients, changing their config to point at the newly opened, SASL & SSL secured port: + We would then restart the clients, changing their config to point at the newly opened, SASL & SSL secured port: -
      +          
               bootstrap.servers = [broker1:9093,...]
               security.protocol = SASL_SSL
               ...etc
      - The second server bounce would switch the cluster to use encrypted broker-broker communication via the SSL port we previously opened on port 9092: + The second server bounce would switch the cluster to use encrypted broker-broker communication via the SSL port we previously opened on port 9092: -
      +          
               listeners=PLAINTEXT://broker1:9091,SSL://broker1:9092,SASL_SSL://broker1:9093
               security.inter.broker.protocol=SSL
      - The final bounce secures the cluster by closing the PLAINTEXT port. + The final bounce secures the cluster by closing the PLAINTEXT port. -
      +          
              listeners=SSL://broker1:9092,SASL_SSL://broker1:9093
              security.inter.broker.protocol=SSL
      - ZooKeeper can be secured independently of the Kafka cluster. The steps for doing this are covered in section 7.5.2. -
    13. + ZooKeeper can be secured independently of the Kafka cluster. The steps for doing this are covered in section 7.5.2. + +
    14. Modifying SASL mechanism in a Running Cluster

      +

      SASL mechanism can be modified in a running cluster using the following sequence:

      +
        +
      1. Enable new SASL mechanism by adding the mechanism to sasl.enabled.mechanisms in server.properties for each broker. Update JAAS config file to include both + mechanisms as described here. Incrementally bounce the cluster nodes.
      2. +
      3. Restart clients using the new mechanism.
      4. +
      5. To change the mechanism of inter-broker communication (if this is required), set sasl.mechanism.inter.broker.protocol in server.properties to the new mechanism and + incrementally bounce the cluster again.
      6. +
      7. To remove old mechanism (if this is required), remove the old mechanism from sasl.enabled.mechanisms in server.properties and remove the entries for the + old mechanism from JAAS config file. Incrementally bounce the cluster again.
      8. +
      +

    7.4 Authorization and ACLs

    From e6d814a3eb2998a3172404385f1a9010f42d5425 Mon Sep 17 00:00:00 2001 From: dan norwood Date: Tue, 3 May 2016 10:21:57 -0700 Subject: [PATCH 071/267] MINOR: add logfilename to error message when file missing ewencp Author: dan norwood Reviewers: Ewen Cheslack-Postava Closes #1307 from norwood/log-filename-when-missing-file (cherry picked from commit 62253539d87e1ccb353673ed96adc98fb2d854ae) Signed-off-by: Ewen Cheslack-Postava --- .../org/apache/kafka/connect/file/FileStreamSourceTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/connect/file/src/main/java/org/apache/kafka/connect/file/FileStreamSourceTask.java b/connect/file/src/main/java/org/apache/kafka/connect/file/FileStreamSourceTask.java index af50d4d7bf21..14d944f60f36 100644 --- a/connect/file/src/main/java/org/apache/kafka/connect/file/FileStreamSourceTask.java +++ b/connect/file/src/main/java/org/apache/kafka/connect/file/FileStreamSourceTask.java @@ -103,7 +103,7 @@ public List poll() throws InterruptedException { reader = new BufferedReader(new InputStreamReader(stream)); log.debug("Opened {} for reading", logFilename()); } catch (FileNotFoundException e) { - log.warn("Couldn't find file for FileStreamSourceTask, sleeping to wait for it to be created"); + log.warn("Couldn't find file {} for FileStreamSourceTask, sleeping to wait for it to be created", logFilename()); synchronized (this) { this.wait(1000); } From e5363e1104dc6cdcdfb117bede6146984a4fe810 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 3 May 2016 10:55:54 -0700 Subject: [PATCH 072/267] KAFKA-3128; Add metrics for ZooKeeper events zookeeper metrics Also: * Remove redundant `time.milliseconds` call in `Sensor.record` * Clean-up a number of tests and remove a manual test that is no longer required Author: Ismael Juma Reviewers: Liquan Pei , Jun Rao Closes #1265 from ijuma/kafka-3128-zookeeper-metrics (cherry picked from commit c7425be5be8d0c2786155fbc697d83f80827d084) Signed-off-by: Jun Rao --- .../apache/kafka/common/metrics/Sensor.java | 2 +- .../scala/kafka/server/KafkaHealthcheck.scala | 57 ++++++++++------ .../main/scala/kafka/server/KafkaServer.scala | 5 +- core/src/main/scala/kafka/utils/ZkUtils.scala | 2 +- .../kafka/api/AdminClientTest.scala | 2 +- .../kafka/api/ProducerBounceTest.scala | 16 ++--- .../test/scala/other/kafka/DeleteZKPath.scala | 44 ------------ .../kafka/integration/PrimitiveApiTest.scala | 2 +- .../ZookeeperConsumerConnectorTest.scala | 2 +- .../kafka/server/ReplicaManagerTest.scala | 2 +- .../server/SessionExpireListenerTest.scala | 68 +++++++++++++++++++ .../kafka/utils/CommandLineUtilsTest.scala | 2 +- .../test/scala/unit/kafka/zk/ZKPathTest.scala | 27 +++----- .../unit/kafka/zk/ZooKeeperTestHarness.scala | 13 ++-- 14 files changed, 136 insertions(+), 108 deletions(-) delete mode 100755 core/src/test/scala/other/kafka/DeleteZKPath.scala create mode 100644 core/src/test/scala/unit/kafka/server/SessionExpireListenerTest.scala diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java b/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java index 0c5bcb74e6e4..098bfa85e5d1 100644 --- a/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java +++ b/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java @@ -96,7 +96,7 @@ public void record(double value) { * bound */ public void record(double value, long timeMs) { - this.lastRecordTime = time.milliseconds(); + this.lastRecordTime = timeMs; synchronized (this) { // increment all the stats for (int i = 0; i < this.stats.size(); i++) diff --git a/core/src/main/scala/kafka/server/KafkaHealthcheck.scala b/core/src/main/scala/kafka/server/KafkaHealthcheck.scala index 4e3fc29f9dc4..117899b9c8d2 100644 --- a/core/src/main/scala/kafka/server/KafkaHealthcheck.scala +++ b/core/src/main/scala/kafka/server/KafkaHealthcheck.scala @@ -18,15 +18,17 @@ package kafka.server import java.net.InetAddress +import java.util.Locale +import java.util.concurrent.TimeUnit import kafka.api.ApiVersion import kafka.cluster.EndPoint +import kafka.metrics.KafkaMetricsGroup import kafka.utils._ import org.I0Itec.zkclient.IZkStateListener import org.apache.kafka.common.protocol.SecurityProtocol import org.apache.zookeeper.Watcher.Event.KeeperState - /** * This class registers the broker in zookeeper to allow * other brokers and consumers to detect failures. It uses an ephemeral znode with the path: @@ -35,14 +37,14 @@ import org.apache.zookeeper.Watcher.Event.KeeperState * Right now our definition of health is fairly naive. If we register in zk we are healthy, otherwise * we are dead. */ -class KafkaHealthcheck(private val brokerId: Int, - private val advertisedEndpoints: Map[SecurityProtocol, EndPoint], - private val zkUtils: ZkUtils, - private val rack: Option[String], - private val interBrokerProtocolVersion: ApiVersion) extends Logging { +class KafkaHealthcheck(brokerId: Int, + advertisedEndpoints: Map[SecurityProtocol, EndPoint], + zkUtils: ZkUtils, + rack: Option[String], + interBrokerProtocolVersion: ApiVersion) extends Logging { - val brokerIdPath = ZkUtils.BrokerIdsPath + "/" + brokerId - val sessionExpireListener = new SessionExpireListener + private val brokerIdPath = ZkUtils.BrokerIdsPath + "/" + brokerId + private[server] val sessionExpireListener = new SessionExpireListener def startup() { zkUtils.zkClient.subscribeStateChanges(sessionExpireListener) @@ -70,31 +72,44 @@ class KafkaHealthcheck(private val brokerId: Int, } /** - * When we get a SessionExpired event, we lost all ephemeral nodes and zkclient has reestablished a - * connection for us. We need to re-register this broker in the broker registry. + * When we get a SessionExpired event, it means that we have lost all ephemeral nodes and ZKClient has re-established + * a connection for us. We need to re-register this broker in the broker registry. We rely on `handleStateChanged` + * to record ZooKeeper connection state metrics. */ - class SessionExpireListener() extends IZkStateListener { + class SessionExpireListener extends IZkStateListener with KafkaMetricsGroup { + + private[server] val stateToMeterMap = { + import KeeperState._ + val stateToEventTypeMap = Map( + Disconnected -> "Disconnects", + SyncConnected -> "SyncConnects", + AuthFailed -> "AuthFailures", + ConnectedReadOnly -> "ReadOnlyConnects", + SaslAuthenticated -> "SaslAuthentications", + Expired -> "Expires" + ) + stateToEventTypeMap.map { case (state, eventType) => + state -> newMeter(s"ZooKeeper${eventType}PerSec", eventType.toLowerCase(Locale.ROOT), TimeUnit.SECONDS) + } + } + @throws(classOf[Exception]) - def handleStateChanged(state: KeeperState) {} + override def handleStateChanged(state: KeeperState) { + stateToMeterMap.get(state).foreach(_.mark()) + } - /** - * Called after the zookeeper session has expired and a new session has been created. You would have to re-create - * any ephemeral nodes here. - * - * @throws Exception - * On any error. - */ @throws(classOf[Exception]) - def handleNewSession() { + override def handleNewSession() { info("re-registering broker info in ZK for broker " + brokerId) register() info("done re-registering broker") info("Subscribing to %s path to watch for new topics".format(ZkUtils.BrokerTopicsPath)) } - override def handleSessionEstablishmentError(error: Throwable): Unit = { + override def handleSessionEstablishmentError(error: Throwable) { fatal("Could not establish session with zookeeper", error) } + } } diff --git a/core/src/main/scala/kafka/server/KafkaServer.scala b/core/src/main/scala/kafka/server/KafkaServer.scala index 36b52fdc4b79..2832ebc6266e 100755 --- a/core/src/main/scala/kafka/server/KafkaServer.scala +++ b/core/src/main/scala/kafka/server/KafkaServer.scala @@ -96,7 +96,7 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime, threadNamePr private var shutdownLatch = new CountDownLatch(1) private val jmxPrefix: String = "kafka.server" - private val reporters: java.util.List[MetricsReporter] = config.metricReporterClasses + private val reporters: java.util.List[MetricsReporter] = config.metricReporterClasses reporters.add(new JmxReporter(jmxPrefix)) // This exists because the Metrics package from clients has its own Time implementation. @@ -239,7 +239,8 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime, threadNamePr else (protocol, endpoint) } - kafkaHealthcheck = new KafkaHealthcheck(config.brokerId, listeners, zkUtils, config.rack, config.interBrokerProtocolVersion) + kafkaHealthcheck = new KafkaHealthcheck(config.brokerId, listeners, zkUtils, config.rack, + config.interBrokerProtocolVersion) kafkaHealthcheck.startup() // Now that the broker id is successfully registered via KafkaHealthcheck, checkpoint it diff --git a/core/src/main/scala/kafka/utils/ZkUtils.scala b/core/src/main/scala/kafka/utils/ZkUtils.scala index 83ff51773ebb..81eb24ad105c 100644 --- a/core/src/main/scala/kafka/utils/ZkUtils.scala +++ b/core/src/main/scala/kafka/utils/ZkUtils.scala @@ -924,7 +924,7 @@ object ZkPath { isNamespacePresent = true } - def resetNamespaceCheckedState { + def resetNamespaceCheckedState() { isNamespacePresent = false } diff --git a/core/src/test/scala/integration/kafka/api/AdminClientTest.scala b/core/src/test/scala/integration/kafka/api/AdminClientTest.scala index ade1911b14b5..7fae81e8622d 100644 --- a/core/src/test/scala/integration/kafka/api/AdminClientTest.scala +++ b/core/src/test/scala/integration/kafka/api/AdminClientTest.scala @@ -84,7 +84,7 @@ class AdminClientTest extends IntegrationTestHarness with Logging { !consumers(0).assignment().isEmpty }, "Expected non-empty assignment") - val group= client.describeGroup(groupId) + val group = client.describeGroup(groupId) assertEquals("consumer", group.protocolType) assertEquals("range", group.protocol) assertEquals("Stable", group.state) diff --git a/core/src/test/scala/integration/kafka/api/ProducerBounceTest.scala b/core/src/test/scala/integration/kafka/api/ProducerBounceTest.scala index 369c3b7cd60c..5994a1d6f58d 100644 --- a/core/src/test/scala/integration/kafka/api/ProducerBounceTest.scala +++ b/core/src/test/scala/integration/kafka/api/ProducerBounceTest.scala @@ -51,16 +51,11 @@ class ProducerBounceTest extends KafkaServerTestHarness { .map(KafkaConfig.fromProps(_, overridingProps)) } - private var consumer1: SimpleConsumer = null - private var consumer2: SimpleConsumer = null - private var producer1: KafkaProducer[Array[Byte],Array[Byte]] = null private var producer2: KafkaProducer[Array[Byte],Array[Byte]] = null private var producer3: KafkaProducer[Array[Byte],Array[Byte]] = null - private var producer4: KafkaProducer[Array[Byte],Array[Byte]] = null private val topic1 = "topic-1" - private val topic2 = "topic-2" @Before override def setUp() { @@ -76,7 +71,6 @@ class ProducerBounceTest extends KafkaServerTestHarness { if (producer1 != null) producer1.close if (producer2 != null) producer2.close if (producer3 != null) producer3.close - if (producer4 != null) producer4.close super.tearDown() } @@ -102,9 +96,8 @@ class ProducerBounceTest extends KafkaServerTestHarness { Thread.sleep(2000) } - // Make sure the producer do not see any exception - // in returned metadata due to broker failures - assertTrue(scheduler.failed == false) + // Make sure the producer do not see any exception in returned metadata due to broker failures + assertFalse(scheduler.failed) // Make sure the leader still exists after bouncing brokers (0 until numPartitions).foreach(partition => TestUtils.waitUntilLeaderIsElectedOrChanged(zkUtils, topic1, partition)) @@ -114,7 +107,7 @@ class ProducerBounceTest extends KafkaServerTestHarness { // Make sure the producer do not see any exception // when draining the left messages on shutdown - assertTrue(scheduler.failed == false) + assertFalse(scheduler.failed) // double check that the leader info has been propagated after consecutive bounces val newLeaders = (0 until numPartitions).map(i => TestUtils.waitUntilMetadataIsPropagated(servers, topic1, i)) @@ -132,8 +125,7 @@ class ProducerBounceTest extends KafkaServerTestHarness { assertEquals("Should have fetched " + scheduler.sent + " unique messages", scheduler.sent, uniqueMessageSize) } - private class ProducerScheduler extends ShutdownableThread("daemon-producer", false) - { + private class ProducerScheduler extends ShutdownableThread("daemon-producer", false) { val numRecords = 1000 var sent = 0 var failed = false diff --git a/core/src/test/scala/other/kafka/DeleteZKPath.scala b/core/src/test/scala/other/kafka/DeleteZKPath.scala deleted file mode 100755 index 202bf4309f80..000000000000 --- a/core/src/test/scala/other/kafka/DeleteZKPath.scala +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package kafka - -import consumer.ConsumerConfig -import utils.ZkUtils -import org.I0Itec.zkclient.ZkClient -import org.apache.kafka.common.utils.Utils - -object DeleteZKPath { - def main(args: Array[String]) { - if(args.length < 2) { - println("USAGE: " + DeleteZKPath.getClass.getName + " consumer.properties zk_path") - System.exit(1) - } - - val config = new ConsumerConfig(Utils.loadProps(args(0))) - val zkPath = args(1) - val zkUtils = ZkUtils(config.zkConnect, config.zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) - - try { - zkUtils.deletePathRecursive(zkPath); - System.out.println(zkPath + " is deleted") - } catch { - case e: Exception => System.err.println("Path not deleted " + e.printStackTrace()) - } - - } -} diff --git a/core/src/test/scala/unit/kafka/integration/PrimitiveApiTest.scala b/core/src/test/scala/unit/kafka/integration/PrimitiveApiTest.scala index beb5d0eafdb1..85e9cad3d248 100755 --- a/core/src/test/scala/unit/kafka/integration/PrimitiveApiTest.scala +++ b/core/src/test/scala/unit/kafka/integration/PrimitiveApiTest.scala @@ -35,7 +35,7 @@ import java.util.Properties * End to end tests of the primitive apis against a local server */ @deprecated("This test has been deprecated and it will be removed in a future release", "0.10.0.0") -class PrimitiveApiTest extends ProducerConsumerTestHarness with ZooKeeperTestHarness { +class PrimitiveApiTest extends ProducerConsumerTestHarness { val requestHandlerLogger = Logger.getLogger(classOf[KafkaRequestHandler]) def generateConfigs() = List(KafkaConfig.fromProps(TestUtils.createBrokerConfig(0, zkConnect))) diff --git a/core/src/test/scala/unit/kafka/javaapi/consumer/ZookeeperConsumerConnectorTest.scala b/core/src/test/scala/unit/kafka/javaapi/consumer/ZookeeperConsumerConnectorTest.scala index e4c46973b5d7..83cce77ce3b0 100644 --- a/core/src/test/scala/unit/kafka/javaapi/consumer/ZookeeperConsumerConnectorTest.scala +++ b/core/src/test/scala/unit/kafka/javaapi/consumer/ZookeeperConsumerConnectorTest.scala @@ -37,7 +37,7 @@ import org.apache.log4j.{Level, Logger} import org.junit.Assert._ @deprecated("This test has been deprecated and it will be removed in a future release", "0.10.0.0") -class ZookeeperConsumerConnectorTest extends KafkaServerTestHarness with ZooKeeperTestHarness with Logging { +class ZookeeperConsumerConnectorTest extends KafkaServerTestHarness with Logging { val numNodes = 2 val numParts = 2 val topic = "topic1" diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala index c2c670e6b69c..2cdf924ad5e7 100644 --- a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala +++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala @@ -58,7 +58,7 @@ class ReplicaManagerTest { @After def tearDown() { - metrics.close(); + metrics.close() } @Test diff --git a/core/src/test/scala/unit/kafka/server/SessionExpireListenerTest.scala b/core/src/test/scala/unit/kafka/server/SessionExpireListenerTest.scala new file mode 100644 index 000000000000..4ffb18936fab --- /dev/null +++ b/core/src/test/scala/unit/kafka/server/SessionExpireListenerTest.scala @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package kafka.server + +import kafka.api.ApiVersion +import kafka.utils.ZkUtils +import org.I0Itec.zkclient.ZkClient +import org.apache.zookeeper.Watcher +import org.easymock.EasyMock +import org.junit.{Assert, Before, Test} +import Assert._ +import com.yammer.metrics.Metrics +import com.yammer.metrics.core.{Meter, Metric, MetricName} +import org.apache.kafka.common.utils.MockTime +import scala.collection.JavaConverters._ + +class SessionExpireListenerTest { + + private var time = new MockTime + private val brokerId = 1 + + @Test + def testSessionExpireListenerMetrics() { + + val metrics = Metrics.defaultRegistry + + def checkMeterCount(name: String, expected: Long) { + val meter = metrics.allMetrics.asScala.collectFirst { + case (metricName, meter: Meter) if metricName.getName == name => meter + }.getOrElse(sys.error(s"Unable to find meter with name $name")) + assertEquals("Unexpected meter count", expected, meter.count) + } + + val zkClient = EasyMock.mock(classOf[ZkClient]) + val zkUtils = ZkUtils(zkClient, isZkSecurityEnabled = false) + import Watcher._ + val healthcheck = new KafkaHealthcheck(brokerId, Map.empty, zkUtils, None, ApiVersion.latestVersion) + + val expiresPerSecName = "ZooKeeperExpiresPerSec" + val disconnectsPerSecName = "ZooKeeperDisconnectsPerSec" + checkMeterCount(expiresPerSecName, 0) + checkMeterCount(disconnectsPerSecName, 0) + + healthcheck.sessionExpireListener.handleStateChanged(Event.KeeperState.Expired) + checkMeterCount(expiresPerSecName, 1) + checkMeterCount(disconnectsPerSecName, 0) + + healthcheck.sessionExpireListener.handleStateChanged(Event.KeeperState.Disconnected) + checkMeterCount(expiresPerSecName, 1) + checkMeterCount(disconnectsPerSecName, 1) + } + +} diff --git a/core/src/test/scala/unit/kafka/utils/CommandLineUtilsTest.scala b/core/src/test/scala/unit/kafka/utils/CommandLineUtilsTest.scala index 6cc868dbc399..50023f804640 100644 --- a/core/src/test/scala/unit/kafka/utils/CommandLineUtilsTest.scala +++ b/core/src/test/scala/unit/kafka/utils/CommandLineUtilsTest.scala @@ -23,7 +23,7 @@ import org.junit.Test class CommandLineUtilsTest { - @Test (expected = classOf[java.lang.IllegalArgumentException]) + @Test(expected = classOf[java.lang.IllegalArgumentException]) def testParseEmptyArg() { val argArray = Array("my.empty.property=") CommandLineUtils.parseKeyValueArgs(argArray, false) diff --git a/core/src/test/scala/unit/kafka/zk/ZKPathTest.scala b/core/src/test/scala/unit/kafka/zk/ZKPathTest.scala index 92fae022a728..7ef45505dfee 100644 --- a/core/src/test/scala/unit/kafka/zk/ZKPathTest.scala +++ b/core/src/test/scala/unit/kafka/zk/ZKPathTest.scala @@ -25,7 +25,7 @@ import org.junit.Test class ZKPathTest extends ZooKeeperTestHarness { - val path: String = "/some_dir" + val path = "/some_dir" val zkSessionTimeoutMs = 1000 def zkConnectWithInvalidRoot: String = zkConnect + "/ghost" @@ -33,7 +33,7 @@ class ZKPathTest extends ZooKeeperTestHarness { def testCreatePersistentPathThrowsException { val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnectWithInvalidRoot, "test", "1")) - var zkUtils = ZkUtils(zkConnectWithInvalidRoot, zkSessionTimeoutMs, + val zkUtils = ZkUtils(zkConnectWithInvalidRoot, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) try { ZkPath.resetNamespaceCheckedState @@ -49,7 +49,7 @@ class ZKPathTest extends ZooKeeperTestHarness { @Test def testCreatePersistentPath { val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, "test", "1")) - var zkUtils = ZkUtils(zkConnect, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) + val zkUtils = ZkUtils(zkConnect, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) try { ZkPath.resetNamespaceCheckedState zkUtils.createPersistentPath(path) @@ -63,10 +63,8 @@ class ZKPathTest extends ZooKeeperTestHarness { @Test def testMakeSurePersistsPathExistsThrowsException { - val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnectWithInvalidRoot, - "test", "1")) - var zkUtils = ZkUtils(zkConnectWithInvalidRoot, zkSessionTimeoutMs, - config.zkConnectionTimeoutMs, false) + val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnectWithInvalidRoot, "test", "1")) + val zkUtils = ZkUtils(zkConnectWithInvalidRoot, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) try { ZkPath.resetNamespaceCheckedState zkUtils.makeSurePersistentPathExists(path) @@ -81,7 +79,7 @@ class ZKPathTest extends ZooKeeperTestHarness { @Test def testMakeSurePersistsPathExists { val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, "test", "1")) - var zkUtils = ZkUtils(zkConnect, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) + val zkUtils = ZkUtils(zkConnect, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) try { ZkPath.resetNamespaceCheckedState zkUtils.makeSurePersistentPathExists(path) @@ -95,10 +93,8 @@ class ZKPathTest extends ZooKeeperTestHarness { @Test def testCreateEphemeralPathThrowsException { - val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnectWithInvalidRoot, - "test", "1")) - var zkUtils = ZkUtils(zkConnectWithInvalidRoot, zkSessionTimeoutMs, - config.zkConnectionTimeoutMs, false) + val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnectWithInvalidRoot, "test", "1")) + val zkUtils = ZkUtils(zkConnectWithInvalidRoot, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) try { ZkPath.resetNamespaceCheckedState zkUtils.createEphemeralPathExpectConflict(path, "somedata") @@ -113,7 +109,7 @@ class ZKPathTest extends ZooKeeperTestHarness { @Test def testCreateEphemeralPathExists { val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, "test", "1")) - var zkUtils = ZkUtils(zkConnect, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) + val zkUtils = ZkUtils(zkConnect, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) try { ZkPath.resetNamespaceCheckedState zkUtils.createEphemeralPathExpectConflict(path, "somedata") @@ -129,8 +125,7 @@ class ZKPathTest extends ZooKeeperTestHarness { def testCreatePersistentSequentialThrowsException { val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnectWithInvalidRoot, "test", "1")) - var zkUtils = ZkUtils(zkConnectWithInvalidRoot, zkSessionTimeoutMs, - config.zkConnectionTimeoutMs, false) + val zkUtils = ZkUtils(zkConnectWithInvalidRoot, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) try { ZkPath.resetNamespaceCheckedState zkUtils.createSequentialPersistentPath(path) @@ -145,7 +140,7 @@ class ZKPathTest extends ZooKeeperTestHarness { @Test def testCreatePersistentSequentialExists { val config = new ConsumerConfig(TestUtils.createConsumerProperties(zkConnect, "test", "1")) - var zkUtils = ZkUtils(zkConnect, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) + val zkUtils = ZkUtils(zkConnect, zkSessionTimeoutMs, config.zkConnectionTimeoutMs, false) var actualPath: String = "" try { diff --git a/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala b/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala index 95f4e350954c..0de11cdbe12f 100755 --- a/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala +++ b/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala @@ -24,18 +24,19 @@ import org.scalatest.junit.JUnitSuite import org.apache.kafka.common.security.JaasUtils trait ZooKeeperTestHarness extends JUnitSuite with Logging { - var zookeeper: EmbeddedZookeeper = null - var zkPort: Int = -1 - var zkUtils: ZkUtils = null + val zkConnectionTimeout = 6000 val zkSessionTimeout = 6000 - def zkConnect: String = "127.0.0.1:" + zkPort - def confFile: String = System.getProperty(JaasUtils.JAVA_LOGIN_CONFIG_PARAM, "") + + var zkUtils: ZkUtils = null + var zookeeper: EmbeddedZookeeper = null + + def zkPort: Int = zookeeper.port + def zkConnect: String = s"127.0.0.1:$zkPort" @Before def setUp() { zookeeper = new EmbeddedZookeeper() - zkPort = zookeeper.port zkUtils = ZkUtils(zkConnect, zkSessionTimeout, zkConnectionTimeout, JaasUtils.isZkSecurityEnabled()) } From 96f6aa8c4d28c2d4a228ffca58ef2e25a4446564 Mon Sep 17 00:00:00 2001 From: Som Sahu Date: Tue, 3 May 2016 19:24:14 +0100 Subject: [PATCH 073/267] KAFKA-3448; Support zone index in IPv6 regex When an address is written textually, the zone index is appended to the address, separated by a percent sign (%). The actual syntax of zone indices depends on the operating system. Author: Som Sahu Author: Soumyajit Sahu Reviewers: Ismael Juma Closes #1305 from soumyajit-sahu/fixIPV6RegexPattern_trunk (cherry picked from commit 88e5d5a9a16f4831b8a178355d38d203febe85f9) Signed-off-by: Ismael Juma --- .../java/org/apache/kafka/common/utils/Utils.java | 2 +- .../java/org/apache/kafka/common/utils/UtilsTest.java | 2 ++ .../src/main/scala/kafka/cluster/BrokerEndPoint.scala | 2 +- core/src/main/scala/kafka/cluster/EndPoint.scala | 2 +- .../scala/unit/kafka/cluster/BrokerEndPointTest.scala | 11 +++++++++++ 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java index 2a988222d403..bd173ed63fd3 100755 --- a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java +++ b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java @@ -50,7 +50,7 @@ public class Utils { // This matches URIs of formats: host:port and protocol:\\host:port // IPv6 is supported with [ip] pattern - private static final Pattern HOST_PORT_PATTERN = Pattern.compile(".*?\\[?([0-9a-zA-Z\\-.:]*)\\]?:([0-9]+)"); + private static final Pattern HOST_PORT_PATTERN = Pattern.compile(".*?\\[?([0-9a-zA-Z\\-%.:]*)\\]?:([0-9]+)"); public static final String NL = System.getProperty("line.separator"); diff --git a/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java index 8ccc647a297a..1078578aa9ba 100755 --- a/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java +++ b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java @@ -37,6 +37,7 @@ public void testGetHost() { assertEquals("::1", getHost("[::1]:1234")); assertEquals("2001:db8:85a3:8d3:1319:8a2e:370:7348", getHost("PLAINTEXT://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:5678")); assertEquals("2001:DB8:85A3:8D3:1319:8A2E:370:7348", getHost("PLAINTEXT://[2001:DB8:85A3:8D3:1319:8A2E:370:7348]:5678")); + assertEquals("fe80::b1da:69ca:57f7:63d8%3", getHost("PLAINTEXT://[fe80::b1da:69ca:57f7:63d8%3]:5678")); } @Test @@ -47,6 +48,7 @@ public void testGetPort() { assertEquals(1234, getPort("[::1]:1234").intValue()); assertEquals(5678, getPort("[2001:db8:85a3:8d3:1319:8a2e:370:7348]:5678").intValue()); assertEquals(5678, getPort("[2001:DB8:85A3:8D3:1319:8A2E:370:7348]:5678").intValue()); + assertEquals(5678, getPort("[fe80::b1da:69ca:57f7:63d8%3]:5678").intValue()); } @Test diff --git a/core/src/main/scala/kafka/cluster/BrokerEndPoint.scala b/core/src/main/scala/kafka/cluster/BrokerEndPoint.scala index 9ae6fcee01a1..99cf66688b04 100644 --- a/core/src/main/scala/kafka/cluster/BrokerEndPoint.scala +++ b/core/src/main/scala/kafka/cluster/BrokerEndPoint.scala @@ -24,7 +24,7 @@ import org.apache.kafka.common.utils.Utils._ object BrokerEndPoint { - private val uriParseExp = """\[?([0-9a-zA-Z\-.:]*)\]?:([0-9]+)""".r + private val uriParseExp = """\[?([0-9a-zA-Z\-%.:]*)\]?:([0-9]+)""".r /** * BrokerEndPoint URI is host:port or [ipv6_host]:port diff --git a/core/src/main/scala/kafka/cluster/EndPoint.scala b/core/src/main/scala/kafka/cluster/EndPoint.scala index 3d248628ada4..eb45aae54c5c 100644 --- a/core/src/main/scala/kafka/cluster/EndPoint.scala +++ b/core/src/main/scala/kafka/cluster/EndPoint.scala @@ -26,7 +26,7 @@ import org.apache.kafka.common.utils.Utils object EndPoint { - private val uriParseExp = """^(.*)://\[?([0-9a-zA-Z\-.:]*)\]?:(-?[0-9]+)""".r + private val uriParseExp = """^(.*)://\[?([0-9a-zA-Z\-%.:]*)\]?:(-?[0-9]+)""".r def readFrom(buffer: ByteBuffer): EndPoint = { val port = buffer.getInt() diff --git a/core/src/test/scala/unit/kafka/cluster/BrokerEndPointTest.scala b/core/src/test/scala/unit/kafka/cluster/BrokerEndPointTest.scala index 400d6d6d67a2..cec8fec1f1fd 100644 --- a/core/src/test/scala/unit/kafka/cluster/BrokerEndPointTest.scala +++ b/core/src/test/scala/unit/kafka/cluster/BrokerEndPointTest.scala @@ -102,6 +102,11 @@ class BrokerEndPointTest extends Logging { endpoint = BrokerEndPoint.createBrokerEndPoint(1, connectionString) assert(endpoint.host == "::1") assert(endpoint.port == 9092) + // test for ipv6 with % character + connectionString = "[fe80::b1da:69ca:57f7:63d8%3]:9092" + endpoint = BrokerEndPoint.createBrokerEndPoint(1, connectionString) + assert(endpoint.host == "fe80::b1da:69ca:57f7:63d8%3") + assert(endpoint.port == 9092) // add test for uppercase in hostname connectionString = "MyHostname:9092" endpoint = BrokerEndPoint.createBrokerEndPoint(1, connectionString) @@ -128,6 +133,12 @@ class BrokerEndPointTest extends Logging { assert(endpoint.host == "::1") assert(endpoint.port == 9092) assert(endpoint.connectionString == "PLAINTEXT://[::1]:9092") + // test for ipv6 with % character + connectionString = "PLAINTEXT://[fe80::b1da:69ca:57f7:63d8%3]:9092" + endpoint = EndPoint.createEndPoint(connectionString) + assert(endpoint.host == "fe80::b1da:69ca:57f7:63d8%3") + assert(endpoint.port == 9092) + assert(endpoint.connectionString == "PLAINTEXT://[fe80::b1da:69ca:57f7:63d8%3]:9092") // test hostname connectionString = "PLAINTEXT://MyHostname:9092" endpoint = EndPoint.createEndPoint(connectionString) From 4de0ed7d525a03cf41a855c0ef22d8f98224914f Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Tue, 3 May 2016 23:42:03 +0100 Subject: [PATCH 074/267] MINOR: Clean up of ConsumerCoordinator and PartitionAssignor Author: Liquan Pei Reviewers: Jason Gustafson , Ismael Juma Closes #1306 from Ishiihara/minor-consumer-cleanup (cherry picked from commit af013788436ae5652b531184770f2620f8914123) Signed-off-by: Ismael Juma --- .../kafka/clients/consumer/internals/ConsumerCoordinator.java | 2 +- .../kafka/clients/consumer/internals/PartitionAssignor.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index 887f47c1cde4..7486969380dc 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -62,7 +62,7 @@ public final class ConsumerCoordinator extends AbstractCoordinator { private static final Logger log = LoggerFactory.getLogger(ConsumerCoordinator.class); private final List assignors; - private final org.apache.kafka.clients.Metadata metadata; + private final Metadata metadata; private final ConsumerCoordinatorMetrics sensors; private final SubscriptionState subscriptions; private final OffsetCommitCallback defaultOffsetCommitCallback; diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/PartitionAssignor.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/PartitionAssignor.java index 46bfa758237c..df8f2f14c920 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/PartitionAssignor.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/PartitionAssignor.java @@ -39,7 +39,7 @@ public interface PartitionAssignor { * Return a serializable object representing the local member's subscription. This can include * additional information as well (e.g. local host/rack information) which can be leveraged in * {@link #assign(Cluster, Map)}. - * @param topics Topics subscribed to through {@link org.apache.kafka.clients.consumer.KafkaConsumer#subscribe(List)} + * @param topics Topics subscribed to through {@link org.apache.kafka.clients.consumer.KafkaConsumer#subscribe(java.util.Collection)} * and variants * @return Non-null subscription with optional user data */ From 138b37d62b1799df9c63372519568fa302297775 Mon Sep 17 00:00:00 2001 From: Chen Zhu Date: Tue, 3 May 2016 23:59:12 +0100 Subject: [PATCH 075/267] KAFKA-3648; maxTimeToBlock in BufferPool.allocate should be enforced `maxTimeToBlock` needs to be updated in each loop iteration. Also record waitTime before throwing `TimeoutException` Author: Chen Zhu Reviewers: Dong Lin , Ismael Juma Closes #1304 from zhuchen1018/KAFKA-3648 (cherry picked from commit 94e12a2e1fbd2f43821643d67a0d91f03b3f94e5) Signed-off-by: Ismael Juma --- .../producer/internals/BufferPool.java | 22 +++++---- .../producer/internals/BufferPoolTest.java | 45 ++++++++++++++----- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java index f881e62c333c..55779711cef5 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java @@ -83,13 +83,13 @@ public BufferPool(long memory, int poolableSize, Metrics metrics, Time time, Str * is configured with blocking mode. * * @param size The buffer size to allocate in bytes - * @param maxTimeToBlock The maximum time in milliseconds to block for buffer memory to be available + * @param maxTimeToBlockMs The maximum time in milliseconds to block for buffer memory to be available * @return The buffer * @throws InterruptedException If the thread is interrupted while blocked * @throws IllegalArgumentException if size is larger than the total memory controlled by the pool (and hence we would block * forever) */ - public ByteBuffer allocate(int size, long maxTimeToBlock) throws InterruptedException { + public ByteBuffer allocate(int size, long maxTimeToBlockMs) throws InterruptedException { if (size > this.totalMemory) throw new IllegalArgumentException("Attempt to allocate " + size + " bytes, but there is a hard limit of " @@ -117,15 +117,21 @@ public ByteBuffer allocate(int size, long maxTimeToBlock) throws InterruptedExce int accumulated = 0; ByteBuffer buffer = null; Condition moreMemory = this.lock.newCondition(); + long remainingTimeToBlockNs = TimeUnit.MILLISECONDS.toNanos(maxTimeToBlockMs); this.waiters.addLast(moreMemory); // loop over and over until we have a buffer or have reserved // enough memory to allocate one while (accumulated < size) { - long startWait = time.nanoseconds(); - if (!moreMemory.await(maxTimeToBlock, TimeUnit.MILLISECONDS)) - throw new TimeoutException("Failed to allocate memory within the configured max blocking time"); - long endWait = time.nanoseconds(); - this.waitTime.record(endWait - startWait, time.milliseconds()); + long startWaitNs = time.nanoseconds(); + boolean waitingTimeElapsed = !moreMemory.await(remainingTimeToBlockNs, TimeUnit.NANOSECONDS); + long endWaitNs = time.nanoseconds(); + long timeNs = Math.max(0L, endWaitNs - startWaitNs); + this.waitTime.record(timeNs, time.milliseconds()); + + if (waitingTimeElapsed) + throw new TimeoutException("Failed to allocate memory within the configured max blocking time " + maxTimeToBlockMs + " ms."); + + remainingTimeToBlockNs -= timeNs; // check if we can satisfy this request from the free list, // otherwise allocate memory @@ -256,4 +262,4 @@ public int poolableSize() { public long totalMemory() { return this.totalMemory; } -} \ No newline at end of file +} diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/BufferPoolTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/BufferPoolTest.java index 72d85a2328a0..88e894386b3c 100644 --- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/BufferPoolTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/BufferPoolTest.java @@ -19,6 +19,7 @@ import org.apache.kafka.common.errors.TimeoutException; import org.apache.kafka.common.metrics.Metrics; import org.apache.kafka.common.utils.MockTime; +import org.apache.kafka.common.utils.SystemTime; import org.apache.kafka.test.TestUtils; import org.junit.After; import org.junit.Test; @@ -27,6 +28,7 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import static org.junit.Assert.assertTrue; @@ -34,10 +36,11 @@ import static org.junit.Assert.assertEquals; public class BufferPoolTest { - private MockTime time = new MockTime(); - private Metrics metrics = new Metrics(time); - private final long maxBlockTimeMs = 2000; - String metricGroup = "TestMetrics"; + private final MockTime time = new MockTime(); + private final SystemTime systemTime = new SystemTime(); + private final Metrics metrics = new Metrics(time); + private final long maxBlockTimeMs = 2000; + private final String metricGroup = "TestMetrics"; @After public void teardown() { @@ -96,7 +99,7 @@ public void testDelayedAllocation() throws Exception { CountDownLatch allocation = asyncAllocate(pool, 5 * 1024); assertEquals("Allocation shouldn't have happened yet, waiting on memory.", 1L, allocation.getCount()); doDealloc.countDown(); // return the memory - allocation.await(); + assertTrue("Allocation should succeed soon after de-allocation", allocation.await(1, TimeUnit.SECONDS)); } private CountDownLatch asyncDeallocate(final BufferPool pool, final ByteBuffer buffer) { @@ -115,6 +118,16 @@ public void run() { return latch; } + private void delayedDeallocate(final BufferPool pool, final ByteBuffer buffer, final long delayMs) { + Thread thread = new Thread() { + public void run() { + systemTime.sleep(delayMs); + pool.deallocate(buffer); + } + }; + thread.start(); + } + private CountDownLatch asyncAllocate(final BufferPool pool, final int size) { final CountDownLatch completed = new CountDownLatch(1); Thread thread = new Thread() { @@ -133,20 +146,32 @@ public void run() { } /** - * Test if Timeout exception is thrown when there is not enough memory to allocate and the elapsed time is greater than the max specified block time + * Test if Timeout exception is thrown when there is not enough memory to allocate and the elapsed time is greater than the max specified block time. + * And verify that the allocation should finish soon after the maxBlockTimeMs. * * @throws Exception */ @Test public void testBlockTimeout() throws Exception { - BufferPool pool = new BufferPool(2, 1, metrics, time, metricGroup); - pool.allocate(1, maxBlockTimeMs); + BufferPool pool = new BufferPool(10, 1, metrics, systemTime, metricGroup); + ByteBuffer buffer1 = pool.allocate(1, maxBlockTimeMs); + ByteBuffer buffer2 = pool.allocate(1, maxBlockTimeMs); + ByteBuffer buffer3 = pool.allocate(1, maxBlockTimeMs); + // First two buffers will be de-allocated within maxBlockTimeMs since the most recent de-allocation + delayedDeallocate(pool, buffer1, maxBlockTimeMs / 2); + delayedDeallocate(pool, buffer2, maxBlockTimeMs); + // The third buffer will be de-allocated after maxBlockTimeMs since the most recent de-allocation + delayedDeallocate(pool, buffer3, maxBlockTimeMs / 2 * 5); + + long beginTimeMs = systemTime.milliseconds(); try { - pool.allocate(2, maxBlockTimeMs); - fail("The buffer allocated more memory than its maximum value 2"); + pool.allocate(10, maxBlockTimeMs); + fail("The buffer allocated more memory than its maximum value 10"); } catch (TimeoutException e) { // this is good } + long endTimeMs = systemTime.milliseconds(); + assertTrue("Allocation should finish not much later than maxBlockTimeMs", endTimeMs - beginTimeMs < maxBlockTimeMs + 1000); } /** From 2d93696634bd2a99858b4ba2578b87dae8b86a8a Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Tue, 3 May 2016 18:05:04 -0700 Subject: [PATCH 076/267] KAFKA-3654: Config validation should validate both common and connector specific configurations Author: Liquan Pei Reviewers: Ewen Cheslack-Postava Closes #1313 from Ishiihara/config-short-circuit (cherry picked from commit 50aacd660d4c4212ffa4a9dca7d45bcfe50af833) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/connect/runtime/AbstractHerder.java | 5 ---- .../ConnectorPluginsResourceTest.java | 25 ++++++++++++++++--- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java index 83f56e2cb7a0..ee2085987228 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java @@ -234,11 +234,6 @@ public ConnectorStateInfo.TaskState taskStatus(ConnectorTaskId id) { public ConfigInfos validateConfigs(String connType, Map connectorConfig) { ConfigDef connectorConfigDef = ConnectorConfig.configDef(); List connectorConfigValues = connectorConfigDef.validate(connectorConfig); - ConfigInfos result = generateResult(connType, connectorConfigDef.configKeys(), connectorConfigValues, Collections.emptyList()); - - if (result.errorCount() != 0) { - return result; - } Connector connector = getConnector(connType); diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java index 241d331bd228..ddf30c712a84 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java @@ -25,9 +25,11 @@ import org.apache.kafka.common.config.ConfigDef.Recommender; import org.apache.kafka.common.config.ConfigDef.Type; import org.apache.kafka.common.config.ConfigDef.Width; +import org.apache.kafka.common.config.ConfigValue; import org.apache.kafka.connect.connector.Connector; import org.apache.kafka.connect.connector.Task; import org.apache.kafka.connect.runtime.AbstractHerder; +import org.apache.kafka.connect.runtime.ConnectorConfig; import org.apache.kafka.connect.runtime.Herder; import org.apache.kafka.connect.runtime.rest.RestServer; import org.apache.kafka.connect.runtime.rest.entities.ConfigInfo; @@ -70,16 +72,23 @@ public class ConnectorPluginsResourceTest { private static Map props = new HashMap<>(); static { + props.put("name", "test"); props.put("test.string.config", "testString"); props.put("test.int.config", "1"); props.put("test.list.config", "a,b"); } private static final ConfigInfos CONFIG_INFOS; + private static final int ERROR_COUNT = 1; static { List configs = new LinkedList<>(); + ConfigDef connectorConfigDef = ConnectorConfig.configDef(); + List connectorConfigValues = connectorConfigDef.validate(props); + ConfigInfos result = AbstractHerder.generateResult(ConnectorPluginsResourceTestConnector.class.getName(), connectorConfigDef.configKeys(), connectorConfigValues, Collections.emptyList()); + configs.addAll(result.values()); + ConfigKeyInfo configKeyInfo = new ConfigKeyInfo("test.string.config", "STRING", true, "", "HIGH", "Test configuration for string type.", null, -1, "NONE", "test.string.config", Collections.emptyList()); ConfigValueInfo configValueInfo = new ConfigValueInfo("test.string.config", "testString", Collections.emptyList(), Collections.emptyList(), true); ConfigInfo configInfo = new ConfigInfo(configKeyInfo, configValueInfo); @@ -100,7 +109,7 @@ public class ConnectorPluginsResourceTest { configInfo = new ConfigInfo(configKeyInfo, configValueInfo); configs.add(configInfo); - CONFIG_INFOS = new ConfigInfos(ConnectorPluginsResourceTestConnector.class.getName(), 0, Collections.singletonList("Test"), configs); + CONFIG_INFOS = new ConfigInfos(ConnectorPluginsResourceTestConnector.class.getName(), ERROR_COUNT, Collections.singletonList("Test"), configs); } @Mock @@ -121,10 +130,20 @@ public void testValidateConfig() throws Throwable { PowerMock.expectLastCall().andAnswer(new IAnswer() { @Override public ConfigInfos answer() { - Config config = new ConnectorPluginsResourceTestConnector().validate(props); + ConfigDef connectorConfigDef = ConnectorConfig.configDef(); + List connectorConfigValues = connectorConfigDef.validate(props); + Connector connector = new ConnectorPluginsResourceTestConnector(); + Config config = connector.validate(props); ConfigDef configDef = connector.config(); - return AbstractHerder.generateResult(ConnectorPluginsResourceTestConnector.class.getName(), configDef.configKeys(), config.configValues(), configDef.groups()); + Map configKeys = configDef.configKeys(); + List configValues = config.configValues(); + + Map resultConfigKeys = new HashMap<>(configKeys); + resultConfigKeys.putAll(connectorConfigDef.configKeys()); + configValues.addAll(connectorConfigValues); + + return AbstractHerder.generateResult(ConnectorPluginsResourceTestConnector.class.getName(), resultConfigKeys, configValues, Collections.singletonList("Test")); } }); PowerMock.replayAll(); From 41a8cb2520066b138ca2d9bbccb1dbdfdf577aa6 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 4 May 2016 02:14:35 +0100 Subject: [PATCH 077/267] MINOR: Handle null values in validators Author: Ewen Cheslack-Postava Reviewers: Ismael Juma Closes #1316 from ewencp/minor-handle-null-values-validators (cherry picked from commit 03a1f7d39c553835022987826df4762cfd39a639) Signed-off-by: Ismael Juma --- .../main/java/org/apache/kafka/common/config/ConfigDef.java | 2 ++ .../java/org/apache/kafka/common/config/ConfigDefTest.java | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java b/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java index 3a562ce9288e..256f5231de8f 100644 --- a/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java +++ b/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java @@ -790,6 +790,8 @@ public static Range between(Number min, Number max) { } public void ensureValid(String name, Object o) { + if (o == null) + throw new ConfigException(name, o, "Value must be non-null"); Number n = (Number) o; if (min != null && n.doubleValue() < min.doubleValue()) throw new ConfigException(name, o, "Value must be at least " + min); diff --git a/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java b/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java index e20e422020af..0ed0f1a85fe9 100644 --- a/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java +++ b/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java @@ -132,9 +132,9 @@ public void testInvalidDefaultString() { @Test public void testValidators() { - testValidators(Type.INT, Range.between(0, 10), 5, new Object[]{1, 5, 9}, new Object[]{-1, 11}); + testValidators(Type.INT, Range.between(0, 10), 5, new Object[]{1, 5, 9}, new Object[]{-1, 11, null}); testValidators(Type.STRING, ValidString.in("good", "values", "default"), "default", - new Object[]{"good", "values", "default"}, new Object[]{"bad", "inputs"}); + new Object[]{"good", "values", "default"}, new Object[]{"bad", "inputs", null}); } @Test From 21e930b4b4ae2c96b990da8e53e92231b2698430 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Wed, 4 May 2016 20:10:41 +0100 Subject: [PATCH 078/267] KAFKA-3632; remove fetcher metrics on shutdown and leader migration Author: Jason Gustafson Reviewers: Ismael Juma Closes #1312 from hachikuji/KAFKA-3632 (cherry picked from commit 7a0821d653c6c99a26ded6046ea24722c8d8ad85) Signed-off-by: Ismael Juma --- .../kafka/server/AbstractFetcherThread.scala | 59 ++++++-- .../server/AbstractFetcherThreadTest.scala | 127 ++++++++++++++++++ 2 files changed, 176 insertions(+), 10 deletions(-) create mode 100644 core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala diff --git a/core/src/main/scala/kafka/server/AbstractFetcherThread.scala b/core/src/main/scala/kafka/server/AbstractFetcherThread.scala index 8b688b9885f3..4aba66714070 100755 --- a/core/src/main/scala/kafka/server/AbstractFetcherThread.scala +++ b/core/src/main/scala/kafka/server/AbstractFetcherThread.scala @@ -77,6 +77,10 @@ abstract class AbstractFetcherThread(name: String, partitionMapCond.signalAll() } awaitShutdown() + + // we don't need the lock since the thread has finished shutdown and metric removal is safe + fetcherStats.unregister() + fetcherLagStats.unregister() } override def doWork() { @@ -133,7 +137,7 @@ abstract class AbstractFetcherThread(name: String, case None => currentPartitionFetchState.offset } partitionMap.put(topicAndPartition, new PartitionFetchState(newOffset)) - fetcherLagStats.getFetcherLagStats(topic, partitionId).lag = Math.max(0L, partitionData.highWatermark - newOffset) + fetcherLagStats.getAndMaybePut(topic, partitionId).lag = Math.max(0L, partitionData.highWatermark - newOffset) fetcherStats.byteRate.mark(validBytes) // Once we hand off the partition data to the subclass, we can't mess with it any more in this thread processPartitionData(topicAndPartition, currentPartitionFetchState.offset, partitionData) @@ -207,8 +211,12 @@ abstract class AbstractFetcherThread(name: String, def removePartitions(topicAndPartitions: Set[TopicAndPartition]) { partitionMapLock.lockInterruptibly() - try topicAndPartitions.foreach(partitionMap.remove) - finally partitionMapLock.unlock() + try { + topicAndPartitions.foreach { topicAndPartition => + partitionMap.remove(topicAndPartition) + fetcherLagStats.unregister(topicAndPartition.topic, topicAndPartition.partition) + } + } finally partitionMapLock.unlock() } def partitionCount() = { @@ -235,15 +243,25 @@ object AbstractFetcherThread { } +object FetcherMetrics { + val ConsumerLag = "ConsumerLag" + val RequestsPerSec = "RequestsPerSec" + val BytesPerSec = "BytesPerSec" +} + class FetcherLagMetrics(metricId: ClientIdTopicPartition) extends KafkaMetricsGroup { + private[this] val lagVal = new AtomicLong(-1L) - newGauge("ConsumerLag", + private[this] val tags = Map( + "clientId" -> metricId.clientId, + "topic" -> metricId.topic, + "partition" -> metricId.partitionId.toString) + + newGauge(FetcherMetrics.ConsumerLag, new Gauge[Long] { def value = lagVal.get }, - Map("clientId" -> metricId.clientId, - "topic" -> metricId.topic, - "partition" -> metricId.partitionId.toString) + tags ) def lag_=(newLag: Long) { @@ -251,15 +269,30 @@ class FetcherLagMetrics(metricId: ClientIdTopicPartition) extends KafkaMetricsGr } def lag = lagVal.get + + def unregister() { + removeMetric(FetcherMetrics.ConsumerLag, tags) + } } class FetcherLagStats(metricId: ClientIdAndBroker) { private val valueFactory = (k: ClientIdTopicPartition) => new FetcherLagMetrics(k) val stats = new Pool[ClientIdTopicPartition, FetcherLagMetrics](Some(valueFactory)) - def getFetcherLagStats(topic: String, partitionId: Int): FetcherLagMetrics = { + def getAndMaybePut(topic: String, partitionId: Int): FetcherLagMetrics = { stats.getAndMaybePut(new ClientIdTopicPartition(metricId.clientId, topic, partitionId)) } + + def unregister(topic: String, partitionId: Int) { + val lagMetrics = stats.remove(new ClientIdTopicPartition(metricId.clientId, topic, partitionId)) + if (lagMetrics != null) lagMetrics.unregister() + } + + def unregister() { + stats.keys.toBuffer.foreach { key: ClientIdTopicPartition => + unregister(key.topic, key.partitionId) + } + } } class FetcherStats(metricId: ClientIdAndBroker) extends KafkaMetricsGroup { @@ -267,9 +300,15 @@ class FetcherStats(metricId: ClientIdAndBroker) extends KafkaMetricsGroup { "brokerHost" -> metricId.brokerHost, "brokerPort" -> metricId.brokerPort.toString) - val requestRate = newMeter("RequestsPerSec", "requests", TimeUnit.SECONDS, tags) + val requestRate = newMeter(FetcherMetrics.RequestsPerSec, "requests", TimeUnit.SECONDS, tags) + + val byteRate = newMeter(FetcherMetrics.BytesPerSec, "bytes", TimeUnit.SECONDS, tags) + + def unregister() { + removeMetric(FetcherMetrics.RequestsPerSec, tags) + removeMetric(FetcherMetrics.BytesPerSec, tags) + } - val byteRate = newMeter("BytesPerSec", "bytes", TimeUnit.SECONDS, tags) } case class ClientIdTopicPartition(clientId: String, topic: String, partitionId: Int) { diff --git a/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala b/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala new file mode 100644 index 000000000000..b95f2bf05f3d --- /dev/null +++ b/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package kafka.server + +import com.yammer.metrics.Metrics +import kafka.cluster.BrokerEndPoint +import kafka.common.TopicAndPartition +import kafka.message.ByteBufferMessageSet +import kafka.server.AbstractFetcherThread.{FetchRequest, PartitionData} +import kafka.utils.TestUtils +import org.apache.kafka.common.protocol.Errors +import org.junit.Assert.{assertFalse, assertTrue} +import org.junit.{Before, Test} + +import scala.collection.JavaConverters._ + +class AbstractFetcherThreadTest { + + @Before + def cleanMetricRegistry(): Unit = { + for (metricName <- Metrics.defaultRegistry().allMetrics().keySet().asScala) + Metrics.defaultRegistry().removeMetric(metricName) + } + + @Test + def testMetricsRemovedOnShutdown() { + val partition = new TopicAndPartition("topic", 0) + val fetcherThread = new DummyFetcherThread("dummy", "client", new BrokerEndPoint(0, "localhost", 9092)) + + fetcherThread.start() + + // add one partition to create the consumer lag metric + fetcherThread.addPartitions(Map(partition -> 0L)) + + // wait until all fetcher metrics are present + TestUtils.waitUntilTrue(() => + allMetricsNames == Set(FetcherMetrics.BytesPerSec, FetcherMetrics.RequestsPerSec, FetcherMetrics.ConsumerLag), + "Failed waiting for all fetcher metrics to be registered") + + fetcherThread.shutdown() + + // after shutdown, they should be gone + assertTrue(Metrics.defaultRegistry().allMetrics().isEmpty) + } + + @Test + def testConsumerLagRemovedWithPartition() { + val partition = new TopicAndPartition("topic", 0) + val fetcherThread = new DummyFetcherThread("dummy", "client", new BrokerEndPoint(0, "localhost", 9092)) + + fetcherThread.start() + + // add one partition to create the consumer lag metric + fetcherThread.addPartitions(Map(partition -> 0L)) + + // wait until lag metric is present + TestUtils.waitUntilTrue(() => allMetricsNames(FetcherMetrics.ConsumerLag), + "Failed waiting for consumer lag metric") + + // remove the partition to simulate leader migration + fetcherThread.removePartitions(Set(partition)) + + // the lag metric should now be gone + assertFalse(allMetricsNames(FetcherMetrics.ConsumerLag)) + + fetcherThread.shutdown() + } + + private def allMetricsNames = Metrics.defaultRegistry().allMetrics().asScala.keySet.map(_.getName) + + class DummyFetchRequest(val offsets: collection.Map[TopicAndPartition, Long]) extends FetchRequest { + override def isEmpty: Boolean = offsets.isEmpty + + override def offset(topicAndPartition: TopicAndPartition): Long = offsets(topicAndPartition) + } + + class DummyPartitionData extends PartitionData { + override def errorCode: Short = Errors.NONE.code + + override def toByteBufferMessageSet: ByteBufferMessageSet = new ByteBufferMessageSet() + + override def highWatermark: Long = 0L + + override def exception: Option[Throwable] = None + } + + class DummyFetcherThread(name: String, + clientId: String, + sourceBroker: BrokerEndPoint) + extends AbstractFetcherThread(name, clientId, sourceBroker, 0) { + + type REQ = DummyFetchRequest + type PD = PartitionData + + override def processPartitionData(topicAndPartition: TopicAndPartition, + fetchOffset: Long, + partitionData: PartitionData): Unit = {} + + override def handleOffsetOutOfRange(topicAndPartition: TopicAndPartition): Long = 0L + + override def handlePartitionsWithErrors(partitions: Iterable[TopicAndPartition]): Unit = {} + + override protected def fetch(fetchRequest: DummyFetchRequest): collection.Map[TopicAndPartition, DummyPartitionData] = { + fetchRequest.offsets.mapValues(_ => new DummyPartitionData) + } + + override protected def buildFetchRequest(partitionMap: collection.Map[TopicAndPartition, PartitionFetchState]): DummyFetchRequest = { + new DummyFetchRequest(partitionMap.mapValues(_.offset)) + } + } + +} From 9b6761ccd6413228efb8849818e8b7a8ffceaaad Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Wed, 4 May 2016 11:25:26 -0700 Subject: [PATCH 079/267] MINOR: Modify checkstyle to allow import classes only used in javadoc Author: Guozhang Wang Reviewers: Gwen Shapira, Ismael Juma Closes #1317 from guozhangwang/KJavaDocImport --- checkstyle/checkstyle.xml | 4 +++- .../apache/kafka/streams/kstream/KTable.java | 18 +++++++++--------- .../streams/processor/StreamPartitioner.java | 4 +++- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/checkstyle/checkstyle.xml b/checkstyle/checkstyle.xml index 3adc446d2070..25d6f2fd0785 100644 --- a/checkstyle/checkstyle.xml +++ b/checkstyle/checkstyle.xml @@ -31,7 +31,9 @@ - + + + diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java index cc5a52180ab1..50d0595aed96 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java @@ -21,6 +21,7 @@ import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.processor.StreamPartitioner; +import org.apache.kafka.clients.producer.internals.DefaultPartitioner; /** * {@link KTable} is an abstraction of a changelog stream from a primary-keyed table. @@ -114,7 +115,7 @@ public interface KTable { /** * Materialize this stream to a topic, also creates a new instance of {@link KTable} from the topic - * using default serializers and deserializers and producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}. + * using default serializers and deserializers and producer's {@link DefaultPartitioner}. * This is equivalent to calling {@link #to(String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#table(String)}. * * @param topic the topic name @@ -129,7 +130,7 @@ public interface KTable { * This is equivalent to calling {@link #to(String)} and {@link org.apache.kafka.streams.kstream.KStreamBuilder#table(String)}. * * @param partitioner the function used to determine how records are distributed among partitions of the topic, - * if not specified producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * if not specified producer's {@link DefaultPartitioner} will be used * @param topic the topic name * * @return a new {@link KTable} that contains the exact same records as this {@link KTable} @@ -140,7 +141,7 @@ public interface KTable { * Materialize this stream to a topic, also creates a new instance of {@link KTable} from the topic. * If {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} * for the key {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} is used - * — otherwise producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} is used. + * — otherwise producer's {@link DefaultPartitioner} is used. * This is equivalent to calling {@link #to(Serde, Serde, String)} and * {@link org.apache.kafka.streams.kstream.KStreamBuilder#table(Serde, Serde, String)}. * @@ -167,7 +168,7 @@ public interface KTable { * @param partitioner the function used to determine how records are distributed among partitions of the topic, * if not specified and {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} will be used - * — otherwise {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * — otherwise {@link DefaultPartitioner} will be used * @param topic the topic name * * @return a new {@link KTable} that contains the exact same records as this {@link KTable} @@ -176,7 +177,7 @@ public interface KTable { /** * Materialize this stream to a topic using default serializers specified in the config - * and producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}. + * and producer's {@link DefaultPartitioner}. * * @param topic the topic name */ @@ -187,7 +188,7 @@ public interface KTable { * and a customizable {@link StreamPartitioner} to determine the distribution of records to partitions. * * @param partitioner the function used to determine how records are distributed among partitions of the topic, - * if not specified producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * if not specified producer's {@link DefaultPartitioner} will be used * @param topic the topic name */ void to(StreamPartitioner partitioner, String topic); @@ -196,7 +197,7 @@ public interface KTable { * Materialize this stream to a topic. If {@code keySerde} provides a * {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} is used - * — otherwise producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} is used. + * — otherwise producer's {@link DefaultPartitioner} is used. * * @param keySerde key serde used to send key-value pairs, * if not specified the default serde defined in the configs will be used @@ -216,7 +217,7 @@ public interface KTable { * @param partitioner the function used to determine how records are distributed among partitions of the topic, * if not specified and {@code keySerde} provides a {@link org.apache.kafka.streams.kstream.internals.WindowedSerializer} for the key * {@link org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner} will be used - * — otherwise {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used + * — otherwise {@link DefaultPartitioner} will be used * @param topic the topic name */ void to(Serde keySerde, Serde valSerde, StreamPartitioner partitioner, String topic); @@ -237,7 +238,6 @@ public interface KTable { * @param mapper @param mapper the instance of {@link KeyValueMapper} * @param the new key type * - * @return a {@link KStream} that contains records with new keys of different type for each update of this {@link KTable} * @return a {@link KStream} that contains the transformed records from this {@link KTable}; * the records are no longer treated as updates on a primary-keyed table, * but rather as normal key-value pairs in a record stream diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/StreamPartitioner.java b/streams/src/main/java/org/apache/kafka/streams/processor/StreamPartitioner.java index fbb037849bc0..0c51c5011877 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/StreamPartitioner.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/StreamPartitioner.java @@ -16,9 +16,11 @@ */ package org.apache.kafka.streams.processor; +import org.apache.kafka.clients.producer.internals.DefaultPartitioner; + /** * Determine how records are distributed among the partitions in a Kafka topic. If not specified, the underlying producer's - * {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used to determine the partition. + * {@link DefaultPartitioner} will be used to determine the partition. *

    * Kafka topics are divided into one or more partitions. Since each partition must fit on the servers that host it, so * using multiple partitions allows the topic to scale beyond a size that will fit on a single machine. Partitions also enable you From f4bb4a6515dfbd5986ae9b3190bbb58dd8cf4e41 Mon Sep 17 00:00:00 2001 From: Eno Thereska Date: Tue, 3 May 2016 13:26:22 -0700 Subject: [PATCH 080/267] MINOR: Added more integration tests Author: Eno Thereska Reviewers: Ismael Juma, Michael G. Noll, Guozhang Wang Closes #1285 from enothereska/more-integration-tests --- checkstyle/import-control.xml | 1 + .../integration/FanoutIntegrationTest.java | 166 +++++++++++ .../InternalTopicIntegrationTest.java | 16 +- .../integration/JoinIntegrationTest.java | 266 ++++++++++++++++++ .../MapFunctionIntegrationTest.java | 127 +++++++++ .../PassThroughIntegrationTest.java | 113 ++++++++ .../integration/WordCountIntegrationTest.java | 149 ++++++++++ 7 files changed, 830 insertions(+), 8 deletions(-) create mode 100644 streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java create mode 100644 streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java diff --git a/checkstyle/import-control.xml b/checkstyle/import-control.xml index 7a45515e7bcf..5f52cced89d8 100644 --- a/checkstyle/import-control.xml +++ b/checkstyle/import-control.xml @@ -148,6 +148,7 @@ + diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java new file mode 100644 index 000000000000..a7b478507ae6 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration; + + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.kstream.KStream; +import org.apache.kafka.streams.kstream.KStreamBuilder; +import org.apache.kafka.streams.kstream.ValueMapper; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Properties; + +import org.apache.kafka.streams.integration.utils.EmbeddedSingleNodeKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + +/** + * End-to-end integration test that demonstrates "fan-out", using an embedded Kafka cluster. + * + * This example shows how you can read from one input topic/stream, transform the data (here: + * trivially) in two different ways via two intermediate streams, and then write the respective + * results to two output topics. + * + *

    + * {@code
    + *
    + *                                         +---map()---> stream2 ---to()---> Kafka topic B
    + *                                         |
    + * Kafka topic A ---stream()--> stream1 ---+
    + *                                         |
    + *                                         +---map()---> stream3 ---to()---> Kafka topic C
    + *
    + * }
    + * 
    + */ +public class FanoutIntegrationTest { + @ClassRule + public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); + private static final String INPUT_TOPIC_A = "A"; + private static final String OUTPUT_TOPIC_B = "B"; + private static final String OUTPUT_TOPIC_C = "C"; + + @BeforeClass + public static void startKafkaCluster() throws Exception { + CLUSTER.createTopic(INPUT_TOPIC_A); + CLUSTER.createTopic(OUTPUT_TOPIC_B); + CLUSTER.createTopic(OUTPUT_TOPIC_C); + } + + @Test + public void shouldFanoutTheInput() throws Exception { + List inputValues = Arrays.asList("Hello", "World"); + List expectedValuesForB = new ArrayList<>(); + List expectedValuesForC = new ArrayList<>(); + for (String input : inputValues) { + expectedValuesForB.add(input.toUpperCase(Locale.getDefault())); + expectedValuesForC.add(input.toLowerCase(Locale.getDefault())); + } + + // + // Step 1: Configure and start the processor topology. + // + KStreamBuilder builder = new KStreamBuilder(); + + Properties streamsConfiguration = new Properties(); + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "fanout-integration-test"); + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + streamsConfiguration.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, CLUSTER.zKConnectString()); + streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + + KStream stream1 = builder.stream(INPUT_TOPIC_A); + KStream stream2 = stream1.mapValues( + new ValueMapper() { + @Override + public String apply(String value) { + return value.toUpperCase(Locale.getDefault()); + } + }); + KStream stream3 = stream1.mapValues( + new ValueMapper() { + @Override + public String apply(String value) { + return value.toLowerCase(Locale.getDefault()); + } + }); + stream2.to(OUTPUT_TOPIC_B); + stream3.to(OUTPUT_TOPIC_C); + + KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration); + streams.start(); + + // Wait briefly for the topology to be fully up and running (otherwise it might miss some or all + // of the input data we produce below). + Thread.sleep(5000); + + // + // Step 2: Produce some input data to the input topic. + // + Properties producerConfig = new Properties(); + producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); + producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); + producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class); + producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + IntegrationTestUtils.produceValuesSynchronously(INPUT_TOPIC_A, inputValues, producerConfig); + + // Give the stream processing application some time to do its work. + Thread.sleep(5000); + streams.close(); + + // + // Step 3: Verify the application's output data. + // + + // Verify output topic B + Properties consumerConfigB = new Properties(); + consumerConfigB.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + consumerConfigB.put(ConsumerConfig.GROUP_ID_CONFIG, "fanout-integration-test-standard-consumer-topicB"); + consumerConfigB.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + consumerConfigB.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class); + consumerConfigB.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + List actualValuesForB = IntegrationTestUtils.readValues(OUTPUT_TOPIC_B, consumerConfigB, inputValues.size()); + assertThat(actualValuesForB, equalTo(expectedValuesForB)); + + // Verify output topic C + Properties consumerConfigC = new Properties(); + consumerConfigC.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + consumerConfigC.put(ConsumerConfig.GROUP_ID_CONFIG, "fanout-integration-test-standard-consumer-topicC"); + consumerConfigC.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + consumerConfigC.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class); + consumerConfigC.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + List actualValuesForC = IntegrationTestUtils.readValues(OUTPUT_TOPIC_C, consumerConfigC, inputValues.size()); + assertThat(actualValuesForC, equalTo(expectedValuesForC)); + } + +} \ No newline at end of file diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java index 2a3e7670e287..66111c4279cd 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java @@ -55,7 +55,7 @@ */ public class InternalTopicIntegrationTest { @ClassRule - public static EmbeddedSingleNodeKafkaCluster cluster = new EmbeddedSingleNodeKafkaCluster(); + public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); private static final String DEFAULT_INPUT_TOPIC = "inputTopic"; private static final String DEFAULT_OUTPUT_TOPIC = "outputTopic"; private static final int DEFAULT_ZK_SESSION_TIMEOUT_MS = 10 * 1000; @@ -63,8 +63,8 @@ public class InternalTopicIntegrationTest { @BeforeClass public static void startKafkaCluster() throws Exception { - cluster.createTopic(DEFAULT_INPUT_TOPIC); - cluster.createTopic(DEFAULT_OUTPUT_TOPIC); + CLUSTER.createTopic(DEFAULT_INPUT_TOPIC); + CLUSTER.createTopic(DEFAULT_OUTPUT_TOPIC); } /** @@ -79,12 +79,12 @@ private boolean isUsingCompactionForStateChangelogTopics() { // only ZooKeeper and will be returned when listing topics, but Kafka itself does not create the // topic. ZkClient zkClient = new ZkClient( - cluster.zKConnectString(), + CLUSTER.zKConnectString(), DEFAULT_ZK_SESSION_TIMEOUT_MS, DEFAULT_ZK_CONNECTION_TIMEOUT_MS, ZKStringSerializer$.MODULE$); boolean isSecure = false; - ZkUtils zkUtils = new ZkUtils(zkClient, new ZkConnection(cluster.zKConnectString()), isSecure); + ZkUtils zkUtils = new ZkUtils(zkClient, new ZkConnection(CLUSTER.zKConnectString()), isSecure); Map topicConfigs = AdminUtils.fetchAllTopicConfigs(zkUtils); Iterator it = topicConfigs.iterator(); @@ -118,8 +118,8 @@ public void shouldCompactTopicsForStateChangelogs() throws Exception { Properties streamsConfiguration = new Properties(); streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "compact-topics-integration-test"); - streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()); - streamsConfiguration.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, cluster.zKConnectString()); + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + streamsConfiguration.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, CLUSTER.zKConnectString()); streamsConfiguration.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/kafka-streams"); @@ -153,7 +153,7 @@ public KeyValue apply(String key, String value) { // Step 2: Produce some input data to the input topic. // Properties producerConfig = new Properties(); - producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()); + producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java new file mode 100644 index 000000000000..1fc0ba67ad0a --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java @@ -0,0 +1,266 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.kafka.streams.integration; + + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.serialization.LongDeserializer; +import org.apache.kafka.common.serialization.LongSerializer; +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.kstream.KStream; +import org.apache.kafka.streams.kstream.KStreamBuilder; +import org.apache.kafka.streams.kstream.KTable; +import org.apache.kafka.streams.kstream.KeyValueMapper; +import org.apache.kafka.streams.kstream.Reducer; +import org.apache.kafka.streams.kstream.ValueJoiner; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.kafka.streams.integration.utils.EmbeddedSingleNodeKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + +/** + * End-to-end integration test that demonstrates how to perform a join between a KStream and a + * KTable (think: KStream.leftJoin(KTable)), i.e. an example of a stateful computation. + */ +public class JoinIntegrationTest { + @ClassRule + public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); + private static final String USER_CLICKS_TOPIC = "user-clicks"; + private static final String USER_REGIONS_TOPIC = "user-regions"; + private static final String OUTPUT_TOPIC = "output-topic"; + + @BeforeClass + public static void startKafkaCluster() throws Exception { + CLUSTER.createTopic(USER_CLICKS_TOPIC); + CLUSTER.createTopic(USER_REGIONS_TOPIC); + CLUSTER.createTopic(OUTPUT_TOPIC); + } + + /** + * Tuple for a region and its associated number of clicks. + */ + private static final class RegionWithClicks { + + private final String region; + private final long clicks; + + public RegionWithClicks(String region, long clicks) { + if (region == null || region.isEmpty()) { + throw new IllegalArgumentException("region must be set"); + } + if (clicks < 0) { + throw new IllegalArgumentException("clicks must not be negative"); + } + this.region = region; + this.clicks = clicks; + } + + public String getRegion() { + return region; + } + + public long getClicks() { + return clicks; + } + + } + + @Test + public void shouldCountClicksPerRegion() throws Exception { + // Input 1: Clicks per user (multiple records allowed per user). + List> userClicks = Arrays.asList( + new KeyValue<>("alice", 13L), + new KeyValue<>("bob", 4L), + new KeyValue<>("chao", 25L), + new KeyValue<>("bob", 19L), + new KeyValue<>("dave", 56L), + new KeyValue<>("eve", 78L), + new KeyValue<>("alice", 40L), + new KeyValue<>("fang", 99L) + ); + + // Input 2: Region per user (multiple records allowed per user). + List> userRegions = Arrays.asList( + new KeyValue<>("alice", "asia"), /* Alice lived in Asia originally... */ + new KeyValue<>("bob", "americas"), + new KeyValue<>("chao", "asia"), + new KeyValue<>("dave", "europe"), + new KeyValue<>("alice", "europe"), /* ...but moved to Europe some time later. */ + new KeyValue<>("eve", "americas"), + new KeyValue<>("fang", "asia") + ); + + List> expectedClicksPerRegion = Arrays.asList( + new KeyValue<>("europe", 13L), + new KeyValue<>("americas", 4L), + new KeyValue<>("asia", 25L), + new KeyValue<>("americas", 23L), + new KeyValue<>("europe", 69L), + new KeyValue<>("americas", 101L), + new KeyValue<>("europe", 109L), + new KeyValue<>("asia", 124L) + ); + + // + // Step 1: Configure and start the processor topology. + // + final Serde stringSerde = Serdes.String(); + final Serde longSerde = Serdes.Long(); + + Properties streamsConfiguration = new Properties(); + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "join-integration-test"); + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + streamsConfiguration.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, CLUSTER.zKConnectString()); + streamsConfiguration.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + // Explicitly place the state directory under /tmp so that we can remove it via + // `purgeLocalStreamsState` below. Once Streams is updated to expose the effective + // StreamsConfig configuration (so we can retrieve whatever state directory Streams came up + // with automatically) we don't need to set this anymore and can update `purgeLocalStreamsState` + // accordingly. + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/kafka-streams"); + + // Remove any state from previous test runs + IntegrationTestUtils.purgeLocalStreamsState(streamsConfiguration); + + KStreamBuilder builder = new KStreamBuilder(); + + // This KStream contains information such as "alice" -> 13L. + // + // Because this is a KStream ("record stream"), multiple records for the same user will be + // considered as separate click-count events, each of which will be added to the total count. + KStream userClicksStream = builder.stream(stringSerde, longSerde, USER_CLICKS_TOPIC); + + // This KTable contains information such as "alice" -> "europe". + // + // Because this is a KTable ("changelog stream"), only the latest value (here: region) for a + // record key will be considered at the time when a new user-click record (see above) is + // received for the `leftJoin` below. Any previous region values are being considered out of + // date. This behavior is quite different to the KStream for user clicks above. + // + // For example, the user "alice" will be considered to live in "europe" (although originally she + // lived in "asia") because, at the time her first user-click record is being received and + // subsequently processed in the `leftJoin`, the latest region update for "alice" is "europe" + // (which overrides her previous region value of "asia"). + KTable userRegionsTable = + builder.table(stringSerde, stringSerde, USER_REGIONS_TOPIC); + + // Compute the number of clicks per region, e.g. "europe" -> 13L. + // + // The resulting KTable is continuously being updated as new data records are arriving in the + // input KStream `userClicksStream` and input KTable `userRegionsTable`. + KTable clicksPerRegion = userClicksStream + // Join the stream against the table. + // + // Null values possible: In general, null values are possible for region (i.e. the value of + // the KTable we are joining against) so we must guard against that (here: by setting the + // fallback region "UNKNOWN"). In this specific example this is not really needed because + // we know, based on the test setup, that all users have appropriate region entries at the + // time we perform the join. + // + // Also, we need to return a tuple of (region, clicks) for each user. But because Java does + // not support tuples out-of-the-box, we must use a custom class `RegionWithClicks` to + // achieve the same effect. + .leftJoin(userRegionsTable, new ValueJoiner() { + @Override + public RegionWithClicks apply(Long clicks, String region) { + RegionWithClicks regionWithClicks = new RegionWithClicks(region == null ? "UNKNOWN" : region, clicks); + return regionWithClicks; + } + }) + // Change the stream from -> to -> + .map(new KeyValueMapper>() { + @Override + public KeyValue apply(String key, RegionWithClicks value) { + return new KeyValue<>(value.getRegion(), value.getClicks()); + } + }) + // Compute the total per region by summing the individual click counts per region. + .reduceByKey(new Reducer() { + @Override + public Long apply(Long value1, Long value2) { + return value1 + value2; + } + }, stringSerde, longSerde, "ClicksPerRegionUnwindowed"); + + // Write the (continuously updating) results to the output topic. + clicksPerRegion.to(stringSerde, longSerde, OUTPUT_TOPIC); + + KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration); + streams.start(); + + // Wait briefly for the topology to be fully up and running (otherwise it might miss some or all + // of the input data we produce below). + Thread.sleep(5000); + + // + // Step 2: Publish user-region information. + // + // To keep this code example simple and easier to understand/reason about, we publish all + // user-region records before any user-click records (cf. step 3). In practice though, + // data records would typically be arriving concurrently in both input streams/topics. + Properties userRegionsProducerConfig = new Properties(); + userRegionsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + userRegionsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); + userRegionsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); + userRegionsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + userRegionsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + IntegrationTestUtils.produceKeyValuesSynchronously(USER_REGIONS_TOPIC, userRegions, userRegionsProducerConfig); + + // + // Step 3: Publish some user click events. + // + Properties userClicksProducerConfig = new Properties(); + userClicksProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + userClicksProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); + userClicksProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); + userClicksProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + userClicksProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class); + IntegrationTestUtils.produceKeyValuesSynchronously(USER_CLICKS_TOPIC, userClicks, userClicksProducerConfig); + + // Give the stream processing application some time to do its work. + Thread.sleep(5000); + streams.close(); + + // + // Step 4: Verify the application's output data. + // + Properties consumerConfig = new Properties(); + consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "join-integration-test-standard-consumer"); + consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class); + List> actualClicksPerRegion = IntegrationTestUtils.readKeyValues(OUTPUT_TOPIC, consumerConfig); + assertThat(actualClicksPerRegion, equalTo(expectedClicksPerRegion)); + } + +} \ No newline at end of file diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java new file mode 100644 index 000000000000..47c00c10a043 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ + + +package org.apache.kafka.streams.integration; + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.kstream.KStream; +import org.apache.kafka.streams.kstream.KStreamBuilder; +import org.apache.kafka.streams.kstream.ValueMapper; + +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Properties; + +import org.apache.kafka.streams.integration.utils.EmbeddedSingleNodeKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + +/** + * End-to-end integration test based on a simple map, using an embedded Kafka cluster. + */ +public class MapFunctionIntegrationTest { + @ClassRule + public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); + private static final String DEFAULT_INPUT_TOPIC = "inputTopic"; + private static final String DEFAULT_OUTPUT_TOPIC = "outputTopic"; + + @BeforeClass + public static void startKafkaCluster() throws Exception { + CLUSTER.createTopic(DEFAULT_INPUT_TOPIC); + CLUSTER.createTopic(DEFAULT_OUTPUT_TOPIC); + } + + @Test + public void shouldUppercaseTheInput() throws Exception { + List inputValues = Arrays.asList("hello", "world"); + List expectedValues = new ArrayList<>(); + for (String input : inputValues) { + expectedValues.add(input.toUpperCase(Locale.getDefault())); + } + + // + // Step 1: Configure and start the processor topology. + // + KStreamBuilder builder = new KStreamBuilder(); + + Properties streamsConfiguration = new Properties(); + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "map-function-integration-test"); + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + streamsConfiguration.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, CLUSTER.zKConnectString()); + streamsConfiguration.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName()); + streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + + KStream input = builder.stream(DEFAULT_INPUT_TOPIC); + KStream uppercased = input.mapValues(new ValueMapper() { + @Override + public String apply(String value) { + return value.toUpperCase(Locale.getDefault()); + } + }); + uppercased.to(DEFAULT_OUTPUT_TOPIC); + + KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration); + streams.start(); + + // Wait briefly for the topology to be fully up and running (otherwise it might miss some or all + // of the input data we produce below). + Thread.sleep(5000); + + // + // Step 2: Produce some input data to the input topic. + // + Properties producerConfig = new Properties(); + producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); + producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); + producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class); + producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + IntegrationTestUtils.produceValuesSynchronously(DEFAULT_INPUT_TOPIC, inputValues, producerConfig); + + // Give the stream processing application some time to do its work. + Thread.sleep(5000); + streams.close(); + + // + // Step 3: Verify the application's output data. + // + Properties consumerConfig = new Properties(); + consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "map-function-integration-test-standard-consumer"); + consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class); + consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + List actualValues = IntegrationTestUtils.readValues(DEFAULT_OUTPUT_TOPIC, consumerConfig, inputValues.size()); + assertThat(actualValues, equalTo(expectedValues)); + } + +} \ No newline at end of file diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java new file mode 100644 index 000000000000..2627a3ac57bf --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.kafka.streams.integration; + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.kstream.KStreamBuilder; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.kafka.streams.integration.utils.EmbeddedSingleNodeKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + +/** + * End-to-end integration test that reads data from an input topic and writes the same data as-is to + * a new output topic, using an embedded Kafka cluster. + */ +public class PassThroughIntegrationTest { + @ClassRule + public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); + private static final String DEFAULT_INPUT_TOPIC = "inputTopic"; + private static final String DEFAULT_OUTPUT_TOPIC = "outputTopic"; + + @BeforeClass + public static void startKafkaCluster() throws Exception { + CLUSTER.createTopic(DEFAULT_INPUT_TOPIC); + CLUSTER.createTopic(DEFAULT_OUTPUT_TOPIC); + } + + @Test + public void shouldWriteTheInputDataAsIsToTheOutputTopic() throws Exception { + List inputValues = Arrays.asList( + "hello world", + "the world is not enough", + "the world of the stock market is coming to an end" + ); + + // + // Step 1: Configure and start the processor topology. + // + KStreamBuilder builder = new KStreamBuilder(); + + Properties streamsConfiguration = new Properties(); + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "pass-through-integration-test"); + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + streamsConfiguration.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, CLUSTER.zKConnectString()); + streamsConfiguration.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + + // Write the input data as-is to the output topic. + builder.stream(DEFAULT_INPUT_TOPIC).to(DEFAULT_OUTPUT_TOPIC); + + KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration); + streams.start(); + + // Wait briefly for the topology to be fully up and running (otherwise it might miss some or all + // of the input data we produce below). + Thread.sleep(5000); + + // + // Step 2: Produce some input data to the input topic. + // + Properties producerConfig = new Properties(); + producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); + producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); + producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + IntegrationTestUtils.produceValuesSynchronously(DEFAULT_INPUT_TOPIC, inputValues, producerConfig); + + // Give the stream processing application some time to do its work. + Thread.sleep(5000); + streams.close(); + + // + // Step 3: Verify the application's output data. + // + Properties consumerConfig = new Properties(); + consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "pass-through-integration-test-standard-consumer"); + consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + List actualValues = IntegrationTestUtils.readValues(DEFAULT_OUTPUT_TOPIC, consumerConfig, inputValues.size()); + assertThat(actualValues, equalTo(inputValues)); + } +} \ No newline at end of file diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java new file mode 100644 index 000000000000..5c32a6c4271b --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.kafka.streams.integration; + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.serialization.LongDeserializer; +import org.apache.kafka.common.serialization.Serde; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.kstream.KStream; +import org.apache.kafka.streams.kstream.KStreamBuilder; +import org.apache.kafka.streams.kstream.KeyValueMapper; +import org.apache.kafka.streams.kstream.ValueMapper; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Properties; + +import org.apache.kafka.streams.integration.utils.EmbeddedSingleNodeKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; + + +/** + * End-to-end integration test based on a simple word count example, using an embedded Kafka + * cluster. + */ +public class WordCountIntegrationTest { + @ClassRule + public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); + private static final String DEFAULT_INPUT_TOPIC = "inputTopic"; + private static final String DEFAULT_OUTPUT_TOPIC = "outputTopic"; + + @BeforeClass + public static void startKafkaCluster() throws Exception { + CLUSTER.createTopic(DEFAULT_INPUT_TOPIC); + CLUSTER.createTopic(DEFAULT_OUTPUT_TOPIC); + } + + @Test + public void shouldCountWords() throws Exception { + List inputValues = Arrays.asList("hello", "world", "world", "hello world"); + List> expectedWordCounts = Arrays.asList( + new KeyValue<>("hello", 1L), + new KeyValue<>("world", 1L), + new KeyValue<>("world", 2L), + new KeyValue<>("hello", 2L), + new KeyValue<>("world", 3L) + ); + + // + // Step 1: Configure and start the processor topology. + // + final Serde stringSerde = Serdes.String(); + final Serde longSerde = Serdes.Long(); + + Properties streamsConfiguration = new Properties(); + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-integration-test"); + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + streamsConfiguration.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, CLUSTER.zKConnectString()); + streamsConfiguration.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + // Explicitly place the state directory under /tmp so that we can remove it via + // `purgeLocalStreamsState` below. Once Streams is updated to expose the effective + // StreamsConfig configuration (so we can retrieve whatever state directory Streams came up + // with automatically) we don't need to set this anymore and can update `purgeLocalStreamsState` + // accordingly. + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/kafka-streams"); + + KStreamBuilder builder = new KStreamBuilder(); + + KStream textLines = builder.stream(DEFAULT_INPUT_TOPIC); + + KStream wordCounts = textLines + .flatMapValues(new ValueMapper>() { + @Override + public Iterable apply(String value) { + return Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+")); + } + }).map(new KeyValueMapper>() { + @Override + public KeyValue apply(String key, String value) { + return new KeyValue(value, value); + } + }).countByKey("Counts") + .toStream(); + + wordCounts.to(stringSerde, longSerde, DEFAULT_OUTPUT_TOPIC); + + // Remove any state from previous test runs + IntegrationTestUtils.purgeLocalStreamsState(streamsConfiguration); + + KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration); + streams.start(); + + // Wait briefly for the topology to be fully up and running (otherwise it might miss some or all + // of the input data we produce below). + Thread.sleep(5000); + + // + // Step 2: Produce some input data to the input topic. + // + Properties producerConfig = new Properties(); + producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); + producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); + producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + IntegrationTestUtils.produceValuesSynchronously(DEFAULT_INPUT_TOPIC, inputValues, producerConfig); + + // + // Step 3: Verify the application's output data. + // + Thread.sleep(5000); + streams.close(); + Properties consumerConfig = new Properties(); + consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "wordcount-integration-test-standard-consumer"); + consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); + consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class); + List> actualWordCounts = IntegrationTestUtils.readKeyValues(DEFAULT_OUTPUT_TOPIC, consumerConfig); + assertThat(actualWordCounts, equalTo(expectedWordCounts)); + } + +} From a0f40770ab766c956c7f6d9a3eff0d089896abee Mon Sep 17 00:00:00 2001 From: Yuto Kawamura Date: Tue, 3 May 2016 10:47:23 -0700 Subject: [PATCH 081/267] KAFKA-3642: Fix NPE from ProcessorStateManager when the changelog topic not exists Issue: https://issues.apache.org/jira/browse/KAFKA-3642 Author: Yuto Kawamura Reviewers: Guozhang Wang Closes #1289 from kawamuray/KAFKA-3642-streams-NPE --- .../kafka/clients/consumer/MockConsumer.java | 6 +---- .../internals/ProcessorStateManager.java | 6 ++++- .../internals/StreamPartitionAssignor.java | 27 ++++++++++++------- .../internals/ProcessorStateManagerTest.java | 4 +-- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java index 8dce1f1f9411..9ab4c29493da 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java @@ -268,11 +268,7 @@ public void updateEndOffsets(Map newOffsets) { @Override public List partitionsFor(String topic) { ensureNotClosed(); - List parts = this.partitions.get(topic); - if (parts == null) - return Collections.emptyList(); - else - return parts; + return this.partitions.get(topic); } @Override diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java index 0cdf44cbd0ca..1d97384a9bf5 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java @@ -186,7 +186,11 @@ public void register(StateStore store, boolean loggingEnabled, StateRestoreCallb // ignore } - for (PartitionInfo partitionInfo : restoreConsumer.partitionsFor(topic)) { + List partitionInfos = restoreConsumer.partitionsFor(topic); + if (partitionInfos == null) { + throw new StreamsException("Could not find partition info for topic: " + topic); + } + for (PartitionInfo partitionInfo : partitionInfos) { if (partitionInfo.partition() == partition) { partitionNotFound = false; break; diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java index 341e66a6862c..f2eea36c11c1 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java @@ -152,7 +152,6 @@ public Subscription subscription(Set topics) { * @param topicToTaskIds Map that contains the topic names to be created * @param compactTopic If true, the topic should be a compacted topic. This is used for * change log topics usually. - * @param outPartitionInfo If true, compute and return all partitions created * @param postPartitionPhase If true, the computation for calculating the number of partitions * is slightly different. Set to true after the initial topic-to-partition * assignment. @@ -160,7 +159,6 @@ public Subscription subscription(Set topics) { */ private Map prepareTopic(Map> topicToTaskIds, boolean compactTopic, - boolean outPartitionInfo, boolean postPartitionPhase) { Map partitionInfos = new HashMap<>(); // if ZK is specified, prepare the internal source topic before calling partition grouper @@ -192,13 +190,24 @@ private Map prepareTopic(Map> partitions = streamThread.restoreConsumer.partitionsFor(topic); } while (partitions == null || partitions.size() != numPartitions); - if (outPartitionInfo) { - for (PartitionInfo partition : partitions) - partitionInfos.put(new TopicPartition(partition.topic(), partition.partition()), partition); - } + for (PartitionInfo partition : partitions) + partitionInfos.put(new TopicPartition(partition.topic(), partition.partition()), partition); } log.info("Completed validating internal topics in partition assignor."); + } else { + List missingTopics = new ArrayList<>(); + for (String topic : topicToTaskIds.keySet()) { + List partitions = streamThread.restoreConsumer.partitionsFor(topic); + if (partitions == null) { + missingTopics.add(topic); + } + } + if (!missingTopics.isEmpty()) { + log.warn("Topic {} do not exists but couldn't created as the config '{}' isn't supplied", + missingTopics, StreamsConfig.ZOOKEEPER_CONNECT_CONFIG); + + } } return partitionInfos; @@ -284,7 +293,7 @@ public Map assign(Cluster metadata, Map internalPartitionInfos = prepareTopic(internalSourceTopicToTaskIds, false, true, false); + Map internalPartitionInfos = prepareTopic(internalSourceTopicToTaskIds, false, false); internalSourceTopicToTaskIds.clear(); Cluster metadataWithInternalTopics = metadata; @@ -380,9 +389,9 @@ public Map assign(Cluster metadata, Map Date: Wed, 4 May 2016 14:26:30 -0700 Subject: [PATCH 082/267] KAFKA-2236; Offset request reply racing with segment rolling Author: William Thurston Author: Ismael Juma Reviewers: Ismael Juma, Guozhang Wang Closes #1318 from ijuma/KAFKA-2236-offset-request-reply-segment-rolling-race (cherry picked from commit c46cc480214080844ef0ca04d96f1db61b1f2ea3) Signed-off-by: Guozhang Wang --- .../main/scala/kafka/server/KafkaApis.scala | 9 ++++---- .../unit/kafka/server/LogOffsetTest.scala | 22 ++++++++++++++++++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/kafka/server/KafkaApis.scala b/core/src/main/scala/kafka/server/KafkaApis.scala index cf7814edff61..eb6358dc8c8f 100644 --- a/core/src/main/scala/kafka/server/KafkaApis.scala +++ b/core/src/main/scala/kafka/server/KafkaApis.scala @@ -579,17 +579,18 @@ class KafkaApis(val requestChannel: RequestChannel, } } - private def fetchOffsetsBefore(log: Log, timestamp: Long, maxNumOffsets: Int): Seq[Long] = { + private[server] def fetchOffsetsBefore(log: Log, timestamp: Long, maxNumOffsets: Int): Seq[Long] = { val segsArray = log.logSegments.toArray var offsetTimeArray: Array[(Long, Long)] = null - if (segsArray.last.size > 0) + val lastSegmentHasSize = segsArray.last.size > 0 + if (lastSegmentHasSize) offsetTimeArray = new Array[(Long, Long)](segsArray.length + 1) else offsetTimeArray = new Array[(Long, Long)](segsArray.length) for (i <- 0 until segsArray.length) offsetTimeArray(i) = (segsArray(i).baseOffset, segsArray(i).lastModified) - if (segsArray.last.size > 0) + if (lastSegmentHasSize) offsetTimeArray(segsArray.length) = (log.logEndOffset, SystemTime.milliseconds) var startIndex = -1 @@ -1048,4 +1049,4 @@ class KafkaApis(val requestChannel: RequestChannel, if (!authorize(request.session, ClusterAction, Resource.ClusterResource)) throw new ClusterAuthorizationException(s"Request $request is not authorized.") } -} \ No newline at end of file +} diff --git a/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala b/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala index d5c696ee9ff1..463cd8a55e2a 100755 --- a/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala +++ b/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala @@ -18,12 +18,14 @@ package kafka.server import java.io.File +import java.util.concurrent.atomic.AtomicLong import java.util.{Properties, Random} import kafka.admin.AdminUtils import kafka.api.{FetchRequestBuilder, OffsetRequest, PartitionOffsetRequestInfo} import kafka.common.TopicAndPartition import kafka.consumer.SimpleConsumer +import kafka.log.{Log, LogSegment} import kafka.message.{ByteBufferMessageSet, Message, NoCompressionCodec} import kafka.utils.TestUtils._ import kafka.utils._ @@ -31,11 +33,12 @@ import kafka.zk.ZooKeeperTestHarness import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.protocol.Errors import org.apache.kafka.common.utils.Utils +import org.easymock.{EasyMock, IAnswer} import org.junit.Assert._ import org.junit.{After, Before, Test} class LogOffsetTest extends ZooKeeperTestHarness { - val random = new Random() + val random = new Random() var logDir: File = null var topicLogDir: File = null var server: KafkaServer = null @@ -194,6 +197,23 @@ class LogOffsetTest extends ZooKeeperTestHarness { assertEquals(Seq(0L), consumerOffsets) } + /* We test that `fetchOffsetsBefore` works correctly if `LogSegment.size` changes after each invocation (simulating + * a race condition) */ + @Test + def testFetchOffsetsBeforeWithChangingSegmentSize() { + val log = EasyMock.niceMock(classOf[Log]) + val logSegment = EasyMock.niceMock(classOf[LogSegment]) + EasyMock.expect(logSegment.size).andStubAnswer(new IAnswer[Long] { + private val value = new AtomicLong(0) + def answer: Long = value.getAndIncrement() + }) + EasyMock.replay(logSegment) + val logSegments = Seq(logSegment) + EasyMock.expect(log.logSegments).andStubReturn(logSegments) + EasyMock.replay(log) + server.apis.fetchOffsetsBefore(log, System.currentTimeMillis, 100) + } + private def createBrokerConfig(nodeId: Int): Properties = { val props = new Properties props.put("broker.id", nodeId.toString) From 1587aeed9a551c7c755666040fe26d066dba6857 Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Wed, 4 May 2016 16:08:08 -0700 Subject: [PATCH 083/267] KAFKA 3656: Remove logging outstanding messages when producer flush fails Author: Liquan Pei Reviewers: Ewen Cheslack-Postava Closes #1319 from Ishiihara/kafka-3656 --- .../org/apache/kafka/connect/runtime/WorkerSourceTask.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java index 602af4a533f7..fd551abd3f7b 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java @@ -286,9 +286,7 @@ public boolean commitOffsets() { try { long timeoutMs = timeout - time.milliseconds(); if (timeoutMs <= 0) { - log.error( - "Failed to flush {}, timed out while waiting for producer to flush outstanding " - + "messages, {} left ({})", this, outstandingMessages.size(), outstandingMessages); + log.error("Failed to flush {}, timed out while waiting for producer to flush outstanding {} messages", this, outstandingMessages.size()); finishFailedFlush(); return false; } From 21351b5755a8ab90b79c6090d97389325a25fc4a Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Thu, 5 May 2016 00:23:34 +0100 Subject: [PATCH 084/267] KAFKA-3639; Configure default serdes upon construction Author: Guozhang Wang Reviewers: Michael G. Noll , Matthias J. Sax , Ismael Juma Closes #1311 from guozhangwang/K3639 (cherry picked from commit c8c6ac3f6d2d590261acb35fabbf7418ae102d4e) Signed-off-by: Ismael Juma --- .../kafka/common/serialization/Serde.java | 24 +++- .../kafka/common/serialization/Serdes.java | 103 +++++++----------- .../apache/kafka/streams/StreamsConfig.java | 10 +- .../kafka/streams/StreamsConfigTest.java | 32 +++++- 4 files changed, 99 insertions(+), 70 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/serialization/Serde.java b/clients/src/main/java/org/apache/kafka/common/serialization/Serde.java index cc7944eb891a..42b8c1e7f637 100644 --- a/clients/src/main/java/org/apache/kafka/common/serialization/Serde.java +++ b/clients/src/main/java/org/apache/kafka/common/serialization/Serde.java @@ -13,12 +13,32 @@ package org.apache.kafka.common.serialization; +import java.io.Closeable; +import java.util.Map; + /** * The interface for wrapping a serializer and deserializer for the given data type. * - * @param + * @param Type to be serialized from and deserialized into. + * + * A class that implements this interface is expected to have a constructor with no parameter. */ -public interface Serde { +public interface Serde extends Closeable { + + /** + * Configure this class, which will configure the underlying serializer and deserializer. + * + * @param configs configs in key/value pairs + * @param isKey whether is for key or value + */ + void configure(Map configs, boolean isKey); + + /** + * Close this serde class, which will close the underlying serializer and deserializer. + * This method has to be idempotent because it might be called multiple times. + */ + @Override + void close(); Serializer serializer(); diff --git a/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java b/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java index d744522bcec5..9075a932a5db 100644 --- a/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java +++ b/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java @@ -16,93 +16,84 @@ import org.apache.kafka.common.utils.Bytes; import java.nio.ByteBuffer; +import java.util.Map; /** * Factory for creating serializers / deserializers. */ public class Serdes { - static public final class LongSerde implements Serde { - @Override - public Serializer serializer() { - return new LongSerializer(); - } + static private class WrapperSerde implements Serde { + final private Serializer serializer; + final private Deserializer deserializer; - @Override - public Deserializer deserializer() { - return new LongDeserializer(); + public WrapperSerde(Serializer serializer, Deserializer deserializer) { + this.serializer = serializer; + this.deserializer = deserializer; } - } - static public final class IntegerSerde implements Serde { @Override - public Serializer serializer() { - return new IntegerSerializer(); + public void configure(Map configs, boolean isKey) { + serializer.configure(configs, isKey); + deserializer.configure(configs, isKey); } @Override - public Deserializer deserializer() { - return new IntegerDeserializer(); + public void close() { + serializer.close(); + deserializer.close(); } - } - static public final class DoubleSerde implements Serde { @Override - public Serializer serializer() { - return new DoubleSerializer(); + public Serializer serializer() { + return serializer; } @Override - public Deserializer deserializer() { - return new DoubleDeserializer(); + public Deserializer deserializer() { + return deserializer; } } - static public final class StringSerde implements Serde { - @Override - public Serializer serializer() { - return new StringSerializer(); - } - - @Override - public Deserializer deserializer() { - return new StringDeserializer(); + static public final class LongSerde extends WrapperSerde { + public LongSerde() { + super(new LongSerializer(), new LongDeserializer()); } } - static public final class ByteBufferSerde implements Serde { - @Override - public Serializer serializer() { - return new ByteBufferSerializer(); + static public final class IntegerSerde extends WrapperSerde { + public IntegerSerde() { + super(new IntegerSerializer(), new IntegerDeserializer()); } + } - @Override - public Deserializer deserializer() { - return new ByteBufferDeserializer(); + static public final class DoubleSerde extends WrapperSerde { + public DoubleSerde() { + super(new DoubleSerializer(), new DoubleDeserializer()); } } - static public final class BytesSerde implements Serde { - @Override - public Serializer serializer() { - return new BytesSerializer(); + static public final class StringSerde extends WrapperSerde { + public StringSerde() { + super(new StringSerializer(), new StringDeserializer()); } + } - @Override - public Deserializer deserializer() { - return new BytesDeserializer(); + static public final class ByteBufferSerde extends WrapperSerde { + public ByteBufferSerde() { + super(new ByteBufferSerializer(), new ByteBufferDeserializer()); } } - static public final class ByteArraySerde implements Serde { - @Override - public Serializer serializer() { - return new ByteArraySerializer(); + static public final class BytesSerde extends WrapperSerde { + public BytesSerde() { + super(new BytesSerializer(), new BytesDeserializer()); } + } - @Override - public Deserializer deserializer() { - return new ByteArrayDeserializer(); + static public final class ByteArraySerde extends WrapperSerde { + public ByteArraySerde() { + super(new ByteArraySerializer(), new ByteArrayDeserializer()); } } @@ -154,17 +145,7 @@ static public Serde serdeFrom(final Serializer serializer, final Deser throw new IllegalArgumentException("deserializer must not be null"); } - return new Serde() { - @Override - public Serializer serializer() { - return serializer; - } - - @Override - public Deserializer deserializer() { - return deserializer; - } - }; + return new WrapperSerde<>(serializer, deserializer); } /* diff --git a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java index 99eb58f0699f..fac29141ffd9 100644 --- a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java +++ b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java @@ -300,11 +300,17 @@ private void removeStreamsSpecificConfigs(Map props) { } public Serde keySerde() { - return getConfiguredInstance(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serde.class); + Serde serde = getConfiguredInstance(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serde.class); + serde.configure(originals(), true); + + return serde; } public Serde valueSerde() { - return getConfiguredInstance(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serde.class); + Serde serde = getConfiguredInstance(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serde.class); + serde.configure(originals(), false); + + return serde; } public static void main(String[] args) { diff --git a/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java b/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java index 0dacde79f50a..81b406f60931 100644 --- a/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java @@ -18,10 +18,12 @@ package org.apache.kafka.streams; import org.apache.kafka.clients.consumer.ConsumerConfig; -import org.apache.kafka.streams.processor.internals.StreamThread; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.Serializer; import org.junit.Before; import org.junit.Test; +import java.util.HashMap; import java.util.Map; import java.util.Properties; import static org.junit.Assert.assertEquals; @@ -31,13 +33,15 @@ public class StreamsConfigTest { private Properties props = new Properties(); private StreamsConfig streamsConfig; - private StreamThread streamThreadPlaceHolder; - @Before public void setUp() { props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-config-test"); props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + props.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + props.put("key.deserializer.encoding", "UTF8"); + props.put("value.deserializer.encoding", "UTF-16"); streamsConfig = new StreamsConfig(props); } @@ -49,8 +53,7 @@ public void testGetProducerConfigs() throws Exception { @Test public void testGetConsumerConfigs() throws Exception { - Map returnedProps = - streamsConfig.getConsumerConfigs(streamThreadPlaceHolder, "example-application", "client"); + Map returnedProps = streamsConfig.getConsumerConfigs(null, "example-application", "client"); assertEquals(returnedProps.get(ConsumerConfig.CLIENT_ID_CONFIG), "client-consumer"); assertEquals(returnedProps.get(ConsumerConfig.GROUP_ID_CONFIG), "example-application"); @@ -62,4 +65,23 @@ public void testGetRestoreConsumerConfigs() throws Exception { assertEquals(returnedProps.get(ConsumerConfig.CLIENT_ID_CONFIG), "client-restore-consumer"); assertNull(returnedProps.get(ConsumerConfig.GROUP_ID_CONFIG)); } + + @Test + public void defaultSerdeShouldBeConfigured() { + Map serializerConfigs = new HashMap(); + serializerConfigs.put("key.serializer.encoding", "UTF8"); + serializerConfigs.put("value.serializer.encoding", "UTF-16"); + Serializer serializer = Serdes.String().serializer(); + + String str = "my string for testing"; + String topic = "my topic"; + + serializer.configure(serializerConfigs, true); + assertEquals("Should get the original string after serialization and deserialization with the configured encoding", + str, streamsConfig.keySerde().deserializer().deserialize(topic, serializer.serialize(topic, str))); + + serializer.configure(serializerConfigs, false); + assertEquals("Should get the original string after serialization and deserialization with the configured encoding", + str, streamsConfig.valueSerde().deserializer().deserialize(topic, serializer.serialize(topic, str))); + } } From 91130e4242f8000016a97a0e81a242ac41e5107c Mon Sep 17 00:00:00 2001 From: Rajini Sivaram Date: Wed, 4 May 2016 18:16:08 -0700 Subject: [PATCH 085/267] KAFKA-3652; Return error response for unsupported version of ApiVersionsRequest Handle unsupported version of ApiVersionsRequest during SASL auth as well as normal operation by returning an error response. Author: Rajini Sivaram Reviewers: Ismael Juma , Jun Rao Closes #1310 from rajinisivaram/KAFKA-3652 (cherry picked from commit 64451af9e08de428064dc232cd6dea0ea0b2a81d) Signed-off-by: Jun Rao --- .../kafka/common/protocol/Protocol.java | 6 +- .../SaslServerAuthenticator.java | 77 ++++++++++++------- .../authenticator/SaslAuthenticatorTest.java | 62 +++++++++++++++ .../scala/kafka/network/RequestChannel.scala | 13 +++- .../main/scala/kafka/server/KafkaApis.scala | 4 +- .../kafka/server/ApiVersionsRequestTest.scala | 8 +- .../server/SaslApiVersionsRequestTest.scala | 17 +++- 7 files changed, 147 insertions(+), 40 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java index 99cdbf9f5798..326b780e03db 100644 --- a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java +++ b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java @@ -848,6 +848,10 @@ public class Protocol { } } + public static boolean apiVersionSupported(short apiKey, short apiVersion) { + return apiKey < CURR_VERSION.length && apiVersion >= MIN_VERSIONS[apiKey] && apiVersion <= CURR_VERSION[apiKey]; + } + private static String indentString(int size) { StringBuilder b = new StringBuilder(size); for (int i = 0; i < size; i++) @@ -1008,4 +1012,4 @@ public static void main(String[] args) { System.out.println(toHtml()); } -} \ No newline at end of file +} diff --git a/clients/src/main/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticator.java b/clients/src/main/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticator.java index a9c19a58f887..e1074a1baac6 100644 --- a/clients/src/main/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticator.java +++ b/clients/src/main/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticator.java @@ -51,6 +51,7 @@ import org.apache.kafka.common.errors.AuthenticationException; import org.apache.kafka.common.errors.IllegalSaslStateException; import org.apache.kafka.common.errors.UnsupportedSaslMechanismException; +import org.apache.kafka.common.errors.UnsupportedVersionException; import org.apache.kafka.common.network.Authenticator; import org.apache.kafka.common.network.Mode; import org.apache.kafka.common.network.NetworkSend; @@ -58,6 +59,7 @@ import org.apache.kafka.common.network.TransportLayer; import org.apache.kafka.common.protocol.ApiKeys; import org.apache.kafka.common.protocol.Errors; +import org.apache.kafka.common.protocol.Protocol; import org.apache.kafka.common.protocol.types.SchemaException; import org.apache.kafka.common.requests.AbstractRequest; import org.apache.kafka.common.requests.AbstractRequestResponse; @@ -75,7 +77,7 @@ public class SaslServerAuthenticator implements Authenticator { private static final Logger LOG = LoggerFactory.getLogger(SaslServerAuthenticator.class); public enum SaslState { - HANDSHAKE_REQUEST, AUTHENTICATE, COMPLETE, FAILED + GSSAPI_OR_HANDSHAKE_REQUEST, HANDSHAKE_REQUEST, AUTHENTICATE, COMPLETE, FAILED } private final String node; @@ -85,7 +87,7 @@ public enum SaslState { private final String host; // Current SASL state - private SaslState saslState = SaslState.HANDSHAKE_REQUEST; + private SaslState saslState = SaslState.GSSAPI_OR_HANDSHAKE_REQUEST; // Next SASL state to be set when outgoing writes associated with the current SASL state complete private SaslState pendingSaslState = null; private SaslServer saslServer; @@ -215,6 +217,9 @@ public void authenticate() throws IOException { try { switch (saslState) { case HANDSHAKE_REQUEST: + handleKafkaRequest(clientToken); + break; + case GSSAPI_OR_HANDSHAKE_REQUEST: if (handleKafkaRequest(clientToken)) break; // For default GSSAPI, fall through to authenticate using the client token as the first GSSAPI packet. @@ -288,39 +293,53 @@ private boolean handleKafkaRequest(byte[] requestBytes) throws IOException, Auth try { ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes); RequestHeader requestHeader = RequestHeader.parse(requestBuffer); - AbstractRequest request = AbstractRequest.getRequest(requestHeader.apiKey(), requestHeader.apiVersion(), requestBuffer); + ApiKeys apiKey = ApiKeys.forId(requestHeader.apiKey()); + // A valid Kafka request header was received. SASL authentication tokens are now expected only + // following a SaslHandshakeRequest since this is not a GSSAPI client token from a Kafka 0.9.0.x client. + setSaslState(SaslState.HANDSHAKE_REQUEST); isKafkaRequest = true; - ApiKeys apiKey = ApiKeys.forId(requestHeader.apiKey()); - LOG.debug("Handle Kafka request {}", apiKey); - switch (apiKey) { - case API_VERSIONS: - handleApiVersionsRequest(requestHeader, (ApiVersionsRequest) request); - break; - case SASL_HANDSHAKE: - clientMechanism = handleHandshakeRequest(requestHeader, (SaslHandshakeRequest) request); - break; - default: - throw new IllegalSaslStateException("Unexpected Kafka request of type " + apiKey + " during SASL handshake."); + if (!Protocol.apiVersionSupported(requestHeader.apiKey(), requestHeader.apiVersion())) { + if (apiKey == ApiKeys.API_VERSIONS) + sendKafkaResponse(requestHeader, ApiVersionsResponse.fromError(Errors.UNSUPPORTED_VERSION)); + else + throw new UnsupportedVersionException("Version " + requestHeader.apiVersion() + " is not supported for apiKey " + apiKey); + } else { + AbstractRequest request = AbstractRequest.getRequest(requestHeader.apiKey(), requestHeader.apiVersion(), requestBuffer); + + LOG.debug("Handle Kafka request {}", apiKey); + switch (apiKey) { + case API_VERSIONS: + handleApiVersionsRequest(requestHeader, (ApiVersionsRequest) request); + break; + case SASL_HANDSHAKE: + clientMechanism = handleHandshakeRequest(requestHeader, (SaslHandshakeRequest) request); + break; + default: + throw new IllegalSaslStateException("Unexpected Kafka request of type " + apiKey + " during SASL handshake."); + } } } catch (SchemaException | IllegalArgumentException e) { - // SchemaException is thrown if the request is not in Kafka format. IIlegalArgumentException is thrown - // if the API key is invalid. For compatibility with 0.9.0.x where the first packet is a GSSAPI token - // starting with 0x60, revert to GSSAPI for both these exceptions. - if (LOG.isDebugEnabled()) { - StringBuilder tokenBuilder = new StringBuilder(); - for (byte b : requestBytes) { - tokenBuilder.append(String.format("%02x", b)); - if (tokenBuilder.length() >= 20) - break; + if (saslState == SaslState.GSSAPI_OR_HANDSHAKE_REQUEST) { + // SchemaException is thrown if the request is not in Kafka format. IIlegalArgumentException is thrown + // if the API key is invalid. For compatibility with 0.9.0.x where the first packet is a GSSAPI token + // starting with 0x60, revert to GSSAPI for both these exceptions. + if (LOG.isDebugEnabled()) { + StringBuilder tokenBuilder = new StringBuilder(); + for (byte b : requestBytes) { + tokenBuilder.append(String.format("%02x", b)); + if (tokenBuilder.length() >= 20) + break; + } + LOG.debug("Received client packet of length {} starting with bytes 0x{}, process as GSSAPI packet", requestBytes.length, tokenBuilder); } - LOG.debug("Received client packet of length {} starting with bytes 0x{}, process as GSSAPI packet", requestBytes.length, tokenBuilder); - } - if (enabledMechanisms.contains(SaslConfigs.GSSAPI_MECHANISM)) { - LOG.debug("First client packet is not a SASL mechanism request, using default mechanism GSSAPI"); - clientMechanism = SaslConfigs.GSSAPI_MECHANISM; + if (enabledMechanisms.contains(SaslConfigs.GSSAPI_MECHANISM)) { + LOG.debug("First client packet is not a SASL mechanism request, using default mechanism GSSAPI"); + clientMechanism = SaslConfigs.GSSAPI_MECHANISM; + } else + throw new UnsupportedSaslMechanismException("Exception handling first SASL packet from client, GSSAPI is not supported by server", e); } else - throw new UnsupportedSaslMechanismException("Exception handling first SASL packet from client, GSSAPI is not supported by server", e); + throw e; } if (clientMechanism != null) { createSaslServer(clientMechanism); diff --git a/clients/src/test/java/org/apache/kafka/common/security/authenticator/SaslAuthenticatorTest.java b/clients/src/test/java/org/apache/kafka/common/security/authenticator/SaslAuthenticatorTest.java index 368b5a78ce03..97fe3d8ef731 100644 --- a/clients/src/test/java/org/apache/kafka/common/security/authenticator/SaslAuthenticatorTest.java +++ b/clients/src/test/java/org/apache/kafka/common/security/authenticator/SaslAuthenticatorTest.java @@ -47,6 +47,7 @@ import org.apache.kafka.common.requests.MetadataRequest; import org.apache.kafka.common.requests.RequestHeader; import org.apache.kafka.common.requests.RequestSend; +import org.apache.kafka.common.requests.ResponseHeader; import org.apache.kafka.common.requests.SaslHandshakeRequest; import org.apache.kafka.common.requests.SaslHandshakeResponse; import org.apache.kafka.common.security.JaasUtils; @@ -243,6 +244,62 @@ public void testUnauthenticatedApiVersionsRequestOverSsl() throws Exception { testUnauthenticatedApiVersionsRequest(SecurityProtocol.SASL_SSL); } + /** + * Tests that unsupported version of ApiVersionsRequest before SASL handshake request + * returns error response and does not result in authentication failure. This test + * is similar to {@link #testUnauthenticatedApiVersionsRequest(SecurityProtocol)} + * where a non-SASL client is used to send requests that are processed by + * {@link SaslServerAuthenticator} of the server prior to client authentication. + */ + @Test + public void testApiVersionsRequestWithUnsupportedVersion() throws Exception { + SecurityProtocol securityProtocol = SecurityProtocol.SASL_PLAINTEXT; + configureMechanisms("PLAIN", Arrays.asList("PLAIN")); + server = NetworkTestUtils.createEchoServer(securityProtocol, saslServerConfigs); + + // Send ApiVersionsRequest with unsupported version and validate error response. + String node = "1"; + createClientConnection(SecurityProtocol.PLAINTEXT, node); + RequestHeader header = new RequestHeader(ApiKeys.API_VERSIONS.id, Short.MAX_VALUE, "someclient", 1); + selector.send(new NetworkSend(node, RequestSend.serialize(header, new ApiVersionsRequest().toStruct()))); + ByteBuffer responseBuffer = waitForResponse(); + ResponseHeader.parse(responseBuffer); + ApiVersionsResponse response = ApiVersionsResponse.parse(responseBuffer); + assertEquals(Errors.UNSUPPORTED_VERSION.code(), response.errorCode()); + + // Send ApiVersionsRequest with a supported version. This should succeed. + sendVersionRequestReceiveResponse(node); + + // Test that client can authenticate successfully + sendHandshakeRequestReceiveResponse(node); + authenticateUsingSaslPlainAndCheckConnection(node); + } + + /** + * Tests that unsupported version of SASL handshake request returns error + * response and fails authentication. This test is similar to + * {@link #testUnauthenticatedApiVersionsRequest(SecurityProtocol)} + * where a non-SASL client is used to send requests that are processed by + * {@link SaslServerAuthenticator} of the server prior to client authentication. + */ + @Test + public void testSaslHandshakeRequestWithUnsupportedVersion() throws Exception { + SecurityProtocol securityProtocol = SecurityProtocol.SASL_PLAINTEXT; + configureMechanisms("PLAIN", Arrays.asList("PLAIN")); + server = NetworkTestUtils.createEchoServer(securityProtocol, saslServerConfigs); + + // Send ApiVersionsRequest and validate error response. + String node1 = "invalid1"; + createClientConnection(SecurityProtocol.PLAINTEXT, node1); + RequestHeader header = new RequestHeader(ApiKeys.SASL_HANDSHAKE.id, Short.MAX_VALUE, "someclient", 2); + selector.send(new NetworkSend(node1, RequestSend.serialize(header, new SaslHandshakeRequest("PLAIN").toStruct()))); + NetworkTestUtils.waitForChannelClose(selector, node1); + selector.close(); + + // Test good connection still works + createAndCheckClientConnection(securityProtocol, "good1"); + } + /** * Tests that any invalid data during Kafka SASL handshake request flow * or the actual SASL authentication flow result in authentication failure @@ -485,6 +542,11 @@ private void testUnauthenticatedApiVersionsRequest(SecurityProtocol securityProt SaslHandshakeResponse handshakeResponse = sendHandshakeRequestReceiveResponse(node); assertEquals(Collections.singletonList("PLAIN"), handshakeResponse.enabledMechanisms()); + // Complete manual authentication and check send/receive succeed + authenticateUsingSaslPlainAndCheckConnection(node); + } + + private void authenticateUsingSaslPlainAndCheckConnection(String node) throws Exception { // Authenticate using PLAIN username/password String authString = "\u0000" + TestJaasConfig.USERNAME + "\u0000" + TestJaasConfig.PASSWORD; selector.send(new NetworkSend(node, ByteBuffer.wrap(authString.getBytes("UTF-8")))); diff --git a/core/src/main/scala/kafka/network/RequestChannel.scala b/core/src/main/scala/kafka/network/RequestChannel.scala index 17c5b9b3fbed..e2000dbfd04c 100644 --- a/core/src/main/scala/kafka/network/RequestChannel.scala +++ b/core/src/main/scala/kafka/network/RequestChannel.scala @@ -28,8 +28,8 @@ import kafka.metrics.KafkaMetricsGroup import kafka.utils.{Logging, SystemTime} import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.network.Send -import org.apache.kafka.common.protocol.{ApiKeys, SecurityProtocol} -import org.apache.kafka.common.requests.{RequestSend, ProduceRequest, AbstractRequest, RequestHeader} +import org.apache.kafka.common.protocol.{ApiKeys, SecurityProtocol, Protocol} +import org.apache.kafka.common.requests.{RequestSend, ProduceRequest, AbstractRequest, RequestHeader, ApiVersionsRequest} import org.apache.kafka.common.security.auth.KafkaPrincipal import org.apache.log4j.Logger @@ -84,8 +84,13 @@ object RequestChannel extends Logging { null val body: AbstractRequest = if (requestObj == null) - try AbstractRequest.getRequest(header.apiKey, header.apiVersion, buffer) - catch { + try { + // For unsupported version of ApiVersionsRequest, create a dummy request to enable an error response to be returned later + if (header.apiKey == ApiKeys.API_VERSIONS.id && !Protocol.apiVersionSupported(header.apiKey, header.apiVersion)) + new ApiVersionsRequest + else + AbstractRequest.getRequest(header.apiKey, header.apiVersion, buffer) + } catch { case ex: Throwable => throw new InvalidRequestException(s"Error getting request for apiKey: ${header.apiKey} and apiVersion: ${header.apiVersion}", ex) } diff --git a/core/src/main/scala/kafka/server/KafkaApis.scala b/core/src/main/scala/kafka/server/KafkaApis.scala index eb6358dc8c8f..086bd4b893db 100644 --- a/core/src/main/scala/kafka/server/KafkaApis.scala +++ b/core/src/main/scala/kafka/server/KafkaApis.scala @@ -1029,9 +1029,7 @@ class KafkaApis(val requestChannel: RequestChannel, // with client authentication which is performed at an earlier stage of the connection where the // ApiVersionRequest is not available. val responseHeader = new ResponseHeader(request.header.correlationId) - val isApiVersionsRequestVersionSupported = request.header.apiVersion <= Protocol.CURR_VERSION(ApiKeys.API_VERSIONS.id) && - request.header.apiVersion >= Protocol.MIN_VERSIONS(ApiKeys.API_VERSIONS.id) - val responseBody = if (isApiVersionsRequestVersionSupported) + val responseBody = if (Protocol.apiVersionSupported(ApiKeys.API_VERSIONS.id, request.header.apiVersion)) ApiVersionsResponse.apiVersionsResponse else ApiVersionsResponse.fromError(Errors.UNSUPPORTED_VERSION) diff --git a/core/src/test/scala/unit/kafka/server/ApiVersionsRequestTest.scala b/core/src/test/scala/unit/kafka/server/ApiVersionsRequestTest.scala index 8bf4d73643e9..f2dd60f24690 100644 --- a/core/src/test/scala/unit/kafka/server/ApiVersionsRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/ApiVersionsRequestTest.scala @@ -17,7 +17,7 @@ package kafka.server -import org.apache.kafka.common.protocol.ApiKeys +import org.apache.kafka.common.protocol.{ApiKeys, Errors} import org.apache.kafka.common.requests.ApiVersionsResponse.ApiVersion import org.apache.kafka.common.requests.{ApiVersionsRequest, ApiVersionsResponse} import org.junit.Assert._ @@ -48,6 +48,12 @@ class ApiVersionsRequestTest extends BaseRequestTest { ApiVersionsRequestTest.validateApiVersionsResponse(apiVersionsResponse) } + @Test + def testApiVersionsRequestWithUnsupportedVersion() { + val apiVersionsResponse = sendApiVersionsRequest(new ApiVersionsRequest, Short.MaxValue) + assertEquals(Errors.UNSUPPORTED_VERSION.code(), apiVersionsResponse.errorCode) + } + private def sendApiVersionsRequest(request: ApiVersionsRequest, version: Short): ApiVersionsResponse = { val response = send(request, ApiKeys.API_VERSIONS, version) ApiVersionsResponse.parse(response) diff --git a/core/src/test/scala/unit/kafka/server/SaslApiVersionsRequestTest.scala b/core/src/test/scala/unit/kafka/server/SaslApiVersionsRequestTest.scala index 632665a02f59..85570087a3d4 100644 --- a/core/src/test/scala/unit/kafka/server/SaslApiVersionsRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/SaslApiVersionsRequestTest.scala @@ -19,11 +19,10 @@ package kafka.server import java.io.IOException import java.net.Socket import java.util.Collections -import org.apache.kafka.common.protocol.{ApiKeys, SecurityProtocol} +import org.apache.kafka.common.protocol.{ApiKeys, Errors, SecurityProtocol} import org.apache.kafka.common.requests.{ApiVersionsRequest, ApiVersionsResponse} import org.apache.kafka.common.requests.SaslHandshakeRequest import org.apache.kafka.common.requests.SaslHandshakeResponse -import org.apache.kafka.common.protocol.Errors import org.junit.Test import org.junit.Assert._ import kafka.api.SaslTestHarness @@ -64,6 +63,20 @@ class SaslApiVersionsRequestTest extends BaseRequestTest with SaslTestHarness { } } + @Test + def testApiVersionsRequestWithUnsupportedVersion() { + val plaintextSocket = connect(protocol = securityProtocol) + try { + val apiVersionsResponse = sendApiVersionsRequest(plaintextSocket, new ApiVersionsRequest, Short.MaxValue) + assertEquals(Errors.UNSUPPORTED_VERSION.code(), apiVersionsResponse.errorCode) + val apiVersionsResponse2 = sendApiVersionsRequest(plaintextSocket, new ApiVersionsRequest, 0) + ApiVersionsRequestTest.validateApiVersionsResponse(apiVersionsResponse2) + sendSaslHandshakeRequestValidateResponse(plaintextSocket) + } finally { + plaintextSocket.close() + } + } + private def sendApiVersionsRequest(socket: Socket, request: ApiVersionsRequest, version: Short): ApiVersionsResponse = { val response = send(socket, request, ApiKeys.API_VERSIONS, version) ApiVersionsResponse.parse(response) From e9d10108b47018578a53d6863084c41baa3bb579 Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Wed, 4 May 2016 19:20:54 -0700 Subject: [PATCH 086/267] KAFKA-3527: Consumer commitAsync should not expose internal exceptions Author: Liquan Pei Reviewers: Grant Henke , Jason Gustafson , Ewen Cheslack-Postava Closes #1300 from Ishiihara/kafka-3527 (cherry picked from commit ad316509787787afeed6e2a24a62fd22cadd09c7) Signed-off-by: Ewen Cheslack-Postava --- .../consumer/CommitFailedException.java | 1 - .../consumer/OffsetCommitCallback.java | 13 +++++++- .../RetriableCommitFailedException.java | 32 +++++++++++++++++++ .../internals/ConsumerCoordinator.java | 9 +++++- .../internals/ConsumerCoordinatorTest.java | 12 ++++--- 5 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 clients/src/main/java/org/apache/kafka/clients/consumer/RetriableCommitFailedException.java diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/CommitFailedException.java b/clients/src/main/java/org/apache/kafka/clients/consumer/CommitFailedException.java index 39468bd90c9d..26ef48e0b410 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/CommitFailedException.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/CommitFailedException.java @@ -31,5 +31,4 @@ public class CommitFailedException extends KafkaException { public CommitFailedException(String message) { super(message); } - } diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetCommitCallback.java b/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetCommitCallback.java index 97a06ad48015..dfa839101599 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetCommitCallback.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetCommitCallback.java @@ -14,6 +14,7 @@ import org.apache.kafka.common.TopicPartition; +import java.util.Collection; import java.util.Map; /** @@ -28,6 +29,16 @@ public interface OffsetCommitCallback { * * @param offsets A map of the offsets and associated metadata that this callback applies to * @param exception The exception thrown during processing of the request, or null if the commit completed successfully + * + * @throws org.apache.kafka.clients.consumer.CommitFailedException if the commit failed and cannot be retried. + * This can only occur if you are using automatic group management with {@link KafkaConsumer#subscribe(Collection)}, + * or if there is an active group with the same groupId which is using group management. + * @throws org.apache.kafka.common.errors.WakeupException if {@link KafkaConsumer#wakeup()} is called before or while this + * function is called + * @throws org.apache.kafka.common.errors.AuthorizationException if not authorized to the topic or to the + * configured groupId + * @throws org.apache.kafka.common.KafkaException for any other unrecoverable errors (e.g. if offset metadata + * is too large or if the committed offset is invalid). */ void onComplete(Map offsets, Exception exception); -} \ No newline at end of file +} diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/RetriableCommitFailedException.java b/clients/src/main/java/org/apache/kafka/clients/consumer/RetriableCommitFailedException.java new file mode 100644 index 000000000000..459a8acbb553 --- /dev/null +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/RetriableCommitFailedException.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.clients.consumer; + +import org.apache.kafka.common.errors.RetriableException; + +public class RetriableCommitFailedException extends RetriableException { + + private static final long serialVersionUID = 1L; + + public RetriableCommitFailedException(String message) { + super(message); + } + + public RetriableCommitFailedException(String message, Throwable t) { + super(message, t); + } +} diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index 7486969380dc..d44d8ebc5d0b 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -23,7 +23,9 @@ import org.apache.kafka.common.Cluster; import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.clients.consumer.RetriableCommitFailedException; import org.apache.kafka.common.errors.GroupAuthorizationException; +import org.apache.kafka.common.errors.RetriableException; import org.apache.kafka.common.errors.TopicAuthorizationException; import org.apache.kafka.common.errors.WakeupException; import org.apache.kafka.common.internals.TopicConstants; @@ -354,6 +356,7 @@ public void close() { } } + public void commitOffsetsAsync(final Map offsets, OffsetCommitCallback callback) { this.subscriptions.needRefreshCommits(); RequestFuture future = sendOffsetCommitRequest(offsets); @@ -368,7 +371,11 @@ public void onSuccess(Void value) { @Override public void onFailure(RuntimeException e) { - cb.onComplete(offsets, e); + if (e instanceof RetriableException) { + cb.onComplete(offsets, new RetriableCommitFailedException("Commit offsets failed with retriable exception. You should retry committing offsets.", e)); + } else { + cb.onComplete(offsets, e); + } } }); diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java index 5a174db16241..bb31acff539e 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java @@ -32,6 +32,7 @@ import org.apache.kafka.common.Node; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.errors.ApiException; +import org.apache.kafka.clients.consumer.RetriableCommitFailedException; import org.apache.kafka.common.errors.DisconnectException; import org.apache.kafka.common.errors.GroupAuthorizationException; import org.apache.kafka.common.errors.OffsetMetadataTooLarge; @@ -85,7 +86,7 @@ public class ConsumerCoordinatorTest { private boolean autoCommitEnabled = false; private long autoCommitIntervalMs = 2000; private MockPartitionAssignor partitionAssignor = new MockPartitionAssignor(); - private List assignors = Arrays.asList(partitionAssignor); + private List assignors = Collections.singletonList(partitionAssignor); private MockTime time; private MockClient client; private Cluster cluster = TestUtils.singletonCluster(topicName, 1); @@ -98,6 +99,7 @@ public class ConsumerCoordinatorTest { private MockCommitCallback defaultOffsetCommitCallback; private ConsumerCoordinator coordinator; + @Before public void setup() { this.time = new MockTime(); @@ -898,7 +900,7 @@ public void testCommitOffsetAsyncFailedWithDefaultCallback() { client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.GROUP_COORDINATOR_NOT_AVAILABLE.code()))); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), null); assertEquals(invokedBeforeTest + 1, defaultOffsetCommitCallback.invoked); - assertEquals(Errors.GROUP_COORDINATOR_NOT_AVAILABLE.exception(), defaultOffsetCommitCallback.exception); + assertTrue(defaultOffsetCommitCallback.exception instanceof RetriableCommitFailedException); } @Test @@ -913,7 +915,7 @@ public void testCommitOffsetAsyncCoordinatorNotAvailable() { assertTrue(coordinator.coordinatorUnknown()); assertEquals(1, cb.invoked); - assertEquals(Errors.GROUP_COORDINATOR_NOT_AVAILABLE.exception(), cb.exception); + assertTrue(cb.exception instanceof RetriableCommitFailedException); } @Test @@ -928,7 +930,7 @@ public void testCommitOffsetAsyncNotCoordinator() { assertTrue(coordinator.coordinatorUnknown()); assertEquals(1, cb.invoked); - assertEquals(Errors.NOT_COORDINATOR_FOR_GROUP.exception(), cb.exception); + assertTrue(cb.exception instanceof RetriableCommitFailedException); } @Test @@ -943,7 +945,7 @@ public void testCommitOffsetAsyncDisconnected() { assertTrue(coordinator.coordinatorUnknown()); assertEquals(1, cb.invoked); - assertTrue(cb.exception instanceof DisconnectException); + assertTrue(cb.exception instanceof RetriableCommitFailedException); } @Test From 1fb0d796bb93afe21a035764dc5e70292a76e061 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Thu, 5 May 2016 12:03:28 -0700 Subject: [PATCH 087/267] KAFKA-3659: Handle coordinator disconnects more gracefully in client Author: Jason Gustafson Reviewers: Grant Henke , Ewen Cheslack-Postava Closes #1322 from hachikuji/KAFKA-3659 (cherry picked from commit 32bf83e5a792c5ee9eb88660da71b73aad5bbc02) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/clients/consumer/KafkaConsumer.java | 2 +- .../internals/AbstractCoordinator.java | 12 +- .../internals/ConsumerCoordinator.java | 4 +- .../org/apache/kafka/clients/MockClient.java | 24 ++- .../internals/AbstractCoordinatorTest.java | 137 ++++++++++++++++++ .../internals/ConsumerCoordinatorTest.java | 94 ++++++------ .../distributed/WorkerGroupMember.java | 4 +- .../distributed/WorkerCoordinatorTest.java | 8 +- 8 files changed, 225 insertions(+), 60 deletions(-) create mode 100644 clients/src/test/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinatorTest.java diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java index 7290a3844564..2373a13d1d65 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java @@ -938,7 +938,7 @@ public ConsumerRecords poll(long timeout) { */ private Map>> pollOnce(long timeout) { // TODO: Sub-requests should take into account the poll timeout (KAFKA-1894) - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // ensure we have partitions assigned if we expect to if (subscriptions.partitionsAutoAssigned()) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java index 15185d7de03a..6bb4406cdb36 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java @@ -171,9 +171,9 @@ protected abstract void onJoinComplete(int generation, ByteBuffer memberAssignment); /** - * Block until the coordinator for this group is known. + * Block until the coordinator for this group is known and is ready to receive requests. */ - public void ensureCoordinatorKnown() { + public void ensureCoordinatorReady() { while (coordinatorUnknown()) { RequestFuture future = sendGroupCoordinatorRequest(); client.poll(future); @@ -183,7 +183,13 @@ public void ensureCoordinatorKnown() { client.awaitMetadataUpdate(); else throw future.exception(); + } else if (coordinator != null && client.connectionFailed(coordinator)) { + // we found the coordinator, but the connection has failed, so mark + // it dead and backoff before retrying discovery + coordinatorDead(); + time.sleep(retryBackoffMs); } + } } @@ -208,7 +214,7 @@ public void ensureActiveGroup() { } while (needRejoin()) { - ensureCoordinatorKnown(); + ensureCoordinatorReady(); // ensure that there are no pending requests to the coordinator. This is important // in particular to avoid resending a pending JoinGroup request. diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index d44d8ebc5d0b..c1f373fb4478 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -321,7 +321,7 @@ public void refreshCommittedOffsetsIfNeeded() { */ public Map fetchCommittedOffsets(Set partitions) { while (true) { - ensureCoordinatorKnown(); + ensureCoordinatorReady(); // contact coordinator to fetch committed offsets RequestFuture> future = sendOffsetFetchRequest(partitions); @@ -397,7 +397,7 @@ public void commitOffsetsSync(Map offsets) { return; while (true) { - ensureCoordinatorKnown(); + ensureCoordinatorReady(); RequestFuture future = sendOffsetCommitRequest(offsets); client.poll(future); diff --git a/clients/src/test/java/org/apache/kafka/clients/MockClient.java b/clients/src/test/java/org/apache/kafka/clients/MockClient.java index 1c3efd4be2f4..527d2834f050 100644 --- a/clients/src/test/java/org/apache/kafka/clients/MockClient.java +++ b/clients/src/test/java/org/apache/kafka/clients/MockClient.java @@ -18,9 +18,11 @@ import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Queue; import java.util.Set; @@ -58,6 +60,7 @@ public FutureResponse(Struct responseBody, boolean disconnected, RequestMatcher private int correlation = 0; private Node node = null; private final Set ready = new HashSet<>(); + private final Map blackedOut = new HashMap<>(); private final Queue requests = new ArrayDeque<>(); private final Queue responses = new ArrayDeque<>(); private final Queue futureResponses = new ArrayDeque<>(); @@ -73,6 +76,8 @@ public boolean isReady(Node node, long now) { @Override public boolean ready(Node node, long now) { + if (isBlackedOut(node)) + return false; ready.add(node.idString()); return true; } @@ -82,9 +87,26 @@ public long connectionDelay(Node node, long now) { return 0; } + public void blackout(Node node, long duration) { + blackedOut.put(node, time.milliseconds() + duration); + } + + private boolean isBlackedOut(Node node) { + if (blackedOut.containsKey(node)) { + long expiration = blackedOut.get(node); + if (time.milliseconds() > expiration) { + blackedOut.remove(node); + return false; + } else { + return true; + } + } + return false; + } + @Override public boolean connectionFailed(Node node) { - return false; + return isBlackedOut(node); } public void disconnect(String node) { diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinatorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinatorTest.java new file mode 100644 index 000000000000..7a05eb1fb807 --- /dev/null +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinatorTest.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.kafka.clients.consumer.internals; + +import org.apache.kafka.clients.Metadata; +import org.apache.kafka.clients.MockClient; +import org.apache.kafka.common.Cluster; +import org.apache.kafka.common.Node; +import org.apache.kafka.common.metrics.Metrics; +import org.apache.kafka.common.protocol.Errors; +import org.apache.kafka.common.protocol.types.Struct; +import org.apache.kafka.common.requests.GroupCoordinatorResponse; +import org.apache.kafka.common.requests.JoinGroupRequest; +import org.apache.kafka.common.utils.MockTime; +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.test.TestUtils; +import org.junit.Before; +import org.junit.Test; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertTrue; + +public class AbstractCoordinatorTest { + + private static final ByteBuffer EMPTY_DATA = ByteBuffer.wrap(new byte[0]); + private static final int SESSION_TIMEOUT_MS = 30000; + private static final int HEARTBEAT_INTERVAL_MS = 3000; + private static final long RETRY_BACKOFF_MS = 100; + private static final long REQUEST_TIMEOUT_MS = 40000; + private static final String GROUP_ID = "dummy-group"; + private static final String METRIC_GROUP_PREFIX = "consumer"; + + private MockClient mockClient; + private MockTime mockTime; + private Node node; + private Node coordinatorNode; + private ConsumerNetworkClient consumerClient; + private DummyCoordinator coordinator; + + @Before + public void setupCoordinator() { + this.mockTime = new MockTime(); + this.mockClient = new MockClient(mockTime); + + Metadata metadata = new Metadata(); + this.consumerClient = new ConsumerNetworkClient(mockClient, metadata, mockTime, + RETRY_BACKOFF_MS, REQUEST_TIMEOUT_MS); + Metrics metrics = new Metrics(); + + Cluster cluster = TestUtils.singletonCluster("topic", 1); + metadata.update(cluster, mockTime.milliseconds()); + this.node = cluster.nodes().get(0); + mockClient.setNode(node); + + this.coordinatorNode = new Node(Integer.MAX_VALUE - node.id(), node.host(), node.port()); + this.coordinator = new DummyCoordinator(consumerClient, metrics, mockTime); + } + + @Test + public void testCoordinatorDiscoveryBackoff() { + mockClient.prepareResponse(groupCoordinatorResponse(node, Errors.NONE.code())); + mockClient.prepareResponse(groupCoordinatorResponse(node, Errors.NONE.code())); + + // blackout the coordinator for 50 milliseconds to simulate a disconnect. + // after backing off, we should be able to connect. + mockClient.blackout(coordinatorNode, 50L); + + long initialTime = mockTime.milliseconds(); + coordinator.ensureCoordinatorReady(); + long endTime = mockTime.milliseconds(); + + assertTrue(endTime - initialTime >= RETRY_BACKOFF_MS); + } + + private Struct groupCoordinatorResponse(Node node, short error) { + GroupCoordinatorResponse response = new GroupCoordinatorResponse(error, node); + return response.toStruct(); + } + + public class DummyCoordinator extends AbstractCoordinator { + + public DummyCoordinator(ConsumerNetworkClient client, + Metrics metrics, + Time time) { + super(client, GROUP_ID, SESSION_TIMEOUT_MS, HEARTBEAT_INTERVAL_MS, metrics, + METRIC_GROUP_PREFIX, time, RETRY_BACKOFF_MS); + } + + @Override + protected String protocolType() { + return "dummy"; + } + + @Override + protected List metadata() { + return Collections.singletonList(new JoinGroupRequest.ProtocolMetadata("dummy-subprotocol", EMPTY_DATA)); + } + + @Override + protected Map performAssignment(String leaderId, String protocol, Map allMemberMetadata) { + Map assignment = new HashMap<>(); + for (Map.Entry metadata : allMemberMetadata.entrySet()) + assignment.put(metadata.getKey(), EMPTY_DATA); + return assignment; + } + + @Override + protected void onJoinPrepare(int generation, String memberId) { + + } + + @Override + protected void onJoinComplete(int generation, String memberId, String protocol, ByteBuffer memberAssignment) { + + } + } + +} diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java index bb31acff539e..82a854afcaf2 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java @@ -125,7 +125,7 @@ public void teardown() { @Test public void testNormalHeartbeat() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // normal heartbeat time.sleep(sessionTimeoutMs); @@ -143,7 +143,7 @@ public void testNormalHeartbeat() { @Test(expected = GroupAuthorizationException.class) public void testGroupDescribeUnauthorized() { client.prepareResponse(consumerMetadataResponse(node, Errors.GROUP_AUTHORIZATION_FAILED.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); } @Test(expected = GroupAuthorizationException.class) @@ -151,7 +151,7 @@ public void testGroupReadUnauthorized() { subscriptions.subscribe(Arrays.asList(topicName), rebalanceListener); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(joinGroupLeaderResponse(0, "memberId", Collections.>emptyMap(), Errors.GROUP_AUTHORIZATION_FAILED.code())); @@ -161,7 +161,7 @@ public void testGroupReadUnauthorized() { @Test public void testCoordinatorNotAvailable() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // GROUP_COORDINATOR_NOT_AVAILABLE will mark coordinator as unknown time.sleep(sessionTimeoutMs); @@ -182,7 +182,7 @@ public void testCoordinatorNotAvailable() { @Test public void testNotCoordinator() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // not_coordinator will mark coordinator as unknown time.sleep(sessionTimeoutMs); @@ -203,7 +203,7 @@ public void testNotCoordinator() { @Test public void testIllegalGeneration() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // illegal_generation will cause re-partition subscriptions.subscribe(Arrays.asList(topicName), rebalanceListener); @@ -227,7 +227,7 @@ public void testIllegalGeneration() { @Test public void testUnknownConsumerId() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // illegal_generation will cause re-partition subscriptions.subscribe(Arrays.asList(topicName), rebalanceListener); @@ -251,7 +251,7 @@ public void testUnknownConsumerId() { @Test public void testCoordinatorDisconnect() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // coordinator disconnect will mark coordinator as unknown time.sleep(sessionTimeoutMs); @@ -281,7 +281,7 @@ public void testJoinGroupInvalidGroupId() { metadata.update(cluster, time.milliseconds()); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(joinGroupLeaderResponse(0, consumerId, Collections.>emptyMap(), Errors.INVALID_GROUP_ID.code())); @@ -300,7 +300,7 @@ public void testNormalJoinGroupLeader() { metadata.update(cluster, time.milliseconds()); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // normal join group Map> memberSubscriptions = Collections.singletonMap(consumerId, Arrays.asList(topicName)); @@ -338,7 +338,7 @@ public void testWakeupDuringJoin() { metadata.update(cluster, time.milliseconds()); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); Map> memberSubscriptions = Collections.singletonMap(consumerId, Arrays.asList(topicName)); partitionAssignor.prepare(Collections.singletonMap(consumerId, Arrays.asList(tp))); @@ -373,7 +373,7 @@ public void testNormalJoinGroupFollower() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // normal join group client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); @@ -404,7 +404,7 @@ public void testLeaveGroupOnClose() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); client.prepareResponse(syncGroupResponse(Arrays.asList(tp), Errors.NONE.code())); @@ -432,7 +432,7 @@ public void testMaybeLeaveGroup() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); client.prepareResponse(syncGroupResponse(Arrays.asList(tp), Errors.NONE.code())); @@ -462,7 +462,7 @@ public void testUnexpectedErrorOnSyncGroup() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // join initially, but let coordinator rebalance on sync client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); @@ -478,7 +478,7 @@ public void testUnknownMemberIdOnSyncGroup() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // join initially, but let coordinator returns unknown member id client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); @@ -508,7 +508,7 @@ public void testRebalanceInProgressOnSyncGroup() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // join initially, but let coordinator rebalance on sync client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); @@ -532,7 +532,7 @@ public void testIllegalGenerationOnSyncGroup() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // join initially, but let coordinator rebalance on sync client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); @@ -562,7 +562,7 @@ public void testMetadataChangeTriggersRebalance() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); client.prepareResponse(syncGroupResponse(Arrays.asList(tp), Errors.NONE.code())); @@ -597,7 +597,7 @@ public void testUpdateMetadataDuringRebalance() { metadata.update(TestUtils.singletonCluster(topic1, 1), time.milliseconds()); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // prepare initial rebalance Map> memberSubscriptions = Collections.singletonMap(consumerId, topics); @@ -658,7 +658,7 @@ public void testRejoinGroup() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // join the group once client.prepareResponse(joinGroupFollowerResponse(1, "consumer", "leader", Errors.NONE.code())); @@ -686,7 +686,7 @@ public void testDisconnectInJoin() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // disconnected from original coordinator will cause re-discover and join again client.prepareResponse(joinGroupFollowerResponse(1, "consumer", "leader", Errors.NONE.code()), true); @@ -707,7 +707,7 @@ public void testInvalidSessionTimeout() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // coordinator doesn't like the session timeout client.prepareResponse(joinGroupFollowerResponse(0, "consumer", "", Errors.INVALID_SESSION_TIMEOUT.code())); @@ -719,7 +719,7 @@ public void testCommitOffsetOnly() { subscriptions.assignFromUser(Arrays.asList(tp)); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code()))); @@ -741,7 +741,7 @@ public void testAutoCommitDynamicAssignment() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(joinGroupFollowerResponse(1, consumerId, "leader", Errors.NONE.code())); client.prepareResponse(syncGroupResponse(Arrays.asList(tp), Errors.NONE.code())); @@ -767,7 +767,7 @@ public void testAutoCommitDynamicAssignmentRebalance() { subscriptions.needReassignment(); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // haven't joined, so should not cause a commit time.sleep(autoCommitIntervalMs); @@ -795,7 +795,7 @@ public void testAutoCommitManualAssignment() { subscriptions.seek(tp, 100); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code()))); time.sleep(autoCommitIntervalMs); @@ -821,7 +821,7 @@ public void testAutoCommitManualAssignmentCoordinatorUnknown() { // now find the coordinator client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // sleep only for the retry backoff time.sleep(retryBackoffMs); @@ -836,7 +836,7 @@ public void testCommitOffsetMetadata() { subscriptions.assignFromUser(Arrays.asList(tp)); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code()))); @@ -852,7 +852,7 @@ public void testCommitOffsetMetadata() { public void testCommitOffsetAsyncWithDefaultCallback() { int invokedBeforeTest = defaultOffsetCommitCallback.invoked; client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code()))); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), null); assertEquals(invokedBeforeTest + 1, defaultOffsetCommitCallback.invoked); @@ -865,7 +865,7 @@ public void testCommitAfterLeaveGroup() { subscriptions.subscribe(Arrays.asList(topicName), rebalanceListener); client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(joinGroupFollowerResponse(1, "consumer", "leader", Errors.NONE.code())); client.prepareResponse(syncGroupResponse(Arrays.asList(tp), Errors.NONE.code())); @@ -896,7 +896,7 @@ public boolean matches(ClientRequest request) { public void testCommitOffsetAsyncFailedWithDefaultCallback() { int invokedBeforeTest = defaultOffsetCommitCallback.invoked; client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.GROUP_COORDINATOR_NOT_AVAILABLE.code()))); coordinator.commitOffsetsAsync(Collections.singletonMap(tp, new OffsetAndMetadata(100L)), null); assertEquals(invokedBeforeTest + 1, defaultOffsetCommitCallback.invoked); @@ -906,7 +906,7 @@ public void testCommitOffsetAsyncFailedWithDefaultCallback() { @Test public void testCommitOffsetAsyncCoordinatorNotAvailable() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // async commit with coordinator not available MockCommitCallback cb = new MockCommitCallback(); @@ -921,7 +921,7 @@ public void testCommitOffsetAsyncCoordinatorNotAvailable() { @Test public void testCommitOffsetAsyncNotCoordinator() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // async commit with not coordinator MockCommitCallback cb = new MockCommitCallback(); @@ -936,7 +936,7 @@ public void testCommitOffsetAsyncNotCoordinator() { @Test public void testCommitOffsetAsyncDisconnected() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // async commit with coordinator disconnected MockCommitCallback cb = new MockCommitCallback(); @@ -951,7 +951,7 @@ public void testCommitOffsetAsyncDisconnected() { @Test public void testCommitOffsetSyncNotCoordinator() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // sync commit with coordinator disconnected (should connect, get metadata, and then submit the commit request) client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NOT_COORDINATOR_FOR_GROUP.code()))); @@ -963,7 +963,7 @@ public void testCommitOffsetSyncNotCoordinator() { @Test public void testCommitOffsetSyncCoordinatorNotAvailable() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // sync commit with coordinator disconnected (should connect, get metadata, and then submit the commit request) client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.GROUP_COORDINATOR_NOT_AVAILABLE.code()))); @@ -975,7 +975,7 @@ public void testCommitOffsetSyncCoordinatorNotAvailable() { @Test public void testCommitOffsetSyncCoordinatorDisconnected() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // sync commit with coordinator disconnected (should connect, get metadata, and then submit the commit request) client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.NONE.code())), true); @@ -988,7 +988,7 @@ public void testCommitOffsetSyncCoordinatorDisconnected() { public void testCommitOffsetMetadataTooLarge() { // since offset metadata is provided by the user, we have to propagate the exception so they can handle it client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.OFFSET_METADATA_TOO_LARGE.code()))); coordinator.commitOffsetsSync(Collections.singletonMap(tp, new OffsetAndMetadata(100L, "metadata"))); @@ -998,7 +998,7 @@ public void testCommitOffsetMetadataTooLarge() { public void testCommitOffsetIllegalGeneration() { // we cannot retry if a rebalance occurs before the commit completed client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.ILLEGAL_GENERATION.code()))); coordinator.commitOffsetsSync(Collections.singletonMap(tp, new OffsetAndMetadata(100L, "metadata"))); @@ -1008,7 +1008,7 @@ public void testCommitOffsetIllegalGeneration() { public void testCommitOffsetUnknownMemberId() { // we cannot retry if a rebalance occurs before the commit completed client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.UNKNOWN_MEMBER_ID.code()))); coordinator.commitOffsetsSync(Collections.singletonMap(tp, new OffsetAndMetadata(100L, "metadata"))); @@ -1018,7 +1018,7 @@ public void testCommitOffsetUnknownMemberId() { public void testCommitOffsetRebalanceInProgress() { // we cannot retry if a rebalance occurs before the commit completed client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.REBALANCE_IN_PROGRESS.code()))); coordinator.commitOffsetsSync(Collections.singletonMap(tp, new OffsetAndMetadata(100L, "metadata"))); @@ -1027,7 +1027,7 @@ public void testCommitOffsetRebalanceInProgress() { @Test(expected = KafkaException.class) public void testCommitOffsetSyncCallbackWithNonRetriableException() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // sync commit with invalid partitions should throw if we have no callback client.prepareResponse(offsetCommitResponse(Collections.singletonMap(tp, Errors.UNKNOWN.code())), false); @@ -1037,7 +1037,7 @@ public void testCommitOffsetSyncCallbackWithNonRetriableException() { @Test public void testRefreshOffset() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.needRefreshCommits(); @@ -1050,7 +1050,7 @@ public void testRefreshOffset() { @Test public void testRefreshOffsetLoadInProgress() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.needRefreshCommits(); @@ -1064,7 +1064,7 @@ public void testRefreshOffsetLoadInProgress() { @Test public void testRefreshOffsetNotCoordinatorForConsumer() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.needRefreshCommits(); @@ -1079,7 +1079,7 @@ public void testRefreshOffsetNotCoordinatorForConsumer() { @Test public void testRefreshOffsetWithNoFetchableOffsets() { client.prepareResponse(consumerMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.needRefreshCommits(); diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/WorkerGroupMember.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/WorkerGroupMember.java index 85af549fe1d3..c21b9bfe1870 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/WorkerGroupMember.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/WorkerGroupMember.java @@ -131,7 +131,7 @@ public void stop() { } public void ensureActive() { - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); coordinator.ensureActiveGroup(); } @@ -143,7 +143,7 @@ public void poll(long timeout) { long remaining = timeout; while (remaining >= 0) { long start = time.milliseconds(); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); coordinator.ensureActiveGroup(); client.poll(remaining); remaining -= time.milliseconds() - start; diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorTest.java index f7423ec6f827..4c2ac4002b10 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorTest.java @@ -171,7 +171,7 @@ public void testNormalJoinGroupLeader() { final String consumerId = "leader"; client.prepareResponse(groupMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // normal join group Map memberConfigOffsets = new HashMap<>(); @@ -211,7 +211,7 @@ public void testNormalJoinGroupFollower() { final String memberId = "member"; client.prepareResponse(groupMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // normal join group client.prepareResponse(joinGroupFollowerResponse(1, memberId, "leader", Errors.NONE.code())); @@ -252,7 +252,7 @@ public void testJoinLeaderCannotAssign() { final String memberId = "member"; client.prepareResponse(groupMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // config mismatch results in assignment error client.prepareResponse(joinGroupFollowerResponse(1, memberId, "leader", Errors.NONE.code())); @@ -283,7 +283,7 @@ public void testRejoinGroup() { PowerMock.replayAll(); client.prepareResponse(groupMetadataResponse(node, Errors.NONE.code())); - coordinator.ensureCoordinatorKnown(); + coordinator.ensureCoordinatorReady(); // join the group once client.prepareResponse(joinGroupFollowerResponse(1, "consumer", "leader", Errors.NONE.code())); From a9df2e7ca2ee93ec723933985c746178293b6388 Mon Sep 17 00:00:00 2001 From: Geoff Anderson Date: Thu, 5 May 2016 13:12:11 -0700 Subject: [PATCH 088/267] KAFKA-3581: add timeouts to joins in background thread services This actually removes joins altogether, as well as references to self.worker_threads, which is best left as an implementation detail in BackgroundThreadService. This makes use of hachikuji 's recent ducktape patch, and updates ducktape dependency to 0.5.0. Author: Geoff Anderson Reviewers: Jason Gustafson , Ewen Cheslack-Postava Closes #1297 from granders/KAFKA-3581-systest-add-join-timeout (cherry picked from commit b6cd0e2791e0e6a6ef02d069b3001ffb477f7c6c) Signed-off-by: Ewen Cheslack-Postava --- .../sanity_checks/test_verifiable_producer.py | 2 +- tests/kafkatest/services/console_consumer.py | 11 +++++++--- .../services/kafka_log4j_appender.py | 9 +++------ .../performance/end_to_end_latency.py | 4 ++-- .../services/performance/performance.py | 11 +++++++++- .../performance/producer_performance.py | 1 + .../services/replica_verification_tool.py | 8 +++++++- .../services/simple_consumer_shell.py | 12 +++++------ .../kafkatest/services/verifiable_consumer.py | 20 +++++++++---------- .../kafkatest/services/verifiable_producer.py | 16 +++++++-------- tests/setup.py | 2 +- .../kafka/tools/VerifiableConsumer.java | 5 ----- .../kafka/tools/VerifiableProducer.java | 6 +----- 13 files changed, 55 insertions(+), 52 deletions(-) diff --git a/tests/kafkatest/sanity_checks/test_verifiable_producer.py b/tests/kafkatest/sanity_checks/test_verifiable_producer.py index e22d4222d5bd..f1bc2a0bdc7d 100644 --- a/tests/kafkatest/sanity_checks/test_verifiable_producer.py +++ b/tests/kafkatest/sanity_checks/test_verifiable_producer.py @@ -35,7 +35,7 @@ def __init__(self, test_context): self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk, topics={self.topic: {"partitions": 1, "replication-factor": 1}}) - self.num_messages = 100 + self.num_messages = 1000 # This will produce to source kafka cluster self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, max_messages=self.num_messages, throughput=1000) diff --git a/tests/kafkatest/services/console_consumer.py b/tests/kafkatest/services/console_consumer.py index 5a33052c4472..9c7f56495c17 100644 --- a/tests/kafkatest/services/console_consumer.py +++ b/tests/kafkatest/services/console_consumer.py @@ -91,7 +91,7 @@ class ConsoleConsumer(JmxMixin, BackgroundThreadService): def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-group", new_consumer=False, message_validator=None, from_beginning=True, consumer_timeout_ms=None, version=TRUNK, client_id="console-consumer", print_key=False, jmx_object_names=None, jmx_attributes=[], - enable_systest_events=False): + enable_systest_events=False, stop_timeout_sec=15): """ Args: context: standard context @@ -108,6 +108,8 @@ def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-gro print_key if True, print each message's key in addition to its value enable_systest_events if True, console consumer will print additional lifecycle-related information only available in 0.10.0 and later. + stop_timeout_sec After stopping a node, wait up to stop_timeout_sec for the node to stop, + and the corresponding background thread to finish successfully. """ JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) BackgroundThreadService.__init__(self, context, num_nodes) @@ -129,6 +131,7 @@ def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-gro self.client_id = client_id self.print_key = print_key self.log_level = "TRACE" + self.stop_timeout_sec = stop_timeout_sec self.enable_systest_events = enable_systest_events if self.enable_systest_events: @@ -259,8 +262,10 @@ def start_node(self, node): def stop_node(self, node): node.account.kill_process("console_consumer", allow_fail=True) - wait_until(lambda: not self.alive(node), timeout_sec=10, backoff_sec=.2, - err_msg="Timed out waiting for consumer to stop.") + + stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec) + assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \ + (str(node.account), str(self.stop_timeout_sec)) def clean_node(self, node): if self.alive(node): diff --git a/tests/kafkatest/services/kafka_log4j_appender.py b/tests/kafkatest/services/kafka_log4j_appender.py index 3732bb0e1273..c0af1a1df759 100644 --- a/tests/kafkatest/services/kafka_log4j_appender.py +++ b/tests/kafkatest/services/kafka_log4j_appender.py @@ -67,13 +67,10 @@ def start_cmd(self, node): def stop_node(self, node): node.account.kill_process("VerifiableLog4jAppender", allow_fail=False) - if self.worker_threads is None: - return - # block until the corresponding thread exits - if len(self.worker_threads) >= self.idx(node): - # Need to guard this because stop is preemptively called before the worker threads are added and started - self.worker_threads[self.idx(node) - 1].join() + stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec) + assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \ + (str(node.account), str(self.stop_timeout_sec)) def clean_node(self, node): node.account.kill_process("VerifiableLog4jAppender", clean_shutdown=False, allow_fail=False) diff --git a/tests/kafkatest/services/performance/end_to_end_latency.py b/tests/kafkatest/services/performance/end_to_end_latency.py index 6d21151d530d..2007d6540b4a 100644 --- a/tests/kafkatest/services/performance/end_to_end_latency.py +++ b/tests/kafkatest/services/performance/end_to_end_latency.py @@ -17,10 +17,11 @@ from kafkatest.services.security.security_config import SecurityConfig from kafkatest.services.kafka.directory import kafka_dir -from kafkatest.services.kafka.version import TRUNK, V_0_9_0_0, V_0_10_0_0 +from kafkatest.services.kafka.version import TRUNK, V_0_9_0_0 import os + class EndToEndLatencyService(PerformanceService): MESSAGE_BYTES = 21 # 0.8.X messages are fixed at 21 bytes, so we'll match that for other versions @@ -45,7 +46,6 @@ class EndToEndLatencyService(PerformanceService): "collect_default": True} } - def __init__(self, context, num_nodes, kafka, topic, num_records, compression_type="none", version=TRUNK, acks=1): super(EndToEndLatencyService, self).__init__(context, num_nodes) self.kafka = kafka diff --git a/tests/kafkatest/services/performance/performance.py b/tests/kafkatest/services/performance/performance.py index 1eab1976278f..dcc1a32783e1 100644 --- a/tests/kafkatest/services/performance/performance.py +++ b/tests/kafkatest/services/performance/performance.py @@ -18,15 +18,24 @@ class PerformanceService(BackgroundThreadService): - def __init__(self, context, num_nodes): + def __init__(self, context, num_nodes, stop_timeout_sec=30): super(PerformanceService, self).__init__(context, num_nodes) self.results = [None] * self.num_nodes self.stats = [[] for x in range(self.num_nodes)] + self.stop_timeout_sec = stop_timeout_sec + + def stop_node(self, node): + node.account.kill_process("java", clean_shutdown=True, allow_fail=True) + + stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec) + assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \ + (str(node.account), str(self.stop_timeout_sec)) def clean_node(self, node): node.account.kill_process("java", clean_shutdown=False, allow_fail=True) node.account.ssh("rm -rf /mnt/*", allow_fail=False) + def throughput(records_per_sec, mb_per_sec): """Helper method to ensure uniform representation of throughput data""" return { diff --git a/tests/kafkatest/services/performance/producer_performance.py b/tests/kafkatest/services/performance/producer_performance.py index f4887edb8cb0..efd6c0934187 100644 --- a/tests/kafkatest/services/performance/producer_performance.py +++ b/tests/kafkatest/services/performance/producer_performance.py @@ -36,6 +36,7 @@ class ProducerPerformanceService(JmxMixin, PerformanceService): def __init__(self, context, num_nodes, kafka, topic, num_records, record_size, throughput, version=TRUNK, settings={}, intermediate_stats=False, client_id="producer-performance", jmx_object_names=None, jmx_attributes=[]): + JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) PerformanceService.__init__(self, context, num_nodes) diff --git a/tests/kafkatest/services/replica_verification_tool.py b/tests/kafkatest/services/replica_verification_tool.py index f6374fbcc259..7f77049a0f9b 100644 --- a/tests/kafkatest/services/replica_verification_tool.py +++ b/tests/kafkatest/services/replica_verification_tool.py @@ -20,6 +20,7 @@ import re + class ReplicaVerificationTool(BackgroundThreadService): logs = { @@ -28,7 +29,7 @@ class ReplicaVerificationTool(BackgroundThreadService): "collect_default": False} } - def __init__(self, context, num_nodes, kafka, topic, report_interval_ms, security_protocol="PLAINTEXT"): + def __init__(self, context, num_nodes, kafka, topic, report_interval_ms, security_protocol="PLAINTEXT", stop_timeout_sec=30): super(ReplicaVerificationTool, self).__init__(context, num_nodes) self.kafka = kafka @@ -37,6 +38,7 @@ def __init__(self, context, num_nodes, kafka, topic, report_interval_ms, securit self.security_protocol = security_protocol self.security_config = SecurityConfig(security_protocol) self.partition_lag = {} + self.stop_timeout_sec = stop_timeout_sec def _worker(self, idx, node): cmd = self.start_cmd(node) @@ -76,6 +78,10 @@ def start_cmd(self, node): def stop_node(self, node): node.account.kill_process("java", clean_shutdown=True, allow_fail=True) + stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec) + assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \ + (str(node.account), str(self.stop_timeout_sec)) + def clean_node(self, node): node.account.kill_process("java", clean_shutdown=False, allow_fail=True) node.account.ssh("rm -rf /mnt/replica_verification_tool.log", allow_fail=False) \ No newline at end of file diff --git a/tests/kafkatest/services/simple_consumer_shell.py b/tests/kafkatest/services/simple_consumer_shell.py index 8deee85d0d27..c44540dbe638 100644 --- a/tests/kafkatest/services/simple_consumer_shell.py +++ b/tests/kafkatest/services/simple_consumer_shell.py @@ -26,13 +26,14 @@ class SimpleConsumerShell(BackgroundThreadService): "collect_default": False} } - def __init__(self, context, num_nodes, kafka, topic, partition=0): + def __init__(self, context, num_nodes, kafka, topic, partition=0, stop_timeout_sec=30): super(SimpleConsumerShell, self).__init__(context, num_nodes) self.kafka = kafka self.topic = topic self.partition = partition self.output = "" + self.stop_timeout_sec = stop_timeout_sec def _worker(self, idx, node): cmd = self.start_cmd(node) @@ -56,13 +57,10 @@ def get_output(self): def stop_node(self, node): node.account.kill_process("SimpleConsumerShell", allow_fail=False) - if self.worker_threads is None: - return - # block until the corresponding thread exits - if len(self.worker_threads) >= self.idx(node): - # Need to guard this because stop is preemptively called before the worker threads are added and started - self.worker_threads[self.idx(node) - 1].join() + stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec) + assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \ + (str(node.account), str(self.stop_timeout_sec)) def clean_node(self, node): node.account.kill_process("SimpleConsumerShell", clean_shutdown=False, allow_fail=False) diff --git a/tests/kafkatest/services/verifiable_consumer.py b/tests/kafkatest/services/verifiable_consumer.py index d97bef3d3df4..55304dcb4ea1 100644 --- a/tests/kafkatest/services/verifiable_consumer.py +++ b/tests/kafkatest/services/verifiable_consumer.py @@ -15,22 +15,22 @@ from ducktape.services.background_thread import BackgroundThreadService -from kafkatest.services.kafka.directory import kafka_dir, KAFKA_TRUNK +from kafkatest.services.kafka.directory import kafka_dir from kafkatest.services.kafka.version import TRUNK -from kafkatest.services.security.security_config import SecurityConfig from kafkatest.services.kafka import TopicPartition import json import os import signal import subprocess -import time + class ConsumerState: Dead = 1 Rebalancing = 3 Joined = 2 + class ConsumerEventHandler(object): def __init__(self, node): @@ -111,6 +111,7 @@ def last_commit(self, tp): else: return None + class VerifiableConsumer(BackgroundThreadService): PERSISTENT_ROOT = "/mnt/verifiable_consumer" STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "verifiable_consumer.stdout") @@ -135,7 +136,7 @@ class VerifiableConsumer(BackgroundThreadService): def __init__(self, context, num_nodes, kafka, topic, group_id, max_messages=-1, session_timeout_sec=30, enable_autocommit=False, assignment_strategy="org.apache.kafka.clients.consumer.RangeAssignor", - version=TRUNK): + version=TRUNK, stop_timeout_sec=30): super(VerifiableConsumer, self).__init__(context, num_nodes) self.log_level = "TRACE" @@ -149,6 +150,7 @@ def __init__(self, context, num_nodes, kafka, topic, group_id, self.prop_file = "" self.security_config = kafka.security_config.client_config(self.prop_file) self.prop_file += str(self.security_config) + self.stop_timeout_sec = stop_timeout_sec self.event_handlers = {} self.global_position = {} @@ -268,14 +270,10 @@ def kill_node(self, node, clean_shutdown=True, allow_fail=False): def stop_node(self, node, clean_shutdown=True): self.kill_node(node, clean_shutdown=clean_shutdown) - - if self.worker_threads is None: - return - # block until the corresponding thread exits - if len(self.worker_threads) >= self.idx(node): - # Need to guard this because stop is preemptively called before the worker threads are added and started - self.worker_threads[self.idx(node) - 1].join() + stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec) + assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \ + (str(node.account), str(self.stop_timeout_sec)) def clean_node(self, node): self.kill_node(node, clean_shutdown=False) diff --git a/tests/kafkatest/services/verifiable_producer.py b/tests/kafkatest/services/verifiable_producer.py index 4fec77671948..a6a1bd980676 100644 --- a/tests/kafkatest/services/verifiable_producer.py +++ b/tests/kafkatest/services/verifiable_producer.py @@ -43,7 +43,8 @@ class VerifiableProducer(BackgroundThreadService): } def __init__(self, context, num_nodes, kafka, topic, max_messages=-1, throughput=100000, - message_validator=is_int, compression_types=None, version=TRUNK, acks=None): + message_validator=is_int, compression_types=None, version=TRUNK, acks=None, + stop_timeout_sec=150): """ :param max_messages is a number of messages to be produced per producer :param message_validator checks for an expected format of messages produced. There are @@ -73,7 +74,7 @@ def __init__(self, context, num_nodes, kafka, topic, max_messages=-1, throughput self.produced_count = {} self.clean_shutdown_nodes = set() self.acks = acks - + self.stop_timeout_sec = stop_timeout_sec @property def security_config(self): @@ -220,14 +221,11 @@ def each_produced_at_least(self, count): return True def stop_node(self, node): - self.kill_node(node, clean_shutdown=False, allow_fail=False) - if self.worker_threads is None: - return + self.kill_node(node, clean_shutdown=True, allow_fail=False) - # block until the corresponding thread exits - if len(self.worker_threads) >= self.idx(node): - # Need to guard this because stop is preemptively called before the worker threads are added and started - self.worker_threads[self.idx(node) - 1].join() + stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec) + assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \ + (str(node.account), str(self.stop_timeout_sec)) def clean_node(self, node): self.kill_node(node, clean_shutdown=False, allow_fail=False) diff --git a/tests/setup.py b/tests/setup.py index de3ea62a9d8e..910c0a2da586 100644 --- a/tests/setup.py +++ b/tests/setup.py @@ -30,5 +30,5 @@ license="apache2.0", packages=find_packages(), include_package_data=True, - install_requires=["ducktape==0.4.0", "requests>=2.5.0"] + install_requires=["ducktape==0.5.0", "requests>=2.5.0"] ) diff --git a/tools/src/main/java/org/apache/kafka/tools/VerifiableConsumer.java b/tools/src/main/java/org/apache/kafka/tools/VerifiableConsumer.java index 1880d7aa1c58..8db442e3f910 100644 --- a/tools/src/main/java/org/apache/kafka/tools/VerifiableConsumer.java +++ b/tools/src/main/java/org/apache/kafka/tools/VerifiableConsumer.java @@ -265,11 +265,6 @@ private static abstract class ConsumerEvent { public long timestamp() { return timestamp; } - - @JsonProperty("class") - public String clazz() { - return VerifiableConsumer.class.getName(); - } } private static class ShutdownComplete extends ConsumerEvent { diff --git a/tools/src/main/java/org/apache/kafka/tools/VerifiableProducer.java b/tools/src/main/java/org/apache/kafka/tools/VerifiableProducer.java index b511fb94c8c6..30f08e8880f9 100644 --- a/tools/src/main/java/org/apache/kafka/tools/VerifiableProducer.java +++ b/tools/src/main/java/org/apache/kafka/tools/VerifiableProducer.java @@ -84,7 +84,7 @@ public VerifiableProducer( this.topic = topic; this.throughput = throughput; this.maxMessages = maxMessages; - this.producer = new KafkaProducer(producerProps); + this.producer = new KafkaProducer<>(producerProps); this.valuePrefix = valuePrefix; } @@ -252,7 +252,6 @@ public void close() { String shutdownString() { Map data = new HashMap<>(); - data.put("class", this.getClass().toString()); data.put("name", "shutdown_complete"); return toJsonString(data); } @@ -265,7 +264,6 @@ String errorString(Exception e, String key, String value, Long nowMs) { assert e != null : "Expected non-null exception."; Map errorData = new HashMap<>(); - errorData.put("class", this.getClass().toString()); errorData.put("name", "producer_send_error"); errorData.put("time_ms", nowMs); @@ -282,7 +280,6 @@ String successString(RecordMetadata recordMetadata, String key, String value, Lo assert recordMetadata != null : "Expected non-null recordMetadata object."; Map successData = new HashMap<>(); - successData.put("class", this.getClass().toString()); successData.put("name", "producer_send_success"); successData.put("time_ms", nowMs); @@ -349,7 +346,6 @@ public void run() { double avgThroughput = 1000 * ((producer.numAcked) / (double) (stopMs - startMs)); Map data = new HashMap<>(); - data.put("class", producer.getClass().toString()); data.put("name", "tool_data"); data.put("sent", producer.numSent); data.put("acked", producer.numAcked); From 940468011a6d127e823c7a59709c4381fb31694f Mon Sep 17 00:00:00 2001 From: Eno Thereska Date: Thu, 5 May 2016 13:50:15 -0700 Subject: [PATCH 089/267] HOTFIX: Reverted timeouts to larger values Author: Eno Thereska Reviewers: Ismael Juma, Guozhang Wang Closes #1324 from enothereska/hotfix-timeouts (cherry picked from commit 7f4e3ccde820eedd962b4cfd3abaecd8a49b83a8) Signed-off-by: Guozhang Wang --- .../apache/kafka/streams/integration/FanoutIntegrationTest.java | 2 +- .../apache/kafka/streams/integration/JoinIntegrationTest.java | 2 +- .../kafka/streams/integration/MapFunctionIntegrationTest.java | 2 +- .../kafka/streams/integration/PassThroughIntegrationTest.java | 2 +- .../kafka/streams/integration/WordCountIntegrationTest.java | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java index a7b478507ae6..2e11cd23e1e3 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java @@ -135,7 +135,7 @@ public String apply(String value) { IntegrationTestUtils.produceValuesSynchronously(INPUT_TOPIC_A, inputValues, producerConfig); // Give the stream processing application some time to do its work. - Thread.sleep(5000); + Thread.sleep(10000); streams.close(); // diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java index 1fc0ba67ad0a..93e31e22652c 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java @@ -247,7 +247,7 @@ public Long apply(Long value1, Long value2) { IntegrationTestUtils.produceKeyValuesSynchronously(USER_CLICKS_TOPIC, userClicks, userClicksProducerConfig); // Give the stream processing application some time to do its work. - Thread.sleep(5000); + Thread.sleep(10000); streams.close(); // diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java index 47c00c10a043..31ac4006aa25 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java @@ -108,7 +108,7 @@ public String apply(String value) { IntegrationTestUtils.produceValuesSynchronously(DEFAULT_INPUT_TOPIC, inputValues, producerConfig); // Give the stream processing application some time to do its work. - Thread.sleep(5000); + Thread.sleep(10000); streams.close(); // diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java index 2627a3ac57bf..e126ed8cb92e 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java @@ -95,7 +95,7 @@ public void shouldWriteTheInputDataAsIsToTheOutputTopic() throws Exception { IntegrationTestUtils.produceValuesSynchronously(DEFAULT_INPUT_TOPIC, inputValues, producerConfig); // Give the stream processing application some time to do its work. - Thread.sleep(5000); + Thread.sleep(10000); streams.close(); // diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java index 5c32a6c4271b..c8583d1da0ed 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java @@ -134,7 +134,7 @@ public KeyValue apply(String key, String value) { // // Step 3: Verify the application's output data. // - Thread.sleep(5000); + Thread.sleep(10000); streams.close(); Properties consumerConfig = new Properties(); consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); From a57d5125d4a699c5fdaf05ef055dd7a0e7a6accd Mon Sep 17 00:00:00 2001 From: Onur Karaman Date: Thu, 5 May 2016 23:25:03 +0100 Subject: [PATCH 090/267] KAFKA-3661; fix NPE in o.a.k.c.c.RoundRobinAssignor when topic metadata not found AbstractPartitionAssignor.assign has an ambiguous line in its documentation: > param partitionsPerTopic The number of partitions for each subscribed topic (may be empty for some topics) Does empty mean the topic has an entry with value zero, or that the entry is excluded from the map altogether? The current implementation in AbstractPartitionAssignor excludes the entry from partitionsPerTopic if the topic isn't in the metadata. RoundRobinAssignorTest.testOneConsumerNonexistentTopic interprets emptiness as providing the topic with a zero value. RangeAssignor interprets emptiness as excluding the entry from the map. RangeAssignorTest.testOneConsumerNonexistentTopic interprets emptiness as providing the topic with a zero value. This implementation chooses to solve the NPE by deciding to exclude topics from partitionsPerTopic when the topic is not in the metadata. Author: Onur Karaman Reviewers: Jason Gustafson , Ismael Juma Closes #1326 from onurkaraman/KAFKA-3661 (cherry picked from commit 8429db937e2134d9935d9dccd2ed0febc474fd66) Signed-off-by: Ismael Juma --- .../kafka/clients/consumer/RangeAssignor.java | 10 +--------- .../kafka/clients/consumer/RoundRobinAssignor.java | 7 +++---- .../internals/AbstractPartitionAssignor.java | 13 ++++++++++--- .../kafka/clients/consumer/RangeAssignorTest.java | 2 -- .../clients/consumer/RoundRobinAssignorTest.java | 2 -- 5 files changed, 14 insertions(+), 20 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/RangeAssignor.java b/clients/src/main/java/org/apache/kafka/clients/consumer/RangeAssignor.java index f23151c105d8..16c1d77c429a 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/RangeAssignor.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/RangeAssignor.java @@ -45,14 +45,6 @@ public String name() { return "range"; } - private List partitions(String topic, - int numPartitions) { - List partitions = new ArrayList<>(); - for (int i = 0; i < numPartitions; i++) - partitions.add(new TopicPartition(topic, i)); - return partitions; - } - private Map> consumersPerTopic(Map> consumerMetadata) { Map> res = new HashMap<>(); for (Map.Entry> subscriptionEntry : consumerMetadata.entrySet()) { @@ -84,7 +76,7 @@ public Map> assign(Map partitionsP int numPartitionsPerConsumer = numPartitionsForTopic / consumersForTopic.size(); int consumersWithExtraPartition = numPartitionsForTopic % consumersForTopic.size(); - List partitions = partitions(topic, numPartitionsForTopic); + List partitions = AbstractPartitionAssignor.partitions(topic, numPartitionsForTopic); for (int i = 0, n = consumersForTopic.size(); i < n; i++) { int start = numPartitionsPerConsumer * i + Math.min(i, consumersWithExtraPartition); int length = numPartitionsPerConsumer + (i + 1 > consumersWithExtraPartition ? 0 : 1); diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/RoundRobinAssignor.java b/clients/src/main/java/org/apache/kafka/clients/consumer/RoundRobinAssignor.java index b8dc2530c6de..a5de595cd361 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/RoundRobinAssignor.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/RoundRobinAssignor.java @@ -65,10 +65,9 @@ public List allPartitionsSorted(Map partitionsP List allPartitions = new ArrayList<>(); for (String topic : topics) { - Integer partitions = partitionsPerTopic.get(topic); - for (int partition = 0; partition < partitions; partition++) { - allPartitions.add(new TopicPartition(topic, partition)); - } + Integer numPartitionsForTopic = partitionsPerTopic.get(topic); + if (numPartitionsForTopic != null) + allPartitions.addAll(AbstractPartitionAssignor.partitions(topic, numPartitionsForTopic)); } return allPartitions; } diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractPartitionAssignor.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractPartitionAssignor.java index 12fa9137bc8f..4f90e66f2794 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractPartitionAssignor.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractPartitionAssignor.java @@ -33,9 +33,10 @@ public abstract class AbstractPartitionAssignor implements PartitionAssignor { /** * Perform the group assignment given the partition counts and member subscriptions - * @param partitionsPerTopic The number of partitions for each subscribed topic (may be empty for some topics) + * @param partitionsPerTopic The number of partitions for each subscribed topic. Topics not in metadata will be excluded + * from this map. * @param subscriptions Map from the memberId to their respective topic subscription - * @return Map from each member to the + * @return Map from each member to the list of partitions assigned to them. */ public abstract Map> assign(Map partitionsPerTopic, Map> subscriptions); @@ -58,7 +59,7 @@ public Map assign(Cluster metadata, Map partitionsPerTopic = new HashMap<>(); for (String topic : allSubscribedTopics) { Integer numPartitions = metadata.partitionCountForTopic(topic); - if (numPartitions != null) + if (numPartitions != null && numPartitions > 0) partitionsPerTopic.put(topic, numPartitions); else log.debug("Skipping assignment for topic {} since no metadata is available", topic); @@ -87,4 +88,10 @@ protected static void put(Map> map, K key, V value) { list.add(value); } + protected static List partitions(String topic, int numPartitions) { + List partitions = new ArrayList<>(numPartitions); + for (int i = 0; i < numPartitions; i++) + partitions.add(new TopicPartition(topic, i)); + return partitions; + } } diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/RangeAssignorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/RangeAssignorTest.java index 13cce1381743..72febb02ca6e 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/RangeAssignorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/RangeAssignorTest.java @@ -53,8 +53,6 @@ public void testOneConsumerNonexistentTopic() { String consumerId = "consumer"; Map partitionsPerTopic = new HashMap<>(); - partitionsPerTopic.put(topic, 0); - Map> assignment = assignor.assign(partitionsPerTopic, Collections.singletonMap(consumerId, Arrays.asList(topic))); assertEquals(Collections.singleton(consumerId), assignment.keySet()); diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/RoundRobinAssignorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/RoundRobinAssignorTest.java index 31598cd24b13..1d62700e5cbe 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/RoundRobinAssignorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/RoundRobinAssignorTest.java @@ -47,8 +47,6 @@ public void testOneConsumerNonexistentTopic() { String consumerId = "consumer"; Map partitionsPerTopic = new HashMap<>(); - partitionsPerTopic.put(topic, 0); - Map> assignment = assignor.assign(partitionsPerTopic, Collections.singletonMap(consumerId, Arrays.asList(topic))); From f255cefb7d3932e300ccefaa3fabf806c7f713a0 Mon Sep 17 00:00:00 2001 From: Mayuresh Gharat Date: Thu, 5 May 2016 23:31:46 +0100 Subject: [PATCH 091/267] KAFKA-3651; Remove the condition variable waiting on memory availability in Bufferpool when a TimeoutException is thrown Whenever the BufferPool throws a "Failed to allocate memory within the configured max blocking time" exception, it should also remove the condition object from the waiters deque Author: MayureshGharat Reviewers: Chen Zhu , Ismael Juma Closes #1314 from MayureshGharat/kafka-3651 (cherry picked from commit 6856c5c214fb0a40b18cfb25db3dadae320c4142) Signed-off-by: Ismael Juma --- .../producer/internals/BufferPool.java | 26 +++++-- .../producer/internals/BufferPoolTest.java | 77 ++++++++++++++++++- 2 files changed, 95 insertions(+), 8 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java index 55779711cef5..b42b0ec010fc 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java @@ -123,16 +123,25 @@ public ByteBuffer allocate(int size, long maxTimeToBlockMs) throws InterruptedEx // enough memory to allocate one while (accumulated < size) { long startWaitNs = time.nanoseconds(); - boolean waitingTimeElapsed = !moreMemory.await(remainingTimeToBlockNs, TimeUnit.NANOSECONDS); - long endWaitNs = time.nanoseconds(); - long timeNs = Math.max(0L, endWaitNs - startWaitNs); - this.waitTime.record(timeNs, time.milliseconds()); + long timeNs; + boolean waitingTimeElapsed; + try { + waitingTimeElapsed = !moreMemory.await(remainingTimeToBlockNs, TimeUnit.NANOSECONDS); + } catch (InterruptedException e) { + this.waiters.remove(moreMemory); + throw e; + } finally { + long endWaitNs = time.nanoseconds(); + timeNs = Math.max(0L, endWaitNs - startWaitNs); + this.waitTime.record(timeNs, time.milliseconds()); + } - if (waitingTimeElapsed) + if (waitingTimeElapsed) { + this.waiters.remove(moreMemory); throw new TimeoutException("Failed to allocate memory within the configured max blocking time " + maxTimeToBlockMs + " ms."); + } remainingTimeToBlockNs -= timeNs; - // check if we can satisfy this request from the free list, // otherwise allocate memory if (accumulated == 0 && size == this.poolableSize && !this.free.isEmpty()) { @@ -262,4 +271,9 @@ public int poolableSize() { public long totalMemory() { return this.totalMemory; } + + // package-private method used only for testing + Deque waiters() { + return this.waiters; + } } diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/BufferPoolTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/BufferPoolTest.java index 88e894386b3c..48682b1be028 100644 --- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/BufferPoolTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/BufferPoolTest.java @@ -26,11 +26,14 @@ import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Deque; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.Condition; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.junit.Assert.assertEquals; @@ -148,8 +151,6 @@ public void run() { /** * Test if Timeout exception is thrown when there is not enough memory to allocate and the elapsed time is greater than the max specified block time. * And verify that the allocation should finish soon after the maxBlockTimeMs. - * - * @throws Exception */ @Test public void testBlockTimeout() throws Exception { @@ -174,6 +175,78 @@ public void testBlockTimeout() throws Exception { assertTrue("Allocation should finish not much later than maxBlockTimeMs", endTimeMs - beginTimeMs < maxBlockTimeMs + 1000); } + /** + * Test if the waiter that is waiting on availability of more memory is cleaned up when a timeout occurs + */ + @Test + public void testCleanupMemoryAvailabilityWaiterOnBlockTimeout() throws Exception { + BufferPool pool = new BufferPool(2, 1, metrics, time, metricGroup); + pool.allocate(1, maxBlockTimeMs); + try { + pool.allocate(2, maxBlockTimeMs); + fail("The buffer allocated more memory than its maximum value 2"); + } catch (TimeoutException e) { + // this is good + } + assertTrue(pool.queued() == 0); + } + + /** + * Test if the waiter that is waiting on availability of more memory is cleaned up when an interruption occurs + */ + @Test + public void testCleanupMemoryAvailabilityWaiterOnInterruption() throws Exception { + BufferPool pool = new BufferPool(2, 1, metrics, time, metricGroup); + long blockTime = 5000; + pool.allocate(1, maxBlockTimeMs); + Thread t1 = new Thread(new BufferPoolAllocator(pool, blockTime)); + Thread t2 = new Thread(new BufferPoolAllocator(pool, blockTime)); + // start thread t1 which will try to allocate more memory on to the Buffer pool + t1.start(); + // sleep for 500ms. Condition variable c1 associated with pool.allocate() by thread t1 will be inserted in the waiters queue. + Thread.sleep(500); + Deque waiters = pool.waiters(); + // get the condition object associated with pool.allocate() by thread t1 + Condition c1 = waiters.getFirst(); + // start thread t2 which will try to allocate more memory on to the Buffer pool + t2.start(); + // sleep for 500ms. Condition variable c2 associated with pool.allocate() by thread t2 will be inserted in the waiters queue. The waiters queue will have 2 entries c1 and c2. + Thread.sleep(500); + t1.interrupt(); + // sleep for 500ms. + Thread.sleep(500); + // get the condition object associated with allocate() by thread t2 + Condition c2 = waiters.getLast(); + t2.interrupt(); + assertNotEquals(c1, c2); + t1.join(); + t2.join(); + // both the allocate() called by threads t1 and t2 should have been interrupted and the waiters queue should be empty + assertEquals(pool.queued(), 0); + } + + private static class BufferPoolAllocator implements Runnable { + BufferPool pool; + long maxBlockTimeMs; + + BufferPoolAllocator(BufferPool pool, long maxBlockTimeMs) { + this.pool = pool; + this.maxBlockTimeMs = maxBlockTimeMs; + } + + @Override + public void run() { + try { + pool.allocate(2, maxBlockTimeMs); + fail("The buffer allocated more memory than its maximum value 2"); + } catch (TimeoutException e) { + // this is good + } catch (InterruptedException e) { + // this can be neglected + } + } + } + /** * This test creates lots of threads that hammer on the pool */ From a037d1766383573f0ed2c542eb14356ba0457b5a Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Thu, 5 May 2016 16:55:23 -0700 Subject: [PATCH 092/267] HOTFIX: follow-up on KAFKA-725 to remove the check and return empty response instead of throw exceptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Author: Guozhang Wang Reviewers: Stig Døssing, Ismael Juma, Jun Rao Closes #1327 from guozhangwang/K725r (cherry picked from commit 4a076a03bee376853713f4b5784b66b18ad5535c) Signed-off-by: Guozhang Wang --- core/src/main/scala/kafka/log/Log.scala | 6 +-- .../src/main/scala/kafka/log/LogSegment.scala | 40 ++++++++++--------- .../scala/kafka/server/ReplicaManager.scala | 8 +--- .../test/scala/unit/kafka/log/LogTest.scala | 20 ++++++---- .../kafka/server/ReplicaManagerTest.scala | 18 +++++---- 5 files changed, 50 insertions(+), 42 deletions(-) diff --git a/core/src/main/scala/kafka/log/Log.scala b/core/src/main/scala/kafka/log/Log.scala index 8465b649ac77..e0ad73de9fb3 100644 --- a/core/src/main/scala/kafka/log/Log.scala +++ b/core/src/main/scala/kafka/log/Log.scala @@ -483,14 +483,14 @@ class Log(val dir: File, } /** - * Read messages from the log + * Read messages from the log. * * @param startOffset The offset to begin reading at * @param maxLength The maximum number of bytes to read - * @param maxOffset -The offset to read up to, exclusive. (i.e. the first offset NOT included in the resulting message set). + * @param maxOffset The offset to read up to, exclusive. (i.e. this offset NOT included in the resulting message set) * * @throws OffsetOutOfRangeException If startOffset is beyond the log end offset or before the base offset of the first segment. - * @return The fetch data information including fetch starting offset metadata and messages read + * @return The fetch data information including fetch starting offset metadata and messages read. */ def read(startOffset: Long, maxLength: Int, maxOffset: Option[Long] = None): FetchDataInfo = { trace("Reading %d bytes from offset %d in log %s of length %d bytes".format(maxLength, startOffset, name, size)) diff --git a/core/src/main/scala/kafka/log/LogSegment.scala b/core/src/main/scala/kafka/log/LogSegment.scala index 3a4bbc86d6c8..37f757985a5c 100755 --- a/core/src/main/scala/kafka/log/LogSegment.scala +++ b/core/src/main/scala/kafka/log/LogSegment.scala @@ -113,7 +113,7 @@ class LogSegment(val log: FileMessageSet, * @param startOffset A lower bound on the first offset to include in the message set we read * @param maxSize The maximum number of bytes to include in the message set we read * @param maxOffset An optional maximum offset for the message set we read - * @param maxPosition An optional maximum position in the log segment that should be exposed for read. + * @param maxPosition The maximum position in the log segment that should be exposed for read * * @return The fetched data and the offset metadata of the first message whose offset is >= startOffset, * or null if the startOffset is larger than the largest offset in this log @@ -137,24 +137,26 @@ class LogSegment(val log: FileMessageSet, return FetchDataInfo(offsetMetadata, MessageSet.Empty) // calculate the length of the message set to read based on whether or not they gave us a maxOffset - val length = - maxOffset match { - case None => - // no max offset, just read until the max position - min((maxPosition - startPosition.position).toInt, maxSize) - case Some(offset) => { - // there is a max offset, translate it to a file position and use that to calculate the max read size - if(offset < startOffset) - throw new IllegalArgumentException("Attempt to read with a maximum offset (%d) less than the start offset (%d).".format(offset, startOffset)) - val mapping = translateOffset(offset, startPosition.position) - val endPosition = - if(mapping == null) - logSize // the max offset is off the end of the log, use the end of the file - else - mapping.position - min(min(maxPosition, endPosition) - startPosition.position, maxSize).toInt - } - } + val length = maxOffset match { + case None => + // no max offset, just read until the max position + min((maxPosition - startPosition.position).toInt, maxSize) + case Some(offset) => + // there is a max offset, translate it to a file position and use that to calculate the max read size; + // when the leader of a partition changes, it's possible for the new leader's high watermark to be less than the + // true high watermark in the previous leader for a short window. In this window, if a consumer fetches on an + // offset between new leader's high watermark and the log end offset, we want to return an empty response. + if(offset < startOffset) + return FetchDataInfo(offsetMetadata, MessageSet.Empty) + val mapping = translateOffset(offset, startPosition.position) + val endPosition = + if(mapping == null) + logSize // the max offset is off the end of the log, use the end of the file + else + mapping.position + min(min(maxPosition, endPosition) - startPosition.position, maxSize).toInt + } + FetchDataInfo(offsetMetadata, log.read(startPosition.position, length)) } diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala index 888912bef911..534de271bfb7 100644 --- a/core/src/main/scala/kafka/server/ReplicaManager.scala +++ b/core/src/main/scala/kafka/server/ReplicaManager.scala @@ -522,12 +522,8 @@ class ReplicaManager(val config: KafkaConfig, getReplicaOrException(topic, partition) // decide whether to only fetch committed data (i.e. messages below high watermark) - val maxOffsetOpt = if (readOnlyCommitted) { - val maxOffset = localReplica.highWatermark.messageOffset - if(offset > maxOffset) - throw new OffsetOutOfRangeException("Request for offset %d beyond high watermark %d when reading from only committed offsets".format(offset, maxOffset)) - Some(maxOffset) - } + val maxOffsetOpt = if (readOnlyCommitted) + Some(localReplica.highWatermark.messageOffset) else None diff --git a/core/src/test/scala/unit/kafka/log/LogTest.scala b/core/src/test/scala/unit/kafka/log/LogTest.scala index 8c973a45f416..796f5c35d878 100755 --- a/core/src/test/scala/unit/kafka/log/LogTest.scala +++ b/core/src/test/scala/unit/kafka/log/LogTest.scala @@ -222,7 +222,7 @@ class LogTest extends JUnitSuite { // now manually truncate off all but one message from the first segment to create a gap in the messages log.logSegments.head.truncateTo(1) - assertEquals("A read should now return the last message in the log", log.logEndOffset-1, log.read(1, 200, None).messageSet.head.offset) + assertEquals("A read should now return the last message in the log", log.logEndOffset - 1, log.read(1, 200, None).messageSet.head.offset) } /** @@ -235,23 +235,29 @@ class LogTest extends JUnitSuite { def testReadOutOfRange() { createEmptyLogs(logDir, 1024) val logProps = new Properties() + + // set up replica log starting with offset 1024 and with one message (at offset 1024) logProps.put(LogConfig.SegmentBytesProp, 1024: java.lang.Integer) val log = new Log(logDir, LogConfig(logProps), recoveryPoint = 0L, time.scheduler, time = time) log.append(new ByteBufferMessageSet(NoCompressionCodec, messages = new Message("42".getBytes))) - assertEquals("Reading just beyond end of log should produce 0 byte read.", 0, log.read(1025, 1000).messageSet.sizeInBytes) + + assertEquals("Reading at the log end offset should produce 0 byte read.", 0, log.read(1025, 1000).messageSet.sizeInBytes) + try { - log.read(0, 1025) - fail("Expected exception on invalid read.") + log.read(0, 1000) + fail("Reading below the log start offset should throw OffsetOutOfRangeException") } catch { - case e: OffsetOutOfRangeException => "This is good." + case e: OffsetOutOfRangeException => // This is good. } + try { log.read(1026, 1000) - fail("Expected exception on invalid read.") + fail("Reading at beyond the log end offset should throw OffsetOutOfRangeException") } catch { case e: OffsetOutOfRangeException => // This is good. } - assertEquals("Reading from maxOffset should produce 0 byte read.", 0, log.read(1024, 1000, Some(1024)).messageSet.sizeInBytes) + + assertEquals("Reading from below the specified maxOffset should produce 0 byte read.", 0, log.read(1025, 1000, Some(1024)).messageSet.sizeInBytes) } /** diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala index 2cdf924ad5e7..57398562385b 100644 --- a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala +++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala @@ -24,7 +24,7 @@ import java.util.concurrent.atomic.AtomicBoolean import kafka.api.{FetchResponsePartitionData, PartitionFetchInfo} import kafka.cluster.Broker import kafka.common.TopicAndPartition -import kafka.message.{ByteBufferMessageSet, Message} +import kafka.message.{MessageSet, ByteBufferMessageSet, Message} import kafka.utils.{MockScheduler, MockTime, TestUtils, ZkUtils} import org.I0Itec.zkclient.ZkClient import org.apache.kafka.common.metrics.Metrics @@ -35,7 +35,7 @@ import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse import org.apache.kafka.common.utils.{MockTime => JMockTime} import org.apache.kafka.common.{Node, TopicPartition} import org.easymock.EasyMock -import org.junit.Assert.{assertEquals, assertTrue, assertFalse} +import org.junit.Assert.{assertEquals, assertTrue} import org.junit.{Test, Before, After} import scala.collection.JavaConverters._ @@ -189,7 +189,7 @@ class ReplicaManagerTest { } @Test - def testFetchBeyondHighWatermarkNotAllowedForConsumer() { + def testFetchBeyondHighWatermarkReturnEmptyResponse() { val props = TestUtils.createBrokerConfig(1, TestUtils.MockZkConnect) props.put("log.dir", TestUtils.tempRelativeDir("data").getAbsolutePath) props.put("broker.id", Int.box(0)) @@ -218,7 +218,7 @@ class ReplicaManagerTest { def produceCallback(responseStatus: Map[TopicPartition, PartitionResponse]) = {} - // Append a message. + // Append a couple of messages. for(i <- 1 to 2) rm.appendMessages( timeout = 1000, @@ -229,8 +229,10 @@ class ReplicaManagerTest { var fetchCallbackFired = false var fetchError = 0 + var fetchedMessages: MessageSet = null def fetchCallback(responseStatus: Map[TopicAndPartition, FetchResponsePartitionData]) = { fetchError = responseStatus.values.head.error + fetchedMessages = responseStatus.values.head.messages fetchCallbackFired = true } @@ -238,25 +240,27 @@ class ReplicaManagerTest { rm.fetchMessages( timeout = 1000, replicaId = 1, - fetchMinBytes = 1, + fetchMinBytes = 0, fetchInfo = collection.immutable.Map(new TopicAndPartition(topic, 0) -> new PartitionFetchInfo(1, 100000)), responseCallback = fetchCallback) assertTrue(fetchCallbackFired) assertEquals("Should not give an exception", Errors.NONE.code, fetchError) + assertTrue("Should return some data", fetchedMessages.iterator.hasNext) fetchCallbackFired = false // Fetch a message above the high watermark as a consumer rm.fetchMessages( timeout = 1000, replicaId = -1, - fetchMinBytes = 1, + fetchMinBytes = 0, fetchInfo = collection.immutable.Map(new TopicAndPartition(topic, 0) -> new PartitionFetchInfo(1, 100000)), responseCallback = fetchCallback) assertTrue(fetchCallbackFired) - assertEquals("Should give OffsetOutOfRangeException", Errors.OFFSET_OUT_OF_RANGE.code, fetchError) + assertEquals("Should not give an exception", Errors.NONE.code, fetchError) + assertEquals("Should return empty response", MessageSet.Empty, fetchedMessages) } finally { rm.shutdown(checkpointHW = false) } From 69bc4cac46e4448a155766ad4c4bfcab687e9ee3 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Thu, 5 May 2016 22:24:03 -0700 Subject: [PATCH 093/267] KAFKA-3627: consumer fails to execute delayed tasks in poll when records are available Author: Jason Gustafson Reviewers: Liquan Pei , Jiangjie Qin , Guozhang Wang , Ewen Cheslack-Postava Closes #1295 from hachikuji/KAFKA-3627 (cherry picked from commit 2ff955044aa875176aaa58a9be4a79c494a3fb27) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/clients/consumer/KafkaConsumer.java | 69 ++- .../internals/ConsumerCoordinator.java | 7 +- .../internals/ConsumerNetworkClient.java | 55 ++- .../clients/consumer/internals/Fetcher.java | 13 +- .../consumer/internals/SubscriptionState.java | 35 +- .../org/apache/kafka/clients/MockClient.java | 55 ++- .../clients/consumer/KafkaConsumerTest.java | 442 +++++++++++++++++- .../consumer/internals/FetcherTest.java | 34 +- 8 files changed, 621 insertions(+), 89 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java index 2373a13d1d65..2784644edcf6 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java @@ -504,7 +504,7 @@ public class KafkaConsumer implements Consumer { private static final AtomicInteger CONSUMER_CLIENT_ID_SEQUENCE = new AtomicInteger(1); private static final String JMX_PREFIX = "kafka.consumer"; - private String clientId; + private final String clientId; private final ConsumerCoordinator coordinator; private final Deserializer keyDeserializer; private final Deserializer valueDeserializer; @@ -517,7 +517,7 @@ public class KafkaConsumer implements Consumer { private final SubscriptionState subscriptions; private final Metadata metadata; private final long retryBackoffMs; - private long requestTimeoutMs; + private final long requestTimeoutMs; private boolean closed = false; // currentThread holds the threadId of the current thread accessing KafkaConsumer @@ -602,10 +602,11 @@ private KafkaConsumer(ConsumerConfig config, throw new ConfigException(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG + " should be greater than " + ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG + " and " + ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG); this.time = new SystemTime(); - clientId = config.getString(ConsumerConfig.CLIENT_ID_CONFIG); + String clientId = config.getString(ConsumerConfig.CLIENT_ID_CONFIG); if (clientId.length() <= 0) clientId = "consumer-" + CONSUMER_CLIENT_ID_SEQUENCE.getAndIncrement(); - Map metricsTags = new LinkedHashMap(); + this.clientId = clientId; + Map metricsTags = new LinkedHashMap<>(); metricsTags.put("client-id", clientId); MetricConfig metricConfig = new MetricConfig().samples(config.getInt(ConsumerConfig.METRICS_NUM_SAMPLES_CONFIG)) .timeWindow(config.getLong(ConsumerConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS) @@ -702,6 +703,35 @@ private KafkaConsumer(ConsumerConfig config, } } + // visible for testing + KafkaConsumer(String clientId, + ConsumerCoordinator coordinator, + Deserializer keyDeserializer, + Deserializer valueDeserializer, + Fetcher fetcher, + ConsumerInterceptors interceptors, + Time time, + ConsumerNetworkClient client, + Metrics metrics, + SubscriptionState subscriptions, + Metadata metadata, + long retryBackoffMs, + long requestTimeoutMs) { + this.clientId = clientId; + this.coordinator = coordinator; + this.keyDeserializer = keyDeserializer; + this.valueDeserializer = valueDeserializer; + this.fetcher = fetcher; + this.interceptors = interceptors; + this.time = time; + this.client = client; + this.metrics = metrics; + this.subscriptions = subscriptions; + this.metadata = metadata; + this.retryBackoffMs = retryBackoffMs; + this.requestTimeoutMs = requestTimeoutMs; + } + /** * Get the set of partitions currently assigned to this consumer. If subscription happened by directly assigning * partitions using {@link #assign(Collection)} then this will simply return the same partitions that @@ -910,14 +940,16 @@ public ConsumerRecords poll(long timeout) { // and avoid block waiting for their responses to enable pipelining while the user // is handling the fetched records. // - // NOTE that we use quickPoll() in this case which disables wakeups and delayed - // task execution since the consumed positions has already been updated and we - // must return these records to users to process before being interrupted or - // auto-committing offsets - fetcher.sendFetches(metadata.fetch()); - client.quickPoll(false); - return this.interceptors == null - ? new ConsumerRecords<>(records) : this.interceptors.onConsume(new ConsumerRecords<>(records)); + // NOTE: since the consumed position has already been updated, we must not allow + // wakeups or any other errors to be triggered prior to returning the fetched records. + // Additionally, pollNoWakeup does not allow automatic commits to get triggered. + fetcher.sendFetches(); + client.pollNoWakeup(); + + if (this.interceptors == null) + return new ConsumerRecords<>(records); + else + return this.interceptors.onConsume(new ConsumerRecords<>(records)); } long elapsed = time.milliseconds() - start; @@ -949,18 +981,21 @@ private Map>> pollOnce(long timeout) { if (!subscriptions.hasAllFetchPositions()) updateFetchPositions(this.subscriptions.missingFetchPositions()); + long now = time.milliseconds(); + + // execute delayed tasks (e.g. autocommits and heartbeats) prior to fetching records + client.executeDelayedTasks(now); + // init any new fetches (won't resend pending fetches) - Cluster cluster = this.metadata.fetch(); Map>> records = fetcher.fetchedRecords(); // if data is available already, e.g. from a previous network client poll() call to commit, // then just return it immediately - if (!records.isEmpty()) { + if (!records.isEmpty()) return records; - } - fetcher.sendFetches(cluster); - client.poll(timeout); + fetcher.sendFetches(); + client.poll(timeout, now); return fetcher.fetchedRecords(); } diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index c1f373fb4478..a642512a2c59 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -379,9 +379,10 @@ public void onFailure(RuntimeException e) { } }); - // ensure commit has a chance to be transmitted (without blocking on its completion) - // note that we allow delayed tasks to be executed in case heartbeats need to be sent - client.quickPoll(true); + // ensure the commit has a chance to be transmitted (without blocking on its completion). + // Note that commits are treated as heartbeats by the coordinator, so there is no need to + // explicitly allow heartbeats through delayed task execution. + client.pollNoWakeup(); } /** diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkClient.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkClient.java index d4c26568fdb0..b65a5b7b203e 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkClient.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerNetworkClient.java @@ -40,7 +40,7 @@ /** * Higher level consumer access to the network layer with basic support for futures and - * task scheduling. NOT thread-safe! + * task scheduling. This class is not thread-safe, except for wakeup(). */ public class ConsumerNetworkClient implements Closeable { private static final Logger log = LoggerFactory.getLogger(ConsumerNetworkClient.class); @@ -53,8 +53,10 @@ public class ConsumerNetworkClient implements Closeable { private final Time time; private final long retryBackoffMs; private final long unsentExpiryMs; - // wakeup enabled flag need to be volatile since it is allowed to be accessed concurrently - volatile private boolean wakeupsEnabled = true; + + // this count is only accessed from the consumer's main thread + private int wakeupDisabledCount = 0; + public ConsumerNetworkClient(KafkaClient client, Metadata metadata, @@ -182,8 +184,7 @@ public boolean poll(RequestFuture future, long timeout) { } /** - * Poll for any network IO. All send requests will either be transmitted on the network - * or failed when this call completes. + * Poll for any network IO. * @param timeout The maximum time to wait for an IO event. * @throws WakeupException if {@link #wakeup()} is called from another thread */ @@ -191,15 +192,26 @@ public void poll(long timeout) { poll(timeout, time.milliseconds(), true); } + /** + * Poll for any network IO. + * @param timeout timeout in milliseconds + * @param now current time in milliseconds + */ + public void poll(long timeout, long now) { + poll(timeout, now, true); + } + /** * Poll for network IO and return immediately. This will not trigger wakeups, * nor will it execute any delayed tasks. - * @param executeDelayedTasks Whether to allow delayed task execution (true allows) */ - public void quickPoll(boolean executeDelayedTasks) { + public void pollNoWakeup() { disableWakeups(); - poll(0, time.milliseconds(), executeDelayedTasks); - enableWakeups(); + try { + poll(0, time.milliseconds(), false); + } finally { + enableWakeups(); + } } private void poll(long timeout, long now, boolean executeDelayedTasks) { @@ -229,6 +241,16 @@ private void poll(long timeout, long now, boolean executeDelayedTasks) { failExpiredRequests(now); } + /** + * Execute delayed tasks now. + * @param now current time in milliseconds + * @throws WakeupException if a wakeup has been requested + */ + public void executeDelayedTasks(long now) { + delayedTasks.poll(now); + maybeTriggerWakeup(); + } + /** * Block until all pending requests from the given node have finished. * @param node The node to await requests from @@ -336,22 +358,29 @@ private boolean trySend(long now) { private void clientPoll(long timeout, long now) { client.poll(timeout, now); - if (wakeupsEnabled && wakeup.get()) { + maybeTriggerWakeup(); + } + + private void maybeTriggerWakeup() { + if (wakeupDisabledCount == 0 && wakeup.get()) { wakeup.set(false); throw new WakeupException(); } } public void disableWakeups() { - this.wakeupsEnabled = false; + wakeupDisabledCount++; } public void enableWakeups() { - this.wakeupsEnabled = true; + if (wakeupDisabledCount <= 0) + throw new IllegalStateException("Cannot enable wakeups since they were never disabled"); + + wakeupDisabledCount--; // re-wakeup the client if the flag was set since previous wake-up call // could be cleared by poll(0) while wakeups were disabled - if (wakeup.get()) + if (wakeupDisabledCount == 0 && wakeup.get()) this.client.wakeup(); } diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java index f6d338747668..0256fe7f536b 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java @@ -131,10 +131,9 @@ public Fetcher(ConsumerNetworkClient client, /** * Set-up a fetch request for any node that we have assigned partitions for which doesn't have one. * - * @param cluster The current cluster metadata */ - public void sendFetches(Cluster cluster) { - for (Map.Entry fetchEntry: createFetchRequests(cluster).entrySet()) { + public void sendFetches() { + for (Map.Entry fetchEntry: createFetchRequests().entrySet()) { final FetchRequest fetch = fetchEntry.getValue(); client.send(fetchEntry.getKey(), ApiKeys.FETCH, fetch) .addListener(new RequestFutureListener() { @@ -525,8 +524,9 @@ private Set fetchablePartitions() { * Create fetch requests for all nodes for which we have assigned partitions * that have no existing requests in flight. */ - private Map createFetchRequests(Cluster cluster) { + private Map createFetchRequests() { // create the fetch info + Cluster cluster = metadata.fetch(); Map> fetchable = new HashMap<>(); for (TopicPartition partition : fetchablePartitions()) { Node node = cluster.leaderFor(partition); @@ -586,11 +586,14 @@ private void handleFetchResponse(ClientResponse resp, FetchRequest request) { ByteBuffer buffer = partition.recordSet; MemoryRecords records = MemoryRecords.readableRecords(buffer); List> parsed = new ArrayList<>(); + boolean skippedRecords = false; for (LogEntry logEntry : records) { // Skip the messages earlier than current position. if (logEntry.offset() >= position) { parsed.add(parseRecord(tp, logEntry)); bytes += logEntry.size(); + } else { + skippedRecords = true; } } @@ -599,7 +602,7 @@ private void handleFetchResponse(ClientResponse resp, FetchRequest request) { ConsumerRecord record = parsed.get(parsed.size() - 1); this.records.add(new PartitionRecords<>(fetchOffset, tp, parsed)); this.sensors.recordsFetchLag.record(partition.highWatermark - record.offset()); - } else if (buffer.limit() > 0) { + } else if (buffer.limit() > 0 && !skippedRecords) { // we did not read a single message from a non-empty buffer // because that message's size is larger than fetch size, in this case // record this exception diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java index e72a476ee5c5..ec351153f054 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java @@ -285,7 +285,7 @@ public Map allConsumed() { Map allConsumed = new HashMap<>(); for (Map.Entry entry : assignment.entrySet()) { TopicPartitionState state = entry.getValue(); - if (state.hasValidPosition) + if (state.hasValidPosition()) allConsumed.put(entry.getKey(), new OffsetAndMetadata(state.position)); } return allConsumed; @@ -304,7 +304,7 @@ public boolean hasDefaultOffsetResetPolicy() { } public boolean isOffsetResetNeeded(TopicPartition partition) { - return assignedState(partition).awaitingReset; + return assignedState(partition).awaitingReset(); } public OffsetResetStrategy resetStrategy(TopicPartition partition) { @@ -313,7 +313,7 @@ public OffsetResetStrategy resetStrategy(TopicPartition partition) { public boolean hasAllFetchPositions() { for (TopicPartitionState state : assignment.values()) - if (!state.hasValidPosition) + if (!state.hasValidPosition()) return false; return true; } @@ -321,7 +321,7 @@ public boolean hasAllFetchPositions() { public Set missingFetchPositions() { Set missing = new HashSet<>(); for (Map.Entry entry : assignment.entrySet()) - if (!entry.getValue().hasValidPosition) + if (!entry.getValue().hasValidPosition()) missing.add(entry.getKey()); return missing; } @@ -359,40 +359,39 @@ public ConsumerRebalanceListener listener() { } private static class TopicPartitionState { - private Long position; + private Long position; // last consumed position private OffsetAndMetadata committed; // last committed position - - private boolean hasValidPosition; // whether we have valid consumed and fetched positions private boolean paused; // whether this partition has been paused by the user - private boolean awaitingReset; // whether we are awaiting reset - private OffsetResetStrategy resetStrategy; // the reset strategy if awaitingReset is set + private OffsetResetStrategy resetStrategy; // the strategy to use if the offset needs resetting public TopicPartitionState() { this.paused = false; this.position = null; this.committed = null; - this.awaitingReset = false; - this.hasValidPosition = false; this.resetStrategy = null; } private void awaitReset(OffsetResetStrategy strategy) { - this.awaitingReset = true; this.resetStrategy = strategy; this.position = null; - this.hasValidPosition = false; + } + + public boolean awaitingReset() { + return resetStrategy != null; + } + + public boolean hasValidPosition() { + return position != null; } private void seek(long offset) { this.position = offset; - this.awaitingReset = false; this.resetStrategy = null; - this.hasValidPosition = true; } private void position(long offset) { - if (!hasValidPosition) - throw new IllegalStateException("Cannot update fetch position without valid consumed/fetched positions"); + if (!hasValidPosition()) + throw new IllegalStateException("Cannot set a new position without a valid current position"); this.position = offset; } @@ -409,7 +408,7 @@ private void resume() { } private boolean isFetchable() { - return !paused && hasValidPosition; + return !paused && hasValidPosition(); } } diff --git a/clients/src/test/java/org/apache/kafka/clients/MockClient.java b/clients/src/test/java/org/apache/kafka/clients/MockClient.java index 527d2834f050..9fbbb88c48c5 100644 --- a/clients/src/test/java/org/apache/kafka/clients/MockClient.java +++ b/clients/src/test/java/org/apache/kafka/clients/MockClient.java @@ -47,11 +47,13 @@ private class FutureResponse { public final Struct responseBody; public final boolean disconnected; public final RequestMatcher requestMatcher; + public Node node; - public FutureResponse(Struct responseBody, boolean disconnected, RequestMatcher requestMatcher) { + public FutureResponse(Struct responseBody, boolean disconnected, RequestMatcher requestMatcher, Node node) { this.responseBody = responseBody; this.disconnected = disconnected; this.requestMatcher = requestMatcher; + this.node = node; } } @@ -124,17 +126,23 @@ public void disconnect(String node) { @Override public void send(ClientRequest request, long now) { - if (!futureResponses.isEmpty()) { - FutureResponse futureResp = futureResponses.poll(); + Iterator iterator = futureResponses.iterator(); + while (iterator.hasNext()) { + FutureResponse futureResp = iterator.next(); + if (futureResp.node != null && !request.request().destination().equals(futureResp.node.idString())) + continue; + if (!futureResp.requestMatcher.matches(request)) throw new IllegalStateException("Next in line response did not match expected request"); ClientResponse resp = new ClientResponse(request, time.milliseconds(), futureResp.disconnected, futureResp.responseBody); responses.add(resp); - } else { - request.setSendTimeMs(now); - this.requests.add(request); + iterator.remove(); + return; } + + request.setSendTimeMs(now); + this.requests.add(request); } @Override @@ -163,10 +171,31 @@ public void respond(Struct body, boolean disconnected) { responses.add(new ClientResponse(request, time.milliseconds(), disconnected, body)); } + public void respondFrom(Struct body, Node node) { + respondFrom(body, node, false); + } + + public void respondFrom(Struct body, Node node, boolean disconnected) { + Iterator iterator = requests.iterator(); + while (iterator.hasNext()) { + ClientRequest request = iterator.next(); + if (request.request().destination().equals(node.idString())) { + iterator.remove(); + responses.add(new ClientResponse(request, time.milliseconds(), disconnected, body)); + return; + } + } + throw new IllegalArgumentException("No requests available to node " + node); + } + public void prepareResponse(Struct body) { prepareResponse(ALWAYS_TRUE, body, false); } + public void prepareResponseFrom(Struct body, Node node) { + prepareResponseFrom(ALWAYS_TRUE, body, node, false); + } + /** * Prepare a response for a request matching the provided matcher. If the matcher does not * match, {@link #send(ClientRequest, long)} will throw IllegalStateException @@ -177,10 +206,18 @@ public void prepareResponse(RequestMatcher matcher, Struct body) { prepareResponse(matcher, body, false); } + public void prepareResponseFrom(RequestMatcher matcher, Struct body, Node node) { + prepareResponseFrom(matcher, body, node, false); + } + public void prepareResponse(Struct body, boolean disconnected) { prepareResponse(ALWAYS_TRUE, body, disconnected); } + public void prepareResponseFrom(Struct body, Node node, boolean disconnected) { + prepareResponseFrom(ALWAYS_TRUE, body, node, disconnected); + } + /** * Prepare a response for a request matching the provided matcher. If the matcher does not * match, {@link #send(ClientRequest, long)} will throw IllegalStateException @@ -189,7 +226,11 @@ public void prepareResponse(Struct body, boolean disconnected) { * @param disconnected Whether the request was disconnected */ public void prepareResponse(RequestMatcher matcher, Struct body, boolean disconnected) { - futureResponses.add(new FutureResponse(body, disconnected, matcher)); + prepareResponseFrom(matcher, body, null, disconnected); + } + + public void prepareResponseFrom(RequestMatcher matcher, Struct body, Node node, boolean disconnected) { + futureResponses.add(new FutureResponse(body, disconnected, matcher, node)); } public void setNode(Node node) { diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java index 227279533d2f..694faf2f5f60 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java @@ -16,21 +16,57 @@ */ package org.apache.kafka.clients.consumer; +import org.apache.kafka.clients.ClientRequest; +import org.apache.kafka.clients.KafkaClient; +import org.apache.kafka.clients.Metadata; +import org.apache.kafka.clients.MockClient; +import org.apache.kafka.clients.consumer.internals.ConsumerCoordinator; +import org.apache.kafka.clients.consumer.internals.ConsumerInterceptors; +import org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient; +import org.apache.kafka.clients.consumer.internals.ConsumerProtocol; +import org.apache.kafka.clients.consumer.internals.Fetcher; +import org.apache.kafka.clients.consumer.internals.PartitionAssignor; +import org.apache.kafka.clients.consumer.internals.SubscriptionState; +import org.apache.kafka.common.Cluster; import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.Node; import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.WakeupException; +import org.apache.kafka.common.metrics.Metrics; +import org.apache.kafka.common.protocol.Errors; +import org.apache.kafka.common.protocol.types.Struct; +import org.apache.kafka.common.record.CompressionType; +import org.apache.kafka.common.record.MemoryRecords; +import org.apache.kafka.common.requests.FetchResponse; +import org.apache.kafka.common.requests.GroupCoordinatorResponse; +import org.apache.kafka.common.requests.HeartbeatResponse; +import org.apache.kafka.common.requests.JoinGroupResponse; +import org.apache.kafka.common.requests.OffsetCommitRequest; +import org.apache.kafka.common.requests.OffsetCommitResponse; +import org.apache.kafka.common.requests.SyncGroupResponse; import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.utils.MockTime; +import org.apache.kafka.common.utils.Time; import org.apache.kafka.test.MockConsumerInterceptor; import org.apache.kafka.test.MockMetricsReporter; +import org.apache.kafka.test.TestUtils; import org.junit.Assert; import org.junit.Test; +import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.Properties; +import java.util.concurrent.atomic.AtomicBoolean; import static java.util.Collections.singleton; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; public class KafkaConsumerTest { @@ -64,19 +100,19 @@ public void testSubscription() { consumer.subscribe(Collections.singletonList(topic)); assertEquals(singleton(topic), consumer.subscription()); - Assert.assertTrue(consumer.assignment().isEmpty()); + assertTrue(consumer.assignment().isEmpty()); consumer.subscribe(Collections.emptyList()); - Assert.assertTrue(consumer.subscription().isEmpty()); - Assert.assertTrue(consumer.assignment().isEmpty()); + assertTrue(consumer.subscription().isEmpty()); + assertTrue(consumer.assignment().isEmpty()); consumer.assign(Collections.singletonList(tp0)); - Assert.assertTrue(consumer.subscription().isEmpty()); + assertTrue(consumer.subscription().isEmpty()); assertEquals(singleton(tp0), consumer.assignment()); consumer.unsubscribe(); - Assert.assertTrue(consumer.subscription().isEmpty()); - Assert.assertTrue(consumer.assignment().isEmpty()); + assertTrue(consumer.subscription().isEmpty()); + assertTrue(consumer.assignment().isEmpty()); consumer.close(); } @@ -124,16 +160,16 @@ public void testPause() { consumer.assign(Collections.singletonList(tp0)); assertEquals(singleton(tp0), consumer.assignment()); - Assert.assertTrue(consumer.paused().isEmpty()); + assertTrue(consumer.paused().isEmpty()); consumer.pause(singleton(tp0)); assertEquals(singleton(tp0), consumer.paused()); consumer.resume(singleton(tp0)); - Assert.assertTrue(consumer.paused().isEmpty()); + assertTrue(consumer.paused().isEmpty()); consumer.unsubscribe(); - Assert.assertTrue(consumer.paused().isEmpty()); + assertTrue(consumer.paused().isEmpty()); consumer.close(); } @@ -147,4 +183,392 @@ private KafkaConsumer newConsumer() { return new KafkaConsumer( props, new ByteArrayDeserializer(), new ByteArrayDeserializer()); } + + @Test + public void verifyHeartbeatSent() { + String topic = "topic"; + TopicPartition partition = new TopicPartition(topic, 0); + + int sessionTimeoutMs = 30000; + int heartbeatIntervalMs = 3000; + int autoCommitIntervalMs = 10000; + + Time time = new MockTime(); + MockClient client = new MockClient(time); + Cluster cluster = TestUtils.singletonCluster(topic, 1); + Node node = cluster.nodes().get(0); + client.setNode(node); + Metadata metadata = new Metadata(0, Long.MAX_VALUE); + metadata.update(cluster, time.milliseconds()); + PartitionAssignor assignor = new RoundRobinAssignor(); + + final KafkaConsumer consumer = newConsumer(time, client, metadata, assignor, + sessionTimeoutMs, heartbeatIntervalMs, autoCommitIntervalMs); + + consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener() { + @Override + public void onPartitionsRevoked(Collection partitions) { + + } + + @Override + public void onPartitionsAssigned(Collection partitions) { + // set initial position so we don't need a lookup + for (TopicPartition partition : partitions) + consumer.seek(partition, 0); + } + }); + + // lookup coordinator + client.prepareResponseFrom(new GroupCoordinatorResponse(Errors.NONE.code(), node).toStruct(), node); + + Node coordinator = new Node(Integer.MAX_VALUE - node.id(), node.host(), node.port()); + + // join group + client.prepareResponseFrom(joinGroupFollowerResponse(assignor, 1, "memberId", "leaderId", Errors.NONE.code()), coordinator); + + // sync group + client.prepareResponseFrom(syncGroupResponse(Arrays.asList(partition), Errors.NONE.code()), coordinator); + + // initial fetch + client.prepareResponseFrom(fetchResponse(partition, 0, 0), node); + + consumer.poll(0); + assertEquals(Collections.singleton(partition), consumer.assignment()); + + // heartbeat interval is 2 seconds + time.sleep(heartbeatIntervalMs); + + final AtomicBoolean heartbeatReceived = new AtomicBoolean(false); + client.prepareResponseFrom(new MockClient.RequestMatcher() { + @Override + public boolean matches(ClientRequest request) { + heartbeatReceived.set(true); + return true; + } + }, new HeartbeatResponse(Errors.NONE.code()).toStruct(), coordinator); + + consumer.poll(0); + + assertTrue(heartbeatReceived.get()); + } + + @Test + public void verifyHeartbeatSentWhenFetchedDataReady() { + String topic = "topic"; + TopicPartition partition = new TopicPartition(topic, 0); + + int sessionTimeoutMs = 30000; + int heartbeatIntervalMs = 3000; + int autoCommitIntervalMs = 10000; + + Time time = new MockTime(); + MockClient client = new MockClient(time); + Cluster cluster = TestUtils.singletonCluster(topic, 1); + Node node = cluster.nodes().get(0); + client.setNode(node); + Metadata metadata = new Metadata(0, Long.MAX_VALUE); + metadata.update(cluster, time.milliseconds()); + PartitionAssignor assignor = new RoundRobinAssignor(); + + final KafkaConsumer consumer = newConsumer(time, client, metadata, assignor, + sessionTimeoutMs, heartbeatIntervalMs, autoCommitIntervalMs); + consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener() { + @Override + public void onPartitionsRevoked(Collection partitions) { + + } + + @Override + public void onPartitionsAssigned(Collection partitions) { + // set initial position so we don't need a lookup + for (TopicPartition partition : partitions) + consumer.seek(partition, 0); + } + }); + + // lookup coordinator + client.prepareResponseFrom(new GroupCoordinatorResponse(Errors.NONE.code(), node).toStruct(), node); + + Node coordinator = new Node(Integer.MAX_VALUE - node.id(), node.host(), node.port()); + + // join group + client.prepareResponseFrom(joinGroupFollowerResponse(assignor, 1, "memberId", "leaderId", Errors.NONE.code()), coordinator); + + // sync group + client.prepareResponseFrom(syncGroupResponse(Arrays.asList(partition), Errors.NONE.code()), coordinator); + + consumer.poll(0); + + // respond to the outstanding fetch so that we have data available on the next poll + client.respondFrom(fetchResponse(partition, 0, 5), node); + client.poll(0, time.milliseconds()); + + time.sleep(heartbeatIntervalMs); + + client.prepareResponseFrom(fetchResponse(partition, 5, 0), node); + final AtomicBoolean heartbeatReceived = new AtomicBoolean(false); + client.prepareResponseFrom(new MockClient.RequestMatcher() { + @Override + public boolean matches(ClientRequest request) { + heartbeatReceived.set(true); + return true; + } + }, new HeartbeatResponse(Errors.NONE.code()).toStruct(), coordinator); + + consumer.poll(0); + + assertTrue(heartbeatReceived.get()); + } + + @Test + public void testAutoCommitSentBeforePositionUpdate() { + String topic = "topic"; + final TopicPartition partition = new TopicPartition(topic, 0); + + int sessionTimeoutMs = 30000; + int heartbeatIntervalMs = 3000; + + // adjust auto commit interval lower than heartbeat so we don't need to deal with + // a concurrent heartbeat request + int autoCommitIntervalMs = 1000; + + Time time = new MockTime(); + MockClient client = new MockClient(time); + Cluster cluster = TestUtils.singletonCluster(topic, 1); + Node node = cluster.nodes().get(0); + client.setNode(node); + Metadata metadata = new Metadata(0, Long.MAX_VALUE); + metadata.update(cluster, time.milliseconds()); + PartitionAssignor assignor = new RoundRobinAssignor(); + + final KafkaConsumer consumer = newConsumer(time, client, metadata, assignor, + sessionTimeoutMs, heartbeatIntervalMs, autoCommitIntervalMs); + consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener() { + @Override + public void onPartitionsRevoked(Collection partitions) { + + } + + @Override + public void onPartitionsAssigned(Collection partitions) { + // set initial position so we don't need a lookup + for (TopicPartition partition : partitions) + consumer.seek(partition, 0); + } + }); + + // lookup coordinator + client.prepareResponseFrom(new GroupCoordinatorResponse(Errors.NONE.code(), node).toStruct(), node); + + Node coordinator = new Node(Integer.MAX_VALUE - node.id(), node.host(), node.port()); + + // join group + client.prepareResponseFrom(joinGroupFollowerResponse(assignor, 1, "memberId", "leaderId", Errors.NONE.code()), coordinator); + + // sync group + client.prepareResponseFrom(syncGroupResponse(Arrays.asList(partition), Errors.NONE.code()), coordinator); + + consumer.poll(0); + + // respond to the outstanding fetch so that we have data available on the next poll + client.respondFrom(fetchResponse(partition, 0, 5), node); + client.poll(0, time.milliseconds()); + + time.sleep(autoCommitIntervalMs); + + client.prepareResponseFrom(fetchResponse(partition, 5, 0), node); + + // no data has been returned to the user yet, so the committed offset should be 0 + final AtomicBoolean commitReceived = new AtomicBoolean(false); + client.prepareResponseFrom(new MockClient.RequestMatcher() { + @Override + public boolean matches(ClientRequest request) { + OffsetCommitRequest commitRequest = new OffsetCommitRequest(request.request().body()); + OffsetCommitRequest.PartitionData partitionData = commitRequest.offsetData().get(partition); + if (partitionData.offset == 0) { + commitReceived.set(true); + return true; + } + return false; + } + }, new OffsetCommitResponse(Collections.singletonMap(partition, Errors.NONE.code())).toStruct(), coordinator); + + consumer.poll(0); + + assertTrue(commitReceived.get()); + } + + @Test + public void testWakeupWithFetchDataAvailable() { + String topic = "topic"; + final TopicPartition partition = new TopicPartition(topic, 0); + + int sessionTimeoutMs = 30000; + int heartbeatIntervalMs = 3000; + + // adjust auto commit interval lower than heartbeat so we don't need to deal with + // a concurrent heartbeat request + int autoCommitIntervalMs = 1000; + + Time time = new MockTime(); + MockClient client = new MockClient(time); + Cluster cluster = TestUtils.singletonCluster(topic, 1); + Node node = cluster.nodes().get(0); + client.setNode(node); + Metadata metadata = new Metadata(0, Long.MAX_VALUE); + metadata.update(cluster, time.milliseconds()); + PartitionAssignor assignor = new RoundRobinAssignor(); + + final KafkaConsumer consumer = newConsumer(time, client, metadata, assignor, + sessionTimeoutMs, heartbeatIntervalMs, autoCommitIntervalMs); + consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener() { + @Override + public void onPartitionsRevoked(Collection partitions) { + + } + + @Override + public void onPartitionsAssigned(Collection partitions) { + // set initial position so we don't need a lookup + for (TopicPartition partition : partitions) + consumer.seek(partition, 0); + } + }); + + // lookup coordinator + client.prepareResponseFrom(new GroupCoordinatorResponse(Errors.NONE.code(), node).toStruct(), node); + + Node coordinator = new Node(Integer.MAX_VALUE - node.id(), node.host(), node.port()); + + // join group + client.prepareResponseFrom(joinGroupFollowerResponse(assignor, 1, "memberId", "leaderId", Errors.NONE.code()), coordinator); + + // sync group + client.prepareResponseFrom(syncGroupResponse(Arrays.asList(partition), Errors.NONE.code()), coordinator); + + consumer.poll(0); + + // respond to the outstanding fetch so that we have data available on the next poll + client.respondFrom(fetchResponse(partition, 0, 5), node); + client.poll(0, time.milliseconds()); + + consumer.wakeup(); + + try { + consumer.poll(0); + fail(); + } catch (WakeupException e) { + } + + // make sure the position hasn't been updated + assertEquals(0, consumer.position(partition)); + + // the next poll should return the completed fetch + ConsumerRecords records = consumer.poll(0); + assertEquals(5, records.count()); + } + + private Struct joinGroupFollowerResponse(PartitionAssignor assignor, int generationId, String memberId, String leaderId, short error) { + return new JoinGroupResponse(error, generationId, assignor.name(), memberId, leaderId, + Collections.emptyMap()).toStruct(); + } + + private Struct syncGroupResponse(List partitions, short error) { + ByteBuffer buf = ConsumerProtocol.serializeAssignment(new PartitionAssignor.Assignment(partitions)); + return new SyncGroupResponse(error, buf).toStruct(); + } + + private Struct fetchResponse(TopicPartition tp, long fetchOffset, int count) { + MemoryRecords records = MemoryRecords.emptyRecords(ByteBuffer.allocate(1024), CompressionType.NONE); + for (int i = 0; i < count; i++) + records.append(fetchOffset + i, 0L, ("key-" + i).getBytes(), ("value-" + i).getBytes()); + records.close(); + FetchResponse response = new FetchResponse(Collections.singletonMap( + tp, new FetchResponse.PartitionData(Errors.NONE.code(), 5, records.buffer())), 0); + return response.toStruct(); + } + + private KafkaConsumer newConsumer(Time time, + KafkaClient client, + Metadata metadata, + PartitionAssignor assignor, + int sessionTimeoutMs, + int heartbeatIntervalMs, + int autoCommitIntervalMs) { + // create a consumer with mocked time and mocked network + + String clientId = "mock-consumer"; + String groupId = "mock-group"; + String metricGroupPrefix = "consumer"; + long retryBackoffMs = 100; + long requestTimeoutMs = 30000; + boolean autoCommitEnabled = true; + boolean excludeInternalTopics = true; + int minBytes = 1; + int maxWaitMs = 500; + int fetchSize = 1024 * 1024; + int maxPollRecords = Integer.MAX_VALUE; + boolean checkCrcs = true; + + Deserializer keyDeserializer = new StringDeserializer(); + Deserializer valueDeserializer = new StringDeserializer(); + + OffsetResetStrategy autoResetStrategy = OffsetResetStrategy.EARLIEST; + OffsetCommitCallback defaultCommitCallback = new ConsumerCoordinator.DefaultOffsetCommitCallback(); + List assignors = Arrays.asList(assignor); + ConsumerInterceptors interceptors = null; + + Metrics metrics = new Metrics(); + SubscriptionState subscriptions = new SubscriptionState(autoResetStrategy); + ConsumerNetworkClient consumerClient = new ConsumerNetworkClient(client, metadata, time, retryBackoffMs, requestTimeoutMs); + ConsumerCoordinator consumerCoordinator = new ConsumerCoordinator( + consumerClient, + groupId, + sessionTimeoutMs, + heartbeatIntervalMs, + assignors, + metadata, + subscriptions, + metrics, + metricGroupPrefix, + time, + retryBackoffMs, + defaultCommitCallback, + autoCommitEnabled, + autoCommitIntervalMs, + interceptors, + excludeInternalTopics); + + Fetcher fetcher = new Fetcher<>( + consumerClient, + minBytes, + maxWaitMs, + fetchSize, + maxPollRecords, + checkCrcs, + keyDeserializer, + valueDeserializer, + metadata, + subscriptions, + metrics, + metricGroupPrefix, + time, + retryBackoffMs); + + return new KafkaConsumer<>( + clientId, + consumerCoordinator, + keyDeserializer, + valueDeserializer, + fetcher, + interceptors, + time, + consumerClient, + metrics, + subscriptions, + metadata, + retryBackoffMs, + requestTimeoutMs); + } } diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java index 49bff1031a21..8fad30f986b7 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java @@ -123,7 +123,7 @@ public void testFetchNormal() { subscriptions.seek(tp, 0); // normal fetch - fetcher.sendFetches(cluster); + fetcher.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.NONE.code(), 100L, 0)); consumerClient.poll(0); records = fetcher.fetchedRecords().get(tp); @@ -158,7 +158,7 @@ public void testFetchMaxPollRecords() { client.prepareResponse(matchesOffset(tp, 1), fetchResponse(this.records.buffer(), Errors.NONE.code(), 100L, 0)); client.prepareResponse(matchesOffset(tp, 4), fetchResponse(this.nextRecords.buffer(), Errors.NONE.code(), 100L, 0)); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); consumerClient.poll(0); records = fetcher.fetchedRecords().get(tp); assertEquals(2, records.size()); @@ -166,14 +166,14 @@ public void testFetchMaxPollRecords() { assertEquals(1, records.get(0).offset()); assertEquals(2, records.get(1).offset()); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); consumerClient.poll(0); records = fetcher.fetchedRecords().get(tp); assertEquals(1, records.size()); assertEquals(4L, (long) subscriptions.position(tp)); assertEquals(3, records.get(0).offset()); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); consumerClient.poll(0); records = fetcher.fetchedRecords().get(tp); assertEquals(2, records.size()); @@ -198,7 +198,7 @@ public void testFetchNonContinuousRecords() { subscriptions.seek(tp, 0); // normal fetch - fetcher.sendFetches(cluster); + fetcher.sendFetches(); client.prepareResponse(fetchResponse(records.buffer(), Errors.NONE.code(), 100L, 0)); consumerClient.poll(0); consumerRecords = fetcher.fetchedRecords().get(tp); @@ -223,7 +223,7 @@ public void testFetchRecordTooLarge() { records.close(); // resize the limit of the buffer to pretend it is only fetch-size large - fetcher.sendFetches(cluster); + fetcher.sendFetches(); client.prepareResponse(fetchResponse((ByteBuffer) records.buffer().limit(this.fetchSize), Errors.NONE.code(), 100L, 0)); consumerClient.poll(0); fetcher.fetchedRecords(); @@ -235,7 +235,7 @@ public void testUnauthorizedTopic() { subscriptions.seek(tp, 0); // resize the limit of the buffer to pretend it is only fetch-size large - fetcher.sendFetches(cluster); + fetcher.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.TOPIC_AUTHORIZATION_FAILED.code(), 100L, 0)); consumerClient.poll(0); try { @@ -252,7 +252,7 @@ public void testFetchDuringRebalance() { subscriptions.assignFromSubscribed(Arrays.asList(tp)); subscriptions.seek(tp, 0); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); // Now the rebalance happens and fetch positions are cleared subscriptions.assignFromSubscribed(Arrays.asList(tp)); @@ -268,7 +268,7 @@ public void testInFlightFetchOnPausedPartition() { subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.seek(tp, 0); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); subscriptions.pause(tp); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.NONE.code(), 100L, 0)); @@ -282,7 +282,7 @@ public void testFetchOnPausedPartition() { subscriptions.seek(tp, 0); subscriptions.pause(tp); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); assertTrue(client.requests().isEmpty()); } @@ -291,7 +291,7 @@ public void testFetchNotLeaderForPartition() { subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.seek(tp, 0); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.NOT_LEADER_FOR_PARTITION.code(), 100L, 0)); consumerClient.poll(0); assertEquals(0, fetcher.fetchedRecords().size()); @@ -303,7 +303,7 @@ public void testFetchUnknownTopicOrPartition() { subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.seek(tp, 0); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.UNKNOWN_TOPIC_OR_PARTITION.code(), 100L, 0)); consumerClient.poll(0); assertEquals(0, fetcher.fetchedRecords().size()); @@ -315,7 +315,7 @@ public void testFetchOffsetOutOfRange() { subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.seek(tp, 0); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.OFFSET_OUT_OF_RANGE.code(), 100L, 0)); consumerClient.poll(0); assertTrue(subscriptions.isOffsetResetNeeded(tp)); @@ -328,7 +328,7 @@ public void testFetchedRecordsAfterSeek() { subscriptionsNoAutoReset.assignFromUser(Arrays.asList(tp)); subscriptionsNoAutoReset.seek(tp, 0); - fetcherNoAutoReset.sendFetches(cluster); + fetcherNoAutoReset.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.OFFSET_OUT_OF_RANGE.code(), 100L, 0)); consumerClient.poll(0); assertFalse(subscriptionsNoAutoReset.isOffsetResetNeeded(tp)); @@ -341,7 +341,7 @@ public void testFetchOffsetOutOfRangeException() { subscriptionsNoAutoReset.assignFromUser(Arrays.asList(tp)); subscriptionsNoAutoReset.seek(tp, 0); - fetcherNoAutoReset.sendFetches(cluster); + fetcherNoAutoReset.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.OFFSET_OUT_OF_RANGE.code(), 100L, 0)); consumerClient.poll(0); assertFalse(subscriptionsNoAutoReset.isOffsetResetNeeded(tp)); @@ -360,7 +360,7 @@ public void testFetchDisconnected() { subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.seek(tp, 0); - fetcher.sendFetches(cluster); + fetcher.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.NONE.code(), 100L, 0), true); consumerClient.poll(0); assertEquals(0, fetcher.fetchedRecords().size()); @@ -519,7 +519,7 @@ public void testQuotaMetrics() throws Exception { } this.records.close(); } - fetcher.sendFetches(cluster); + fetcher.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.NONE.code(), 100L, 100 * i)); consumerClient.poll(0); records = fetcher.fetchedRecords().get(tp); From 4d0b06e838cfddd7c8c3a1a92287afba6d5ee3e9 Mon Sep 17 00:00:00 2001 From: Dustin Cote Date: Fri, 6 May 2016 16:33:01 +0100 Subject: [PATCH 094/267] KAFKA-3666; Update links for new consumer API Pull request to update the consumer API links in the docs. Author: Dustin Cote Reviewers: Ismael Juma Closes #1331 from cotedm/KAFKA-3666 (cherry picked from commit 78de891ace41547d83901c3c18a9a01a517ccba1) Signed-off-by: Ismael Juma --- docs/ops.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ops.html b/docs/ops.html index 8b1cc234c640..f64a701717d2 100644 --- a/docs/ops.html +++ b/docs/ops.html @@ -134,7 +134,7 @@

    Checking consu -Note, however, after 0.9.0, the kafka.tools.ConsumerOffsetChecker tool is deprecated and you should use the kafka.admin.ConsumerGroupCommand (or the bin/kafka-consumer-groups.sh script) to manage consumer groups, including consumers created with the new consumer-groups API. +Note, however, after 0.9.0, the kafka.tools.ConsumerOffsetChecker tool is deprecated and you should use the kafka.admin.ConsumerGroupCommand (or the bin/kafka-consumer-groups.sh script) to manage consumer groups, including consumers created with the new consumer API.

    Managing Consumer Groups

    @@ -156,7 +156,7 @@

    Managing C -When you're using the new consumer-groups API where the broker handles coordination of partition handling and rebalance, you can manage the groups with the "--new-consumer" flags: +When you're using the new consumer API where the broker handles coordination of partition handling and rebalance, you can manage the groups with the "--new-consumer" flags:
      > bin/kafka-consumer-groups.sh --new-consumer --bootstrap-server broker1:9092 --list
    
    From cda571dfc0a918c7125b09c94c175a5e6c09e798 Mon Sep 17 00:00:00 2001
    From: Yuto Kawamura 
    Date: Fri, 6 May 2016 08:49:44 -0700
    Subject: [PATCH 095/267] KAFKA-3616: Make kafka producers/consumers injectable
     for KafkaStreams
    
    Ticket: https://issues.apache.org/jira/browse/KAFKA-3616
    
    Author: Yuto Kawamura 
    
    Reviewers: Guozhang Wang 
    
    Closes #1264 from kawamuray/kafka-3616-inject-clients
    ---
     .../kafka/streams/KafkaClientSupplier.java    | 47 ++++++++++++++++
     .../apache/kafka/streams/KafkaStreams.java    | 17 +++++-
     .../internals/DefaultKafkaClientSupplier.java | 45 +++++++++++++++
     .../processor/internals/StreamThread.java     | 56 ++++---------------
     .../StreamPartitionAssignorTest.java          | 56 ++++---------------
     .../processor/internals/StreamThreadTest.java | 42 +++++++-------
     .../apache/kafka/test/MockClientSupplier.java | 52 +++++++++++++++++
     7 files changed, 201 insertions(+), 114 deletions(-)
     create mode 100644 streams/src/main/java/org/apache/kafka/streams/KafkaClientSupplier.java
     create mode 100644 streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultKafkaClientSupplier.java
     create mode 100644 streams/src/test/java/org/apache/kafka/test/MockClientSupplier.java
    
    diff --git a/streams/src/main/java/org/apache/kafka/streams/KafkaClientSupplier.java b/streams/src/main/java/org/apache/kafka/streams/KafkaClientSupplier.java
    new file mode 100644
    index 000000000000..e0312f92304a
    --- /dev/null
    +++ b/streams/src/main/java/org/apache/kafka/streams/KafkaClientSupplier.java
    @@ -0,0 +1,47 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + * 

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams; + +import java.util.Map; + +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.producer.Producer; + +public interface KafkaClientSupplier { + /** + * Creates an instance of Producer which is used to produce records. + * @param config producer config which supplied by {@link StreamsConfig} given to {@link KafkaStreams} + * @return an instance of kafka Producer + */ + Producer getProducer(Map config); + + /** + * Creates an instance of Consumer which is used to consume records of source topics. + * @param config consumer config which supplied by {@link StreamsConfig} given to {@link KafkaStreams} + * @return an instance of kafka Consumer + */ + Consumer getConsumer(Map config); + + /** + * Creates an instance of Consumer which is used to consume records of internal topics. + * @param config restore consumer config which supplied by {@link StreamsConfig} given to + * {@link KafkaStreams} + * @return an instance of kafka Consumer + */ + Consumer getRestoreConsumer(Map config); +} diff --git a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java index 45024f224010..b3e3f5d6f059 100644 --- a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java +++ b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java @@ -25,6 +25,7 @@ import org.apache.kafka.common.utils.SystemTime; import org.apache.kafka.common.utils.Time; import org.apache.kafka.streams.processor.TopologyBuilder; +import org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier; import org.apache.kafka.streams.processor.internals.StreamThread; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -104,7 +105,7 @@ public class KafkaStreams { * @param props properties for the {@link StreamsConfig} */ public KafkaStreams(TopologyBuilder builder, Properties props) { - this(builder, new StreamsConfig(props)); + this(builder, new StreamsConfig(props), new DefaultKafkaClientSupplier()); } /** @@ -114,6 +115,18 @@ public KafkaStreams(TopologyBuilder builder, Properties props) { * @param config the stream configs */ public KafkaStreams(TopologyBuilder builder, StreamsConfig config) { + this(builder, config, new DefaultKafkaClientSupplier()); + } + + /** + * Construct the stream instance. + * + * @param builder the processor topology builder specifying the computational logic + * @param config the stream configs + * @param clientSupplier the kafka clients supplier which provides underlying producer and consumer clients + * for this {@link KafkaStreams} instance + */ + public KafkaStreams(TopologyBuilder builder, StreamsConfig config, KafkaClientSupplier clientSupplier) { // create the metrics Time time = new SystemTime(); @@ -138,7 +151,7 @@ public KafkaStreams(TopologyBuilder builder, StreamsConfig config) { this.threads = new StreamThread[config.getInt(StreamsConfig.NUM_STREAM_THREADS_CONFIG)]; for (int i = 0; i < this.threads.length; i++) { - this.threads[i] = new StreamThread(builder, config, applicationId, clientId, processId, metrics, time); + this.threads[i] = new StreamThread(builder, config, clientSupplier, applicationId, clientId, processId, metrics, time); } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultKafkaClientSupplier.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultKafkaClientSupplier.java new file mode 100644 index 000000000000..be17008b77fd --- /dev/null +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultKafkaClientSupplier.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams.processor.internals; + +import java.util.Map; + +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.apache.kafka.streams.KafkaClientSupplier; + +public class DefaultKafkaClientSupplier implements KafkaClientSupplier { + @Override + public Producer getProducer(Map config) { + return new KafkaProducer<>(config, new ByteArraySerializer(), new ByteArraySerializer()); + } + + @Override + public Consumer getConsumer(Map config) { + return new KafkaConsumer<>(config, new ByteArrayDeserializer(), new ByteArrayDeserializer()); + } + + @Override + public Consumer getRestoreConsumer(Map config) { + return new KafkaConsumer<>(config, new ByteArrayDeserializer(), new ByteArrayDeserializer()); + } +} diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java index d4cb78c30fcf..72eeef54266d 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java @@ -22,8 +22,6 @@ import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.KafkaConsumer; -import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.MetricName; @@ -35,10 +33,9 @@ import org.apache.kafka.common.metrics.stats.Count; import org.apache.kafka.common.metrics.stats.Max; import org.apache.kafka.common.metrics.stats.Rate; -import org.apache.kafka.common.serialization.ByteArrayDeserializer; -import org.apache.kafka.common.serialization.ByteArraySerializer; import org.apache.kafka.common.utils.Time; import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.streams.KafkaClientSupplier; import org.apache.kafka.streams.StreamsConfig; import org.apache.kafka.streams.StreamsMetrics; import org.apache.kafka.streams.errors.StreamsException; @@ -151,24 +148,12 @@ public void onPartitionsRevoked(Collection assignment) { public StreamThread(TopologyBuilder builder, StreamsConfig config, + KafkaClientSupplier clientSupplier, String applicationId, String clientId, UUID processId, Metrics metrics, Time time) { - this(builder, config, null , null, null, applicationId, clientId, processId, metrics, time); - } - - StreamThread(TopologyBuilder builder, - StreamsConfig config, - Producer producer, - Consumer consumer, - Consumer restoreConsumer, - String applicationId, - String clientId, - UUID processId, - Metrics metrics, - Time time) { super("StreamThread-" + STREAM_THREAD_ID_SEQUENCE.getAndIncrement()); this.applicationId = applicationId; @@ -180,9 +165,16 @@ public StreamThread(TopologyBuilder builder, this.partitionGrouper = config.getConfiguredInstance(StreamsConfig.PARTITION_GROUPER_CLASS_CONFIG, PartitionGrouper.class); // set the producer and consumer clients - this.producer = (producer != null) ? producer : createProducer(); - this.consumer = (consumer != null) ? consumer : createConsumer(); - this.restoreConsumer = (restoreConsumer != null) ? restoreConsumer : createRestoreConsumer(); + String threadName = getName(); + String threadClientId = clientId + "-" + threadName; + log.info("Creating producer client for stream thread [{}]", threadName); + this.producer = clientSupplier.getProducer(config.getProducerConfigs(threadClientId)); + log.info("Creating consumer client for stream thread [{}]", threadName); + this.consumer = clientSupplier.getConsumer( + config.getConsumerConfigs(this, applicationId, threadClientId)); + log.info("Creating restore consumer client for stream thread [{}]", threadName); + this.restoreConsumer = clientSupplier.getRestoreConsumer( + config.getRestoreConsumerConfigs(threadClientId)); // initialize the task list this.activeTasks = new HashMap<>(); @@ -213,30 +205,6 @@ public void partitionAssignor(StreamPartitionAssignor partitionAssignor) { this.partitionAssignor = partitionAssignor; } - private Producer createProducer() { - String threadName = this.getName(); - log.info("Creating producer client for stream thread [" + threadName + "]"); - return new KafkaProducer<>(config.getProducerConfigs(this.clientId + "-" + threadName), - new ByteArraySerializer(), - new ByteArraySerializer()); - } - - private Consumer createConsumer() { - String threadName = this.getName(); - log.info("Creating consumer client for stream thread [" + threadName + "]"); - return new KafkaConsumer<>(config.getConsumerConfigs(this, this.applicationId, this.clientId + "-" + threadName), - new ByteArrayDeserializer(), - new ByteArrayDeserializer()); - } - - private Consumer createRestoreConsumer() { - String threadName = this.getName(); - log.info("Creating restore consumer client for stream thread [" + threadName + "]"); - return new KafkaConsumer<>(config.getRestoreConsumerConfigs(this.clientId + "-" + threadName), - new ByteArrayDeserializer(), - new ByteArrayDeserializer()); - } - /** * Execute the stream processors * @throws KafkaException for any Kafka-related exceptions diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignorTest.java index 3e8b110fde9b..17bda54bea5f 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignorTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignorTest.java @@ -18,15 +18,12 @@ package org.apache.kafka.streams.processor.internals; import org.apache.kafka.clients.consumer.MockConsumer; -import org.apache.kafka.clients.consumer.OffsetResetStrategy; import org.apache.kafka.clients.consumer.internals.PartitionAssignor; -import org.apache.kafka.clients.producer.MockProducer; import org.apache.kafka.common.Cluster; import org.apache.kafka.common.Node; import org.apache.kafka.common.PartitionInfo; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.metrics.Metrics; -import org.apache.kafka.common.serialization.ByteArraySerializer; import org.apache.kafka.common.utils.SystemTime; import org.apache.kafka.common.utils.Utils; import org.apache.kafka.streams.StreamsConfig; @@ -34,6 +31,7 @@ import org.apache.kafka.streams.processor.TopologyBuilder; import org.apache.kafka.streams.processor.internals.assignment.AssignmentInfo; import org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo; +import org.apache.kafka.test.MockClientSupplier; import org.apache.kafka.test.MockProcessorSupplier; import org.apache.kafka.test.MockStateStoreSupplier; import org.apache.kafka.test.MockTimestampExtractor; @@ -98,17 +96,11 @@ private Properties configProps() { }; } - private ByteArraySerializer serializer = new ByteArraySerializer(); - @SuppressWarnings("unchecked") @Test public void testSubscription() throws Exception { StreamsConfig config = new StreamsConfig(configProps()); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); - TopologyBuilder builder = new TopologyBuilder(); builder.addSource("source1", "topic1"); builder.addSource("source2", "topic2"); @@ -122,7 +114,7 @@ public void testSubscription() throws Exception { String clientId = "client-id"; UUID processId = UUID.randomUUID(); - StreamThread thread = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, "test", clientId, processId, new Metrics(), new SystemTime()) { + StreamThread thread = new StreamThread(builder, config, new MockClientSupplier(), "test", clientId, processId, new Metrics(), new SystemTime()) { @Override public Set prevTasks() { return prevTasks; @@ -152,10 +144,6 @@ public Set cachedTasks() { public void testAssignBasic() throws Exception { StreamsConfig config = new StreamsConfig(configProps()); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); - TopologyBuilder builder = new TopologyBuilder(); builder.addSource("source1", "topic1"); builder.addSource("source2", "topic2"); @@ -173,9 +161,8 @@ public void testAssignBasic() throws Exception { UUID uuid1 = UUID.randomUUID(); UUID uuid2 = UUID.randomUUID(); String client1 = "client1"; - String client2 = "client2"; - StreamThread thread10 = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, "test", client1, uuid1, new Metrics(), new SystemTime()); + StreamThread thread10 = new StreamThread(builder, config, new MockClientSupplier(), "test", client1, uuid1, new Metrics(), new SystemTime()); StreamPartitionAssignor partitionAssignor = new StreamPartitionAssignor(); partitionAssignor.configure(config.getConsumerConfigs(thread10, "test", client1)); @@ -224,10 +211,6 @@ public void testAssignBasic() throws Exception { public void testAssignWithNewTasks() throws Exception { StreamsConfig config = new StreamsConfig(configProps()); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); - TopologyBuilder builder = new TopologyBuilder(); builder.addSource("source1", "topic1"); builder.addSource("source2", "topic2"); @@ -244,9 +227,8 @@ public void testAssignWithNewTasks() throws Exception { UUID uuid1 = UUID.randomUUID(); UUID uuid2 = UUID.randomUUID(); String client1 = "client1"; - String client2 = "client2"; - StreamThread thread10 = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, "test", client1, uuid1, new Metrics(), new SystemTime()); + StreamThread thread10 = new StreamThread(builder, config, new MockClientSupplier(), "test", client1, uuid1, new Metrics(), new SystemTime()); StreamPartitionAssignor partitionAssignor = new StreamPartitionAssignor(); partitionAssignor.configure(config.getConsumerConfigs(thread10, "test", client1)); @@ -288,10 +270,6 @@ public void testAssignWithNewTasks() throws Exception { public void testAssignWithStates() throws Exception { StreamsConfig config = new StreamsConfig(configProps()); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); - TopologyBuilder builder = new TopologyBuilder(); builder.addSource("source1", "topic1"); @@ -316,9 +294,8 @@ public void testAssignWithStates() throws Exception { UUID uuid1 = UUID.randomUUID(); UUID uuid2 = UUID.randomUUID(); String client1 = "client1"; - String client2 = "client2"; - StreamThread thread10 = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, "test", client1, uuid1, new Metrics(), new SystemTime()); + StreamThread thread10 = new StreamThread(builder, config, new MockClientSupplier(), "test", client1, uuid1, new Metrics(), new SystemTime()); StreamPartitionAssignor partitionAssignor = new StreamPartitionAssignor(); partitionAssignor.configure(config.getConsumerConfigs(thread10, "test", client1)); @@ -354,10 +331,6 @@ public void testAssignWithStandbyReplicas() throws Exception { props.setProperty(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, "1"); StreamsConfig config = new StreamsConfig(props); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); - TopologyBuilder builder = new TopologyBuilder(); builder.addSource("source1", "topic1"); builder.addSource("source2", "topic2"); @@ -376,9 +349,8 @@ public void testAssignWithStandbyReplicas() throws Exception { UUID uuid1 = UUID.randomUUID(); UUID uuid2 = UUID.randomUUID(); String client1 = "client1"; - String client2 = "client2"; - StreamThread thread10 = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, "test", client1, uuid1, new Metrics(), new SystemTime()); + StreamThread thread10 = new StreamThread(builder, config, new MockClientSupplier(), "test", client1, uuid1, new Metrics(), new SystemTime()); StreamPartitionAssignor partitionAssignor = new StreamPartitionAssignor(); partitionAssignor.configure(config.getConsumerConfigs(thread10, "test", client1)); @@ -470,10 +442,6 @@ private AssignmentInfo checkAssignment(PartitionAssignor.Assignment assignment) public void testOnAssignment() throws Exception { StreamsConfig config = new StreamsConfig(configProps()); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); - TopicPartition t2p3 = new TopicPartition("topic2", 3); TopologyBuilder builder = new TopologyBuilder(); @@ -484,7 +452,7 @@ public void testOnAssignment() throws Exception { UUID uuid = UUID.randomUUID(); String client1 = "client1"; - StreamThread thread = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, "test", client1, uuid, new Metrics(), new SystemTime()); + StreamThread thread = new StreamThread(builder, config, new MockClientSupplier(), "test", client1, uuid, new Metrics(), new SystemTime()); StreamPartitionAssignor partitionAssignor = new StreamPartitionAssignor(); partitionAssignor.configure(config.getConsumerConfigs(thread, "test", client1)); @@ -507,10 +475,6 @@ public void testOnAssignment() throws Exception { public void testAssignWithInternalTopics() throws Exception { StreamsConfig config = new StreamsConfig(configProps()); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); - TopologyBuilder builder = new TopologyBuilder(); builder.addInternalTopic("topicX"); builder.addSource("source1", "topic1"); @@ -522,14 +486,14 @@ public void testAssignWithInternalTopics() throws Exception { Set allTasks = Utils.mkSet(task0, task1, task2); UUID uuid1 = UUID.randomUUID(); - UUID uuid2 = UUID.randomUUID(); String client1 = "client1"; - StreamThread thread10 = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, "test", client1, uuid1, new Metrics(), new SystemTime()); + MockClientSupplier clientSupplier = new MockClientSupplier(); + StreamThread thread10 = new StreamThread(builder, config, clientSupplier, "test", client1, uuid1, new Metrics(), new SystemTime()); StreamPartitionAssignor partitionAssignor = new StreamPartitionAssignor(); partitionAssignor.configure(config.getConsumerConfigs(thread10, "test", client1)); - MockInternalTopicManager internalTopicManager = new MockInternalTopicManager(mockRestoreConsumer); + MockInternalTopicManager internalTopicManager = new MockInternalTopicManager(clientSupplier.restoreConsumer); partitionAssignor.setInternalTopicManager(internalTopicManager); Map subscriptions = new HashMap<>(); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java index e387a59d68e7..4ae31e4ac655 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java @@ -19,27 +19,25 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; -import org.apache.kafka.clients.consumer.MockConsumer; -import org.apache.kafka.clients.consumer.OffsetResetStrategy; import org.apache.kafka.clients.consumer.internals.PartitionAssignor; -import org.apache.kafka.clients.producer.MockProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.common.Cluster; import org.apache.kafka.common.Node; import org.apache.kafka.common.PartitionInfo; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.metrics.Metrics; -import org.apache.kafka.common.serialization.ByteArraySerializer; import org.apache.kafka.common.utils.MockTime; import org.apache.kafka.common.utils.SystemTime; import org.apache.kafka.common.utils.Utils; import org.apache.kafka.streams.StreamsConfig; import org.apache.kafka.streams.processor.TaskId; import org.apache.kafka.streams.processor.TopologyBuilder; +import org.apache.kafka.test.MockClientSupplier; import org.apache.kafka.test.MockProcessorSupplier; import org.apache.kafka.test.MockTimestampExtractor; import org.junit.Test; @@ -148,28 +146,22 @@ protected void initializeOffsetLimits() { } } - private ByteArraySerializer serializer = new ByteArraySerializer(); - @SuppressWarnings("unchecked") @Test public void testPartitionAssignmentChange() throws Exception { StreamsConfig config = new StreamsConfig(configProps()); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - final MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); - TopologyBuilder builder = new TopologyBuilder(); builder.addSource("source1", "topic1"); builder.addSource("source2", "topic2"); builder.addSource("source3", "topic3"); builder.addProcessor("processor", new MockProcessorSupplier(), "source2", "source3"); - StreamThread thread = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, applicationId, clientId, processId, new Metrics(), new SystemTime()) { + StreamThread thread = new StreamThread(builder, config, new MockClientSupplier(), applicationId, clientId, processId, new Metrics(), new SystemTime()) { @Override protected StreamTask createStreamTask(TaskId id, Collection partitionsForTask) { ProcessorTopology topology = builder.build("X", id.topicGroupId); - return new TestStreamTask(id, applicationId, partitionsForTask, topology, consumer, producer, mockRestoreConsumer, config); + return new TestStreamTask(id, applicationId, partitionsForTask, topology, consumer, producer, restoreConsumer, config); } }; @@ -279,15 +271,12 @@ public void testMaybeClean() throws Exception { stateDir3.mkdir(); extraDir.mkdir(); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - final MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); MockTime mockTime = new MockTime(); TopologyBuilder builder = new TopologyBuilder(); builder.addSource("source1", "topic1"); - StreamThread thread = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, applicationId, clientId, processId, new Metrics(), mockTime) { + StreamThread thread = new StreamThread(builder, config, new MockClientSupplier(), applicationId, clientId, processId, new Metrics(), mockTime) { @Override public void maybeClean() { super.maybeClean(); @@ -296,7 +285,7 @@ public void maybeClean() { @Override protected StreamTask createStreamTask(TaskId id, Collection partitionsForTask) { ProcessorTopology topology = builder.build("X", id.topicGroupId); - return new TestStreamTask(id, applicationId, partitionsForTask, topology, consumer, producer, mockRestoreConsumer, config); + return new TestStreamTask(id, applicationId, partitionsForTask, topology, consumer, producer, restoreConsumer, config); } }; @@ -401,15 +390,12 @@ public void testMaybeCommit() throws Exception { StreamsConfig config = new StreamsConfig(props); - MockProducer producer = new MockProducer<>(true, serializer, serializer); - MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); - final MockConsumer mockRestoreConsumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); MockTime mockTime = new MockTime(); TopologyBuilder builder = new TopologyBuilder(); builder.addSource("source1", "topic1"); - StreamThread thread = new StreamThread(builder, config, producer, consumer, mockRestoreConsumer, applicationId, clientId, processId, new Metrics(), mockTime) { + StreamThread thread = new StreamThread(builder, config, new MockClientSupplier(), applicationId, clientId, processId, new Metrics(), mockTime) { @Override public void maybeCommit() { super.maybeCommit(); @@ -418,7 +404,7 @@ public void maybeCommit() { @Override protected StreamTask createStreamTask(TaskId id, Collection partitionsForTask) { ProcessorTopology topology = builder.build("X", id.topicGroupId); - return new TestStreamTask(id, applicationId, partitionsForTask, topology, consumer, producer, mockRestoreConsumer, config); + return new TestStreamTask(id, applicationId, partitionsForTask, topology, consumer, producer, restoreConsumer, config); } }; @@ -475,6 +461,18 @@ protected StreamTask createStreamTask(TaskId id, Collection part } } + @Test + public void testInjectClients() { + TopologyBuilder builder = new TopologyBuilder(); + StreamsConfig config = new StreamsConfig(configProps()); + MockClientSupplier clientSupplier = new MockClientSupplier(); + StreamThread thread = new StreamThread(builder, config, clientSupplier, applicationId, + clientId, processId, new Metrics(), new MockTime()); + assertSame(clientSupplier.producer, thread.producer); + assertSame(clientSupplier.consumer, thread.consumer); + assertSame(clientSupplier.restoreConsumer, thread.restoreConsumer); + } + private void initPartitionGrouper(StreamsConfig config, StreamThread thread) { StreamPartitionAssignor partitionAssignor = new StreamPartitionAssignor(); diff --git a/streams/src/test/java/org/apache/kafka/test/MockClientSupplier.java b/streams/src/test/java/org/apache/kafka/test/MockClientSupplier.java new file mode 100644 index 000000000000..3861ff854dd0 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/test/MockClientSupplier.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.test; + +import java.util.Map; + +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.MockConsumer; +import org.apache.kafka.clients.consumer.OffsetResetStrategy; +import org.apache.kafka.clients.producer.MockProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.apache.kafka.streams.KafkaClientSupplier; + +public class MockClientSupplier implements KafkaClientSupplier { + private static final ByteArraySerializer BYTE_ARRAY_SERIALIZER = new ByteArraySerializer(); + + public final MockProducer producer = + new MockProducer<>(true, BYTE_ARRAY_SERIALIZER, BYTE_ARRAY_SERIALIZER); + public final MockConsumer consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); + public final MockConsumer restoreConsumer = new MockConsumer<>(OffsetResetStrategy.LATEST); + + @Override + public Producer getProducer(Map config) { + return producer; + } + + @Override + public Consumer getConsumer(Map config) { + return consumer; + } + + @Override + public Consumer getRestoreConsumer(Map config) { + return restoreConsumer; + } +} From aabf825145645828075f341eba1483384df18f26 Mon Sep 17 00:00:00 2001 From: Geoff Anderson Date: Fri, 6 May 2016 11:10:27 -0700 Subject: [PATCH 096/267] KAFKA-3592: System test - configurable paths This patch adds logic for the following: - remove hard-coded paths to various scripts and jars in kafkatest service classes - provide a mechanism for overriding path resolution logic with a "pluggable" path resolver class Author: Geoff Anderson Reviewers: Ewen Cheslack-Postava Closes #1245 from granders/configurable-install-path (cherry picked from commit 54092c12ed276b4bf91741e3c7fd315443f3c0b1) Signed-off-by: Ewen Cheslack-Postava --- .gitignore | 1 + tests/README.md | 15 ++ .../benchmarks/core/benchmark_test.py | 9 +- tests/kafkatest/directory_layout/__init__.py | 14 ++ .../kafkatest/directory_layout/kafka_path.py | 137 ++++++++++++++++++ .../sanity_checks/test_console_consumer.py | 18 +-- .../sanity_checks/test_kafka_version.py | 4 +- .../test_performance_services.py | 6 +- .../sanity_checks/test_verifiable_producer.py | 6 +- tests/kafkatest/services/connect.py | 18 ++- tests/kafkatest/services/console_consumer.py | 19 ++- tests/kafkatest/services/kafka/directory.py | 32 ---- tests/kafkatest/services/kafka/kafka.py | 74 ++++++---- .../services/kafka_log4j_appender.py | 8 +- tests/kafkatest/services/mirror_maker.py | 13 +- tests/kafkatest/services/monitor/jmx.py | 9 +- .../performance/consumer_performance.py | 10 +- .../performance/end_to_end_latency.py | 15 +- .../services/performance/performance.py | 3 +- .../performance/producer_performance.py | 21 +-- .../services/replica_verification_tool.py | 8 +- .../kafkatest/services/security/kafka_acls.py | 10 +- tests/kafkatest/services/security/minikdc.py | 37 ++--- .../services/simple_consumer_shell.py | 8 +- tests/kafkatest/services/streams.py | 13 +- .../kafkatest/services/verifiable_consumer.py | 18 +-- .../kafkatest/services/verifiable_producer.py | 29 ++-- tests/kafkatest/services/zookeeper.py | 25 ++-- .../client/message_format_change_test.py | 12 +- .../compatibility_test_new_broker_test.py | 13 +- .../core/security_rolling_upgrade_test.py | 3 +- tests/kafkatest/tests/core/upgrade_test.py | 9 +- .../core/zookeeper_security_upgrade_test.py | 4 +- .../kafkatest/{services/kafka => }/version.py | 23 ++- tests/setup.cfg | 30 ++++ tests/setup.py | 29 +++- tests/unit/__init__.py | 14 ++ tests/unit/directory_layout/__init__.py | 14 ++ .../directory_layout/check_project_paths.py | 90 ++++++++++++ tests/unit/setup.cfg | 23 +++ tests/unit/version/__init__.py | 15 ++ tests/unit/version/check_version.py | 33 +++++ 42 files changed, 649 insertions(+), 243 deletions(-) create mode 100644 tests/kafkatest/directory_layout/__init__.py create mode 100644 tests/kafkatest/directory_layout/kafka_path.py delete mode 100644 tests/kafkatest/services/kafka/directory.py rename tests/kafkatest/{services/kafka => }/version.py (72%) create mode 100644 tests/setup.cfg create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/directory_layout/__init__.py create mode 100644 tests/unit/directory_layout/check_project_paths.py create mode 100644 tests/unit/setup.cfg create mode 100644 tests/unit/version/__init__.py create mode 100644 tests/unit/version/check_version.py diff --git a/.gitignore b/.gitignore index 50e1f85f074d..73972e6a4f36 100644 --- a/.gitignore +++ b/.gitignore @@ -37,5 +37,6 @@ results tests/results .ducktape tests/.ducktape +.cache docs/generated/ diff --git a/tests/README.md b/tests/README.md index 143711d8b492..8a779a41d4db 100644 --- a/tests/README.md +++ b/tests/README.md @@ -134,3 +134,18 @@ the test driver machine. * To halt your workers without destroying persistent state, run `vagrant halt`. Run `vagrant destroy -f` to destroy all traces of your workers. +Unit Tests +---------- +The system tests have unit tests! The various services in the python `kafkatest` module are reasonably complex, and intended to be reusable. Hence we have unit tests +for the system service classes. + +Where are the unit tests? +* The kafkatest unit tests are located under kafka/tests/unit + +How do I run the unit tests? +* cd kafka/tests # The base system test directory +* python setup.py test + +How can I add a unit test? +* Follow the naming conventions - module name starts with "check", class name begins with "Check", test method name begins with "check" +* These naming conventions are defined in "setup.cfg". We use "check" to distinguish unit tests from system tests, which use "test" in the various names. diff --git a/tests/kafkatest/benchmarks/core/benchmark_test.py b/tests/kafkatest/benchmarks/core/benchmark_test.py index 83f4b2a23adf..4dbf902432cb 100644 --- a/tests/kafkatest/benchmarks/core/benchmark_test.py +++ b/tests/kafkatest/benchmarks/core/benchmark_test.py @@ -13,16 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from ducktape.mark import matrix +from ducktape.mark import parametrize from ducktape.services.service import Service from ducktape.tests.test import Test -from ducktape.mark import parametrize -from ducktape.mark import matrix -from kafkatest.services.zookeeper import ZookeeperService from kafkatest.services.kafka import KafkaService -from kafkatest.services.kafka.version import TRUNK, KafkaVersion from kafkatest.services.performance import ProducerPerformanceService, EndToEndLatencyService, ConsumerPerformanceService, throughput, latency, compute_aggregate_throughput - +from kafkatest.services.zookeeper import ZookeeperService +from kafkatest.version import TRUNK, KafkaVersion TOPIC_REP_ONE = "topic-replication-factor-one" TOPIC_REP_THREE = "topic-replication-factor-three" diff --git a/tests/kafkatest/directory_layout/__init__.py b/tests/kafkatest/directory_layout/__init__.py new file mode 100644 index 000000000000..ec2014340d78 --- /dev/null +++ b/tests/kafkatest/directory_layout/__init__.py @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/kafkatest/directory_layout/kafka_path.py b/tests/kafkatest/directory_layout/kafka_path.py new file mode 100644 index 000000000000..0e60affa2426 --- /dev/null +++ b/tests/kafkatest/directory_layout/kafka_path.py @@ -0,0 +1,137 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import os + +from kafkatest.version import get_version, KafkaVersion, TRUNK + + +"""This module serves a few purposes: + +First, it gathers information about path layout in a single place, and second, it +makes the layout of the Kafka installation pluggable, so that users are not forced +to use the layout assumed in the KafkaPathResolver class. + +To run system tests using your own path resolver, use for example: + +ducktape --globals '{"kafka-path-resolver": "my.path.resolver.CustomResolverClass"}' +""" + +SCRATCH_ROOT = "/mnt" +KAFKA_INSTALL_ROOT = "/opt" +KAFKA_PATH_RESOLVER_KEY = "kafka-path-resolver" +KAFKA_PATH_RESOLVER = "kafkatest.directory_layout.kafka_path.KafkaSystemTestPathResolver" + +# Variables for jar path resolution +CORE_JAR_NAME = "core" +CORE_LIBS_JAR_NAME = "core-libs" +CORE_DEPENDANT_TEST_LIBS_JAR_NAME = "core-dependant-testlibs" +TOOLS_JAR_NAME = "tools" +TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME = "tools-dependant-libs" + +JARS = { + "trunk": { + CORE_JAR_NAME: "core/build/*/*.jar", + CORE_LIBS_JAR_NAME: "core/build/libs/*.jar", + CORE_DEPENDANT_TEST_LIBS_JAR_NAME: "core/build/dependant-testlibs/*.jar", + TOOLS_JAR_NAME: "tools/build/libs/kafka-tools*.jar", + TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME: "tools/build/dependant-libs*/*.jar" + } +} + + +def create_path_resolver(context, project="kafka"): + """Factory for generating a path resolver class + + This will first check for a fully qualified path resolver classname in context.globals. + + If present, construct a new instance, else default to KafkaSystemTestPathResolver + """ + assert project is not None + + if KAFKA_PATH_RESOLVER_KEY in context.globals: + resolver_fully_qualified_classname = context.globals[KAFKA_PATH_RESOLVER_KEY] + else: + resolver_fully_qualified_classname = KAFKA_PATH_RESOLVER + + # Using the fully qualified classname, import the resolver class + (module_name, resolver_class_name) = resolver_fully_qualified_classname.rsplit('.', 1) + cluster_mod = importlib.import_module(module_name) + path_resolver_class = getattr(cluster_mod, resolver_class_name) + path_resolver = path_resolver_class(context, project) + + return path_resolver + + +class KafkaPathResolverMixin(object): + """Mixin to automatically provide pluggable path resolution functionality to any class using it. + + Keep life simple, and don't add a constructor to this class: + Since use of a mixin entails multiple inheritence, it is *much* simpler to reason about the interaction of this + class with subclasses if we don't have to worry about method resolution order, constructor signatures etc. + """ + + @property + def path(self): + if not hasattr(self, "_path"): + setattr(self, "_path", create_path_resolver(self.context, "kafka")) + if hasattr(self.context, "logger") and self.context.logger is not None: + self.context.logger.debug("Using path resolver %s" % self._path.__class__.__name__) + + return self._path + + +class KafkaSystemTestPathResolver(object): + """Path resolver for Kafka system tests which assumes the following layout: + + /opt/kafka-trunk # Current version of kafka under test + /opt/kafka-0.9.0.1 # Example of an older version of kafka installed from tarball + /opt/kafka- # Other previous versions of kafka + ... + """ + def __init__(self, context, project="kafka"): + self.context = context + self.project = project + + def home(self, node_or_version=TRUNK): + version = self._version(node_or_version) + home_dir = self.project + if version is not None: + home_dir += "-%s" % str(version) + + return os.path.join(KAFKA_INSTALL_ROOT, home_dir) + + def bin(self, node_or_version=TRUNK): + version = self._version(node_or_version) + return os.path.join(self.home(version), "bin") + + def script(self, script_name, node_or_version=TRUNK): + version = self._version(node_or_version) + return os.path.join(self.bin(version), script_name) + + def jar(self, jar_name, node_or_version=TRUNK): + version = self._version(node_or_version) + return os.path.join(self.home(version), JARS[str(version)][jar_name]) + + def scratch_space(self, service_instance): + return os.path.join(SCRATCH_ROOT, service_instance.service_id) + + def _version(self, node_or_version): + if isinstance(node_or_version, KafkaVersion): + return node_or_version + else: + return get_version(node_or_version) + diff --git a/tests/kafkatest/sanity_checks/test_console_consumer.py b/tests/kafkatest/sanity_checks/test_console_consumer.py index d6a152a10bce..773a56180aeb 100644 --- a/tests/kafkatest/sanity_checks/test_console_consumer.py +++ b/tests/kafkatest/sanity_checks/test_console_consumer.py @@ -13,21 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time + +from ducktape.mark import matrix +from ducktape.mark import parametrize from ducktape.tests.test import Test from ducktape.utils.util import wait_until -from ducktape.mark import parametrize -from ducktape.mark import matrix -from kafkatest.services.zookeeper import ZookeeperService -from kafkatest.services.kafka import KafkaService -from kafkatest.services.kafka.version import LATEST_0_8_2 from kafkatest.services.console_consumer import ConsoleConsumer -from kafkatest.utils.remote_account import line_count, file_exists +from kafkatest.services.kafka import KafkaService from kafkatest.services.verifiable_producer import VerifiableProducer -from kafkatest.services.security.security_config import SecurityConfig - - -import time +from kafkatest.services.zookeeper import ZookeeperService +from kafkatest.utils.remote_account import line_count, file_exists +from kafkatest.version import LATEST_0_8_2 class ConsoleConsumerTest(Test): diff --git a/tests/kafkatest/sanity_checks/test_kafka_version.py b/tests/kafkatest/sanity_checks/test_kafka_version.py index f5f5d5fe83f0..b33c59058272 100644 --- a/tests/kafkatest/sanity_checks/test_kafka_version.py +++ b/tests/kafkatest/sanity_checks/test_kafka_version.py @@ -15,10 +15,10 @@ from ducktape.tests.test import Test -from kafkatest.services.zookeeper import ZookeeperService from kafkatest.services.kafka import KafkaService, config_property -from kafkatest.services.kafka.version import LATEST_0_8_2, TRUNK +from kafkatest.services.zookeeper import ZookeeperService from kafkatest.utils import is_version +from kafkatest.version import LATEST_0_8_2, TRUNK class KafkaVersionTest(Test): diff --git a/tests/kafkatest/sanity_checks/test_performance_services.py b/tests/kafkatest/sanity_checks/test_performance_services.py index 16d5d32132fe..94a61bc89f4f 100644 --- a/tests/kafkatest/sanity_checks/test_performance_services.py +++ b/tests/kafkatest/sanity_checks/test_performance_services.py @@ -13,14 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ducktape.tests.test import Test from ducktape.mark import parametrize +from ducktape.tests.test import Test -from kafkatest.services.zookeeper import ZookeeperService from kafkatest.services.kafka import KafkaService -from kafkatest.services.kafka.version import TRUNK, LATEST_0_8_2, LATEST_0_9, KafkaVersion from kafkatest.services.performance import ProducerPerformanceService, ConsumerPerformanceService, EndToEndLatencyService from kafkatest.services.performance import latency, compute_aggregate_throughput +from kafkatest.services.zookeeper import ZookeeperService +from kafkatest.version import TRUNK, LATEST_0_8_2, LATEST_0_9, KafkaVersion class PerformanceServiceTest(Test): diff --git a/tests/kafkatest/sanity_checks/test_verifiable_producer.py b/tests/kafkatest/sanity_checks/test_verifiable_producer.py index f1bc2a0bdc7d..23932f3f27cd 100644 --- a/tests/kafkatest/sanity_checks/test_verifiable_producer.py +++ b/tests/kafkatest/sanity_checks/test_verifiable_producer.py @@ -14,15 +14,15 @@ # limitations under the License. +from ducktape.mark import parametrize from ducktape.tests.test import Test from ducktape.utils.util import wait_until -from ducktape.mark import parametrize -from kafkatest.services.zookeeper import ZookeeperService from kafkatest.services.kafka import KafkaService -from kafkatest.services.kafka.version import LATEST_0_8_2, LATEST_0_9, TRUNK, KafkaVersion from kafkatest.services.verifiable_producer import VerifiableProducer +from kafkatest.services.zookeeper import ZookeeperService from kafkatest.utils import is_version +from kafkatest.version import LATEST_0_8_2, LATEST_0_9, TRUNK, KafkaVersion class TestVerifiableProducer(Test): diff --git a/tests/kafkatest/services/connect.py b/tests/kafkatest/services/connect.py index 51dade3471b5..1eb2dd58c646 100644 --- a/tests/kafkatest/services/connect.py +++ b/tests/kafkatest/services/connect.py @@ -13,14 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json +import os.path +import random +import signal + +import requests +from ducktape.errors import DucktapeError from ducktape.services.service import Service from ducktape.utils.util import wait_until -from ducktape.errors import DucktapeError -from kafkatest.services.kafka.directory import kafka_dir -import signal, random, requests, os.path, json +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin + -class ConnectServiceBase(Service): +class ConnectServiceBase(KafkaPathResolverMixin, Service): """Base class for Kafka Connect services providing some common settings and functionality""" PERSISTENT_ROOT = "/mnt/connect" @@ -156,7 +162,7 @@ def node(self): def start_cmd(self, node, connector_configs): cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % self.LOG4J_CONFIG_FILE cmd += "export KAFKA_OPTS=%s; " % self.security_config.kafka_opts - cmd += "/opt/%s/bin/connect-standalone.sh %s " % (kafka_dir(node), self.CONFIG_FILE) + cmd += "%s %s " % (self.path.script("connect-standalone.sh", node), self.CONFIG_FILE) cmd += " ".join(connector_configs) cmd += " & echo $! >&3 ) 1>> %s 2>> %s 3> %s" % (self.STDOUT_FILE, self.STDERR_FILE, self.PID_FILE) return cmd @@ -195,7 +201,7 @@ def __init__(self, context, num_nodes, kafka, files, offsets_topic="connect-offs def start_cmd(self, node): cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % self.LOG4J_CONFIG_FILE cmd += "export KAFKA_OPTS=%s; " % self.security_config.kafka_opts - cmd += "/opt/%s/bin/connect-distributed.sh %s " % (kafka_dir(node), self.CONFIG_FILE) + cmd += "%s %s " % (self.path.script("connect-distributed.sh", node), self.CONFIG_FILE) cmd += " & echo $! >&3 ) 1>> %s 2>> %s 3> %s" % (self.STDOUT_FILE, self.STDERR_FILE, self.PID_FILE) return cmd diff --git a/tests/kafkatest/services/console_consumer.py b/tests/kafkatest/services/console_consumer.py index 9c7f56495c17..2bd093c1def9 100644 --- a/tests/kafkatest/services/console_consumer.py +++ b/tests/kafkatest/services/console_consumer.py @@ -13,17 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ducktape.utils.util import wait_until -from ducktape.services.background_thread import BackgroundThreadService - -from kafkatest.services.kafka.directory import kafka_dir -from kafkatest.services.kafka.version import TRUNK, LATEST_0_8_2, LATEST_0_9, V_0_10_0_0 -from kafkatest.services.monitor.jmx import JmxMixin - import itertools import os import subprocess +from ducktape.services.background_thread import BackgroundThreadService +from ducktape.utils.util import wait_until + +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin +from kafkatest.services.monitor.jmx import JmxMixin +from kafkatest.version import TRUNK, LATEST_0_8_2, LATEST_0_9, V_0_10_0_0 """ 0.8.2.1 ConsoleConsumer options @@ -66,7 +65,7 @@ """ -class ConsoleConsumer(JmxMixin, BackgroundThreadService): +class ConsoleConsumer(KafkaPathResolverMixin, JmxMixin, BackgroundThreadService): # Root directory for persistent output PERSISTENT_ROOT = "/mnt/console_consumer" STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "console_consumer.stdout") @@ -165,7 +164,7 @@ def start_cmd(self, node): args['config_file'] = ConsoleConsumer.CONFIG_FILE args['stdout'] = ConsoleConsumer.STDOUT_CAPTURE args['jmx_port'] = self.jmx_port - args['kafka_dir'] = kafka_dir(node) + args['console_consumer'] = self.path.script("kafka-console-consumer.sh", node) args['broker_list'] = self.kafka.bootstrap_servers(self.security_config.security_protocol) args['kafka_opts'] = self.security_config.kafka_opts @@ -173,7 +172,7 @@ def start_cmd(self, node): "export LOG_DIR=%(log_dir)s; " \ "export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j_config)s\"; " \ "export KAFKA_OPTS=%(kafka_opts)s; " \ - "/opt/%(kafka_dir)s/bin/kafka-console-consumer.sh " \ + "%(console_consumer)s " \ "--topic %(topic)s --consumer.config %(config_file)s" % args if self.new_consumer: diff --git a/tests/kafkatest/services/kafka/directory.py b/tests/kafkatest/services/kafka/directory.py deleted file mode 100644 index 59af1fcad043..000000000000 --- a/tests/kafkatest/services/kafka/directory.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2015 Confluent Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# "trunk" installation of kafka -KAFKA_TRUNK = "kafka-trunk" - - -def kafka_dir(node=None): - """Return name of kafka directory for the given node. - - This provides a convenient way to support different versions of kafka or kafka tools running - on different nodes. - """ - if node is None: - return KAFKA_TRUNK - - if not hasattr(node, "version"): - return KAFKA_TRUNK - - return "kafka-" + str(node.version) \ No newline at end of file diff --git a/tests/kafkatest/services/kafka/kafka.py b/tests/kafkatest/services/kafka/kafka.py index a74bb004c811..6ff7d0c7b553 100644 --- a/tests/kafkatest/services/kafka/kafka.py +++ b/tests/kafkatest/services/kafka/kafka.py @@ -13,28 +13,29 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections +import json +import os.path +import re +import signal +import subprocess +import time + from ducktape.services.service import Service from ducktape.utils.util import wait_until from config import KafkaConfig +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin from kafkatest.services.kafka import config_property -from kafkatest.services.kafka.version import TRUNK -from kafkatest.services.kafka.directory import kafka_dir, KAFKA_TRUNK - from kafkatest.services.monitor.jmx import JmxMixin -from kafkatest.services.security.security_config import SecurityConfig from kafkatest.services.security.minikdc import MiniKdc -import json -import re -import signal -import subprocess -import time -import os.path -import collections +from kafkatest.services.security.security_config import SecurityConfig +from kafkatest.version import TRUNK Port = collections.namedtuple('Port', ['name', 'number', 'open']) -class KafkaService(JmxMixin, Service): + +class KafkaService(KafkaPathResolverMixin, JmxMixin, Service): PERSISTENT_ROOT = "/mnt" STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "kafka.log") @@ -84,6 +85,7 @@ def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAI self.topics = topics self.minikdc = None self.authorizer_class_name = authorizer_class_name + # # In a heavily loaded and not very fast machine, it is # sometimes necessary to give more time for the zk client @@ -174,7 +176,11 @@ def start_cmd(self, node): cmd = "export JMX_PORT=%d; " % self.jmx_port cmd += "export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % self.LOG4J_CONFIG cmd += "export KAFKA_OPTS=%s; " % self.security_config.kafka_opts - cmd += "/opt/" + kafka_dir(node) + "/bin/kafka-server-start.sh %s 1>> %s 2>> %s &" % (KafkaService.CONFIG_FILE, KafkaService.STDOUT_CAPTURE, KafkaService.STDERR_CAPTURE) + cmd += "%s %s 1>> %s 2>> %s &" % \ + (self.path.script("kafka-server-start.sh", node), + KafkaService.CONFIG_FILE, + KafkaService.STDOUT_CAPTURE, + KafkaService.STDERR_CAPTURE) return cmd def start_node(self, node): @@ -239,8 +245,9 @@ def create_topic(self, topic_cfg, node=None): if node is None: node = self.nodes[0] self.logger.info("Creating topic %s with settings %s", topic_cfg["topic"], topic_cfg) + kafka_topic_script = self.path.script("kafka-topics.sh", node) - cmd = "/opt/%s/bin/kafka-topics.sh " % kafka_dir(node) + cmd = kafka_topic_script + " " cmd += "--zookeeper %(zk_connect)s --create --topic %(topic)s --partitions %(partitions)d --replication-factor %(replication)d" % { 'zk_connect': self.zk.connect_setting(), 'topic': topic_cfg.get("topic"), @@ -263,8 +270,8 @@ def create_topic(self, topic_cfg, node=None): def describe_topic(self, topic, node=None): if node is None: node = self.nodes[0] - cmd = "/opt/%s/bin/kafka-topics.sh --zookeeper %s --topic %s --describe" % \ - (kafka_dir(node), self.zk.connect_setting(), topic) + cmd = "%s --zookeeper %s --topic %s --describe" % \ + (self.path.script("kafka-topics.sh", node), self.zk.connect_setting(), topic) output = "" for line in node.account.ssh_capture(cmd): output += line @@ -274,8 +281,8 @@ def alter_message_format(self, topic, msg_format_version, node=None): if node is None: node = self.nodes[0] self.logger.info("Altering message format version for topic %s with format %s", topic, msg_format_version) - cmd = "/opt/%s/bin/kafka-configs.sh --zookeeper %s --entity-name %s --entity-type topics --alter --add-config message.format.version=%s" % \ - (kafka_dir(node), self.zk.connect_setting(), topic, msg_format_version) + cmd = "%s --zookeeper %s --entity-name %s --entity-type topics --alter --add-config message.format.version=%s" % \ + (self.path.script("kafka-configs.sh", node), self.zk.connect_setting(), topic, msg_format_version) self.logger.info("Running alter message format command...\n%s" % cmd) node.account.ssh(cmd) @@ -322,7 +329,7 @@ def verify_reassign_partitions(self, reassignment, node=None): # create command cmd = "echo %s > %s && " % (json_str, json_file) - cmd += "/opt/%s/bin/kafka-reassign-partitions.sh " % kafka_dir(node) + cmd += "%s " % self.path.script("kafka-reassign-partitions.sh", node) cmd += "--zookeeper %s " % self.zk.connect_setting() cmd += "--reassignment-json-file %s " % json_file cmd += "--verify " @@ -355,7 +362,7 @@ def execute_reassign_partitions(self, reassignment, node=None): # create command cmd = "echo %s > %s && " % (json_str, json_file) - cmd += "/opt/%s/bin/kafka-reassign-partitions.sh " % kafka_dir(node) + cmd += "%s " % self.path.script( "kafka-reassign-partitions.sh", node) cmd += "--zookeeper %s " % self.zk.connect_setting() cmd += "--reassignment-json-file %s " % json_file cmd += "--execute" @@ -386,8 +393,8 @@ def search_data_files(self, topic, messages): # Check each data file to see if it contains the messages we want for log in files: - cmd = "/opt/%s/bin/kafka-run-class.sh kafka.tools.DumpLogSegments --print-data-log --files %s " \ - "| grep -E \"%s\"" % (kafka_dir(node), log.strip(), payload_match) + cmd = "%s kafka.tools.DumpLogSegments --print-data-log --files %s | grep -E \"%s\"" % \ + (self.path.script("kafka-run-class.sh", node), log.strip(), payload_match) for line in node.account.ssh_capture(cmd, allow_fail=True): for val in messages: @@ -429,6 +436,7 @@ def list_consumer_groups(self, node=None, new_consumer=False, command_config=Non """ if node is None: node = self.nodes[0] + consumer_group_script = self.path.script("kafka-consumer-groups.sh", node) if command_config is None: command_config = "" @@ -436,11 +444,12 @@ def list_consumer_groups(self, node=None, new_consumer=False, command_config=Non command_config = "--command-config " + command_config if new_consumer: - cmd = "/opt/%s/bin/kafka-consumer-groups.sh --new-consumer --bootstrap-server %s %s --list" % \ - (kafka_dir(node), self.bootstrap_servers(self.security_protocol), command_config) + cmd = "%s --new-consumer --bootstrap-server %s %s --list" % \ + (consumer_group_script, + self.bootstrap_servers(self.security_protocol), + command_config) else: - cmd = "/opt/%s/bin/kafka-consumer-groups.sh --zookeeper %s %s --list" % \ - (kafka_dir(node), self.zk.connect_setting(), command_config) + cmd = "%s --zookeeper %s %s --list" % (consumer_group_script, self.zk.connect_setting(), command_config) output = "" self.logger.debug(cmd) for line in node.account.ssh_capture(cmd): @@ -454,6 +463,7 @@ def describe_consumer_group(self, group, node=None, new_consumer=False, command_ """ if node is None: node = self.nodes[0] + consumer_group_script = self.path.script("kafka-consumer-groups.sh", node) if command_config is None: command_config = "" @@ -461,11 +471,11 @@ def describe_consumer_group(self, group, node=None, new_consumer=False, command_ command_config = "--command-config " + command_config if new_consumer: - cmd = "/opt/%s/bin/kafka-consumer-groups.sh --new-consumer --bootstrap-server %s %s --group %s --describe" % \ - (kafka_dir(node), self.bootstrap_servers(self.security_protocol), command_config, group) + cmd = "%s --new-consumer --bootstrap-server %s %s --group %s --describe" % \ + (consumer_group_script, self.bootstrap_servers(self.security_protocol), command_config, group) else: - cmd = "/opt/%s/bin/kafka-consumer-groups.sh --zookeeper %s %s --group %s --describe" % \ - (kafka_dir(node), self.zk.connect_setting(), command_config, group) + cmd = "%s --zookeeper %s %s --group %s --describe" % \ + (consumer_group_script, self.zk.connect_setting(), command_config, group) output = "" self.logger.debug(cmd) for line in node.account.ssh_capture(cmd): @@ -506,8 +516,8 @@ def controller(self): def get_offset_shell(self, topic, partitions, max_wait_ms, offsets, time): node = self.nodes[0] - cmd = "/opt/%s/bin/" % kafka_dir(node) - cmd += "kafka-run-class.sh kafka.tools.GetOffsetShell" + cmd = self.path.script("kafka-run-class.sh", node) + cmd += " kafka.tools.GetOffsetShell" cmd += " --topic %s --broker-list %s --max-wait-ms %s --offsets %s --time %s" % (topic, self.bootstrap_servers(self.security_protocol), max_wait_ms, offsets, time) if partitions: diff --git a/tests/kafkatest/services/kafka_log4j_appender.py b/tests/kafkatest/services/kafka_log4j_appender.py index c0af1a1df759..c50cab423eba 100644 --- a/tests/kafkatest/services/kafka_log4j_appender.py +++ b/tests/kafkatest/services/kafka_log4j_appender.py @@ -15,11 +15,11 @@ from ducktape.services.background_thread import BackgroundThreadService -from kafkatest.services.kafka.directory import kafka_dir +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin from kafkatest.services.security.security_config import SecurityConfig -class KafkaLog4jAppender(BackgroundThreadService): +class KafkaLog4jAppender(KafkaPathResolverMixin, BackgroundThreadService): logs = { "producer_log": { @@ -43,8 +43,8 @@ def _worker(self, idx, node): node.account.ssh(cmd) def start_cmd(self, node): - cmd = "/opt/%s/bin/" % kafka_dir(node) - cmd += "kafka-run-class.sh org.apache.kafka.tools.VerifiableLog4jAppender" + cmd = self.path.script("kafka-run-class.sh", node) + cmd += " org.apache.kafka.tools.VerifiableLog4jAppender" cmd += " --topic %s --broker-list %s" % (self.topic, self.kafka.bootstrap_servers(self.security_protocol)) if self.max_messages > 0: diff --git a/tests/kafkatest/services/mirror_maker.py b/tests/kafkatest/services/mirror_maker.py index cb4b2c1ac9a0..fdaf4c84f4dc 100644 --- a/tests/kafkatest/services/mirror_maker.py +++ b/tests/kafkatest/services/mirror_maker.py @@ -13,14 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import subprocess + from ducktape.services.service import Service from ducktape.utils.util import wait_until -from kafkatest.services.kafka.directory import kafka_dir -from kafkatest.services.security.security_config import SecurityConfig - -import os -import subprocess +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin """ 0.8.2.1 MirrorMaker options @@ -56,7 +55,7 @@ """ -class MirrorMaker(Service): +class MirrorMaker(KafkaPathResolverMixin, Service): # Root directory for persistent output PERSISTENT_ROOT = "/mnt/mirror_maker" @@ -114,7 +113,7 @@ def start_cmd(self, node): cmd = "export LOG_DIR=%s;" % MirrorMaker.LOG_DIR cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\";" % MirrorMaker.LOG4J_CONFIG cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts - cmd += " /opt/%s/bin/kafka-run-class.sh kafka.tools.MirrorMaker" % kafka_dir(node) + cmd += " %s kafka.tools.MirrorMaker" % self.path.script("kafka-run-class.sh", node) cmd += " --consumer.config %s" % MirrorMaker.CONSUMER_CONFIG cmd += " --producer.config %s" % MirrorMaker.PRODUCER_CONFIG cmd += " --offset.commit.interval.ms %s" % str(self.offset_commit_interval_ms) diff --git a/tests/kafkatest/services/monitor/jmx.py b/tests/kafkatest/services/monitor/jmx.py index 06c7dc82d91d..ea407b0ebf59 100644 --- a/tests/kafkatest/services/monitor/jmx.py +++ b/tests/kafkatest/services/monitor/jmx.py @@ -13,12 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from kafkatest.services.kafka.directory import kafka_dir class JmxMixin(object): """This mixin helps existing service subclasses start JmxTool on their worker nodes and collect jmx stats. - Note that this is not a service in its own right. + A couple things worth noting: + - this is not a service in its own right. + - we assume the service using JmxMixin also uses KafkaPathResolverMixin """ def __init__(self, num_nodes, jmx_object_names=None, jmx_attributes=[]): self.jmx_object_names = jmx_object_names @@ -38,8 +39,8 @@ def start_jmx_tool(self, idx, node): if self.started[idx-1] or self.jmx_object_names is None: return - cmd = "/opt/%s/bin/kafka-run-class.sh kafka.tools.JmxTool " \ - "--reporting-interval 1000 --jmx-url service:jmx:rmi:///jndi/rmi://127.0.0.1:%d/jmxrmi" % (kafka_dir(node), self.jmx_port) + cmd = "%s kafka.tools.JmxTool " % self.path.script("kafka-run-class.sh", node) + cmd += "--reporting-interval 1000 --jmx-url service:jmx:rmi:///jndi/rmi://127.0.0.1:%d/jmxrmi" % self.jmx_port for jmx_object_name in self.jmx_object_names: cmd += " --object-name %s" % jmx_object_name for jmx_attribute in self.jmx_attributes: diff --git a/tests/kafkatest/services/performance/consumer_performance.py b/tests/kafkatest/services/performance/consumer_performance.py index def27b15fa6d..b0f99d7ddedc 100644 --- a/tests/kafkatest/services/performance/consumer_performance.py +++ b/tests/kafkatest/services/performance/consumer_performance.py @@ -13,13 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from kafkatest.services.performance import PerformanceService -from kafkatest.services.security.security_config import SecurityConfig -from kafkatest.services.kafka.directory import kafka_dir -from kafkatest.services.kafka.version import TRUNK, V_0_9_0_0 import os +from kafkatest.services.performance import PerformanceService +from kafkatest.services.security.security_config import SecurityConfig +from kafkatest.version import TRUNK, V_0_9_0_0 + class ConsumerPerformanceService(PerformanceService): """ @@ -135,7 +135,7 @@ def start_cmd(self, node): cmd = "export LOG_DIR=%s;" % ConsumerPerformanceService.LOG_DIR cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\";" % ConsumerPerformanceService.LOG4J_CONFIG - cmd += " /opt/%s/bin/kafka-consumer-perf-test.sh" % kafka_dir(node) + cmd += " %s" % self.path.script("kafka-consumer-perf-test.sh", node) for key, value in self.args.items(): cmd += " --%s %s" % (key, value) diff --git a/tests/kafkatest/services/performance/end_to_end_latency.py b/tests/kafkatest/services/performance/end_to_end_latency.py index 2007d6540b4a..917ac85bc5dc 100644 --- a/tests/kafkatest/services/performance/end_to_end_latency.py +++ b/tests/kafkatest/services/performance/end_to_end_latency.py @@ -13,13 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os + from kafkatest.services.performance import PerformanceService from kafkatest.services.security.security_config import SecurityConfig +from kafkatest.version import TRUNK, V_0_9_0_0 -from kafkatest.services.kafka.directory import kafka_dir -from kafkatest.services.kafka.version import TRUNK, V_0_9_0_0 - -import os class EndToEndLatencyService(PerformanceService): @@ -77,16 +76,16 @@ def start_cmd(self, node): 'zk_connect': self.kafka.zk.connect_setting(), 'bootstrap_servers': self.kafka.bootstrap_servers(self.security_config.security_protocol), 'config_file': EndToEndLatencyService.CONFIG_FILE, - 'kafka_dir': kafka_dir(node) + 'kafka_run_class': self.path.script("kafka-run-class.sh", node) }) cmd = "export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % EndToEndLatencyService.LOG4J_CONFIG if node.version >= V_0_9_0_0: - cmd += "KAFKA_OPTS=%(kafka_opts)s /opt/%(kafka_dir)s/bin/kafka-run-class.sh kafka.tools.EndToEndLatency " % args + cmd += "KAFKA_OPTS=%(kafka_opts)s %(kafka_run_class)s kafka.tools.EndToEndLatency " % args cmd += "%(bootstrap_servers)s %(topic)s %(num_records)d %(acks)d %(message_bytes)d %(config_file)s" % args else: # Set fetch max wait to 0 to match behavior in later versions - cmd += "KAFKA_OPTS=%(kafka_opts)s /opt/%(kafka_dir)s/bin/kafka-run-class.sh kafka.tools.TestEndToEndLatency " % args + cmd += "KAFKA_OPTS=%(kafka_opts)s %(kafka_run_class)s kafka.tools.TestEndToEndLatency " % args cmd += "%(bootstrap_servers)s %(zk_connect)s %(topic)s %(num_records)d 0 %(acks)d" % args cmd += " 2>> %(stderr)s | tee -a %(stdout)s" % {'stdout': EndToEndLatencyService.STDOUT_CAPTURE, @@ -104,7 +103,7 @@ def _worker(self, idx, node): if node.version >= V_0_9_0_0: client_config += "compression_type=%(compression_type)s" % self.args node.account.create_file(EndToEndLatencyService.CONFIG_FILE, client_config) - + self.security_config.setup_node(node) cmd = self.start_cmd(node) diff --git a/tests/kafkatest/services/performance/performance.py b/tests/kafkatest/services/performance/performance.py index dcc1a32783e1..d6d4f14d4e0f 100644 --- a/tests/kafkatest/services/performance/performance.py +++ b/tests/kafkatest/services/performance/performance.py @@ -14,9 +14,10 @@ # limitations under the License. from ducktape.services.background_thread import BackgroundThreadService +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin -class PerformanceService(BackgroundThreadService): +class PerformanceService(KafkaPathResolverMixin, BackgroundThreadService): def __init__(self, context, num_nodes, stop_timeout_sec=30): super(PerformanceService, self).__init__(context, num_nodes) diff --git a/tests/kafkatest/services/performance/producer_performance.py b/tests/kafkatest/services/performance/producer_performance.py index efd6c0934187..7131df14d417 100644 --- a/tests/kafkatest/services/performance/producer_performance.py +++ b/tests/kafkatest/services/performance/producer_performance.py @@ -13,16 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import subprocess + from ducktape.utils.util import wait_until +from kafkatest.directory_layout.kafka_path import TOOLS_JAR_NAME, TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME from kafkatest.services.monitor.jmx import JmxMixin from kafkatest.services.performance import PerformanceService from kafkatest.services.security.security_config import SecurityConfig -from kafkatest.services.kafka.directory import kafka_dir, KAFKA_TRUNK -from kafkatest.services.kafka.version import TRUNK, V_0_9_0_0 - -import os -import subprocess +from kafkatest.version import TRUNK, V_0_9_0_0 class ProducerPerformanceService(JmxMixin, PerformanceService): @@ -84,7 +84,7 @@ def start_cmd(self, node): 'bootstrap_servers': self.kafka.bootstrap_servers(self.security_config.security_protocol), 'jmx_port': self.jmx_port, 'client_id': self.client_id, - 'kafka_directory': kafka_dir(node) + 'kafka_run_class': self.path.script("kafka-run-class.sh", node) }) cmd = "" @@ -92,12 +92,15 @@ def start_cmd(self, node): if node.version < TRUNK: # In order to ensure more consistent configuration between versions, always use the ProducerPerformance # tool from trunk - cmd += "for file in /opt/%s/tools/build/libs/kafka-tools*.jar; do CLASSPATH=$CLASSPATH:$file; done; " % KAFKA_TRUNK - cmd += "for file in /opt/%s/tools/build/dependant-libs-${SCALA_VERSION}*/*.jar; do CLASSPATH=$CLASSPATH:$file; done; " % KAFKA_TRUNK + tools_jar = self.path.jar(TOOLS_JAR_NAME, TRUNK) + tools_dependant_libs_jar = self.path.jar(TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME, TRUNK) + + cmd += "for file in %s; do CLASSPATH=$CLASSPATH:$file; done; " % tools_jar + cmd += "for file in %s; do CLASSPATH=$CLASSPATH:$file; done; " % tools_dependant_libs_jar cmd += "export CLASSPATH; " cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % ProducerPerformanceService.LOG4J_CONFIG - cmd += "JMX_PORT=%(jmx_port)d KAFKA_OPTS=%(kafka_opts)s /opt/%(kafka_directory)s/bin/kafka-run-class.sh org.apache.kafka.tools.ProducerPerformance " \ + cmd += "JMX_PORT=%(jmx_port)d KAFKA_OPTS=%(kafka_opts)s %(kafka_run_class)s org.apache.kafka.tools.ProducerPerformance " \ "--topic %(topic)s --num-records %(num_records)d --record-size %(record_size)d --throughput %(throughput)d --producer-props bootstrap.servers=%(bootstrap_servers)s client.id=%(client_id)s" % args self.security_config.setup_node(node) diff --git a/tests/kafkatest/services/replica_verification_tool.py b/tests/kafkatest/services/replica_verification_tool.py index 7f77049a0f9b..20332427f6a4 100644 --- a/tests/kafkatest/services/replica_verification_tool.py +++ b/tests/kafkatest/services/replica_verification_tool.py @@ -15,13 +15,13 @@ from ducktape.services.background_thread import BackgroundThreadService -from kafkatest.services.kafka.directory import kafka_dir +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin from kafkatest.services.security.security_config import SecurityConfig import re -class ReplicaVerificationTool(BackgroundThreadService): +class ReplicaVerificationTool(KafkaPathResolverMixin, BackgroundThreadService): logs = { "producer_log": { @@ -68,8 +68,8 @@ def get_lag_for_partition(self, topic, partition): return lag def start_cmd(self, node): - cmd = "/opt/%s/bin/" % kafka_dir(node) - cmd += "kafka-run-class.sh kafka.tools.ReplicaVerificationTool" + cmd = self.path.script("kafka-run-class.sh", node) + cmd += " kafka.tools.ReplicaVerificationTool" cmd += " --broker-list %s --topic-white-list %s --time -2 --report-interval-ms %s" % (self.kafka.bootstrap_servers(self.security_protocol), self.topic, self.report_interval_ms) cmd += " 2>> /mnt/replica_verification_tool.log | tee -a /mnt/replica_verification_tool.log &" diff --git a/tests/kafkatest/services/security/kafka_acls.py b/tests/kafkatest/services/security/kafka_acls.py index eb853545c65e..5fcb5e7f4fa0 100644 --- a/tests/kafkatest/services/security/kafka_acls.py +++ b/tests/kafkatest/services/security/kafka_acls.py @@ -13,12 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from kafkatest.services.kafka.directory import kafka_dir +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin -class ACLs(): - def __init__(self): - pass +class ACLs(KafkaPathResolverMixin): + def __init__(self, context): + self.context = context def set_acls(self, protocol, kafka, zk, topic, group): node = kafka.nodes[0] @@ -35,7 +35,7 @@ def set_acls(self, protocol, kafka, zk, topic, group): self.acls_command(node, ACLs.consume_acl(setting, topic, group, client_principal)) def acls_command(self, node, properties): - cmd = "/opt/%s/bin/kafka-acls.sh %s" % (kafka_dir(node), properties) + cmd = "%s %s" % (self.path.script("kafka-acls.sh", node), properties) node.account.ssh(cmd) @staticmethod diff --git a/tests/kafkatest/services/security/minikdc.py b/tests/kafkatest/services/security/minikdc.py index d83aede11a89..0e7bb1bcb808 100644 --- a/tests/kafkatest/services/security/minikdc.py +++ b/tests/kafkatest/services/security/minikdc.py @@ -13,18 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ducktape.services.service import Service -from kafkatest.services.kafka.directory import kafka_dir - import os -from tempfile import mkstemp -from shutil import move -from os import remove, close -from io import open import uuid +from io import open +from os import remove, close +from shutil import move +from tempfile import mkstemp + +from ducktape.services.service import Service +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin, CORE_LIBS_JAR_NAME, CORE_DEPENDANT_TEST_LIBS_JAR_NAME +from kafkatest.version import TRUNK -class MiniKdc(Service): + +class MiniKdc(KafkaPathResolverMixin, Service): logs = { "minikdc_log": { @@ -40,7 +42,7 @@ class MiniKdc(Service): LOCAL_KEYTAB_FILE = "/tmp/" + str(uuid.uuid4().get_hex()) + "_keytab" LOCAL_KRB5CONF_FILE = "/tmp/" + str(uuid.uuid4().get_hex()) + "_krb5.conf" - def __init__(self, context, kafka_nodes, extra_principals = ""): + def __init__(self, context, kafka_nodes, extra_principals=""): super(MiniKdc, self).__init__(context, 1) self.kafka_nodes = kafka_nodes self.extra_principals = extra_principals @@ -66,9 +68,13 @@ def start_node(self, node): principals = 'client ' + kafka_principals + self.extra_principals self.logger.info("Starting MiniKdc with principals " + principals) - jar_paths = self.core_jar_paths(node, "dependant-testlibs") + self.core_jar_paths(node, "libs") - classpath = ":".join(jar_paths) - cmd = "INCLUDE_TEST_JARS=true CLASSPATH=%s /opt/%s/bin/kafka-run-class.sh kafka.security.minikdc.MiniKdc %s %s %s %s 1>> %s 2>> %s &" % (classpath, kafka_dir(node), MiniKdc.WORK_DIR, MiniKdc.PROPS_FILE, MiniKdc.KEYTAB_FILE, principals, MiniKdc.LOG_FILE, MiniKdc.LOG_FILE) + core_libs_jar = self.path.jar(CORE_LIBS_JAR_NAME, TRUNK) + core_dependant_test_libs_jar = self.path.jar(CORE_DEPENDANT_TEST_LIBS_JAR_NAME, TRUNK) + + cmd = "for file in %s; do CLASSPATH=$CLASSPATH:$file; done;" % core_libs_jar + cmd += " for file in %s; do CLASSPATH=$CLASSPATH:$file; done;" % core_dependant_test_libs_jar + cmd += " export CLASSPATH;" + cmd += " %s kafka.security.minikdc.MiniKdc %s %s %s %s 1>> %s 2>> %s &" % (self.path.script("kafka-run-class.sh", node), MiniKdc.WORK_DIR, MiniKdc.PROPS_FILE, MiniKdc.KEYTAB_FILE, principals, MiniKdc.LOG_FILE, MiniKdc.LOG_FILE) self.logger.debug("Attempting to start MiniKdc on %s with command: %s" % (str(node.account), cmd)) with node.account.monitor_log(MiniKdc.LOG_FILE) as monitor: node.account.ssh(cmd) @@ -77,14 +83,9 @@ def start_node(self, node): node.account.scp_from(MiniKdc.KEYTAB_FILE, MiniKdc.LOCAL_KEYTAB_FILE) node.account.scp_from(MiniKdc.KRB5CONF_FILE, MiniKdc.LOCAL_KRB5CONF_FILE) - #KDC is set to bind openly (via 0.0.0.0). Change krb5.conf to hold the specific KDC address + # KDC is set to bind openly (via 0.0.0.0). Change krb5.conf to hold the specific KDC address self.replace_in_file(MiniKdc.LOCAL_KRB5CONF_FILE, '0.0.0.0', node.account.hostname) - def core_jar_paths(self, node, lib_dir_name): - lib_dir = "/opt/%s/core/build/%s" % (kafka_dir(node), lib_dir_name) - jars = node.account.ssh_capture("ls " + lib_dir) - return [os.path.join(lib_dir, jar.strip()) for jar in jars] - def stop_node(self, node): self.logger.info("Stopping %s on %s" % (type(self).__name__, node.account.hostname)) node.account.kill_process("apacheds", allow_fail=False) diff --git a/tests/kafkatest/services/simple_consumer_shell.py b/tests/kafkatest/services/simple_consumer_shell.py index c44540dbe638..7204748955d0 100644 --- a/tests/kafkatest/services/simple_consumer_shell.py +++ b/tests/kafkatest/services/simple_consumer_shell.py @@ -15,10 +15,10 @@ from ducktape.services.background_thread import BackgroundThreadService -from kafkatest.services.kafka.directory import kafka_dir +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin -class SimpleConsumerShell(BackgroundThreadService): +class SimpleConsumerShell(KafkaPathResolverMixin, BackgroundThreadService): logs = { "simple_consumer_shell_log": { @@ -45,8 +45,8 @@ def _worker(self, idx, node): self.logger.debug(self.output) def start_cmd(self, node): - cmd = "/opt/%s/bin/" % kafka_dir(node) - cmd += "kafka-run-class.sh kafka.tools.SimpleConsumerShell" + cmd = self.path.script("kafka-run-class.sh", node) + cmd += " kafka.tools.SimpleConsumerShell" cmd += " --topic %s --broker-list %s --partition %s --no-wait-at-logend" % (self.topic, self.kafka.bootstrap_servers(), self.partition) cmd += " 2>> /mnt/get_simple_consumer_shell.log | tee -a /mnt/get_simple_consumer_shell.log &" diff --git a/tests/kafkatest/services/streams.py b/tests/kafkatest/services/streams.py index 53d967e1bc8b..875daeeed041 100644 --- a/tests/kafkatest/services/streams.py +++ b/tests/kafkatest/services/streams.py @@ -13,15 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os.path +import signal + from ducktape.services.service import Service from ducktape.utils.util import wait_until -from kafkatest.services.kafka.directory import kafka_dir -import signal -import os.path +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin -class StreamsSmokeTestBaseService(Service): +class StreamsSmokeTestBaseService(KafkaPathResolverMixin, Service): """Base class for Streams Smoke Test services providing some common settings and functionality""" PERSISTENT_ROOT = "/mnt/streams" @@ -105,10 +106,10 @@ def start_cmd(self, node): args['stderr'] = self.STDERR_FILE args['pidfile'] = self.PID_FILE args['log4j'] = self.LOG4J_CONFIG_FILE - args['kafka_dir'] = kafka_dir(node) + args['kafka_run_class'] = self.path.script("kafka-run-class.sh", node) cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j)s\"; " \ - "INCLUDE_TEST_JARS=true /opt/%(kafka_dir)s/bin/kafka-run-class.sh org.apache.kafka.streams.smoketest.StreamsSmokeTest " \ + "INCLUDE_TEST_JARS=true %(kafka_run_class)s org.apache.kafka.streams.smoketest.StreamsSmokeTest " \ " %(command)s %(kafka)s %(zk)s %(state_dir)s " \ " & echo $! >&3 ) 1>> %(stdout)s 2>> %(stderr)s 3> %(pidfile)s" % args diff --git a/tests/kafkatest/services/verifiable_consumer.py b/tests/kafkatest/services/verifiable_consumer.py index 55304dcb4ea1..9c6abddaa609 100644 --- a/tests/kafkatest/services/verifiable_consumer.py +++ b/tests/kafkatest/services/verifiable_consumer.py @@ -13,17 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ducktape.services.background_thread import BackgroundThreadService - -from kafkatest.services.kafka.directory import kafka_dir -from kafkatest.services.kafka.version import TRUNK -from kafkatest.services.kafka import TopicPartition - import json import os import signal import subprocess +from ducktape.services.background_thread import BackgroundThreadService + +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin +from kafkatest.services.kafka import TopicPartition +from kafkatest.version import TRUNK + class ConsumerState: Dead = 1 @@ -112,7 +112,7 @@ def last_commit(self, tp): return None -class VerifiableConsumer(BackgroundThreadService): +class VerifiableConsumer(KafkaPathResolverMixin, BackgroundThreadService): PERSISTENT_ROOT = "/mnt/verifiable_consumer" STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "verifiable_consumer.stdout") STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "verifiable_consumer.stderr") @@ -226,9 +226,9 @@ def start_cmd(self, node): cmd += "export LOG_DIR=%s;" % VerifiableConsumer.LOG_DIR cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % VerifiableConsumer.LOG4J_CONFIG - cmd += "/opt/" + kafka_dir(node) + "/bin/kafka-run-class.sh org.apache.kafka.tools.VerifiableConsumer" \ + cmd += self.path.script("kafka-run-class.sh", node) + " org.apache.kafka.tools.VerifiableConsumer" \ " --group-id %s --topic %s --broker-list %s --session-timeout %s --assignment-strategy %s %s" % \ - (self.group_id, self.topic, self.kafka.bootstrap_servers(self.security_config.security_protocol), + (self.group_id, self.topic, self.kafka.bootstrap_servers(self.security_config.security_protocol), self.session_timeout_sec*1000, self.assignment_strategy, "--enable-autocommit" if self.enable_autocommit else "") if self.max_messages > 0: diff --git a/tests/kafkatest/services/verifiable_producer.py b/tests/kafkatest/services/verifiable_producer.py index a6a1bd980676..dbdf71f36c24 100644 --- a/tests/kafkatest/services/verifiable_producer.py +++ b/tests/kafkatest/services/verifiable_producer.py @@ -13,19 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ducktape.services.background_thread import BackgroundThreadService - -from kafkatest.services.kafka.directory import kafka_dir, KAFKA_TRUNK -from kafkatest.services.kafka.version import TRUNK, LATEST_0_8_2 -from kafkatest.utils import is_int, is_int_with_prefix - import json import os import signal import subprocess import time -class VerifiableProducer(BackgroundThreadService): +from ducktape.services.background_thread import BackgroundThreadService + +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin, TOOLS_JAR_NAME, TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME +from kafkatest.utils import is_int, is_int_with_prefix +from kafkatest.version import TRUNK, LATEST_0_8_2 + + +class VerifiableProducer(KafkaPathResolverMixin, BackgroundThreadService): PERSISTENT_ROOT = "/mnt/verifiable_producer" STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "verifiable_producer.stdout") LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs") @@ -110,7 +111,6 @@ def _worker(self, idx, node): cmd = self.start_cmd(node, idx) self.logger.debug("VerifiableProducer %d command: %s" % (idx, cmd)) - self.produced_count[idx] = 0 last_produced_time = time.time() prev_msg = None @@ -147,20 +147,23 @@ def _worker(self, idx, node): self.clean_shutdown_nodes.add(node) def start_cmd(self, node, idx): - cmd = "" if node.version <= LATEST_0_8_2: # 0.8.2.X releases do not have VerifiableProducer.java, so cheat and add # the tools jar from trunk to the classpath - cmd += "for file in /opt/%s/tools/build/libs/kafka-tools*.jar; do CLASSPATH=$CLASSPATH:$file; done; " % KAFKA_TRUNK - cmd += "for file in /opt/%s/tools/build/dependant-libs-${SCALA_VERSION}*/*.jar; do CLASSPATH=$CLASSPATH:$file; done; " % KAFKA_TRUNK + tools_jar = self.path.jar(TOOLS_JAR_NAME, TRUNK) + tools_dependant_libs_jar = self.path.jar(TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME, TRUNK) + + cmd += "for file in %s; do CLASSPATH=$CLASSPATH:$file; done; " % tools_jar + cmd += "for file in %s; do CLASSPATH=$CLASSPATH:$file; done; " % tools_dependant_libs_jar cmd += "export CLASSPATH; " cmd += "export LOG_DIR=%s;" % VerifiableProducer.LOG_DIR cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % VerifiableProducer.LOG4J_CONFIG - cmd += "/opt/" + kafka_dir(node) + "/bin/kafka-run-class.sh org.apache.kafka.tools.VerifiableProducer" \ - " --topic %s --broker-list %s" % (self.topic, self.kafka.bootstrap_servers(self.security_config.security_protocol)) + cmd += " " + self.path.script("kafka-run-class.sh", node) + cmd += " org.apache.kafka.tools.VerifiableProducer" + cmd += " --topic %s --broker-list %s" % (self.topic, self.kafka.bootstrap_servers(self.security_config.security_protocol)) if self.max_messages > 0: cmd += " --max-messages %s" % str(self.max_messages) if self.throughput > 0: diff --git a/tests/kafkatest/services/zookeeper.py b/tests/kafkatest/services/zookeeper.py index fb7358781158..07e2c0ca54bd 100644 --- a/tests/kafkatest/services/zookeeper.py +++ b/tests/kafkatest/services/zookeeper.py @@ -14,18 +14,18 @@ # limitations under the License. +import re +import subprocess +import time + from ducktape.services.service import Service -from kafkatest.services.kafka.directory import kafka_dir +from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin from kafkatest.services.security.security_config import SecurityConfig -from kafkatest.services.kafka.directory import kafka_dir, KAFKA_TRUNK - -import subprocess -import time -import re +from kafkatest.version import TRUNK -class ZookeeperService(Service): +class ZookeeperService(KafkaPathResolverMixin, Service): logs = { "zk_log": { @@ -73,7 +73,7 @@ def start_node(self, node): node.account.create_file("/mnt/zookeeper.properties", config_file) start_cmd = "export KAFKA_OPTS=\"%s\";" % self.kafka_opts - start_cmd += "/opt/%s/bin/zookeeper-server-start.sh " % kafka_dir(node) + start_cmd += "%s " % self.path.script("zookeeper-server-start.sh", node) start_cmd += "/mnt/zookeeper.properties 1>> %(path)s 2>> %(path)s &" % self.logs["zk_log"] node.account.ssh(start_cmd) @@ -111,16 +111,17 @@ def connect_setting(self): # the use of ZooKeeper ACLs. # def zookeeper_migration(self, node, zk_acl): - la_migra_cmd = "/opt/%s/bin/zookeeper-security-migration.sh --zookeeper.acl=%s --zookeeper.connect=%s" % (kafka_dir(node), zk_acl, self.connect_setting()) + la_migra_cmd = "%s --zookeeper.acl=%s --zookeeper.connect=%s" % \ + (self.path.script("zookeeper-security-migration.sh", node), zk_acl, self.connect_setting()) node.account.ssh(la_migra_cmd) def query(self, path): """ Queries zookeeper for data associated with 'path' and returns all fields in the schema """ - kafka_dir = KAFKA_TRUNK - cmd = "/opt/%s/bin/kafka-run-class.sh kafka.tools.ZooKeeperMainWrapper -server %s get %s" % \ - (kafka_dir, self.connect_setting(), path) + kafka_run_class = self.path.script("kafka-run-class.sh", TRUNK) + cmd = "%s kafka.tools.ZooKeeperMainWrapper -server %s get %s" % \ + (kafka_run_class, self.connect_setting(), path) self.logger.debug(cmd) node = self.nodes[0] diff --git a/tests/kafkatest/tests/client/message_format_change_test.py b/tests/kafkatest/tests/client/message_format_change_test.py index 357fd17a9e30..a1ebf22257ea 100644 --- a/tests/kafkatest/tests/client/message_format_change_test.py +++ b/tests/kafkatest/tests/client/message_format_change_test.py @@ -12,18 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ducktape.tests.test import Test from ducktape.mark import parametrize from ducktape.utils.util import wait_until -from kafkatest.services.zookeeper import ZookeeperService + +from kafkatest.services.console_consumer import ConsoleConsumer from kafkatest.services.kafka import KafkaService -from kafkatest.services.kafka.version import LATEST_0_9, LATEST_0_10, TRUNK, KafkaVersion from kafkatest.services.verifiable_producer import VerifiableProducer -from kafkatest.services.console_consumer import ConsoleConsumer -from kafkatest.utils import is_int +from kafkatest.services.zookeeper import ZookeeperService from kafkatest.tests.produce_consume_validate import ProduceConsumeValidateTest -from kafkatest.services.kafka import config_property -import time +from kafkatest.utils import is_int +from kafkatest.version import LATEST_0_9, LATEST_0_10, TRUNK, KafkaVersion class MessageFormatChangeTest(ProduceConsumeValidateTest): diff --git a/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py b/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py index 2c261df6c6df..85fc9aec1657 100644 --- a/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py +++ b/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py @@ -12,17 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ducktape.tests.test import Test from ducktape.mark import parametrize from ducktape.utils.util import wait_until -from kafkatest.services.zookeeper import ZookeeperService + +from kafkatest.services.console_consumer import ConsoleConsumer from kafkatest.services.kafka import KafkaService -from kafkatest.services.kafka.version import LATEST_0_9, LATEST_0_8_2, TRUNK, KafkaVersion +from kafkatest.services.kafka import config_property from kafkatest.services.verifiable_producer import VerifiableProducer -from kafkatest.services.console_consumer import ConsoleConsumer -from kafkatest.utils import is_int +from kafkatest.services.zookeeper import ZookeeperService from kafkatest.tests.produce_consume_validate import ProduceConsumeValidateTest -from kafkatest.services.kafka import config_property +from kafkatest.utils import is_int +from kafkatest.version import LATEST_0_9, LATEST_0_8_2, TRUNK, KafkaVersion + # Compatibility tests for moving to a new broker (e.g., 0.10.x) and using a mix of old and new clients (e.g., 0.9.x) class ClientCompatibilityTestNewBroker(ProduceConsumeValidateTest): diff --git a/tests/kafkatest/tests/core/security_rolling_upgrade_test.py b/tests/kafkatest/tests/core/security_rolling_upgrade_test.py index fdbedca174ff..39774902a6cb 100644 --- a/tests/kafkatest/tests/core/security_rolling_upgrade_test.py +++ b/tests/kafkatest/tests/core/security_rolling_upgrade_test.py @@ -16,7 +16,6 @@ from kafkatest.services.zookeeper import ZookeeperService from kafkatest.services.kafka import KafkaService -from kafkatest.services.security.security_config import SecurityConfig from kafkatest.services.verifiable_producer import VerifiableProducer from kafkatest.services.console_consumer import ConsoleConsumer from kafkatest.utils import is_int @@ -34,7 +33,7 @@ def __init__(self, test_context): super(TestSecurityRollingUpgrade, self).__init__(test_context=test_context) def setUp(self): - self.acls = ACLs() + self.acls = ACLs(self.test_context) self.topic = "test_topic" self.group = "group" self.producer_throughput = 100 diff --git a/tests/kafkatest/tests/core/upgrade_test.py b/tests/kafkatest/tests/core/upgrade_test.py index 9926f11ee0e1..778d6a50ca3e 100644 --- a/tests/kafkatest/tests/core/upgrade_test.py +++ b/tests/kafkatest/tests/core/upgrade_test.py @@ -14,14 +14,15 @@ # limitations under the License. from ducktape.mark import parametrize -from kafkatest.services.zookeeper import ZookeeperService -from kafkatest.services.kafka import KafkaService -from kafkatest.services.kafka.version import LATEST_0_8_2, LATEST_0_9, TRUNK, KafkaVersion -from kafkatest.services.verifiable_producer import VerifiableProducer + from kafkatest.services.console_consumer import ConsoleConsumer +from kafkatest.services.kafka import KafkaService from kafkatest.services.kafka import config_property +from kafkatest.services.verifiable_producer import VerifiableProducer +from kafkatest.services.zookeeper import ZookeeperService from kafkatest.tests.produce_consume_validate import ProduceConsumeValidateTest from kafkatest.utils import is_int +from kafkatest.version import LATEST_0_8_2, LATEST_0_9, TRUNK, KafkaVersion class TestUpgrade(ProduceConsumeValidateTest): diff --git a/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py b/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py index 7f80deb027ea..7e722f7d31da 100644 --- a/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py +++ b/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py @@ -19,11 +19,9 @@ from kafkatest.services.kafka import KafkaService from kafkatest.services.verifiable_producer import VerifiableProducer from kafkatest.services.console_consumer import ConsoleConsumer -from kafkatest.services.security.security_config import SecurityConfig from kafkatest.tests.produce_consume_validate import ProduceConsumeValidateTest from kafkatest.services.security.kafka_acls import ACLs from kafkatest.utils import is_int -import time class ZooKeeperSecurityUpgradeTest(ProduceConsumeValidateTest): """Tests a rolling upgrade for zookeeper. @@ -38,7 +36,7 @@ def setUp(self): self.producer_throughput = 100 self.num_producers = 1 self.num_consumers = 1 - self.acls = ACLs() + self.acls = ACLs(self.test_context) self.zk = ZookeeperService(self.test_context, num_nodes=3) diff --git a/tests/kafkatest/services/kafka/version.py b/tests/kafkatest/version.py similarity index 72% rename from tests/kafkatest/services/kafka/version.py rename to tests/kafkatest/version.py index dc2582b6a627..6b378e7e48e6 100644 --- a/tests/kafkatest/services/kafka/version.py +++ b/tests/kafkatest/version.py @@ -1,10 +1,11 @@ -# Copyright 2015 Confluent Inc. +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -12,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from kafkatest.utils import kafkatest_version from distutils.version import LooseVersion @@ -52,6 +54,15 @@ def __str__(self): return LooseVersion.__str__(self) +def get_version(node=None): + """Return the version attached to the given node. + Default to trunk if node or node.version is undefined (aka None) + """ + if node is not None and hasattr(node, "version") and node.version is not None: + return node.version + else: + return TRUNK + TRUNK = KafkaVersion("trunk") # 0.8.2.X versions diff --git a/tests/setup.cfg b/tests/setup.cfg new file mode 100644 index 000000000000..c70f1e498bd0 --- /dev/null +++ b/tests/setup.cfg @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pytest configuration (can also be defined in in tox.ini or pytest.ini file) +# +# This file defines naming convention and root search directory for autodiscovery of +# pytest unit tests for the system test service classes. +# +# To ease possible confusion, 'check' instead of 'test' as a prefix for unit tests, since +# many system test files, classes, and methods have 'test' somewhere in the name +[pytest] +testpaths=unit +python_files=check_*.py +python_classes=Check +python_functions=check_* + +# don't search inside any resources directory for unit tests +norecursedirs = resources diff --git a/tests/setup.py b/tests/setup.py index 910c0a2da586..9961508ce29b 100644 --- a/tests/setup.py +++ b/tests/setup.py @@ -15,12 +15,33 @@ # see kafka.server.KafkaConfig for additional details and defaults import re +import sys from setuptools import find_packages, setup +from setuptools.command.test import test as TestCommand version = '' with open('kafkatest/__init__.py', 'r') as fd: - version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', - fd.read(), re.MULTILINE).group(1) + version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', fd.read(), re.MULTILINE).group(1) + + +class PyTest(TestCommand): + user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")] + + def initialize_options(self): + TestCommand.initialize_options(self) + self.pytest_args = [] + + def finalize_options(self): + TestCommand.finalize_options(self) + self.test_args = [] + self.test_suite = True + + def run_tests(self): + # import here, cause outside the eggs aren't loaded + import pytest + print self.pytest_args + errno = pytest.main(self.pytest_args) + sys.exit(errno) setup(name="kafkatest", version=version, @@ -30,5 +51,7 @@ license="apache2.0", packages=find_packages(), include_package_data=True, - install_requires=["ducktape==0.5.0", "requests>=2.5.0"] + install_requires=["ducktape==0.5.0", "requests>=2.5.0"], + tests_require=["pytest", "mock"], + cmdclass={'test': PyTest}, ) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 000000000000..ec2014340d78 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/directory_layout/__init__.py b/tests/unit/directory_layout/__init__.py new file mode 100644 index 000000000000..ec2014340d78 --- /dev/null +++ b/tests/unit/directory_layout/__init__.py @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/directory_layout/check_project_paths.py b/tests/unit/directory_layout/check_project_paths.py new file mode 100644 index 000000000000..284e4d5150c4 --- /dev/null +++ b/tests/unit/directory_layout/check_project_paths.py @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from kafkatest.directory_layout.kafka_path import create_path_resolver, KafkaSystemTestPathResolver, \ + KAFKA_PATH_RESOLVER_KEY +from kafkatest.version import V_0_9_0_1, TRUNK, KafkaVersion + + +class DummyContext(object): + def __init__(self): + self.globals = {} + + +class DummyPathResolver(object): + """Dummy class to help check path resolver creation.""" + def __init__(self, context, project_name): + pass + +class DummyNode(object): + """Fake node object""" + pass + +class CheckCreatePathResolver(object): + def check_create_path_resolver_override(self): + """Test override behavior when instantiating a path resolver using our factory function. + + If context.globals has an entry for a path resolver class, use that class instead of the default. + """ + mock_context = DummyContext() + mock_context.globals[KAFKA_PATH_RESOLVER_KEY] = \ + "unit.directory_layout.check_project_paths.DummyPathResolver" + + resolver = create_path_resolver(mock_context) + assert type(resolver) == DummyPathResolver + + def check_create_path_resolver_default(self): + """Test default behavior when instantiating a path resolver using our factory function. + """ + resolver = create_path_resolver(DummyContext()) + assert type(resolver) == KafkaSystemTestPathResolver + + def check_paths(self): + """Check expected path resolution without any version specified.""" + resolver = create_path_resolver(DummyContext()) + + assert resolver.home() == "/opt/kafka-trunk" + assert resolver.bin() == "/opt/kafka-trunk/bin" + assert resolver.script("kafka-run-class.sh") == "/opt/kafka-trunk/bin/kafka-run-class.sh" + + def check_versioned_source_paths(self): + """Check expected paths when using versions.""" + resolver = create_path_resolver(DummyContext()) + + assert resolver.home(V_0_9_0_1) == "/opt/kafka-0.9.0.1" + assert resolver.bin(V_0_9_0_1) == "/opt/kafka-0.9.0.1/bin" + assert resolver.script("kafka-run-class.sh", V_0_9_0_1) == "/opt/kafka-0.9.0.1/bin/kafka-run-class.sh" + + def check_node_or_version_helper(self): + """KafkaSystemTestPathResolver has a helper method which can take a node or version, and returns the version. + Check expected behavior here. + """ + resolver = create_path_resolver(DummyContext()) + + # Node with no version attribute should resolve to TRUNK + node = DummyNode() + assert resolver._version(node) == TRUNK + + # Node with version attribute should resolve to the version attribute + node.version = V_0_9_0_1 + assert resolver._version(node) == V_0_9_0_1 + + # A KafkaVersion object should resolve to itself + assert resolver._version(TRUNK) == TRUNK + version = KafkaVersion("999.999.999") + assert resolver._version(version) == version + + diff --git a/tests/unit/setup.cfg b/tests/unit/setup.cfg new file mode 100644 index 000000000000..e757a99fb3be --- /dev/null +++ b/tests/unit/setup.cfg @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pytest configuration (can also be defined in in tox.ini or pytest.ini file) +# +# To ease possible confusion, prefix muckrake *unit* tests with 'check' instead of 'test', since +# many muckrake files, classes, and methods have 'test' somewhere in the name +[pytest] +python_files=check_*.py +python_classes=Check +python_functions=check_* diff --git a/tests/unit/version/__init__.py b/tests/unit/version/__init__.py new file mode 100644 index 000000000000..e556dc959238 --- /dev/null +++ b/tests/unit/version/__init__.py @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/tests/unit/version/check_version.py b/tests/unit/version/check_version.py new file mode 100644 index 000000000000..7b855518207d --- /dev/null +++ b/tests/unit/version/check_version.py @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mock import Mock + +from kafkatest.version import TRUNK, V_0_8_2_2, get_version + + +class CheckVersion(object): + def check_get_version(self): + """Check default and override behavior of get_version""" + node = None + assert get_version(node) == TRUNK + + node = Mock() + node.version = None + assert get_version(node) == TRUNK + + node = Mock() + node.version = V_0_8_2_2 + assert get_version(node) == V_0_8_2_2 \ No newline at end of file From 7ad2d3df8eb0b3c23baff04efc30a0b4c6752b19 Mon Sep 17 00:00:00 2001 From: Mark Grover Date: Fri, 6 May 2016 21:47:00 +0100 Subject: [PATCH 097/267] KAFKA-3669; Add secondary constructor for KafkaConfig with a default value for doLog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …value for doLog Author: Mark Grover Reviewers: Grant Henke , Ismael Juma Closes #1334 from markgrover/kafka-3669-trunk (cherry picked from commit 58f9d7cf8a68824a8fc451cf7ff5fcfb696fa863) Signed-off-by: Ismael Juma --- core/src/main/scala/kafka/server/KafkaConfig.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/main/scala/kafka/server/KafkaConfig.scala b/core/src/main/scala/kafka/server/KafkaConfig.scala index c5da55d4acac..596cc587786d 100755 --- a/core/src/main/scala/kafka/server/KafkaConfig.scala +++ b/core/src/main/scala/kafka/server/KafkaConfig.scala @@ -758,6 +758,8 @@ object KafkaConfig { class KafkaConfig(val props: java.util.Map[_, _], doLog: Boolean) extends AbstractConfig(KafkaConfig.configDef, props, doLog) { + def this(props: java.util.Map[_, _]) = this(props, true) + /** ********* Zookeeper Configuration ***********/ val zkConnect: String = getString(KafkaConfig.ZkConnectProp) val zkSessionTimeoutMs: Int = getInt(KafkaConfig.ZkSessionTimeoutMsProp) From b904a427c0e5e5683590528e54da33226c075c0b Mon Sep 17 00:00:00 2001 From: Chen Zhu Date: Fri, 6 May 2016 21:57:53 +0100 Subject: [PATCH 098/267] KAFKA-3655; awaitFlushCompletion() in RecordAccumulator should always decrement flushesInProgress count Author: Chen Zhu Reviewers: Ismael Juma Closes #1315 from zhuchen1018/KAFKA-3655 (cherry picked from commit 717eea8350feb670e8ba3dd3505c708a8a52de71) Signed-off-by: Ismael Juma --- .../producer/internals/RecordAccumulator.java | 13 +++++--- .../internals/RecordAccumulatorTest.java | 31 +++++++++++++++++++ 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java index 1766609ace02..5339096efa75 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java @@ -430,8 +430,10 @@ public void deallocate(RecordBatch batch) { /** * Are there any threads currently waiting on a flush? + * + * package private for test */ - private boolean flushInProgress() { + boolean flushInProgress() { return flushesInProgress.get() > 0; } @@ -453,9 +455,12 @@ private boolean appendsInProgress() { * Mark all partitions as ready to send and block until the send is complete */ public void awaitFlushCompletion() throws InterruptedException { - for (RecordBatch batch: this.incomplete.all()) - batch.produceFuture.await(); - this.flushesInProgress.decrementAndGet(); + try { + for (RecordBatch batch : this.incomplete.all()) + batch.produceFuture.await(); + } finally { + this.flushesInProgress.decrementAndGet(); + } } /** diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java index a39d2e82840a..b3a5a049a82d 100644 --- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java @@ -16,6 +16,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -39,6 +40,7 @@ import org.apache.kafka.common.record.Record; import org.apache.kafka.common.record.Records; import org.apache.kafka.common.utils.MockTime; +import org.apache.kafka.common.utils.SystemTime; import org.junit.After; import org.junit.Test; @@ -57,6 +59,7 @@ public class RecordAccumulatorTest { private PartitionInfo part2 = new PartitionInfo(topic, partition2, node1, null, null); private PartitionInfo part3 = new PartitionInfo(topic, partition3, node2, null, null); private MockTime time = new MockTime(); + private SystemTime systemTime = new SystemTime(); private byte[] key = "key".getBytes(); private byte[] value = "value".getBytes(); private int msgSize = Records.LOG_OVERHEAD + Record.recordSize(key, value); @@ -272,6 +275,34 @@ public void testFlush() throws Exception { assertFalse(accum.hasUnsent()); } + + private void delayedInterrupt(final Thread thread, final long delayMs) { + Thread t = new Thread() { + public void run() { + systemTime.sleep(delayMs); + thread.interrupt(); + } + }; + t.start(); + } + + @Test + public void testAwaitFlushComplete() throws Exception { + RecordAccumulator accum = new RecordAccumulator(4 * 1024, 64 * 1024, CompressionType.NONE, Long.MAX_VALUE, 100L, metrics, time); + accum.append(new TopicPartition(topic, 0), 0L, key, value, null, maxBlockTimeMs); + + accum.beginFlush(); + assertTrue(accum.flushInProgress()); + delayedInterrupt(Thread.currentThread(), 1000L); + try { + accum.awaitFlushCompletion(); + fail("awaitFlushCompletion should throw InterruptException"); + } catch (InterruptedException e) { + assertFalse("flushInProgress count should be decremented even if thread is interrupted", accum.flushInProgress()); + } + } + + @Test public void testAbortIncompleteBatches() throws Exception { long lingerMs = Long.MAX_VALUE; From 2277b3c43edc3b756c8262373846e93589998997 Mon Sep 17 00:00:00 2001 From: Jonathan Bond Date: Sat, 7 May 2016 09:05:55 +0100 Subject: [PATCH 099/267] KAFKA-3112; Warn instead of error on unresolvable bootstrap server so that unresolvable DNS names are ignored and only throw an error if no other bootstrap servers are resolvable. Author: Jonathan Bond Reviewers: Ewen Cheslack-Postava , Grant Henke , Ismael Juma Closes #792 from bondj/KAFKA-3112 (cherry picked from commit c4bbf342432291220242ad4177fd72a959ddcb94) Signed-off-by: Ismael Juma --- .../org/apache/kafka/clients/ClientUtils.java | 10 ++++++---- .../apache/kafka/clients/ClientUtilsTest.java | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/ClientUtils.java b/clients/src/main/java/org/apache/kafka/clients/ClientUtils.java index ad9c5d0a2936..0c12ea13450f 100644 --- a/clients/src/main/java/org/apache/kafka/clients/ClientUtils.java +++ b/clients/src/main/java/org/apache/kafka/clients/ClientUtils.java @@ -45,16 +45,18 @@ public static List parseAndValidateAddresses(List url throw new ConfigException("Invalid url in " + CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG + ": " + url); try { InetSocketAddress address = new InetSocketAddress(host, port); - if (address.isUnresolved()) - throw new ConfigException("DNS resolution failed for url in " + CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG + ": " + url); - addresses.add(address); + if (address.isUnresolved()) { + log.warn("Removing server from " + CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG + " as DNS resolution failed: " + url); + } else { + addresses.add(address); + } } catch (NumberFormatException e) { throw new ConfigException("Invalid port in " + CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG + ": " + url); } } } if (addresses.size() < 1) - throw new ConfigException("No bootstrap urls given in " + CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG); + throw new ConfigException("No resolvable bootstrap urls given in " + CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG); return addresses; } diff --git a/clients/src/test/java/org/apache/kafka/clients/ClientUtilsTest.java b/clients/src/test/java/org/apache/kafka/clients/ClientUtilsTest.java index d6a4019c4b81..2c1db445180e 100644 --- a/clients/src/test/java/org/apache/kafka/clients/ClientUtilsTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/ClientUtilsTest.java @@ -18,8 +18,11 @@ import org.apache.kafka.common.config.ConfigException; import org.junit.Test; +import static org.junit.Assert.assertEquals; +import java.net.InetSocketAddress; import java.util.Arrays; +import java.util.List; public class ClientUtilsTest { @@ -29,14 +32,24 @@ public void testParseAndValidateAddresses() { check("mydomain.com:8080"); check("[::1]:8000"); check("[2001:db8:85a3:8d3:1319:8a2e:370:7348]:1234", "mydomain.com:10000"); + List validatedAddresses = check("some.invalid.hostname.foo.bar:9999", "mydomain.com:10000"); + assertEquals(1, validatedAddresses.size()); + InetSocketAddress onlyAddress = validatedAddresses.get(0); + assertEquals("mydomain.com", onlyAddress.getHostName()); + assertEquals(10000, onlyAddress.getPort()); } @Test(expected = ConfigException.class) public void testNoPort() { check("127.0.0.1"); } + + @Test(expected = ConfigException.class) + public void testOnlyBadHostname() { + check("some.invalid.hostname.foo.bar:9999"); + } - private void check(String... url) { - ClientUtils.parseAndValidateAddresses(Arrays.asList(url)); + private List check(String... url) { + return ClientUtils.parseAndValidateAddresses(Arrays.asList(url)); } } From c8ef1778c91c05061dfce19f6edf0c22d8806fbe Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Sat, 7 May 2016 19:35:55 +0100 Subject: [PATCH 100/267] KAFKA-3160; Fix LZ4 Framing This contribution is my original work and I license the work under Apache 2.0. Author: Dana Powers Author: Ismael Juma Reviewers: Jun Rao , Ismael Juma Closes #1212 from dpkp/KAFKA-3160 (cherry picked from commit 8fe2552239863f3a01d01708d55edf3c7082ff92) Signed-off-by: Ismael Juma --- .../kafka/common/record/Compressor.java | 7 +- .../record/KafkaLZ4BlockInputStream.java | 68 ++++++--- .../record/KafkaLZ4BlockOutputStream.java | 89 +++++++----- .../kafka/common/record/MemoryRecords.java | 12 +- .../kafka/common/record/KafkaLZ4Test.java | 137 ++++++++++++++++++ .../src/main/scala/kafka/log/LogCleaner.scala | 2 +- .../kafka/message/ByteBufferMessageSet.scala | 7 +- .../kafka/message/CompressionFactory.scala | 8 +- .../message/MessageCompressionTest.scala | 21 +++ .../kafka/message/MessageWriterTest.scala | 6 +- docs/upgrade.html | 5 + 11 files changed, 289 insertions(+), 73 deletions(-) create mode 100644 clients/src/test/java/org/apache/kafka/common/record/KafkaLZ4Test.java diff --git a/clients/src/main/java/org/apache/kafka/common/record/Compressor.java b/clients/src/main/java/org/apache/kafka/common/record/Compressor.java index afa85a41194d..37d53b81dcba 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/Compressor.java +++ b/clients/src/main/java/org/apache/kafka/common/record/Compressor.java @@ -77,7 +77,7 @@ public Constructor get() throws ClassNotFoundException, NoSuchMethodException { @Override public Constructor get() throws ClassNotFoundException, NoSuchMethodException { return Class.forName("org.apache.kafka.common.record.KafkaLZ4BlockInputStream") - .getConstructor(InputStream.class); + .getConstructor(InputStream.class, Boolean.TYPE); } }); @@ -275,7 +275,7 @@ static public DataOutputStream wrapForOutput(ByteBufferOutputStream buffer, Comp } } - static public DataInputStream wrapForInput(ByteBufferInputStream buffer, CompressionType type) { + static public DataInputStream wrapForInput(ByteBufferInputStream buffer, CompressionType type, byte messageVersion) { try { switch (type) { case NONE: @@ -291,7 +291,8 @@ static public DataInputStream wrapForInput(ByteBufferInputStream buffer, Compres } case LZ4: try { - InputStream stream = (InputStream) lz4InputStreamSupplier.get().newInstance(buffer); + InputStream stream = (InputStream) lz4InputStreamSupplier.get().newInstance(buffer, + messageVersion == Record.MAGIC_VALUE_V0); return new DataInputStream(stream); } catch (Exception e) { throw new KafkaException(e); diff --git a/clients/src/main/java/org/apache/kafka/common/record/KafkaLZ4BlockInputStream.java b/clients/src/main/java/org/apache/kafka/common/record/KafkaLZ4BlockInputStream.java index 372d4f4fb430..92718d896bdc 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/KafkaLZ4BlockInputStream.java +++ b/clients/src/main/java/org/apache/kafka/common/record/KafkaLZ4BlockInputStream.java @@ -36,15 +36,14 @@ import net.jpountz.xxhash.XXHashFactory; /** - * A partial implementation of the v1.4.1 LZ4 Frame format. + * A partial implementation of the v1.5.1 LZ4 Frame format. * - * @see LZ4 Framing - * Format Spec + * @see LZ4 Frame Format */ public final class KafkaLZ4BlockInputStream extends FilterInputStream { public static final String PREMATURE_EOS = "Stream ended prematurely"; - public static final String NOT_SUPPORTED = "Stream unsupported"; + public static final String NOT_SUPPORTED = "Stream unsupported (invalid magic bytes)"; public static final String BLOCK_HASH_MISMATCH = "Block checksum mismatch"; public static final String DESCRIPTOR_HASH_MISMATCH = "Stream frame descriptor corrupted"; @@ -53,6 +52,7 @@ public final class KafkaLZ4BlockInputStream extends FilterInputStream { private final byte[] buffer; private final byte[] compressedBuffer; private final int maxBlockSize; + private final boolean ignoreFlagDescriptorChecksum; private FLG flg; private BD bd; private int bufferOffset; @@ -63,12 +63,14 @@ public final class KafkaLZ4BlockInputStream extends FilterInputStream { * Create a new {@link InputStream} that will decompress data using the LZ4 algorithm. * * @param in The stream to decompress + * @param ignoreFlagDescriptorChecksum for compatibility with old kafka clients, ignore incorrect HC byte * @throws IOException */ - public KafkaLZ4BlockInputStream(InputStream in) throws IOException { + public KafkaLZ4BlockInputStream(InputStream in, boolean ignoreFlagDescriptorChecksum) throws IOException { super(in); decompressor = LZ4Factory.fastestInstance().safeDecompressor(); checksum = XXHashFactory.fastestInstance().hash32(); + this.ignoreFlagDescriptorChecksum = ignoreFlagDescriptorChecksum; readHeader(); maxBlockSize = bd.getBlockMaximumSize(); buffer = new byte[maxBlockSize]; @@ -78,6 +80,25 @@ public KafkaLZ4BlockInputStream(InputStream in) throws IOException { finished = false; } + /** + * Create a new {@link InputStream} that will decompress data using the LZ4 algorithm. + * + * @param in The stream to decompress + * @throws IOException + */ + public KafkaLZ4BlockInputStream(InputStream in) throws IOException { + this(in, false); + } + + /** + * Check whether KafkaLZ4BlockInputStream is configured to ignore the + * Frame Descriptor checksum, which is useful for compatibility with + * old client implementations that use incorrect checksum calculations. + */ + public boolean ignoreFlagDescriptorChecksum() { + return this.ignoreFlagDescriptorChecksum; + } + /** * Reads the magic number and frame descriptor from the underlying {@link InputStream}. * @@ -87,25 +108,35 @@ private void readHeader() throws IOException { byte[] header = new byte[LZ4_MAX_HEADER_LENGTH]; // read first 6 bytes into buffer to check magic and FLG/BD descriptor flags - bufferOffset = 6; - if (in.read(header, 0, bufferOffset) != bufferOffset) { + int headerOffset = 6; + if (in.read(header, 0, headerOffset) != headerOffset) { throw new IOException(PREMATURE_EOS); } - if (MAGIC != Utils.readUnsignedIntLE(header, bufferOffset - 6)) { + if (MAGIC != Utils.readUnsignedIntLE(header, headerOffset - 6)) { throw new IOException(NOT_SUPPORTED); } - flg = FLG.fromByte(header[bufferOffset - 2]); - bd = BD.fromByte(header[bufferOffset - 1]); - // TODO read uncompressed content size, update flg.validate() - // TODO read dictionary id, update flg.validate() + flg = FLG.fromByte(header[headerOffset - 2]); + bd = BD.fromByte(header[headerOffset - 1]); - // check stream descriptor hash - byte hash = (byte) ((checksum.hash(header, 0, bufferOffset, 0) >> 8) & 0xFF); - header[bufferOffset++] = (byte) in.read(); - if (hash != header[bufferOffset - 1]) { - throw new IOException(DESCRIPTOR_HASH_MISMATCH); + if (flg.isContentSizeSet()) { + if (in.read(header, headerOffset, 8) != 8) + throw new IOException(PREMATURE_EOS); + headerOffset += 8; } + + // Final byte of Frame Descriptor is HC checksum + header[headerOffset++] = (byte) in.read(); + + // Old implementations produced incorrect HC checksums + if (ignoreFlagDescriptorChecksum) + return; + + int offset = 4; + int len = headerOffset - offset - 1; // dont include magic bytes or HC + byte hash = (byte) ((checksum.hash(header, offset, len, 0) >> 8) & 0xFF); + if (hash != header[headerOffset - 1]) + throw new IOException(DESCRIPTOR_HASH_MISMATCH); } /** @@ -120,7 +151,8 @@ private void readBlock() throws IOException { // Check for EndMark if (blockSize == 0) { finished = true; - // TODO implement content checksum, update flg.validate() + if (flg.isContentChecksumSet()) + Utils.readUnsignedIntLE(in); // TODO: verify this content checksum return; } else if (blockSize > maxBlockSize) { throw new IOException(String.format("Block size %s exceeded max: %s", blockSize, maxBlockSize)); diff --git a/clients/src/main/java/org/apache/kafka/common/record/KafkaLZ4BlockOutputStream.java b/clients/src/main/java/org/apache/kafka/common/record/KafkaLZ4BlockOutputStream.java index 7d23f4aa4c43..933b2cfaa30f 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/KafkaLZ4BlockOutputStream.java +++ b/clients/src/main/java/org/apache/kafka/common/record/KafkaLZ4BlockOutputStream.java @@ -29,10 +29,9 @@ import net.jpountz.xxhash.XXHashFactory; /** - * A partial implementation of the v1.4.1 LZ4 Frame format. + * A partial implementation of the v1.5.1 LZ4 Frame format. * - * @see LZ4 Framing - * Format Spec + * @see LZ4 Frame Format */ public final class KafkaLZ4BlockOutputStream extends FilterOutputStream { @@ -49,6 +48,7 @@ public final class KafkaLZ4BlockOutputStream extends FilterOutputStream { private final LZ4Compressor compressor; private final XXHash32 checksum; + private final boolean useBrokenFlagDescriptorChecksum; private final FLG flg; private final BD bd; private final byte[] buffer; @@ -65,12 +65,15 @@ public final class KafkaLZ4BlockOutputStream extends FilterOutputStream { * values will generate an exception * @param blockChecksum Default: false. When true, a XXHash32 checksum is computed and appended to the stream for * every block of data + * @param useBrokenFlagDescriptorChecksum Default: false. When true, writes an incorrect FrameDescriptor checksum + * compatible with older kafka clients. * @throws IOException */ - public KafkaLZ4BlockOutputStream(OutputStream out, int blockSize, boolean blockChecksum) throws IOException { + public KafkaLZ4BlockOutputStream(OutputStream out, int blockSize, boolean blockChecksum, boolean useBrokenFlagDescriptorChecksum) throws IOException { super(out); compressor = LZ4Factory.fastestInstance().fastCompressor(); checksum = XXHashFactory.fastestInstance().hash32(); + this.useBrokenFlagDescriptorChecksum = useBrokenFlagDescriptorChecksum; bd = new BD(blockSize); flg = new FLG(blockChecksum); bufferOffset = 0; @@ -81,6 +84,20 @@ public KafkaLZ4BlockOutputStream(OutputStream out, int blockSize, boolean blockC writeHeader(); } + /** + * Create a new {@link OutputStream} that will compress data using the LZ4 algorithm. + * + * @param out The output stream to compress + * @param blockSize Default: 4. The block size used during compression. 4=64kb, 5=256kb, 6=1mb, 7=4mb. All other + * values will generate an exception + * @param blockChecksum Default: false. When true, a XXHash32 checksum is computed and appended to the stream for + * every block of data + * @throws IOException + */ + public KafkaLZ4BlockOutputStream(OutputStream out, int blockSize, boolean blockChecksum) throws IOException { + this(out, blockSize, blockChecksum, false); + } + /** * Create a new {@link OutputStream} that will compress data using the LZ4 algorithm. * @@ -90,7 +107,7 @@ public KafkaLZ4BlockOutputStream(OutputStream out, int blockSize, boolean blockC * @throws IOException */ public KafkaLZ4BlockOutputStream(OutputStream out, int blockSize) throws IOException { - this(out, blockSize, false); + this(out, blockSize, false, false); } /** @@ -103,6 +120,19 @@ public KafkaLZ4BlockOutputStream(OutputStream out) throws IOException { this(out, BLOCKSIZE_64KB); } + public KafkaLZ4BlockOutputStream(OutputStream out, boolean useBrokenHC) throws IOException { + this(out, BLOCKSIZE_64KB, false, useBrokenHC); + } + + /** + * Check whether KafkaLZ4BlockInputStream is configured to write an + * incorrect Frame Descriptor checksum, which is useful for + * compatibility with old client implementations. + */ + public boolean useBrokenFlagDescriptorChecksum() { + return this.useBrokenFlagDescriptorChecksum; + } + /** * Writes the magic number and frame descriptor to the underlying {@link OutputStream}. * @@ -114,10 +144,17 @@ private void writeHeader() throws IOException { buffer[bufferOffset++] = flg.toByte(); buffer[bufferOffset++] = bd.toByte(); // TODO write uncompressed content size, update flg.validate() - // TODO write dictionary id, update flg.validate() + // compute checksum on all descriptor fields - int hash = (checksum.hash(buffer, 0, bufferOffset, 0) >> 8) & 0xFF; - buffer[bufferOffset++] = (byte) hash; + int offset = 4; + int len = bufferOffset - offset; + if (this.useBrokenFlagDescriptorChecksum) { + len += offset; + offset = 0; + } + byte hash = (byte) ((checksum.hash(buffer, offset, len, 0) >> 8) & 0xFF); + buffer[bufferOffset++] = hash; + // write out frame descriptor out.write(buffer, 0, bufferOffset); bufferOffset = 0; @@ -236,8 +273,7 @@ public static class FLG { private static final int VERSION = 1; - private final int presetDictionary; - private final int reserved1; + private final int reserved; private final int contentChecksum; private final int contentSize; private final int blockChecksum; @@ -249,18 +285,16 @@ public FLG() { } public FLG(boolean blockChecksum) { - this(0, 0, 0, 0, blockChecksum ? 1 : 0, 1, VERSION); + this(0, 0, 0, blockChecksum ? 1 : 0, 1, VERSION); } - private FLG(int presetDictionary, - int reserved1, + private FLG(int reserved, int contentChecksum, int contentSize, int blockChecksum, int blockIndependence, int version) { - this.presetDictionary = presetDictionary; - this.reserved1 = reserved1; + this.reserved = reserved; this.contentChecksum = contentChecksum; this.contentSize = contentSize; this.blockChecksum = blockChecksum; @@ -270,16 +304,14 @@ private FLG(int presetDictionary, } public static FLG fromByte(byte flg) { - int presetDictionary = (flg >>> 0) & 1; - int reserved1 = (flg >>> 1) & 1; + int reserved = (flg >>> 0) & 3; int contentChecksum = (flg >>> 2) & 1; int contentSize = (flg >>> 3) & 1; int blockChecksum = (flg >>> 4) & 1; int blockIndependence = (flg >>> 5) & 1; int version = (flg >>> 6) & 3; - return new FLG(presetDictionary, - reserved1, + return new FLG(reserved, contentChecksum, contentSize, blockChecksum, @@ -288,22 +320,13 @@ public static FLG fromByte(byte flg) { } public byte toByte() { - return (byte) (((presetDictionary & 1) << 0) | ((reserved1 & 1) << 1) | ((contentChecksum & 1) << 2) + return (byte) (((reserved & 3) << 0) | ((contentChecksum & 1) << 2) | ((contentSize & 1) << 3) | ((blockChecksum & 1) << 4) | ((blockIndependence & 1) << 5) | ((version & 3) << 6)); } private void validate() { - if (presetDictionary != 0) { - throw new RuntimeException("Preset dictionary is unsupported"); - } - if (reserved1 != 0) { - throw new RuntimeException("Reserved1 field must be 0"); - } - if (contentChecksum != 0) { - throw new RuntimeException("Content checksum is unsupported"); - } - if (contentSize != 0) { - throw new RuntimeException("Content size is unsupported"); + if (reserved != 0) { + throw new RuntimeException("Reserved bits must be 0"); } if (blockIndependence != 1) { throw new RuntimeException("Dependent block stream is unsupported"); @@ -313,10 +336,6 @@ private void validate() { } } - public boolean isPresetDictionarySet() { - return presetDictionary == 1; - } - public boolean isContentChecksumSet() { return contentChecksum == 1; } diff --git a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java index 7175953d6670..fcf7f446a459 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java +++ b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java @@ -187,10 +187,10 @@ public ByteBuffer buffer() { public Iterator iterator() { if (writable) { // flip on a duplicate buffer for reading - return new RecordsIterator((ByteBuffer) this.buffer.duplicate().flip(), CompressionType.NONE, false); + return new RecordsIterator((ByteBuffer) this.buffer.duplicate().flip(), false); } else { // do not need to flip for non-writable buffer - return new RecordsIterator(this.buffer.duplicate(), CompressionType.NONE, false); + return new RecordsIterator(this.buffer.duplicate(), false); } } @@ -224,11 +224,11 @@ public static class RecordsIterator extends AbstractIterator { private final ArrayDeque logEntries; private final long absoluteBaseOffset; - public RecordsIterator(ByteBuffer buffer, CompressionType type, boolean shallow) { - this.type = type; + public RecordsIterator(ByteBuffer buffer, boolean shallow) { + this.type = CompressionType.NONE; this.buffer = buffer; this.shallow = shallow; - this.stream = Compressor.wrapForInput(new ByteBufferInputStream(this.buffer), type); + this.stream = new DataInputStream(new ByteBufferInputStream(buffer)); this.logEntries = null; this.absoluteBaseOffset = -1; } @@ -238,7 +238,7 @@ private RecordsIterator(LogEntry entry) { this.type = entry.record().compressionType(); this.buffer = entry.record().value(); this.shallow = true; - this.stream = Compressor.wrapForInput(new ByteBufferInputStream(this.buffer), type); + this.stream = Compressor.wrapForInput(new ByteBufferInputStream(this.buffer), type, entry.record().magic()); long wrapperRecordOffset = entry.offset(); // If relative offset is used, we need to decompress the entire message first to compute // the absolute offset. diff --git a/clients/src/test/java/org/apache/kafka/common/record/KafkaLZ4Test.java b/clients/src/test/java/org/apache/kafka/common/record/KafkaLZ4Test.java new file mode 100644 index 000000000000..37877efe8fad --- /dev/null +++ b/clients/src/test/java/org/apache/kafka/common/record/KafkaLZ4Test.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.common.record; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import net.jpountz.xxhash.XXHashFactory; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(value = Parameterized.class) +public class KafkaLZ4Test { + + private final boolean useBrokenFlagDescriptorChecksum; + private final boolean ignoreFlagDescriptorChecksum; + private final byte[] payload; + + public KafkaLZ4Test(boolean useBrokenFlagDescriptorChecksum, boolean ignoreFlagDescriptorChecksum, byte[] payload) { + this.useBrokenFlagDescriptorChecksum = useBrokenFlagDescriptorChecksum; + this.ignoreFlagDescriptorChecksum = ignoreFlagDescriptorChecksum; + this.payload = payload; + } + + @Parameters + public static Collection data() { + byte[] payload = new byte[1000]; + Arrays.fill(payload, (byte) 1); + List values = new ArrayList(); + for (boolean broken : Arrays.asList(false, true)) + for (boolean ignore : Arrays.asList(false, true)) + values.add(new Object[] {broken, ignore, payload}); + return values; + } + + @Test + public void testKafkaLZ4() throws IOException { + ByteArrayOutputStream output = new ByteArrayOutputStream(); + KafkaLZ4BlockOutputStream lz4 = new KafkaLZ4BlockOutputStream(output, this.useBrokenFlagDescriptorChecksum); + lz4.write(this.payload, 0, this.payload.length); + lz4.flush(); + byte[] compressed = output.toByteArray(); + + // Check magic bytes stored as little-endian + int offset = 0; + assertEquals(compressed[offset++], 0x04); + assertEquals(compressed[offset++], 0x22); + assertEquals(compressed[offset++], 0x4D); + assertEquals(compressed[offset++], 0x18); + + // Check flg descriptor + byte flg = compressed[offset++]; + + // 2-bit version must be 01 + int version = (flg >>> 6) & 3; + assertEquals(version, 1); + + // Reserved bits should always be 0 + int reserved = flg & 3; + assertEquals(reserved, 0); + + // Check block descriptor + byte bd = compressed[offset++]; + + // Block max-size + int blockMaxSize = (bd >>> 4) & 7; + // Only supported values are 4 (64KB), 5 (256KB), 6 (1MB), 7 (4MB) + assertTrue(blockMaxSize >= 4); + assertTrue(blockMaxSize <= 7); + + // Multiple reserved bit ranges in block descriptor + reserved = bd & 15; + assertEquals(reserved, 0); + reserved = (bd >>> 7) & 1; + assertEquals(reserved, 0); + + // If flg descriptor sets content size flag + // there are 8 additional bytes before checksum + boolean contentSize = ((flg >>> 3) & 1) != 0; + if (contentSize) + offset += 8; + + // Checksum applies to frame descriptor: flg, bd, and optional contentsize + // so initial offset should be 4 (for magic bytes) + int off = 4; + int len = offset - 4; + + // Initial implementation of checksum incorrectly applied to full header + // including magic bytes + if (this.useBrokenFlagDescriptorChecksum) { + off = 0; + len = offset; + } + + int hash = XXHashFactory.fastestInstance().hash32().hash(compressed, off, len, 0); + + byte hc = compressed[offset++]; + assertEquals(hc, (byte) ((hash >> 8) & 0xFF)); + + ByteArrayInputStream input = new ByteArrayInputStream(compressed); + try { + KafkaLZ4BlockInputStream decompressed = new KafkaLZ4BlockInputStream(input, this.ignoreFlagDescriptorChecksum); + byte[] testPayload = new byte[this.payload.length]; + int ret = decompressed.read(testPayload, 0, this.payload.length); + assertEquals(ret, this.payload.length); + assertArrayEquals(this.payload, testPayload); + } catch (IOException e) { + assertTrue(this.useBrokenFlagDescriptorChecksum && !this.ignoreFlagDescriptorChecksum); + } + } +} diff --git a/core/src/main/scala/kafka/log/LogCleaner.scala b/core/src/main/scala/kafka/log/LogCleaner.scala index e23234bb2fe4..0f742f918b9a 100644 --- a/core/src/main/scala/kafka/log/LogCleaner.scala +++ b/core/src/main/scala/kafka/log/LogCleaner.scala @@ -499,7 +499,7 @@ private[log] class Cleaner(val id: Int, val timestampType = firstMessageOffset.message.timestampType val messageWriter = new MessageWriter(math.min(math.max(MessageSet.messageSetSize(messages) / 2, 1024), 1 << 16)) messageWriter.write(codec = compressionCodec, timestamp = magicAndTimestamp.timestamp, timestampType = timestampType, magicValue = messageFormatVersion) { outputStream => - val output = new DataOutputStream(CompressionFactory(compressionCodec, outputStream)) + val output = new DataOutputStream(CompressionFactory(compressionCodec, messageFormatVersion, outputStream)) try { for (messageOffset <- messageAndOffsets) { val message = messageOffset.message diff --git a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala index 6f38715def5e..677355a0c450 100644 --- a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala +++ b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala @@ -27,6 +27,7 @@ import java.util.concurrent.atomic.AtomicLong import scala.collection.JavaConverters._ +import org.apache.kafka.common.errors.CorruptRecordException import org.apache.kafka.common.errors.InvalidTimestampException import org.apache.kafka.common.record.TimestampType import org.apache.kafka.common.utils.Utils @@ -55,7 +56,7 @@ object ByteBufferMessageSet { var offset = -1L val messageWriter = new MessageWriter(math.min(math.max(MessageSet.messageSetSize(messages) / 2, 1024), 1 << 16)) messageWriter.write(codec = compressionCodec, timestamp = magicAndTimestamp.timestamp, timestampType = timestampType, magicValue = magicAndTimestamp.magic) { outputStream => - val output = new DataOutputStream(CompressionFactory(compressionCodec, outputStream)) + val output = new DataOutputStream(CompressionFactory(compressionCodec, magicAndTimestamp.magic, outputStream)) try { for (message <- messages) { offset = offsetAssigner.nextAbsoluteOffset() @@ -95,7 +96,7 @@ object ByteBufferMessageSet { if (wrapperMessage.payload == null) throw new KafkaException(s"Message payload is null: $wrapperMessage") val inputStream = new ByteBufferBackedInputStream(wrapperMessage.payload) - val compressed = new DataInputStream(CompressionFactory(wrapperMessage.compressionCodec, inputStream)) + val compressed = new DataInputStream(CompressionFactory(wrapperMessage.compressionCodec, wrapperMessage.magic, inputStream)) var lastInnerOffset = -1L val messageAndOffsets = if (wrapperMessageAndOffset.message.magic > MagicValue_V0) { @@ -107,7 +108,7 @@ object ByteBufferMessageSet { case eofe: EOFException => compressed.close() case ioe: IOException => - throw new KafkaException(ioe) + throw new CorruptRecordException(ioe) } Some(innerMessageAndOffsets) } else None diff --git a/core/src/main/scala/kafka/message/CompressionFactory.scala b/core/src/main/scala/kafka/message/CompressionFactory.scala index b047f68fb7f5..e02ed633153f 100644 --- a/core/src/main/scala/kafka/message/CompressionFactory.scala +++ b/core/src/main/scala/kafka/message/CompressionFactory.scala @@ -26,7 +26,7 @@ import org.apache.kafka.common.record.{KafkaLZ4BlockInputStream, KafkaLZ4BlockOu object CompressionFactory { - def apply(compressionCodec: CompressionCodec, stream: OutputStream): OutputStream = { + def apply(compressionCodec: CompressionCodec, messageVersion: Byte, stream: OutputStream): OutputStream = { compressionCodec match { case DefaultCompressionCodec => new GZIPOutputStream(stream) case GZIPCompressionCodec => new GZIPOutputStream(stream) @@ -34,13 +34,13 @@ object CompressionFactory { import org.xerial.snappy.SnappyOutputStream new SnappyOutputStream(stream) case LZ4CompressionCodec => - new KafkaLZ4BlockOutputStream(stream) + new KafkaLZ4BlockOutputStream(stream, messageVersion == Message.MagicValue_V0) case _ => throw new kafka.common.UnknownCodecException("Unknown Codec: " + compressionCodec) } } - def apply(compressionCodec: CompressionCodec, stream: InputStream): InputStream = { + def apply(compressionCodec: CompressionCodec, messageVersion: Byte, stream: InputStream): InputStream = { compressionCodec match { case DefaultCompressionCodec => new GZIPInputStream(stream) case GZIPCompressionCodec => new GZIPInputStream(stream) @@ -48,7 +48,7 @@ object CompressionFactory { import org.xerial.snappy.SnappyInputStream new SnappyInputStream(stream) case LZ4CompressionCodec => - new KafkaLZ4BlockInputStream(stream) + new KafkaLZ4BlockInputStream(stream, messageVersion == Message.MagicValue_V0) case _ => throw new kafka.common.UnknownCodecException("Unknown Codec: " + compressionCodec) } diff --git a/core/src/test/scala/unit/kafka/message/MessageCompressionTest.scala b/core/src/test/scala/unit/kafka/message/MessageCompressionTest.scala index 53b85eff3596..14385234b093 100644 --- a/core/src/test/scala/unit/kafka/message/MessageCompressionTest.scala +++ b/core/src/test/scala/unit/kafka/message/MessageCompressionTest.scala @@ -17,6 +17,9 @@ package kafka.message +import org.apache.kafka.common.record._ + +import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream import scala.collection._ import org.scalatest.junit.JUnitSuite @@ -25,6 +28,24 @@ import org.junit.Assert._ class MessageCompressionTest extends JUnitSuite { + @Test + def testLZ4FramingV0() { + val output = CompressionFactory(LZ4CompressionCodec, Message.MagicValue_V0, new ByteArrayOutputStream()) + assertTrue(output.asInstanceOf[KafkaLZ4BlockOutputStream].useBrokenFlagDescriptorChecksum()) + + val input = CompressionFactory(LZ4CompressionCodec, Message.MagicValue_V0, new ByteArrayInputStream(Array[Byte](0x04, 0x22, 0x4D, 0x18, 0x60, 0x40, 0x1A))) + assertTrue(input.asInstanceOf[KafkaLZ4BlockInputStream].ignoreFlagDescriptorChecksum()) + } + + @Test + def testLZ4FramingV1() { + val output = CompressionFactory(LZ4CompressionCodec, Message.MagicValue_V1, new ByteArrayOutputStream()) + assertFalse(output.asInstanceOf[KafkaLZ4BlockOutputStream].useBrokenFlagDescriptorChecksum()) + + val input = CompressionFactory(LZ4CompressionCodec, Message.MagicValue_V1, new ByteArrayInputStream(Array[Byte](0x04, 0x22, 0x4D, 0x18, 0x60, 0x40, -126))) + assertFalse(input.asInstanceOf[KafkaLZ4BlockInputStream].ignoreFlagDescriptorChecksum()) + } + @Test def testSimpleCompressDecompress() { val codecs = mutable.ArrayBuffer[CompressionCodec](GZIPCompressionCodec) diff --git a/core/src/test/scala/unit/kafka/message/MessageWriterTest.scala b/core/src/test/scala/unit/kafka/message/MessageWriterTest.scala index 6f0ee1d57b72..a82a553370e0 100644 --- a/core/src/test/scala/unit/kafka/message/MessageWriterTest.scala +++ b/core/src/test/scala/unit/kafka/message/MessageWriterTest.scala @@ -36,7 +36,7 @@ class MessageWriterTest extends JUnitSuite { private def mkMessageWithWriter(key: Array[Byte] = null, bytes: Array[Byte], codec: CompressionCodec): Message = { val writer = new MessageWriter(100) writer.write(key = key, codec = codec, timestamp = Message.NoTimestamp, timestampType = TimestampType.CREATE_TIME, magicValue = Message.MagicValue_V1) { output => - val out = if (codec == NoCompressionCodec) output else CompressionFactory(codec, output) + val out = if (codec == NoCompressionCodec) output else CompressionFactory(codec, Message.MagicValue_V1, output) try { val p = rnd.nextInt(bytes.length) out.write(bytes, 0, p) @@ -53,14 +53,14 @@ class MessageWriterTest extends JUnitSuite { private def compress(bytes: Array[Byte], codec: CompressionCodec): Array[Byte] = { val baos = new ByteArrayOutputStream() - val out = CompressionFactory(codec, baos) + val out = CompressionFactory(codec, Message.MagicValue_V1, baos) out.write(bytes) out.close() baos.toByteArray } private def decompress(compressed: Array[Byte], codec: CompressionCodec): Array[Byte] = { - toArray(CompressionFactory(codec, new ByteArrayInputStream(compressed))) + toArray(CompressionFactory(codec, Message.MagicValue_V1, new ByteArrayInputStream(compressed))) } private def toArray(in: InputStream): Array[Byte] = { diff --git a/docs/upgrade.html b/docs/upgrade.html index b9c4bec012c1..486954c1c62f 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -80,6 +80,11 @@

    Potential breaking c
  • MirrorMakerMessageHandler no longer exposes the handle(record: MessageAndMetadata[Array[Byte], Array[Byte]]) method as it was never called.
  • The 0.7 KafkaMigrationTool is no longer packaged with Kafka. If you need to migrate from 0.7 to 0.10.0, please migrate to 0.8 first and then follow the documented upgrade process to upgrade from 0.8 to 0.10.0.
  • The new consumer has standardized its APIs to accept java.util.Collection as the sequence type for method parameters. Existing code may have to be updated to work with the 0.10.0 client library.
  • +
  • LZ4-compressed message handling was changed to use an interoperable framing specification (LZ4f v1.5.1). + To maintain compatibility with old clients, this change only applies to Message format 0.10.0 and later. + Clients that Produce/Fetch LZ4-compressed messages using v0/v1 (Message format 0.9.0) should continue + to use the 0.9.0 framing implementation. Clients that use Produce/Fetch protocols v2 or later + should use interoperable LZ4f framing. A list of interoperable LZ4 libraries is available at http://www.lz4.org/
    Notable changes in 0.10.0.0
    From a1afbab3edadeed96825deb512a7961fbce26e1f Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Sun, 8 May 2016 10:45:47 -0700 Subject: [PATCH 101/267] KAFKA-3670; ControlledShutdownLeaderSelector should pick the preferred replica as the new leader, if possible Author: Ismael Juma Reviewers: Jun Rao Closes #1338 from ijuma/kafka-3670-controlled-shutdown-leader-selector-preferred-replica (cherry picked from commit 51f7a35c929d9aa04d821098a2266902f9178d7c) Signed-off-by: Jun Rao --- .../controller/PartitionLeaderSelector.scala | 9 +-- ...ControlledShutdownLeaderSelectorTest.scala | 73 +++++++++++++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 core/src/test/scala/unit/kafka/server/ControlledShutdownLeaderSelectorTest.scala diff --git a/core/src/main/scala/kafka/controller/PartitionLeaderSelector.scala b/core/src/main/scala/kafka/controller/PartitionLeaderSelector.scala index 5eed3829ff3c..9d8b0b6f71e2 100644 --- a/core/src/main/scala/kafka/controller/PartitionLeaderSelector.scala +++ b/core/src/main/scala/kafka/controller/PartitionLeaderSelector.scala @@ -185,13 +185,10 @@ class ControlledShutdownLeaderSelector(controllerContext: ControllerContext) val liveAssignedReplicas = assignedReplicas.filter(r => liveOrShuttingDownBrokerIds.contains(r)) val newIsr = currentLeaderAndIsr.isr.filter(brokerId => !controllerContext.shuttingDownBrokerIds.contains(brokerId)) - val newLeaderOpt = newIsr.headOption - newLeaderOpt match { + liveAssignedReplicas.filter(newIsr.contains).headOption match { case Some(newLeader) => - debug("Partition %s : current leader = %d, new leader = %d" - .format(topicAndPartition, currentLeader, newLeader)) - (LeaderAndIsr(newLeader, currentLeaderEpoch + 1, newIsr, currentLeaderIsrZkPathVersion + 1), - liveAssignedReplicas) + debug("Partition %s : current leader = %d, new leader = %d".format(topicAndPartition, currentLeader, newLeader)) + (LeaderAndIsr(newLeader, currentLeaderEpoch + 1, newIsr, currentLeaderIsrZkPathVersion + 1), liveAssignedReplicas) case None => throw new StateChangeFailedException(("No other replicas in ISR %s for %s besides" + " shutting down brokers %s").format(currentLeaderAndIsr.isr.mkString(","), topicAndPartition, controllerContext.shuttingDownBrokerIds.mkString(","))) diff --git a/core/src/test/scala/unit/kafka/server/ControlledShutdownLeaderSelectorTest.scala b/core/src/test/scala/unit/kafka/server/ControlledShutdownLeaderSelectorTest.scala new file mode 100644 index 000000000000..f032eb68524b --- /dev/null +++ b/core/src/test/scala/unit/kafka/server/ControlledShutdownLeaderSelectorTest.scala @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package kafka.server + +import kafka.api.LeaderAndIsr +import kafka.common.TopicAndPartition +import kafka.controller.{ControlledShutdownLeaderSelector, ControllerContext} +import org.easymock.EasyMock +import org.junit.{Assert, Test} +import Assert._ +import kafka.cluster.Broker +import kafka.utils.ZkUtils + +import scala.collection.mutable + +class ControlledShutdownLeaderSelectorTest { + + @Test + def testSelectLeader() { + val topicPartition = TopicAndPartition("topic", 1) + val assignment = Seq(6, 5, 4, 3, 2, 1) + val preferredReplicaId = assignment.head + + val firstIsr = List(1, 3, 6) + val firstLeader = 1 + + val zkUtils = EasyMock.mock(classOf[ZkUtils]) + val controllerContext = new ControllerContext(zkUtils, zkSessionTimeout = 1000) + controllerContext.liveBrokers = assignment.map(Broker(_, Map.empty, None)).toSet + controllerContext.shuttingDownBrokerIds = mutable.Set(2, 3) + controllerContext.partitionReplicaAssignment = mutable.Map(topicPartition -> assignment) + + val leaderSelector = new ControlledShutdownLeaderSelector(controllerContext) + val firstLeaderAndIsr = new LeaderAndIsr(firstLeader, firstIsr) + val (secondLeaderAndIsr, secondReplicas) = leaderSelector.selectLeader(topicPartition, firstLeaderAndIsr) + + assertEquals(preferredReplicaId, secondLeaderAndIsr.leader) + assertEquals(Seq(1, 6), secondLeaderAndIsr.isr) + assertEquals(1, secondLeaderAndIsr.zkVersion) + assertEquals(1, secondLeaderAndIsr.leaderEpoch) + assertEquals(assignment, secondReplicas) + + controllerContext.shuttingDownBrokerIds += preferredReplicaId + + val deadBrokerId = 2 + controllerContext.liveBrokers = controllerContext.liveOrShuttingDownBrokers.filter(_.id != deadBrokerId) + controllerContext.shuttingDownBrokerIds -= deadBrokerId + + val (thirdLeaderAndIsr, thirdReplicas) = leaderSelector.selectLeader(topicPartition, secondLeaderAndIsr) + + assertEquals(1, thirdLeaderAndIsr.leader) + assertEquals(Seq(1), thirdLeaderAndIsr.isr) + assertEquals(2, thirdLeaderAndIsr.zkVersion) + assertEquals(2, thirdLeaderAndIsr.leaderEpoch) + assertEquals(Seq(6, 5, 4, 3, 1), thirdReplicas) + + } + +} From 9f86c62a845c7e6a6f100dd6f22af12118969785 Mon Sep 17 00:00:00 2001 From: Vahid Hashemian Date: Sun, 8 May 2016 22:27:58 +0100 Subject: [PATCH 102/267] KAFKA-3579; Reference both old and new consumer properties in `TopicCommand` Add references to the new consumer property 'max.partition.fetch.bytes' along with the old consumer property 'fetch.message.max.bytes' in the corresponding warning messages of TopicCommand. Also, create and leverage a static variable for the default value of the new consumer property. Also, use 'DEFAULT_...' for default propoerty constant names in the code instead of '..._DEFAULT'. Author: Vahid Hashemian Reviewers: Manikumar reddy O , Ashish Singh , Grant Henke , Ismael Juma Closes #1239 from vahidhashemian/KAFKA-3579 (cherry picked from commit 62b9fa22545a8e254b4ffd07ddc5bd3315542548) Signed-off-by: Ismael Juma --- .../clients/consumer/ConsumerConfig.java | 7 ++-- .../internals/ConsumerCoordinatorTest.java | 14 ++++---- .../main/scala/kafka/admin/TopicCommand.scala | 35 +++++++++++-------- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java index 69c4a3620925..6523d184585c 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java @@ -115,6 +115,7 @@ public class ConsumerConfig extends AbstractConfig { */ public static final String MAX_PARTITION_FETCH_BYTES_CONFIG = "max.partition.fetch.bytes"; private static final String MAX_PARTITION_FETCH_BYTES_DOC = "The maximum amount of data per-partition the server will return. The maximum total memory used for a request will be #partitions * max.partition.fetch.bytes. This size must be at least as large as the maximum message size the server allows or else it is possible for the producer to send messages larger than the consumer can fetch. If that happens, the consumer can get stuck trying to fetch a large message on a certain partition."; + public static final int DEFAULT_MAX_PARTITION_FETCH_BYTES = 1 * 1024 * 1024; /** send.buffer.bytes */ public static final String SEND_BUFFER_CONFIG = CommonClientConfigs.SEND_BUFFER_CONFIG; @@ -184,7 +185,7 @@ public class ConsumerConfig extends AbstractConfig { public static final String EXCLUDE_INTERNAL_TOPICS_CONFIG = "exclude.internal.topics"; private static final String EXCLUDE_INTERNAL_TOPICS_DOC = "Whether records from internal topics (such as offsets) should be exposed to the consumer. " + "If set to true the only way to receive records from an internal topic is subscribing to it."; - public static final boolean EXCLUDE_INTERNAL_TOPICS_DEFAULT = true; + public static final boolean DEFAULT_EXCLUDE_INTERNAL_TOPICS = true; static { CONFIG = new ConfigDef().define(BOOTSTRAP_SERVERS_CONFIG, @@ -231,7 +232,7 @@ public class ConsumerConfig extends AbstractConfig { CommonClientConfigs.CLIENT_ID_DOC) .define(MAX_PARTITION_FETCH_BYTES_CONFIG, Type.INT, - 1 * 1024 * 1024, + DEFAULT_MAX_PARTITION_FETCH_BYTES, atLeast(0), Importance.HIGH, MAX_PARTITION_FETCH_BYTES_DOC) @@ -332,7 +333,7 @@ public class ConsumerConfig extends AbstractConfig { MAX_POLL_RECORDS_DOC) .define(EXCLUDE_INTERNAL_TOPICS_CONFIG, Type.BOOLEAN, - EXCLUDE_INTERNAL_TOPICS_DEFAULT, + DEFAULT_EXCLUDE_INTERNAL_TOPICS, Importance.MEDIUM, EXCLUDE_INTERNAL_TOPICS_DOC) diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java index 82a854afcaf2..fc5c929ef05c 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java @@ -114,7 +114,7 @@ public void setup() { this.partitionAssignor.clear(); client.setNode(node); - this.coordinator = buildCoordinator(metrics, assignors, ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, autoCommitEnabled); + this.coordinator = buildCoordinator(metrics, assignors, ConsumerConfig.DEFAULT_EXCLUDE_INTERNAL_TOPICS, autoCommitEnabled); } @After @@ -735,7 +735,7 @@ public void testAutoCommitDynamicAssignment() { final String consumerId = "consumer"; ConsumerCoordinator coordinator = buildCoordinator(new Metrics(), assignors, - ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, true); + ConsumerConfig.DEFAULT_EXCLUDE_INTERNAL_TOPICS, true); subscriptions.subscribe(Arrays.asList(topicName), rebalanceListener); subscriptions.needReassignment(); @@ -761,7 +761,7 @@ public void testAutoCommitDynamicAssignmentRebalance() { final String consumerId = "consumer"; ConsumerCoordinator coordinator = buildCoordinator(new Metrics(), assignors, - ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, true); + ConsumerConfig.DEFAULT_EXCLUDE_INTERNAL_TOPICS, true); subscriptions.subscribe(Arrays.asList(topicName), rebalanceListener); subscriptions.needReassignment(); @@ -789,7 +789,7 @@ public void testAutoCommitDynamicAssignmentRebalance() { @Test public void testAutoCommitManualAssignment() { ConsumerCoordinator coordinator = buildCoordinator(new Metrics(), assignors, - ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, true); + ConsumerConfig.DEFAULT_EXCLUDE_INTERNAL_TOPICS, true); subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.seek(tp, 100); @@ -807,7 +807,7 @@ public void testAutoCommitManualAssignment() { @Test public void testAutoCommitManualAssignmentCoordinatorUnknown() { ConsumerCoordinator coordinator = buildCoordinator(new Metrics(), assignors, - ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, true); + ConsumerConfig.DEFAULT_EXCLUDE_INTERNAL_TOPICS, true); subscriptions.assignFromUser(Arrays.asList(tp)); subscriptions.seek(tp, 100); @@ -1096,7 +1096,7 @@ public void testProtocolMetadataOrder() { try (Metrics metrics = new Metrics(time)) { ConsumerCoordinator coordinator = buildCoordinator(metrics, Arrays.asList(roundRobin, range), - ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, false); + ConsumerConfig.DEFAULT_EXCLUDE_INTERNAL_TOPICS, false); List metadata = coordinator.metadata(); assertEquals(2, metadata.size()); assertEquals(roundRobin.name(), metadata.get(0).name()); @@ -1105,7 +1105,7 @@ public void testProtocolMetadataOrder() { try (Metrics metrics = new Metrics(time)) { ConsumerCoordinator coordinator = buildCoordinator(metrics, Arrays.asList(range, roundRobin), - ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_DEFAULT, false); + ConsumerConfig.DEFAULT_EXCLUDE_INTERNAL_TOPICS, false); List metadata = coordinator.metadata(); assertEquals(2, metadata.size()); assertEquals(range.name(), metadata.get(0).name()); diff --git a/core/src/main/scala/kafka/admin/TopicCommand.scala b/core/src/main/scala/kafka/admin/TopicCommand.scala index 9f1014f286b7..ee9cf65e81da 100755 --- a/core/src/main/scala/kafka/admin/TopicCommand.scala +++ b/core/src/main/scala/kafka/admin/TopicCommand.scala @@ -20,7 +20,7 @@ package kafka.admin import java.util.Properties import joptsimple._ import kafka.common.{AdminCommandFailedException, Topic, TopicExistsException} -import kafka.consumer.{ConsumerConfig, Whitelist} +import kafka.consumer.{ConsumerConfig => OldConsumerConfig, Whitelist} import kafka.coordinator.GroupCoordinator import kafka.log.{Defaults, LogConfig} import kafka.server.ConfigType @@ -31,6 +31,7 @@ import org.apache.kafka.common.security.JaasUtils import org.apache.kafka.common.utils.Utils import scala.collection.JavaConversions._ import scala.collection._ +import org.apache.kafka.clients.consumer.{ConsumerConfig => NewConsumerConfig} import org.apache.kafka.common.internals.TopicConstants @@ -379,29 +380,33 @@ object TopicCommand extends Logging { def shortMessageSizeWarning(maxMessageBytes: Int): String = { "\n\n" + "*****************************************************************************************************\n" + - "*** WARNING: you are creating a topic where the max.message.bytes is greater than the consumer ***\n" + - "*** default. This operation is potentially dangerous. Consumers will get failures if their ***\n" + - "*** fetch.message.max.bytes < the value you are using. ***\n" + + "*** WARNING: you are creating a topic where the max.message.bytes is greater than the broker's ***\n" + + "*** default max.message.bytes. This operation is potentially dangerous. Consumers will get ***\n" + + s"*** failures if their fetch.message.max.bytes (old consumer) or ${NewConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG} ***\n"+ + "*** (new consumer) < the value you are using. ***\n" + "*****************************************************************************************************\n" + s"- value set here: $maxMessageBytes\n" + - s"- Default Consumer fetch.message.max.bytes: ${ConsumerConfig.FetchSize}\n" + + s"- Default Old Consumer fetch.message.max.bytes: ${OldConsumerConfig.FetchSize}\n" + + s"- Default New Consumer ${NewConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG}: ${NewConsumerConfig.DEFAULT_MAX_PARTITION_FETCH_BYTES}\n" + s"- Default Broker max.message.bytes: ${kafka.server.Defaults.MessageMaxBytes}\n\n" } def longMessageSizeWarning(maxMessageBytes: Int): String = { "\n\n" + - "****************************************************************************************************\n" + - "*** WARNING: you are creating a topic where the max.message.bytes is greater than the broker ***\n" + - "*** default. This operation is dangerous. There are two potential side effects: ***\n" + - "*** - Consumers will get failures if their fetch.message.max.bytes < the value you are using ***\n" + - "*** - Producer requests larger than replica.fetch.max.bytes will not replicate and hence have ***\n" + - "*** a higher risk of data loss ***\n" + - "*** You should ensure both of these settings are greater than the value set here before using ***\n" + - "*** this topic. ***\n" + - "****************************************************************************************************\n" + + "*****************************************************************************************************\n" + + "*** WARNING: you are creating a topic where the max.message.bytes is greater than the broker's ***\n" + + "*** default max.message.bytes. This operation is dangerous. There are two potential side effects: ***\n" + + "*** - Consumers will get failures if their fetch.message.max.bytes (old consumer) or ***\n" + + s"*** ${NewConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG} (new consumer) < the value you are using ***\n" + + "*** - Producer requests larger than replica.fetch.max.bytes will not replicate and hence have ***\n" + + "*** a higher risk of data loss ***\n" + + "*** You should ensure both of these settings are greater than the value set here before using ***\n" + + "*** this topic. ***\n" + + "*****************************************************************************************************\n" + s"- value set here: $maxMessageBytes\n" + s"- Default Broker replica.fetch.max.bytes: ${kafka.server.Defaults.ReplicaFetchMaxBytes}\n" + s"- Default Broker max.message.bytes: ${kafka.server.Defaults.MessageMaxBytes}\n" + - s"- Default Consumer fetch.message.max.bytes: ${ConsumerConfig.FetchSize}\n\n" + s"- Default Old Consumer fetch.message.max.bytes: ${OldConsumerConfig.FetchSize}\n" + + s"- Default New Consumer ${NewConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG}: ${NewConsumerConfig.DEFAULT_MAX_PARTITION_FETCH_BYTES}\n\n" } } From 120baf9781e84defc6ac337aec6efc7877adcff9 Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Sun, 8 May 2016 22:26:26 -0700 Subject: [PATCH 103/267] KAFKA 3671: Move topics to SinkConnectorConfig Author: Liquan Pei Reviewers: Dan Norwood , Ewen Cheslack-Postava Closes #1335 from Ishiihara/sink-connector-config (cherry picked from commit d1bb2b9df105169c47f16d057c887acb7f8fe818) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/connect/runtime/AbstractHerder.java | 11 +++- .../connect/runtime/ConnectorConfig.java | 20 +++---- .../connect/runtime/SinkConnectorConfig.java | 46 ++++++++++++++++ .../runtime/SourceConnectorConfig.java | 27 +++++++++ .../distributed/DistributedHerder.java | 13 ++++- .../runtime/standalone/StandaloneHerder.java | 19 +++++-- .../kafka/connect/runtime/WorkerTest.java | 8 +-- .../distributed/DistributedHerderTest.java | 10 ++-- .../standalone/StandaloneHerderTest.java | 55 +++++++++++-------- 9 files changed, 155 insertions(+), 54 deletions(-) create mode 100644 connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java create mode 100644 connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java index ee2085987228..43fc4d1e3ebf 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java @@ -29,6 +29,7 @@ import org.apache.kafka.connect.runtime.rest.entities.ConfigValueInfo; import org.apache.kafka.connect.runtime.rest.entities.ConnectorPluginInfo; import org.apache.kafka.connect.runtime.rest.entities.ConnectorStateInfo; +import org.apache.kafka.connect.source.SourceConnector; import org.apache.kafka.connect.storage.ConfigBackingStore; import org.apache.kafka.connect.storage.StatusBackingStore; import org.apache.kafka.connect.tools.VerifiableSinkConnector; @@ -232,10 +233,14 @@ public ConnectorStateInfo.TaskState taskStatus(ConnectorTaskId id) { @Override public ConfigInfos validateConfigs(String connType, Map connectorConfig) { - ConfigDef connectorConfigDef = ConnectorConfig.configDef(); - List connectorConfigValues = connectorConfigDef.validate(connectorConfig); - Connector connector = getConnector(connType); + ConfigDef connectorConfigDef; + if (connector instanceof SourceConnector) { + connectorConfigDef = SourceConnectorConfig.configDef(); + } else { + connectorConfigDef = SinkConnectorConfig.configDef(); + } + List connectorConfigValues = connectorConfigDef.validate(connectorConfig); Config config = connector.validate(connectorConfig); ConfigDef configDef = connector.config(); diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java index e4395523263f..0cbfe214caab 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java @@ -22,12 +22,12 @@ import org.apache.kafka.common.config.ConfigDef.Importance; import org.apache.kafka.common.config.ConfigDef.Type; import org.apache.kafka.common.config.ConfigDef.Width; -import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; - import java.util.HashMap; import java.util.Map; +import static org.apache.kafka.common.config.ConfigDef.Range.atLeast; + /** *

    * Configuration options for Connectors. These only include Kafka Connect system-level configuration @@ -40,7 +40,7 @@ *

    */ public class ConnectorConfig extends AbstractConfig { - private static final String COMMON_GROUP = "Common"; + protected static final String COMMON_GROUP = "Common"; public static final String NAME_CONFIG = "name"; private static final String NAME_DOC = "Globally unique name to use for this connector."; @@ -60,19 +60,13 @@ public class ConnectorConfig extends AbstractConfig { private static final String TASK_MAX_DISPLAY = "Tasks max"; - public static final String TOPICS_CONFIG = "topics"; - private static final String TOPICS_DOC = ""; - public static final String TOPICS_DEFAULT = ""; - private static final String TOPICS_DISPLAY = "Topics"; - - private static ConfigDef config; + protected static ConfigDef config; static { config = new ConfigDef() .define(NAME_CONFIG, Type.STRING, Importance.HIGH, NAME_DOC, COMMON_GROUP, 1, Width.MEDIUM, NAME_DISPLAY) .define(CONNECTOR_CLASS_CONFIG, Type.STRING, Importance.HIGH, CONNECTOR_CLASS_DOC, COMMON_GROUP, 2, Width.LONG, CONNECTOR_CLASS_DISPLAY) - .define(TASKS_MAX_CONFIG, Type.INT, TASKS_MAX_DEFAULT, atLeast(TASKS_MIN_CONFIG), Importance.HIGH, TASKS_MAX_DOC, COMMON_GROUP, 3, Width.SHORT, TASK_MAX_DISPLAY) - .define(TOPICS_CONFIG, Type.LIST, TOPICS_DEFAULT, Importance.HIGH, TOPICS_DOC, COMMON_GROUP, 4, Width.LONG, TOPICS_DISPLAY); + .define(TASKS_MAX_CONFIG, Type.INT, TASKS_MAX_DEFAULT, atLeast(TASKS_MIN_CONFIG), Importance.HIGH, TASKS_MAX_DOC, COMMON_GROUP, 3, Width.SHORT, TASK_MAX_DISPLAY); } public static ConfigDef configDef() { @@ -86,4 +80,8 @@ public ConnectorConfig() { public ConnectorConfig(Map props) { super(config, props); } + + public ConnectorConfig(ConfigDef subClassConfig, Map props) { + super(subClassConfig, props); + } } diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java new file mode 100644 index 000000000000..cbfc6d1a6405 --- /dev/null +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +package org.apache.kafka.connect.runtime; + +import org.apache.kafka.common.config.ConfigDef; + +import java.util.HashMap; +import java.util.Map; + +/** + * Configuration needed for all sink connectors + */ + +public class SinkConnectorConfig extends ConnectorConfig { + + public static final String TOPICS_CONFIG = "topics"; + private static final String TOPICS_DOC = ""; + public static final String TOPICS_DEFAULT = ""; + private static final String TOPICS_DISPLAY = "Topics"; + + static ConfigDef config = ConnectorConfig.configDef() + .define(TOPICS_CONFIG, ConfigDef.Type.LIST, TOPICS_DEFAULT, ConfigDef.Importance.HIGH, TOPICS_DOC, COMMON_GROUP, 4, ConfigDef.Width.LONG, TOPICS_DISPLAY); + + public SinkConnectorConfig() { + this(new HashMap()); + } + + public SinkConnectorConfig(Map props) { + super(config, props); + } +} diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java new file mode 100644 index 000000000000..ca9219f5449f --- /dev/null +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +package org.apache.kafka.connect.runtime; + +import java.util.Map; + +public class SourceConnectorConfig extends ConnectorConfig { + + public SourceConnectorConfig(Map props) { + super(config, props); + } +} diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java index 037eba742816..a2beff3b4409 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java @@ -28,6 +28,8 @@ import org.apache.kafka.connect.runtime.AbstractHerder; import org.apache.kafka.connect.runtime.ConnectorConfig; import org.apache.kafka.connect.runtime.HerderConnectorContext; +import org.apache.kafka.connect.runtime.SinkConnectorConfig; +import org.apache.kafka.connect.runtime.SourceConnectorConfig; import org.apache.kafka.connect.runtime.TargetState; import org.apache.kafka.connect.runtime.TaskConfig; import org.apache.kafka.connect.runtime.Worker; @@ -827,10 +829,15 @@ private void reconfigureConnector(final String connName, final Callback cb } Map configs = configState.connectorConfig(connName); - ConnectorConfig connConfig = new ConnectorConfig(configs); + + ConnectorConfig connConfig; List sinkTopics = null; - if (worker.isSinkConnector(connName)) - sinkTopics = connConfig.getList(ConnectorConfig.TOPICS_CONFIG); + if (worker.isSinkConnector(connName)) { + connConfig = new SinkConnectorConfig(configs); + sinkTopics = connConfig.getList(SinkConnectorConfig.TOPICS_CONFIG); + } else { + connConfig = new SourceConnectorConfig(configs); + } final List> taskProps = worker.connectorTaskConfigs(connName, connConfig.getInt(ConnectorConfig.TASKS_MAX_CONFIG), sinkTopics); diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerder.java index 2316baefb365..8dbda74b8e7a 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerder.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerder.java @@ -23,6 +23,8 @@ import org.apache.kafka.connect.runtime.AbstractHerder; import org.apache.kafka.connect.runtime.ConnectorConfig; import org.apache.kafka.connect.runtime.HerderConnectorContext; +import org.apache.kafka.connect.runtime.SinkConnectorConfig; +import org.apache.kafka.connect.runtime.SourceConnectorConfig; import org.apache.kafka.connect.runtime.TargetState; import org.apache.kafka.connect.runtime.TaskConfig; import org.apache.kafka.connect.runtime.Worker; @@ -251,11 +253,20 @@ private String startConnector(Map connectorProps) { private List> recomputeTaskConfigs(String connName) { Map config = configState.connectorConfig(connName); - ConnectorConfig connConfig = new ConnectorConfig(config); - return worker.connectorTaskConfigs(connName, - connConfig.getInt(ConnectorConfig.TASKS_MAX_CONFIG), - connConfig.getList(ConnectorConfig.TOPICS_CONFIG)); + ConnectorConfig connConfig; + if (worker.isSinkConnector(connName)) { + connConfig = new SinkConnectorConfig(config); + return worker.connectorTaskConfigs(connName, + connConfig.getInt(ConnectorConfig.TASKS_MAX_CONFIG), + connConfig.getList(SinkConnectorConfig.TOPICS_CONFIG)); + } else { + connConfig = new SourceConnectorConfig(config); + return worker.connectorTaskConfigs(connName, + connConfig.getInt(ConnectorConfig.TASKS_MAX_CONFIG), + null); + } + } private void createConnectorTasks(String connName, TargetState initialState) { diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTest.java index 2004c993a2c6..ec4f0253c1e3 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTest.java @@ -97,7 +97,7 @@ public void testStartAndStopConnector() throws Exception { EasyMock.expect(connector.version()).andReturn("1.0"); Map props = new HashMap<>(); - props.put(ConnectorConfig.TOPICS_CONFIG, "foo,bar"); + props.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar"); props.put(ConnectorConfig.TASKS_MAX_CONFIG, "1"); props.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID); props.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, WorkerTestConnector.class.getName()); @@ -155,7 +155,7 @@ public void testAddConnectorByAlias() throws Exception { EasyMock.expect(connector.version()).andReturn("1.0"); Map props = new HashMap<>(); - props.put(ConnectorConfig.TOPICS_CONFIG, "foo,bar"); + props.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar"); props.put(ConnectorConfig.TASKS_MAX_CONFIG, "1"); props.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID); props.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, "WorkerTestConnector"); @@ -208,7 +208,7 @@ public void testAddConnectorByShortAlias() throws Exception { EasyMock.expect(connector.version()).andReturn("1.0"); Map props = new HashMap<>(); - props.put(ConnectorConfig.TOPICS_CONFIG, "foo,bar"); + props.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar"); props.put(ConnectorConfig.TASKS_MAX_CONFIG, "1"); props.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID); props.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, "WorkerTest"); @@ -274,7 +274,7 @@ public void testReconfigureConnectorTasks() throws Exception { EasyMock.expect(connector.version()).andReturn("1.0"); Map props = new HashMap<>(); - props.put(ConnectorConfig.TOPICS_CONFIG, "foo,bar"); + props.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar"); props.put(ConnectorConfig.TASKS_MAX_CONFIG, "1"); props.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID); props.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, WorkerTestConnector.class.getName()); diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java index e62b66366cb2..fbccc55963e7 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java @@ -24,6 +24,7 @@ import org.apache.kafka.connect.errors.NotFoundException; import org.apache.kafka.connect.runtime.ConnectorConfig; import org.apache.kafka.connect.runtime.Herder; +import org.apache.kafka.connect.runtime.SinkConnectorConfig; import org.apache.kafka.connect.runtime.TargetState; import org.apache.kafka.connect.runtime.TaskConfig; import org.apache.kafka.connect.runtime.Worker; @@ -96,18 +97,18 @@ public class DistributedHerderTest { static { CONN1_CONFIG.put(ConnectorConfig.NAME_CONFIG, CONN1); CONN1_CONFIG.put(ConnectorConfig.TASKS_MAX_CONFIG, MAX_TASKS.toString()); - CONN1_CONFIG.put(ConnectorConfig.TOPICS_CONFIG, "foo,bar"); + CONN1_CONFIG.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar"); CONN1_CONFIG.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, BogusSourceConnector.class.getName()); } private static final Map CONN1_CONFIG_UPDATED = new HashMap<>(CONN1_CONFIG); static { - CONN1_CONFIG_UPDATED.put(ConnectorConfig.TOPICS_CONFIG, "foo,bar,baz"); + CONN1_CONFIG_UPDATED.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar,baz"); } private static final Map CONN2_CONFIG = new HashMap<>(); static { CONN2_CONFIG.put(ConnectorConfig.NAME_CONFIG, CONN2); CONN2_CONFIG.put(ConnectorConfig.TASKS_MAX_CONFIG, MAX_TASKS.toString()); - CONN2_CONFIG.put(ConnectorConfig.TOPICS_CONFIG, "foo,bar"); + CONN2_CONFIG.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar"); CONN2_CONFIG.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, BogusSourceConnector.class.getName()); } private static final Map TASK_CONFIG = new HashMap<>(); @@ -935,9 +936,6 @@ public Object answer() throws Throwable { herder.tick(); assertTrue(connectorConfigCb.isDone()); assertEquals(CONN1_CONFIG_UPDATED, connectorConfigCb.get()); - // The config passed to Worker should - assertEquals(Arrays.asList("foo", "bar", "baz"), - capturedUpdatedConfig.getValue().getList(ConnectorConfig.TOPICS_CONFIG)); PowerMock.verifyAll(); } diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerderTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerderTest.java index 10e51946cd11..e70b968d420c 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerderTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerderTest.java @@ -92,7 +92,7 @@ public void testCreateSourceConnector() throws Exception { PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); PowerMock.verifyAll(); } @@ -109,8 +109,8 @@ public void testCreateConnectorAlreadyExists() throws Exception { PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); PowerMock.verifyAll(); } @@ -122,7 +122,7 @@ public void testCreateSinkConnector() throws Exception { PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSinkConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSinkConnector.class, true), false, createCallback); PowerMock.verifyAll(); } @@ -139,7 +139,7 @@ public void testDestroyConnector() throws Exception { PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); FutureCallback> futureCb = new FutureCallback<>(); herder.putConnectorConfig(CONNECTOR_NAME, null, true, futureCb); futureCb.get(1000L, TimeUnit.MILLISECONDS); @@ -164,13 +164,13 @@ public void testRestartConnector() throws Exception { worker.stopConnector(CONNECTOR_NAME); EasyMock.expectLastCall(); - worker.startConnector(EasyMock.eq(new ConnectorConfig(connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class))), + worker.startConnector(EasyMock.eq(new ConnectorConfig(connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false))), EasyMock.anyObject(HerderConnectorContext.class), EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED)); EasyMock.expectLastCall(); PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); FutureCallback cb = new FutureCallback<>(); herder.restartConnector(CONNECTOR_NAME, cb); @@ -191,7 +191,7 @@ public void testRestartConnectorFailureOnStop() throws Exception { PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); FutureCallback cb = new FutureCallback<>(); herder.restartConnector(CONNECTOR_NAME, cb); @@ -213,13 +213,13 @@ public void testRestartConnectorFailureOnStart() throws Exception { EasyMock.expectLastCall(); RuntimeException e = new RuntimeException(); - worker.startConnector(EasyMock.eq(new ConnectorConfig(connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class))), + worker.startConnector(EasyMock.eq(new ConnectorConfig(connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false))), EasyMock.anyObject(HerderConnectorContext.class), EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED)); EasyMock.expectLastCall().andThrow(e); PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); FutureCallback cb = new FutureCallback<>(); herder.restartConnector(CONNECTOR_NAME, cb); @@ -247,7 +247,7 @@ public void testRestartTask() throws Exception { PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); FutureCallback cb = new FutureCallback<>(); herder.restartTask(taskId, cb); @@ -269,7 +269,7 @@ public void testRestartTaskFailureOnStop() throws Exception { PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); FutureCallback cb = new FutureCallback<>(); herder.restartTask(taskId, cb); @@ -297,7 +297,7 @@ public void testRestartTaskFailureOnStart() throws Exception { PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); FutureCallback cb = new FutureCallback<>(); herder.restartTask(taskId, cb); @@ -325,7 +325,7 @@ public void testCreateAndStop() throws Exception { PowerMock.replayAll(); - herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class), false, createCallback); + herder.putConnectorConfig(CONNECTOR_NAME, connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false), false, createCallback); herder.stop(); PowerMock.verifyAll(); @@ -333,7 +333,7 @@ public void testCreateAndStop() throws Exception { @Test public void testAccessors() throws Exception { - Map connConfig = connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class); + Map connConfig = connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false); Callback> listConnectorsCb = PowerMock.createMock(Callback.class); Callback connectorInfoCb = PowerMock.createMock(Callback.class); @@ -388,7 +388,7 @@ public void testAccessors() throws Exception { @Test public void testPutConnectorConfig() throws Exception { - Map connConfig = connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class); + Map connConfig = connectorConfig(CONNECTOR_NAME, BogusSourceConnector.class, false); Map newConnConfig = new HashMap<>(connConfig); newConnConfig.put("foo", "bar"); @@ -410,8 +410,10 @@ public void testPutConnectorConfig() throws Exception { EasyMock.expectLastCall(); EasyMock.expect(worker.isRunning(CONNECTOR_NAME)).andReturn(true); // Generate same task config, which should result in no additional action to restart tasks - EasyMock.expect(worker.connectorTaskConfigs(CONNECTOR_NAME, DEFAULT_MAX_TASKS, TOPICS_LIST)) + EasyMock.expect(worker.connectorTaskConfigs(CONNECTOR_NAME, DEFAULT_MAX_TASKS, null)) .andReturn(Collections.singletonList(taskConfig(BogusSourceTask.class, false))); + worker.isSinkConnector(CONNECTOR_NAME); + EasyMock.expectLastCall().andReturn(false); ConnectorInfo newConnInfo = new ConnectorInfo(CONNECTOR_NAME, newConnConfig, Arrays.asList(new ConnectorTaskId(CONNECTOR_NAME, 0))); putConnectorConfigCb.onCompletion(null, new Herder.Created<>(false, newConnInfo)); EasyMock.expectLastCall(); @@ -448,10 +450,11 @@ private void expectAdd(String name, Class connClass, Class taskClass, boolean sink) throws Exception { - Map connectorProps = connectorConfig(name, connClass); + + Map connectorProps = connectorConfig(name, connClass, sink); worker.startConnector(EasyMock.eq(new ConnectorConfig(connectorProps)), EasyMock.anyObject(HerderConnectorContext.class), - EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED)); + EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED)); EasyMock.expectLastCall(); EasyMock.expect(worker.isRunning(name)).andReturn(true); @@ -462,11 +465,15 @@ private void expectAdd(String name, // And we should instantiate the tasks. For a sink task, we should see added properties for // the input topic partitions Map generatedTaskProps = taskConfig(taskClass, sink); - EasyMock.expect(worker.connectorTaskConfigs(CONNECTOR_NAME, DEFAULT_MAX_TASKS, TOPICS_LIST)) - .andReturn(Collections.singletonList(generatedTaskProps)); + + EasyMock.expect(worker.connectorTaskConfigs(CONNECTOR_NAME, DEFAULT_MAX_TASKS, sink ? TOPICS_LIST : null)) + .andReturn(Collections.singletonList(generatedTaskProps)); worker.startTask(new ConnectorTaskId(CONNECTOR_NAME, 0), new TaskConfig(generatedTaskProps), herder, TargetState.STARTED); EasyMock.expectLastCall(); + + worker.isSinkConnector(CONNECTOR_NAME); + PowerMock.expectLastCall().andReturn(sink); } private void expectStop() { @@ -483,11 +490,13 @@ private void expectDestroy() { expectStop(); } - private static HashMap connectorConfig(String name, Class connClass) { + private static HashMap connectorConfig(String name, Class connClass, boolean sink) { HashMap connectorProps = new HashMap<>(); connectorProps.put(ConnectorConfig.NAME_CONFIG, name); - connectorProps.put(SinkConnector.TOPICS_CONFIG, TOPICS_LIST_STR); connectorProps.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, connClass.getName()); + if (sink) { + connectorProps.put(SinkConnector.TOPICS_CONFIG, TOPICS_LIST_STR); + } return connectorProps; } From 76fa376e23e3442d4a57d239d696632d521279a6 Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Sun, 8 May 2016 23:50:43 -0700 Subject: [PATCH 104/267] KAFKA-3673: Connect tests don't handle concurrent config changes Author: Liquan Pei Reviewers: Ewen Cheslack-Postava Closes #1340 from Ishiihara/connect-test-failure (cherry picked from commit dbafc631ad78c96f85361a3d5e1c4d203cedb26f) Signed-off-by: Ewen Cheslack-Postava --- tests/kafkatest/services/connect.py | 38 ++++++++++--------- .../tests/connect/connect_rest_test.py | 25 ++++++------ tests/kafkatest/utils/util.py | 12 ++++++ 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/tests/kafkatest/services/connect.py b/tests/kafkatest/services/connect.py index 1eb2dd58c646..cf67c301b09e 100644 --- a/tests/kafkatest/services/connect.py +++ b/tests/kafkatest/services/connect.py @@ -22,6 +22,7 @@ from ducktape.errors import DucktapeError from ducktape.services.service import Service from ducktape.utils.util import wait_until +from kafkatest.utils.util import retry_on_exception from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin @@ -102,31 +103,30 @@ def clean_node(self, node): def config_filenames(self): return [os.path.join(self.PERSISTENT_ROOT, "connect-connector-" + str(idx) + ".properties") for idx, template in enumerate(self.connector_config_templates or [])] + def list_connectors(self, node=None, retries=0, retry_backoff=.01): + return self._rest_with_retry('/connectors', node=node, retries=retries, retry_backoff=retry_backoff) - def list_connectors(self, node=None): - return self._rest('/connectors', node=node) - - def create_connector(self, config, node=None): + def create_connector(self, config, node=None, retries=0, retry_backoff=.01): create_request = { 'name': config['name'], 'config': config } - return self._rest('/connectors', create_request, node=node, method="POST") + return self._rest_with_retry('/connectors', create_request, node=node, method="POST", retries=retries, retry_backoff=retry_backoff) - def get_connector(self, name, node=None): - return self._rest('/connectors/' + name, node=node) + def get_connector(self, name, node=None, retries=0, retry_backoff=.01): + return self._rest_with_retry('/connectors/' + name, node=node, retries=retries, retry_backoff=retry_backoff) - def get_connector_config(self, name, node=None): - return self._rest('/connectors/' + name + '/config', node=node) + def get_connector_config(self, name, node=None, retries=0, retry_backoff=.01): + return self._rest_with_retry('/connectors/' + name + '/config', node=node, retries=retries, retry_backoff=retry_backoff) - def set_connector_config(self, name, config, node=None): - return self._rest('/connectors/' + name + '/config', config, node=node, method="PUT") + def set_connector_config(self, name, config, node=None, retries=0, retry_backoff=.01): + return self._rest_with_retry('/connectors/' + name + '/config', config, node=node, method="PUT", retries=retries, retry_backoff=retry_backoff) - def get_connector_tasks(self, name, node=None): - return self._rest('/connectors/' + name + '/tasks', node=node) + def get_connector_tasks(self, name, node=None, retries=0, retry_backoff=.01): + return self._rest_with_retry('/connectors/' + name + '/tasks', node=node, retries=retries, retry_backoff=retry_backoff) - def delete_connector(self, name, node=None): - return self._rest('/connectors/' + name, node=node, method="DELETE") + def delete_connector(self, name, node=None, retries=0, retry_backoff=.01): + return self._rest_with_retry('/connectors/' + name, node=node, method="DELETE", retries=retries, retry_backoff=retry_backoff) def _rest(self, path, body=None, node=None, method="GET"): if node is None: @@ -144,10 +144,13 @@ def _rest(self, path, body=None, node=None, method="GET"): else: return resp.json() + def _rest_with_retry(self, path, body=None, node=None, method="GET", retries=0, retry_backoff=.01): + return retry_on_exception(lambda: self._rest(path, body, node, method), ConnectRestError, retries, retry_backoff) def _base_url(self, node): return 'http://' + node.account.externally_routable_ip + ':' + '8083' + class ConnectStandaloneService(ConnectServiceBase): """Runs Kafka Connect in standalone mode.""" @@ -223,8 +226,6 @@ def start_node(self, node): raise RuntimeError("No process ids recorded") - - class ConnectRestError(RuntimeError): def __init__(self, status, msg, url): self.status = status @@ -235,7 +236,6 @@ def __unicode__(self): return "Kafka Connect REST call failed: returned " + self.status + " for " + self.url + ". Response: " + self.message - class VerifiableConnector(object): def messages(self): """ @@ -261,6 +261,7 @@ def stop(self): self.logger.info("Destroying connector %s %s", type(self).__name__, self.name) self.cc.delete_connector(self.name) + class VerifiableSource(VerifiableConnector): """ Helper class for running a verifiable source connector on a Kafka Connect cluster and analyzing the output. @@ -284,6 +285,7 @@ def start(self): 'throughput': self.throughput }) + class VerifiableSink(VerifiableConnector): """ Helper class for running a verifiable sink connector on a Kafka Connect cluster and analyzing the output. diff --git a/tests/kafkatest/tests/connect/connect_rest_test.py b/tests/kafkatest/tests/connect/connect_rest_test.py index 69a8cb797020..63b9bb11a58e 100644 --- a/tests/kafkatest/tests/connect/connect_rest_test.py +++ b/tests/kafkatest/tests/connect/connect_rest_test.py @@ -15,8 +15,12 @@ from kafkatest.tests.kafka_test import KafkaTest from kafkatest.services.connect import ConnectDistributedService, ConnectRestError +from kafkatest.utils.util import retry_on_exception from ducktape.utils.util import wait_until -import hashlib, subprocess, json, itertools +import subprocess +import json +import itertools + class ConnectRestApiTest(KafkaTest): """ @@ -65,10 +69,10 @@ def test_rest_api(self): sink_connector_props = self.render("connect-file-sink.properties") for connector_props in [source_connector_props, sink_connector_props]: connector_config = self._config_dict_from_props(connector_props) - self.cc.create_connector(connector_config) + self.cc.create_connector(connector_config, retries=120, retry_backoff=1) # We should see the connectors appear - wait_until(lambda: set(self.cc.list_connectors()) == set(["local-file-source", "local-file-sink"]), + wait_until(lambda: set(self.cc.list_connectors(retries=5, retry_backoff=1)) == set(["local-file-source", "local-file-sink"]), timeout_sec=10, err_msg="Connectors that were just created did not appear in connector listing") # We'll only do very simple validation that the connectors and tasks really ran. @@ -76,7 +80,6 @@ def test_rest_api(self): node.account.ssh("echo -e -n " + repr(self.INPUTS) + " >> " + self.INPUT_FILE) wait_until(lambda: self.validate_output(self.INPUT_LIST), timeout_sec=120, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.") - # Trying to create the same connector again should cause an error try: self.cc.create_connector(self._config_dict_from_props(source_connector_props)) @@ -97,19 +100,18 @@ def test_rest_api(self): expected_sink_info = { 'name': 'local-file-sink', 'config': self._config_dict_from_props(sink_connector_props), - 'tasks': [{ 'connector': 'local-file-sink', 'task': 0 }] + 'tasks': [{'connector': 'local-file-sink', 'task': 0 }] } sink_info = self.cc.get_connector("local-file-sink") assert expected_sink_info == sink_info, "Incorrect info:" + json.dumps(sink_info) sink_config = self.cc.get_connector_config("local-file-sink") assert expected_sink_info['config'] == sink_config, "Incorrect config: " + json.dumps(sink_config) - # Validate that we can get info about tasks. This info should definitely be available now without waiting since # we've already seen data appear in files. # TODO: It would be nice to validate a complete listing, but that doesn't make sense for the file connectors expected_source_task_info = [{ - 'id': { 'connector': 'local-file-source', 'task': 0 }, + 'id': {'connector': 'local-file-source', 'task': 0}, 'config': { 'task.class': 'org.apache.kafka.connect.file.FileStreamSourceTask', 'file': self.INPUT_FILE, @@ -119,7 +121,7 @@ def test_rest_api(self): source_task_info = self.cc.get_connector_tasks("local-file-source") assert expected_source_task_info == source_task_info, "Incorrect info:" + json.dumps(source_task_info) expected_sink_task_info = [{ - 'id': { 'connector': 'local-file-sink', 'task': 0 }, + 'id': {'connector': 'local-file-sink', 'task': 0}, 'config': { 'task.class': 'org.apache.kafka.connect.file.FileStreamSinkTask', 'file': self.OUTPUT_FILE, @@ -139,9 +141,9 @@ def test_rest_api(self): node.account.ssh("echo -e -n " + repr(self.LONER_INPUTS) + " >> " + self.INPUT_FILE2) wait_until(lambda: self.validate_output(self.LONGER_INPUT_LIST), timeout_sec=120, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.") - self.cc.delete_connector("local-file-source") - self.cc.delete_connector("local-file-sink") - wait_until(lambda: len(self.cc.list_connectors()) == 0, timeout_sec=10, err_msg="Deleted connectors did not disappear from REST listing") + self.cc.delete_connector("local-file-source", retries=120, retry_backoff=1) + self.cc.delete_connector("local-file-sink", retries=120, retry_backoff=1) + wait_until(lambda: len(self.cc.list_connectors(retries=5, retry_backoff=1)) == 0, timeout_sec=10, err_msg="Deleted connectors did not disappear from REST listing") def validate_output(self, input): input_set = set(input) @@ -151,7 +153,6 @@ def validate_output(self, input): ])) return input_set == output_set - def file_contents(self, node, file): try: # Convert to a list here or the CalledProcessError may be returned during a call to the generator instead of diff --git a/tests/kafkatest/utils/util.py b/tests/kafkatest/utils/util.py index 0b10dbf0f445..c043bec743fb 100644 --- a/tests/kafkatest/utils/util.py +++ b/tests/kafkatest/utils/util.py @@ -15,6 +15,7 @@ from kafkatest import __version__ as __kafkatest_version__ import re +import time def kafkatest_version(): @@ -71,3 +72,14 @@ def is_int_with_prefix(msg): raise Exception("Unexpected message format. Message should be of format: integer " "prefix dot integer value, but one of the two parts (before or after dot) " "are not integers. Message: %s" % (msg)) + + +def retry_on_exception(fun, exception, retries, retry_backoff=.01): + exception_to_throw = None + for i in range(0, retries + 1): + try: + return fun() + except exception as e: + exception_to_throw = e + time.sleep(retry_backoff) + raise exception_to_throw From 18f643b82b05904516295e2734af0d8a9fb053ea Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Sun, 8 May 2016 23:52:13 -0700 Subject: [PATCH 105/267] MINOR: Move connect.start() to try catch block Author: Liquan Pei Reviewers: Ewen Cheslack-Postava Closes #1347 from Ishiihara/connect-standalone --- .../java/org/apache/kafka/connect/cli/ConnectStandalone.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/cli/ConnectStandalone.java b/connect/runtime/src/main/java/org/apache/kafka/connect/cli/ConnectStandalone.java index 4ade18c878fc..b75783cd2f7b 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/cli/ConnectStandalone.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/cli/ConnectStandalone.java @@ -77,9 +77,9 @@ public static void main(String[] args) throws Exception { Herder herder = new StandaloneHerder(worker); final Connect connect = new Connect(herder, rest); - connect.start(); - + try { + connect.start(); for (final String connectorPropsFile : Arrays.copyOfRange(args, 1, args.length)) { Map connectorProps = Utils.propsToStringMap(Utils.loadProps(connectorPropsFile)); FutureCallback> cb = new FutureCallback<>(new Callback>() { From 1ee36c940ee656e575b8a0aa7cc66df380bb0978 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Mon, 9 May 2016 00:12:30 -0700 Subject: [PATCH 106/267] KAFKA-3674: Ensure connector target state changes propagated to worker Author: Jason Gustafson Reviewers: Ewen Cheslack-Postava Closes #1341 from hachikuji/KAFKA-3674 (cherry picked from commit 8911660e2e7d9553502974393ad1aa04852c2da2) Signed-off-by: Ewen Cheslack-Postava --- .../distributed/DistributedHerder.java | 22 +- .../storage/KafkaConfigBackingStore.java | 30 +- .../connect/runtime/WorkerSourceTaskTest.java | 4 +- .../distributed/DistributedHerderTest.java | 204 ++++++++++++++ .../storage/KafkaConfigBackingStoreTest.java | 258 +++++++++++++++++- 5 files changed, 498 insertions(+), 20 deletions(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java index a2beff3b4409..afabbeb1e37f 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java @@ -309,15 +309,21 @@ private void processConnectorConfigUpdates(Set connectorConfigUpdates) { } private void processTargetStateChanges(Set connectorTargetStateChanges) { - if (!connectorTargetStateChanges.isEmpty()) { - for (String connector : connectorTargetStateChanges) { - if (worker.connectorNames().contains(connector)) { - TargetState targetState = configState.targetState(connector); - worker.setTargetState(connector, targetState); - if (targetState == TargetState.STARTED) - reconfigureConnectorTasksWithRetry(connector); - } + for (String connector : connectorTargetStateChanges) { + TargetState targetState = configState.targetState(connector); + if (!configState.connectors().contains(connector)) { + log.debug("Received target state change for unknown connector: {}", connector); + continue; } + + // we must propagate the state change to the worker so that the connector's + // tasks can transition to the new target state + worker.setTargetState(connector, targetState); + + // additionally, if the worker is running the connector itself, then we need to + // request reconfiguration to ensure that config changes while paused take effect + if (worker.ownsConnector(connector) && targetState == TargetState.STARTED) + reconfigureConnectorTasksWithRetry(connector); } } diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java index 9412e42629dc..a894f3102c2c 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java @@ -317,8 +317,14 @@ public void putConnectorConfig(String connector, Map properties) @Override public void removeConnectorConfig(String connector) { log.debug("Removing connector configuration for connector {}", connector); - updateConnectorConfig(connector, null); - configLog.send(TARGET_STATE_KEY(connector), null); + try { + configLog.send(CONNECTOR_KEY(connector), null); + configLog.send(TARGET_STATE_KEY(connector), null); + configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS); + } catch (InterruptedException | ExecutionException | TimeoutException e) { + log.error("Failed to remove connector configuration from Kafka: ", e); + throw new ConnectException("Error removing connector configuration from Kafka", e); + } } @Override @@ -437,8 +443,19 @@ public void onCompletion(Throwable error, ConsumerRecord record) if (record.key().startsWith(TARGET_STATE_PREFIX)) { String connectorName = record.key().substring(TARGET_STATE_PREFIX.length()); + boolean removed = false; synchronized (lock) { - if (value.value() != null) { + if (value.value() == null) { + // When connector configs are removed, we also write tombstones for the target state. + log.debug("Removed target state for connector {} due to null value in topic.", connectorName); + connectorTargetStates.remove(connectorName); + removed = true; + + // If for some reason we still have configs for the connector, add back the default + // STARTED state to ensure each connector always has a valid target state. + if (connectorConfigs.containsKey(connectorName)) + connectorTargetStates.put(connectorName, TargetState.STARTED); + } else { if (!(value.value() instanceof Map)) { log.error("Found target state ({}) in wrong format: {}", record.key(), value.value().getClass()); return; @@ -461,8 +478,11 @@ public void onCompletion(Throwable error, ConsumerRecord record) } } - if (!starting) + // Note that we do not notify the update listener if the target state has been removed. + // Instead we depend on the removal callback of the connector config itself to notify the worker. + if (!starting && !removed) updateListener.onConnectorTargetStateChange(connectorName); + } else if (record.key().startsWith(CONNECTOR_PREFIX)) { String connectorName = record.key().substring(CONNECTOR_PREFIX.length()); boolean removed = false; @@ -487,6 +507,8 @@ public void onCompletion(Throwable error, ConsumerRecord record) log.debug("Updating configuration for connector " + connectorName + " configuration: " + newConnectorConfig); connectorConfigs.put(connectorName, (Map) newConnectorConfig); + // Set the initial state of the connector to STARTED, which ensures that any connectors + // which were created with 0.9 Connect will be initialized in the STARTED state. if (!connectorTargetStates.containsKey(connectorName)) connectorTargetStates.put(connectorName, TargetState.STARTED); } diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java index 0d805dae47ab..076878132932 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java @@ -203,7 +203,9 @@ public void testPause() throws Exception { int priorCount = count.get(); Thread.sleep(100); - assertEquals(priorCount, count.get()); + + // since the transition is observed asynchronously, the count could be off by one loop iteration + assertTrue(count.get() - priorCount <= 1); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java index fbccc55963e7..81e6be86d80c 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java @@ -130,6 +130,9 @@ public class DistributedHerderTest { private static final ClusterConfigState SNAPSHOT = new ClusterConfigState(1, Collections.singletonMap(CONN1, 3), Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED), TASK_CONFIGS_MAP, Collections.emptySet()); + private static final ClusterConfigState SNAPSHOT_PAUSED_CONN1 = new ClusterConfigState(1, Collections.singletonMap(CONN1, 3), + Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.PAUSED), + TASK_CONFIGS_MAP, Collections.emptySet()); private static final ClusterConfigState SNAPSHOT_UPDATED_CONN1_CONFIG = new ClusterConfigState(1, Collections.singletonMap(CONN1, 3), Collections.singletonMap(CONN1, CONN1_CONFIG_UPDATED), Collections.singletonMap(CONN1, TargetState.STARTED), TASK_CONFIGS_MAP, Collections.emptySet()); @@ -746,6 +749,207 @@ public void testConnectorConfigUpdate() throws Exception { PowerMock.verifyAll(); } + @Test + public void testConnectorPaused() throws Exception { + // ensure that target state changes are propagated to the worker + + EasyMock.expect(member.memberId()).andStubReturn("member"); + EasyMock.expect(worker.connectorNames()).andStubReturn(Collections.singleton(CONN1)); + + // join + expectRebalance(1, Arrays.asList(CONN1), Collections.emptyList()); + expectPostRebalanceCatchup(SNAPSHOT); + worker.startConnector(EasyMock.anyObject(), EasyMock.anyObject(), + EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED)); + PowerMock.expectLastCall(); + EasyMock.expect(worker.isRunning(CONN1)).andReturn(true); + EasyMock.expect(worker.connectorTaskConfigs(CONN1, MAX_TASKS, null)).andReturn(TASK_CONFIGS); + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + // handle the state change + member.wakeup(); + member.ensureActive(); + PowerMock.expectLastCall(); + + EasyMock.expect(configStorage.snapshot()).andReturn(SNAPSHOT_PAUSED_CONN1); + PowerMock.expectLastCall(); + + EasyMock.expect(worker.ownsConnector(CONN1)).andReturn(true); + + worker.setTargetState(CONN1, TargetState.PAUSED); + PowerMock.expectLastCall(); + + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + PowerMock.replayAll(); + + herder.tick(); // join + configUpdateListener.onConnectorTargetStateChange(CONN1); // state changes to paused + herder.tick(); // worker should apply the state change + + PowerMock.verifyAll(); + } + + @Test + public void testConnectorResumed() throws Exception { + EasyMock.expect(member.memberId()).andStubReturn("member"); + EasyMock.expect(worker.connectorNames()).andStubReturn(Collections.singleton(CONN1)); + + // start with the connector paused + expectRebalance(1, Arrays.asList(CONN1), Collections.emptyList()); + expectPostRebalanceCatchup(SNAPSHOT_PAUSED_CONN1); + worker.startConnector(EasyMock.anyObject(), EasyMock.anyObject(), + EasyMock.eq(herder), EasyMock.eq(TargetState.PAUSED)); + PowerMock.expectLastCall(); + + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + // handle the state change + member.wakeup(); + member.ensureActive(); + PowerMock.expectLastCall(); + + EasyMock.expect(configStorage.snapshot()).andReturn(SNAPSHOT); + PowerMock.expectLastCall(); + + // we expect reconfiguration after resuming + EasyMock.expect(worker.ownsConnector(CONN1)).andReturn(true); + EasyMock.expect(worker.isRunning(CONN1)).andReturn(true); + EasyMock.expect(worker.connectorTaskConfigs(CONN1, MAX_TASKS, null)).andReturn(TASK_CONFIGS); + + worker.setTargetState(CONN1, TargetState.STARTED); + PowerMock.expectLastCall(); + + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + PowerMock.replayAll(); + + herder.tick(); // join + configUpdateListener.onConnectorTargetStateChange(CONN1); // state changes to started + herder.tick(); // apply state change + + PowerMock.verifyAll(); + } + + @Test + public void testUnknownConnectorPaused() throws Exception { + EasyMock.expect(member.memberId()).andStubReturn("member"); + EasyMock.expect(worker.connectorNames()).andStubReturn(Collections.singleton(CONN1)); + + // join + expectRebalance(1, Collections.emptyList(), Collections.singletonList(TASK0)); + expectPostRebalanceCatchup(SNAPSHOT); + worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED)); + PowerMock.expectLastCall(); + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + // state change is ignored since we have no target state + member.wakeup(); + member.ensureActive(); + PowerMock.expectLastCall(); + + EasyMock.expect(configStorage.snapshot()).andReturn(SNAPSHOT); + PowerMock.expectLastCall(); + + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + PowerMock.replayAll(); + + herder.tick(); // join + configUpdateListener.onConnectorTargetStateChange("unknown-connector"); + herder.tick(); // continue + + PowerMock.verifyAll(); + } + + @Test + public void testConnectorPausedRunningTaskOnly() throws Exception { + // even if we don't own the connector, we should still propagate target state + // changes to the worker so that tasks will transition correctly + + EasyMock.expect(member.memberId()).andStubReturn("member"); + EasyMock.expect(worker.connectorNames()).andStubReturn(Collections.emptySet()); + + // join + expectRebalance(1, Collections.emptyList(), Collections.singletonList(TASK0)); + expectPostRebalanceCatchup(SNAPSHOT); + worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED)); + PowerMock.expectLastCall(); + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + // handle the state change + member.wakeup(); + member.ensureActive(); + PowerMock.expectLastCall(); + + EasyMock.expect(configStorage.snapshot()).andReturn(SNAPSHOT_PAUSED_CONN1); + PowerMock.expectLastCall(); + + EasyMock.expect(worker.ownsConnector(CONN1)).andReturn(false); + + worker.setTargetState(CONN1, TargetState.PAUSED); + PowerMock.expectLastCall(); + + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + PowerMock.replayAll(); + + herder.tick(); // join + configUpdateListener.onConnectorTargetStateChange(CONN1); // state changes to paused + herder.tick(); // apply state change + + PowerMock.verifyAll(); + } + + @Test + public void testConnectorResumedRunningTaskOnly() throws Exception { + // even if we don't own the connector, we should still propagate target state + // changes to the worker so that tasks will transition correctly + + EasyMock.expect(member.memberId()).andStubReturn("member"); + EasyMock.expect(worker.connectorNames()).andStubReturn(Collections.emptySet()); + + // join + expectRebalance(1, Collections.emptyList(), Collections.singletonList(TASK0)); + expectPostRebalanceCatchup(SNAPSHOT_PAUSED_CONN1); + worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.eq(herder), EasyMock.eq(TargetState.PAUSED)); + PowerMock.expectLastCall(); + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + // handle the state change + member.wakeup(); + member.ensureActive(); + PowerMock.expectLastCall(); + + EasyMock.expect(configStorage.snapshot()).andReturn(SNAPSHOT); + PowerMock.expectLastCall(); + + EasyMock.expect(worker.ownsConnector(CONN1)).andReturn(false); + + worker.setTargetState(CONN1, TargetState.STARTED); + PowerMock.expectLastCall(); + + member.poll(EasyMock.anyInt()); + PowerMock.expectLastCall(); + + PowerMock.replayAll(); + + herder.tick(); // join + configUpdateListener.onConnectorTargetStateChange(CONN1); // state changes to paused + herder.tick(); // apply state change + + PowerMock.verifyAll(); + } + @Test public void testTaskConfigAdded() { // Task config always requires rebalance diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaConfigBackingStoreTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaConfigBackingStoreTest.java index 617177e1a4f1..f5bce8f43568 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaConfigBackingStoreTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaConfigBackingStoreTest.java @@ -26,6 +26,7 @@ import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.runtime.TargetState; import org.apache.kafka.connect.runtime.distributed.ClusterConfigState; import org.apache.kafka.connect.runtime.distributed.DistributedConfig; import org.apache.kafka.connect.util.Callback; @@ -53,9 +54,11 @@ import java.util.List; import java.util.Map; import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; @RunWith(PowerMockRunner.class) @PrepareForTest(KafkaConfigBackingStore.class) @@ -107,6 +110,7 @@ public class KafkaConfigBackingStoreTest { new Struct(KafkaConfigBackingStore.TASK_CONFIGURATION_V0).put("properties", SAMPLE_CONFIGS.get(0)), new Struct(KafkaConfigBackingStore.TASK_CONFIGURATION_V0).put("properties", SAMPLE_CONFIGS.get(1)) ); + private static final Struct TARGET_STATE_PAUSED = new Struct(KafkaConfigBackingStore.TARGET_STATE_V0).put("state", "PAUSED"); private static final Struct TASKS_COMMIT_STRUCT_TWO_TASK_CONNECTOR = new Struct(KafkaConfigBackingStore.CONNECTOR_TASKS_COMMIT_V0).put("tasks", 2); @@ -181,15 +185,10 @@ public void testPutConnectorConfig() throws Exception { EasyMock.expectLastCall(); // Config deletion - expectConvertWriteAndRead( - CONNECTOR_CONFIG_KEYS.get(1), KafkaConfigBackingStore.CONNECTOR_CONFIGURATION_V0, null, null, null); + expectConnectorRemoval(CONNECTOR_CONFIG_KEYS.get(1), TARGET_STATE_KEYS.get(1)); configUpdateListener.onConnectorConfigRemove(CONNECTOR_IDS.get(1)); EasyMock.expectLastCall(); - // Target state deletion - storeLog.send(TARGET_STATE_KEYS.get(1), null); - PowerMock.expectLastCall(); - expectStop(); PowerMock.replayAll(); @@ -220,9 +219,10 @@ public void testPutConnectorConfig() throws Exception { // Deletion should remove the second one we added configStorage.removeConnectorConfig(CONNECTOR_IDS.get(1)); configState = configStorage.snapshot(); - assertEquals(3, configState.offset()); + assertEquals(4, configState.offset()); assertEquals(SAMPLE_CONFIGS.get(0), configState.connectorConfig(CONNECTOR_IDS.get(0))); assertNull(configState.connectorConfig(CONNECTOR_IDS.get(1))); + assertNull(configState.targetState(CONNECTOR_IDS.get(1))); configStorage.stop(); @@ -345,6 +345,176 @@ public void testPutTaskConfigsZeroTasks() throws Exception { PowerMock.verifyAll(); } + @Test + public void testRestoreTargetState() throws Exception { + expectConfigure(); + List> existingRecords = Arrays.asList( + new ConsumerRecord<>(TOPIC, 0, 0, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, CONNECTOR_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(0)), + new ConsumerRecord<>(TOPIC, 0, 1, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(1)), + new ConsumerRecord<>(TOPIC, 0, 2, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(1), CONFIGS_SERIALIZED.get(2)), + new ConsumerRecord<>(TOPIC, 0, 3, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TARGET_STATE_KEYS.get(0), CONFIGS_SERIALIZED.get(3)), + new ConsumerRecord<>(TOPIC, 0, 4, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, COMMIT_TASKS_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(4))); + LinkedHashMap deserialized = new LinkedHashMap(); + deserialized.put(CONFIGS_SERIALIZED.get(0), CONNECTOR_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(1), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(2), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(3), TARGET_STATE_PAUSED); + deserialized.put(CONFIGS_SERIALIZED.get(4), TASKS_COMMIT_STRUCT_TWO_TASK_CONNECTOR); + logOffset = 5; + + expectStart(existingRecords, deserialized); + + // Shouldn't see any callbacks since this is during startup + + expectStop(); + + PowerMock.replayAll(); + + configStorage.configure(DEFAULT_DISTRIBUTED_CONFIG); + configStorage.start(); + + // Should see a single connector with initial state paused + ClusterConfigState configState = configStorage.snapshot(); + assertEquals(5, configState.offset()); // Should always be next to be read, even if uncommitted + assertEquals(Arrays.asList(CONNECTOR_IDS.get(0)), new ArrayList<>(configState.connectors())); + assertEquals(TargetState.PAUSED, configState.targetState(CONNECTOR_IDS.get(0))); + + configStorage.stop(); + + PowerMock.verifyAll(); + } + + @Test + public void testBackgroundUpdateTargetState() throws Exception { + // verify that we handle target state changes correctly when they come up through the log + + expectConfigure(); + List> existingRecords = Arrays.asList( + new ConsumerRecord<>(TOPIC, 0, 0, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, CONNECTOR_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(0)), + new ConsumerRecord<>(TOPIC, 0, 1, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(1)), + new ConsumerRecord<>(TOPIC, 0, 2, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(1), CONFIGS_SERIALIZED.get(2)), + new ConsumerRecord<>(TOPIC, 0, 3, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, COMMIT_TASKS_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(3))); + LinkedHashMap deserialized = new LinkedHashMap(); + deserialized.put(CONFIGS_SERIALIZED.get(0), CONNECTOR_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(1), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(2), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(3), TASKS_COMMIT_STRUCT_TWO_TASK_CONNECTOR); + logOffset = 5; + + expectStart(existingRecords, deserialized); + + expectRead(TARGET_STATE_KEYS.get(0), CONFIGS_SERIALIZED.get(0), TARGET_STATE_PAUSED); + + configUpdateListener.onConnectorTargetStateChange(CONNECTOR_IDS.get(0)); + EasyMock.expectLastCall(); + + expectStop(); + + PowerMock.replayAll(); + + configStorage.configure(DEFAULT_DISTRIBUTED_CONFIG); + configStorage.start(); + + // Should see a single connector with initial state paused + ClusterConfigState configState = configStorage.snapshot(); + assertEquals(TargetState.STARTED, configState.targetState(CONNECTOR_IDS.get(0))); + + configStorage.refresh(0, TimeUnit.SECONDS); + + configStorage.stop(); + + PowerMock.verifyAll(); + } + + @Test + public void testBackgroundConnectorDeletion() throws Exception { + // verify that we handle connector deletions correctly when they come up through the log + + expectConfigure(); + List> existingRecords = Arrays.asList( + new ConsumerRecord<>(TOPIC, 0, 0, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, CONNECTOR_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(0)), + new ConsumerRecord<>(TOPIC, 0, 1, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(1)), + new ConsumerRecord<>(TOPIC, 0, 2, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(1), CONFIGS_SERIALIZED.get(2)), + new ConsumerRecord<>(TOPIC, 0, 3, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, COMMIT_TASKS_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(3))); + LinkedHashMap deserialized = new LinkedHashMap(); + deserialized.put(CONFIGS_SERIALIZED.get(0), CONNECTOR_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(1), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(2), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(3), TASKS_COMMIT_STRUCT_TWO_TASK_CONNECTOR); + logOffset = 5; + + expectStart(existingRecords, deserialized); + + LinkedHashMap serializedData = new LinkedHashMap<>(); + serializedData.put(CONNECTOR_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(0)); + serializedData.put(TARGET_STATE_KEYS.get(0), CONFIGS_SERIALIZED.get(1)); + + Map deserializedData = new HashMap<>(); + deserializedData.put(CONNECTOR_CONFIG_KEYS.get(0), null); + deserializedData.put(TARGET_STATE_KEYS.get(0), null); + + expectRead(serializedData, deserializedData); + + configUpdateListener.onConnectorConfigRemove(CONNECTOR_IDS.get(0)); + EasyMock.expectLastCall(); + + expectStop(); + + PowerMock.replayAll(); + + configStorage.configure(DEFAULT_DISTRIBUTED_CONFIG); + configStorage.start(); + + // Should see a single connector with initial state paused + ClusterConfigState configState = configStorage.snapshot(); + assertEquals(TargetState.STARTED, configState.targetState(CONNECTOR_IDS.get(0))); + + configStorage.refresh(0, TimeUnit.SECONDS); + + configStorage.stop(); + + PowerMock.verifyAll(); + } + + @Test + public void testRestoreTargetStateUnexpectedDeletion() throws Exception { + expectConfigure(); + List> existingRecords = Arrays.asList( + new ConsumerRecord<>(TOPIC, 0, 0, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, CONNECTOR_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(0)), + new ConsumerRecord<>(TOPIC, 0, 1, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(1)), + new ConsumerRecord<>(TOPIC, 0, 2, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(1), CONFIGS_SERIALIZED.get(2)), + new ConsumerRecord<>(TOPIC, 0, 3, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TARGET_STATE_KEYS.get(0), CONFIGS_SERIALIZED.get(3)), + new ConsumerRecord<>(TOPIC, 0, 4, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, COMMIT_TASKS_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(4))); + LinkedHashMap deserialized = new LinkedHashMap(); + deserialized.put(CONFIGS_SERIALIZED.get(0), CONNECTOR_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(1), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(2), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(3), null); + deserialized.put(CONFIGS_SERIALIZED.get(4), TASKS_COMMIT_STRUCT_TWO_TASK_CONNECTOR); + logOffset = 5; + + expectStart(existingRecords, deserialized); + + // Shouldn't see any callbacks since this is during startup + + expectStop(); + + PowerMock.replayAll(); + + configStorage.configure(DEFAULT_DISTRIBUTED_CONFIG); + configStorage.start(); + + // The target state deletion should reset the state to STARTED + ClusterConfigState configState = configStorage.snapshot(); + assertEquals(5, configState.offset()); // Should always be next to be read, even if uncommitted + assertEquals(Arrays.asList(CONNECTOR_IDS.get(0)), new ArrayList<>(configState.connectors())); + assertEquals(TargetState.STARTED, configState.targetState(CONNECTOR_IDS.get(0))); + + configStorage.stop(); + + PowerMock.verifyAll(); + } + @Test public void testRestore() throws Exception { // Restoring data should notify only of the latest values after loading is complete. This also validates @@ -385,6 +555,7 @@ public void testRestore() throws Exception { ClusterConfigState configState = configStorage.snapshot(); assertEquals(7, configState.offset()); // Should always be next to be read, even if uncommitted assertEquals(Arrays.asList(CONNECTOR_IDS.get(0)), new ArrayList<>(configState.connectors())); + assertEquals(TargetState.STARTED, configState.targetState(CONNECTOR_IDS.get(0))); // CONNECTOR_CONFIG_STRUCTS[2] -> SAMPLE_CONFIGS[2] assertEquals(SAMPLE_CONFIGS.get(2), configState.connectorConfig(CONNECTOR_IDS.get(0))); // Should see 2 tasks for that connector. Only config updates before the root key update should be reflected @@ -399,6 +570,51 @@ public void testRestore() throws Exception { PowerMock.verifyAll(); } + @Test + public void testRestoreConnectorDeletion() throws Exception { + // Restoring data should notify only of the latest values after loading is complete. This also validates + // that inconsistent state is ignored. + + expectConfigure(); + // Overwrite each type at least once to ensure we see the latest data after loading + List> existingRecords = Arrays.asList( + new ConsumerRecord<>(TOPIC, 0, 0, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, CONNECTOR_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(0)), + new ConsumerRecord<>(TOPIC, 0, 1, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(1)), + new ConsumerRecord<>(TOPIC, 0, 2, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TASK_CONFIG_KEYS.get(1), CONFIGS_SERIALIZED.get(2)), + new ConsumerRecord<>(TOPIC, 0, 3, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, CONNECTOR_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(3)), + new ConsumerRecord<>(TOPIC, 0, 4, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TARGET_STATE_KEYS.get(0), CONFIGS_SERIALIZED.get(4)), + new ConsumerRecord<>(TOPIC, 0, 5, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, COMMIT_TASKS_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(5))); + + LinkedHashMap deserialized = new LinkedHashMap(); + deserialized.put(CONFIGS_SERIALIZED.get(0), CONNECTOR_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(1), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(2), TASK_CONFIG_STRUCTS.get(0)); + deserialized.put(CONFIGS_SERIALIZED.get(3), null); + deserialized.put(CONFIGS_SERIALIZED.get(4), null); + deserialized.put(CONFIGS_SERIALIZED.get(5), TASKS_COMMIT_STRUCT_TWO_TASK_CONNECTOR); + + logOffset = 6; + expectStart(existingRecords, deserialized); + + // Shouldn't see any callbacks since this is during startup + + expectStop(); + + PowerMock.replayAll(); + + configStorage.configure(DEFAULT_DISTRIBUTED_CONFIG); + configStorage.start(); + + // Should see a single connector and its config should be the last one seen anywhere in the log + ClusterConfigState configState = configStorage.snapshot(); + assertEquals(6, configState.offset()); // Should always be next to be read, even if uncommitted + assertTrue(configState.connectors().isEmpty()); + + configStorage.stop(); + + PowerMock.verifyAll(); + } + @Test public void testRestoreZeroTasks() throws Exception { // Restoring data should notify only of the latest values after loading is complete. This also validates @@ -558,6 +774,22 @@ private void expectStop() { PowerMock.expectLastCall(); } + private void expectRead(LinkedHashMap serializedValues, + Map deserializedValues) { + expectReadToEnd(serializedValues); + for (Map.Entry deserializedValueEntry : deserializedValues.entrySet()) { + byte[] serializedValue = serializedValues.get(deserializedValueEntry.getKey()); + EasyMock.expect(converter.toConnectData(EasyMock.eq(TOPIC), EasyMock.aryEq(serializedValue))) + .andReturn(new SchemaAndValue(null, structToMap(deserializedValueEntry.getValue()))); + } + } + + private void expectRead(final String key, final byte[] serializedValue, Struct deserializedValue) { + LinkedHashMap serializedData = new LinkedHashMap<>(); + serializedData.put(key, serializedValue); + expectRead(serializedData, Collections.singletonMap(key, deserializedValue)); + } + // Expect a conversion & write to the underlying log, followed by a subsequent read when the data is consumed back // from the log. Validate the data that is captured when the conversion is performed matches the specified data // (by checking a single field's value) @@ -596,6 +828,15 @@ public Future answer() throws Throwable { }); } + private void expectConnectorRemoval(String configKey, String targetStateKey) { + expectConvertWriteRead(configKey, KafkaConfigBackingStore.CONNECTOR_CONFIGURATION_V0, null, null, null); + expectConvertWriteRead(targetStateKey, KafkaConfigBackingStore.TARGET_STATE_V0, null, null, null); + + LinkedHashMap recordsToRead = new LinkedHashMap<>(); + recordsToRead.put(configKey, null); + recordsToRead.put(targetStateKey, null); + expectReadToEnd(recordsToRead); + } private void expectConvertWriteAndRead(final String configKey, final Schema valueSchema, final byte[] serialized, final String dataFieldName, final Object dataFieldValue) { @@ -619,6 +860,9 @@ private void whiteboxAddConnector(String connectorName, Map conn // Generates a Map representation of Struct. Only does shallow traversal, so nested structs are not converted private Map structToMap(Struct struct) { + if (struct == null) + return null; + HashMap result = new HashMap<>(); for (Field field : struct.schema().fields()) result.put(field.name(), struct.get(field)); From 7a0b9eb06342d23c985fc898c2a69c6e93bc3318 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Mon, 9 May 2016 00:22:13 -0700 Subject: [PATCH 107/267] KAFKA-3675; Add lz4 to parametrized `test_upgrade` system test Author: Ismael Juma Reviewers: Ewen Cheslack-Postava Closes #1343 from ijuma/kafka-3675-lz4-test-upgrade (cherry picked from commit f5b98b8fa786ff0ca37c5cba6d43c3390d28436e) Signed-off-by: Ewen Cheslack-Postava --- tests/kafkatest/tests/core/upgrade_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/kafkatest/tests/core/upgrade_test.py b/tests/kafkatest/tests/core/upgrade_test.py index 778d6a50ca3e..790b69d1d8af 100644 --- a/tests/kafkatest/tests/core/upgrade_test.py +++ b/tests/kafkatest/tests/core/upgrade_test.py @@ -62,8 +62,12 @@ def perform_upgrade(self, from_kafka_version, to_message_format_version=None): @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["none"]) @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["snappy"], new_consumer=True) + @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["lz4"]) + @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["lz4"], new_consumer=True) @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=str(LATEST_0_9), compression_types=["none"]) @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=str(LATEST_0_9), compression_types=["snappy"], new_consumer=True) + @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=str(LATEST_0_9), compression_types=["lz4"]) + @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=str(LATEST_0_9), compression_types=["lz4"], new_consumer=True) @parametrize(from_kafka_version=str(LATEST_0_8_2), to_message_format_version=None, compression_types=["none"]) @parametrize(from_kafka_version=str(LATEST_0_8_2), to_message_format_version=None, compression_types=["snappy"]) def test_upgrade(self, from_kafka_version, to_message_format_version, compression_types, new_consumer=False): From 9f583d96f68578fa6fa395805b0e1057b58d1ac7 Mon Sep 17 00:00:00 2001 From: Rajini Sivaram Date: Mon, 9 May 2016 14:48:08 +0100 Subject: [PATCH 108/267] KAFKA-3662; Fix timing issue in SocketServerTest.tooBigRequestIsRejected Test sends large request using multiple writes of length followed by request body. The first write should succeed, but since the server closes the connection on processing the length that is too big, subsequent writes may fail. Modified test to handle this exception. Author: Rajini Sivaram Reviewers: Ismael Juma Closes #1349 from rajinisivaram/KAFKA-3662 (cherry picked from commit bce3415b18fd0d2b7ab05330f2590f98939b8850) Signed-off-by: Ismael Juma --- .../test/scala/unit/kafka/network/SocketServerTest.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/unit/kafka/network/SocketServerTest.scala b/core/src/test/scala/unit/kafka/network/SocketServerTest.scala index 81e5232e85cd..d2154306b94e 100644 --- a/core/src/test/scala/unit/kafka/network/SocketServerTest.scala +++ b/core/src/test/scala/unit/kafka/network/SocketServerTest.scala @@ -150,8 +150,13 @@ class SocketServerTest extends JUnitSuite { val tooManyBytes = new Array[Byte](server.config.socketRequestMaxBytes + 1) new Random().nextBytes(tooManyBytes) val socket = connect() - sendRequest(socket, tooManyBytes, Some(0)) + val outgoing = new DataOutputStream(socket.getOutputStream) + outgoing.writeInt(tooManyBytes.length) try { + // Server closes client connection when it processes the request length because + // it is too big. The write of request body may fail if the connection has been closed. + outgoing.write(tooManyBytes) + outgoing.flush() receiveResponse(socket) } catch { case e: IOException => // thats fine From 1142f51bf72095c8817db52c23d3a20d2099a5dd Mon Sep 17 00:00:00 2001 From: Kaufman Ng Date: Mon, 9 May 2016 17:01:16 +0100 Subject: [PATCH 109/267] KAFKA-3681; Connect doc formatting Author: Kaufman Ng Reviewers: Ismael Juma Closes #1351 from coughman/KAFKA-3681-connect-doc-formatting (cherry picked from commit b86378840a2a6defcbdc59c1c1462ecbfd2c47d4) Signed-off-by: Ismael Juma --- docs/connect.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/connect.html b/docs/connect.html index 5cd4130d691c..ca45165ccf08 100644 --- a/docs/connect.html +++ b/docs/connect.html @@ -192,7 +192,7 @@

    Connector
  • Although not used in the example, SourceTask also provides two APIs to commit offsets in the source system: commit and commitSourceRecord. The APIs are provided for source systems which have an acknowledgement mechanism for messages. Overriding these methods allows the source connector to acknowledge messages in the source system, either in bulk or individually, once they have been written to Kafka. -The commit API stores the offsets in the source system, up to the offsets that have been returned by poll. The implementation of this API should block until the commit is complete. The commitSourceRecord API saves the offset in the source system for each SourceRecord after it is written to Kafka. As Kafka Connect will record offsets automatically, SourceTasks are not required to implement them. In cases where a connector does need to acknowledge messages in the source system, only one of the APIs is typically required. +The commit API stores the offsets in the source system, up to the offsets that have been returned by poll. The implementation of this API should block until the commit is complete. The commitSourceRecord API saves the offset in the source system for each SourceRecord after it is written to Kafka. As Kafka Connect will record offsets automatically, SourceTasks are not required to implement them. In cases where a connector does need to acknowledge messages in the source system, only one of the APIs is typically required. Even with multiple tasks, this method implementation is usually pretty simple. It just has to determine the number of input tasks, which may require contacting the remote service it is pulling data from, and then divvy them up. Because some patterns for splitting work among tasks are so common, some utilities are provided in ConnectorUtils to simplify these cases. @@ -232,7 +232,7 @@
    Task Example - Sourc public List<SourceRecord> poll() throws InterruptedException { try { ArrayList<SourceRecord> records = new ArrayList<>(); - while (streamValid(stream) && records.isEmpty()) { + while (streamValid(stream) && records.isEmpty()) { LineAndOffset line = readToNextLine(stream); if (line != null) { Map sourcePartition = Collections.singletonMap("filename", filename); From 2a1b3b93b661a00055878948023d5b3901e18621 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Mon, 9 May 2016 09:31:11 -0700 Subject: [PATCH 110/267] KAFKA-3658: Validate retention period be longer than window size Author: Guozhang Wang Reviewers: Henry Cai, Ismael Juma Closes #1337 from guozhangwang/K3658 (cherry picked from commit 29a682e2ea1918b50c6fd4c36ce399242929e5ae) Signed-off-by: Guozhang Wang --- .../kafka/streams/kstream/internals/KStreamJoinWindow.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamJoinWindow.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamJoinWindow.java index 864dc9c26962..2f4b04d252f1 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamJoinWindow.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamJoinWindow.java @@ -34,9 +34,9 @@ class KStreamJoinWindow implements ProcessorSupplier { KStreamJoinWindow(String windowName, long windowSizeMs, long retentionPeriodMs) { this.windowName = windowName; - if (windowSizeMs * 2 > retentionPeriodMs) + if (windowSizeMs > retentionPeriodMs) throw new TopologyBuilderException("The retention period of the join window " - + windowName + " must at least two times its window size."); + + windowName + " must be no smaller than its window size."); } @Override From 9181ecf247297ea7f7cdd2b69a578a487c74fdf5 Mon Sep 17 00:00:00 2001 From: Mickael Maison Date: Mon, 9 May 2016 18:10:40 +0100 Subject: [PATCH 111/267] KAFKA-3587; LogCleaner fails due to incorrect offset map computation Removed the over pessimistic require and instead attempt to fill the dedup buffer. Use the (only) map until full; this may allow to process all dirty segment (optimism) or may happen in the middle of a dirt segment. In either case, do compaction using the map loaded that way. This patch was developed with edoardocomar Author: Mickael Maison Reviewers: Jun Rao , Guozhang Wang Closes #1332 from mimaison/KAFKA-3587 (cherry picked from commit 2caf872c2e51d689c6ac20240c4a306e36d98b15) Signed-off-by: Ismael Juma --- .../src/main/scala/kafka/log/LogCleaner.scala | 27 ++++++++++----- .../scala/unit/kafka/log/CleanerTest.scala | 33 +++++++++++++++++++ 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/core/src/main/scala/kafka/log/LogCleaner.scala b/core/src/main/scala/kafka/log/LogCleaner.scala index 0f742f918b9a..c6636be09428 100644 --- a/core/src/main/scala/kafka/log/LogCleaner.scala +++ b/core/src/main/scala/kafka/log/LogCleaner.scala @@ -615,17 +615,19 @@ private[log] class Cleaner(val id: Int, // but we may be able to fit more (if there is lots of duplication in the dirty section of the log) var offset = dirty.head.baseOffset require(offset == start, "Last clean offset is %d but segment base offset is %d for log %s.".format(start, offset, log.name)) - val maxDesiredMapSize = (map.slots * this.dupBufferLoadFactor).toInt var full = false for (segment <- dirty if !full) { checkDone(log.topicAndPartition) - val segmentSize = segment.nextOffset() - segment.baseOffset - require(segmentSize <= maxDesiredMapSize, "%d messages in segment %s/%s but offset map can fit only %d. You can increase log.cleaner.dedupe.buffer.size or decrease log.cleaner.threads".format(segmentSize, log.name, segment.log.file.getName, maxDesiredMapSize)) - if (map.size + segmentSize <= maxDesiredMapSize) - offset = buildOffsetMapForSegment(log.topicAndPartition, segment, map) - else + val newOffset = buildOffsetMapForSegment(log.topicAndPartition, segment, map) + if (newOffset > -1L) + offset = newOffset + else { + // If not even one segment can fit in the map, compaction cannot happen + require(offset > start, "Unable to build the offset map for segment %s/%s. You can increase log.cleaner.dedupe.buffer.size or decrease log.cleaner.threads".format(log.name, segment.log.file.getName)) + debug("Offset map is full, %d segments fully mapped, segment with base offset %d is partially mapped".format(dirty.indexOf(segment), segment.baseOffset)) full = true + } } info("Offset map for log %s complete.".format(log.name)) offset @@ -637,11 +639,12 @@ private[log] class Cleaner(val id: Int, * @param segment The segment to index * @param map The map in which to store the key=>offset mapping * - * @return The final offset covered by the map + * @return The final offset covered by the map or -1 if the map is full */ private def buildOffsetMapForSegment(topicAndPartition: TopicAndPartition, segment: LogSegment, map: OffsetMap): Long = { var position = 0 var offset = segment.baseOffset + val maxDesiredMapSize = (map.slots * this.dupBufferLoadFactor).toInt while (position < segment.log.sizeInBytes) { checkDone(topicAndPartition) readBuffer.clear() @@ -650,8 +653,14 @@ private[log] class Cleaner(val id: Int, val startPosition = position for (entry <- messages) { val message = entry.message - if (message.hasKey) - map.put(message.key, entry.offset) + if (message.hasKey) { + if (map.size < maxDesiredMapSize) + map.put(message.key, entry.offset) + else { + // The map is full, stop looping and return + return -1L + } + } offset = entry.offset stats.indexMessagesRead(1) } diff --git a/core/src/test/scala/unit/kafka/log/CleanerTest.scala b/core/src/test/scala/unit/kafka/log/CleanerTest.scala index b6849f0dcdc7..752a260fb28a 100755 --- a/core/src/test/scala/unit/kafka/log/CleanerTest.scala +++ b/core/src/test/scala/unit/kafka/log/CleanerTest.scala @@ -423,6 +423,39 @@ class CleanerTest extends JUnitSuite { recoverAndCheck(config, cleanedKeys) } + + @Test + def testBuildOffsetMapFakeLarge() { + val map = new FakeOffsetMap(1000) + val logProps = new Properties() + logProps.put(LogConfig.SegmentBytesProp, 72: java.lang.Integer) + logProps.put(LogConfig.SegmentIndexBytesProp, 72: java.lang.Integer) + logProps.put(LogConfig.CleanupPolicyProp, LogConfig.Compact) + val logConfig = LogConfig(logProps) + val log = makeLog(config = logConfig) + val cleaner = makeCleaner(Int.MaxValue) + val start = 0 + val end = 2 + val offsetSeq = Seq(0L, 7206178L) + val offsets = writeToLog(log, (start until end) zip (start until end), offsetSeq) + val endOffset = cleaner.buildOffsetMap(log, start, end, map) + assertEquals("Last offset should be the end offset.", 7206178L, endOffset) + assertEquals("Should have the expected number of messages in the map.", end - start, map.size) + assertEquals("Map should contain first value", 0L, map.get(key(0))) + assertEquals("Map should contain second value", 7206178L, map.get(key(1))) + } + + private def writeToLog(log: Log, keysAndValues: Iterable[(Int, Int)], offsetSeq: Iterable[Long]): Iterable[Long] = { + for(((key, value), offset) <- keysAndValues.zip(offsetSeq)) + yield log.append(messageWithOffset(key, value, offset), assignOffsets = false).firstOffset + } + + private def messageWithOffset(key: Int, value: Int, offset: Long) = + new ByteBufferMessageSet(NoCompressionCodec, Seq(offset), + new Message(key = key.toString.getBytes, + bytes = value.toString.getBytes, + timestamp = Message.NoTimestamp, + magicValue = Message.MagicValue_V1)) def makeLog(dir: File = dir, config: LogConfig = logConfig) = From 48908fe60225e3af532888284eb347dd6b0a2ff9 Mon Sep 17 00:00:00 2001 From: Dana Powers Date: Mon, 9 May 2016 18:40:54 +0100 Subject: [PATCH 112/267] HOTFIX: KAFKA-3160 follow-up, catch decompression errors in constructor After testing KAFKA-3160 a bit more, I found that the error code was not being set properly in ProduceResponse. This happened because the validation error is raised in the CompressionFactory constructor, which was not wrapped in a try / catch. ijuma junrao (This contribution is my original work and I license the work under Apache 2.0.) Author: Dana Powers Author: Ismael Juma Reviewers: Jun Rao , Gwen Shapira , Ismael Juma Closes #1344 from dpkp/decompress_error_code (cherry picked from commit 4331bf4ff2d0e7ab1a24ea29382897162c1ed91c) Signed-off-by: Ismael Juma --- .../kafka/message/ByteBufferMessageSet.scala | 13 +-- .../message/InvalidMessageException.scala | 3 +- .../kafka/server/ProduceRequestTest.scala | 86 +++++++++++++++++++ 3 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala diff --git a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala index 677355a0c450..a116d4bc6767 100644 --- a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala +++ b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala @@ -23,11 +23,7 @@ import java.nio.ByteBuffer import java.nio.channels._ import java.io._ import java.util.ArrayDeque -import java.util.concurrent.atomic.AtomicLong -import scala.collection.JavaConverters._ - -import org.apache.kafka.common.errors.CorruptRecordException import org.apache.kafka.common.errors.InvalidTimestampException import org.apache.kafka.common.record.TimestampType import org.apache.kafka.common.utils.Utils @@ -96,7 +92,12 @@ object ByteBufferMessageSet { if (wrapperMessage.payload == null) throw new KafkaException(s"Message payload is null: $wrapperMessage") val inputStream = new ByteBufferBackedInputStream(wrapperMessage.payload) - val compressed = new DataInputStream(CompressionFactory(wrapperMessage.compressionCodec, wrapperMessage.magic, inputStream)) + val compressed = try { + new DataInputStream(CompressionFactory(wrapperMessage.compressionCodec, wrapperMessage.magic, inputStream)) + } catch { + case ioe: IOException => + throw new InvalidMessageException(s"Failed to instantiate input stream compressed with ${wrapperMessage.compressionCodec}", ioe) + } var lastInnerOffset = -1L val messageAndOffsets = if (wrapperMessageAndOffset.message.magic > MagicValue_V0) { @@ -108,7 +109,7 @@ object ByteBufferMessageSet { case eofe: EOFException => compressed.close() case ioe: IOException => - throw new CorruptRecordException(ioe) + throw new InvalidMessageException(s"Error while reading message from stream compressed with ${wrapperMessage.compressionCodec}", ioe) } Some(innerMessageAndOffsets) } else None diff --git a/core/src/main/scala/kafka/message/InvalidMessageException.scala b/core/src/main/scala/kafka/message/InvalidMessageException.scala index df22516848d8..ef83500aea5a 100644 --- a/core/src/main/scala/kafka/message/InvalidMessageException.scala +++ b/core/src/main/scala/kafka/message/InvalidMessageException.scala @@ -27,6 +27,7 @@ import org.apache.kafka.common.errors.CorruptRecordException * Because ByteBufferMessageSet.scala and Message.scala are used in both server and client code having * InvalidMessageException extend CorruptRecordException allows us to change server code without affecting the client. */ -class InvalidMessageException(message: String) extends CorruptRecordException(message) { +class InvalidMessageException(message: String, throwable: Throwable) extends CorruptRecordException(message, throwable) { + def this(message: String) = this(null, null) def this() = this(null) } diff --git a/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala b/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala new file mode 100644 index 000000000000..67f7d413c755 --- /dev/null +++ b/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package kafka.server + +import kafka.message.{ByteBufferMessageSet, LZ4CompressionCodec, Message} +import kafka.utils.TestUtils +import org.apache.kafka.common.TopicPartition +import org.apache.kafka.common.protocol.{ApiKeys, Errors, ProtoUtils} +import org.apache.kafka.common.requests.{ProduceRequest, ProduceResponse} +import org.junit.Assert._ +import org.junit.Test + +import scala.collection.JavaConverters._ + +/** + * Subclasses of `BaseProduceSendRequestTest` exercise the producer and produce request/response. This class + * complements those classes with tests that require lower-level access to the protocol. + */ +class ProduceRequestTest extends BaseRequestTest { + + @Test + def testSimpleProduceRequest() { + val (partition, leader) = createTopicAndFindPartitionWithLeader("topic") + val messageBuffer = new ByteBufferMessageSet(new Message("value".getBytes, "key".getBytes, + System.currentTimeMillis(), 1: Byte)).buffer + val topicPartition = new TopicPartition("topic", partition) + val partitionRecords = Map(topicPartition -> messageBuffer) + val produceResponse = sendProduceRequest(leader, new ProduceRequest(-1, 3000, partitionRecords.asJava)) + assertEquals(1, produceResponse.responses.size) + val (tp, partitionResponse) = produceResponse.responses.asScala.head + assertEquals(topicPartition, tp) + assertEquals(Errors.NONE.code, partitionResponse.errorCode) + assertEquals(0, partitionResponse.baseOffset) + assertEquals(-1, partitionResponse.timestamp) + } + + /* returns a pair of partition id and leader id */ + private def createTopicAndFindPartitionWithLeader(topic: String): (Int, Int) = { + val partitionToLeader = TestUtils.createTopic(zkUtils, topic, 3, 2, servers) + partitionToLeader.collectFirst { + case (partition, Some(leader)) if leader != -1 => (partition, leader) + }.getOrElse(fail(s"No leader elected for topic $topic")) + } + + @Test + def testCorruptLz4ProduceRequest() { + val (partition, leader) = createTopicAndFindPartitionWithLeader("topic") + val messageBuffer = new ByteBufferMessageSet(LZ4CompressionCodec, new Message("value".getBytes, "key".getBytes, + System.currentTimeMillis(), 1: Byte)).buffer + // Change the lz4 checksum value so that it doesn't match the contents + messageBuffer.array.update(40, 0) + val topicPartition = new TopicPartition("topic", partition) + val partitionRecords = Map(topicPartition -> messageBuffer) + val produceResponse = sendProduceRequest(leader, new ProduceRequest(-1, 3000, partitionRecords.asJava)) + assertEquals(1, produceResponse.responses.size) + val (tp, partitionResponse) = produceResponse.responses.asScala.head + assertEquals(topicPartition, tp) + assertEquals(Errors.CORRUPT_MESSAGE.code, partitionResponse.errorCode) + assertEquals(-1, partitionResponse.baseOffset) + assertEquals(-1, partitionResponse.timestamp) + } + + private def sendProduceRequest(leaderId: Int, request: ProduceRequest): ProduceResponse = { + val socket = connect(s = servers.find(_.config.brokerId == leaderId).map(_.socketServer).getOrElse { + fail(s"Could not find broker with id $leaderId") + }) + val response = send(socket, request, ApiKeys.PRODUCE, ProtoUtils.latestVersion(ApiKeys.PRODUCE.id)) + ProduceResponse.parse(response) + } + +} From 7876318c28bb477f536ddb48401980eb466a04c2 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Mon, 9 May 2016 23:34:30 +0100 Subject: [PATCH 113/267] MINOR: Add toString implementations to Subscription and Assignment Author: Jason Gustafson Reviewers: Ismael Juma Closes #1354 from hachikuji/minor-add-missing-assignor-tostrings (cherry picked from commit ff8580da6fb1d65b206df544ebd574222abe7f3f) Signed-off-by: Ismael Juma --- .../consumer/internals/PartitionAssignor.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/PartitionAssignor.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/PartitionAssignor.java index df8f2f14c920..02eddd1eede5 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/PartitionAssignor.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/PartitionAssignor.java @@ -89,6 +89,12 @@ public ByteBuffer userData() { return userData; } + @Override + public String toString() { + return "Subscription(" + + "topics=" + topics + + ')'; + } } class Assignment { @@ -112,6 +118,12 @@ public ByteBuffer userData() { return userData; } + @Override + public String toString() { + return "Assignment(" + + "partitions=" + partitions + + ')'; + } } } From 57ae46f055a1e9db13a6e56a8997796a505bca03 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Mon, 9 May 2016 23:42:09 +0100 Subject: [PATCH 114/267] KAFKA-3421; Follow up to fix name of SourceTask method and add documentation of connector status REST API Author: Ewen Cheslack-Postava Reviewers: Ismael Juma Closes #1355 from ewencp/kafka-3421-follow-up (cherry picked from commit 36ed00d9bf82d48b0a02b6414734b7698c9ac4ea) Signed-off-by: Ismael Juma --- docs/connect.html | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/connect.html b/docs/connect.html index ca45165ccf08..c3cf58303f3a 100644 --- a/docs/connect.html +++ b/docs/connect.html @@ -95,7 +95,9 @@

    REST API

  • GET /connectors/{name} - get information about a specific connector
  • GET /connectors/{name}/config - get the configuration parameters for a specific connector
  • PUT /connectors/{name}/config - update the configuration parameters for a specific connector
  • +
  • GET /connectors/{name}/status - get current status of the connector, including if it is running, failed, paused, etc., which worker it is assigned to, error information if it has failed, and the state of all its tasks
  • GET /connectors/{name}/tasks - get a list of tasks currently running for a connector
  • +
  • GET /connectors/{name}/tasks/{taskid}/status - get current status of the task, including if it is running, failed, paused, etc., which worker it is assigned to, and error information if it has failed
  • DELETE /connectors/{name} - delete a connector, halting all tasks and deleting its configuration
  • @@ -191,8 +193,8 @@
    Connector } -Although not used in the example, SourceTask also provides two APIs to commit offsets in the source system: commit and commitSourceRecord. The APIs are provided for source systems which have an acknowledgement mechanism for messages. Overriding these methods allows the source connector to acknowledge messages in the source system, either in bulk or individually, once they have been written to Kafka. -The commit API stores the offsets in the source system, up to the offsets that have been returned by poll. The implementation of this API should block until the commit is complete. The commitSourceRecord API saves the offset in the source system for each SourceRecord after it is written to Kafka. As Kafka Connect will record offsets automatically, SourceTasks are not required to implement them. In cases where a connector does need to acknowledge messages in the source system, only one of the APIs is typically required. +Although not used in the example, SourceTask also provides two APIs to commit offsets in the source system: commit and commitRecord. The APIs are provided for source systems which have an acknowledgement mechanism for messages. Overriding these methods allows the source connector to acknowledge messages in the source system, either in bulk or individually, once they have been written to Kafka. +The commit API stores the offsets in the source system, up to the offsets that have been returned by poll. The implementation of this API should block until the commit is complete. The commitRecord API saves the offset in the source system for each SourceRecord after it is written to Kafka. As Kafka Connect will record offsets automatically, SourceTasks are not required to implement them. In cases where a connector does need to acknowledge messages in the source system, only one of the APIs is typically required. Even with multiple tasks, this method implementation is usually pretty simple. It just has to determine the number of input tasks, which may require contacting the remote service it is pulling data from, and then divvy them up. Because some patterns for splitting work among tasks are so common, some utilities are provided in ConnectorUtils to simplify these cases. From f4ed61cbdd785339e8db12685bd22358747bd35b Mon Sep 17 00:00:00 2001 From: Rajini Sivaram Date: Mon, 9 May 2016 23:47:04 +0100 Subject: [PATCH 115/267] KAFKA-3634; Upgrade tests for SASL authentication Add a test for changing SASL mechanism using rolling upgrade and a test for rolling upgrade from 0.9.0.x to 0.10.0 with SASL/GSSAPI. Author: Rajini Sivaram Reviewers: Ben Stopford , Geoff Anderson , Ismael Juma Closes #1290 from rajinisivaram/KAFKA-3634 (cherry picked from commit 87285f36c9cd8e1d9861f6dfaacef978772fb7f1) Signed-off-by: Ismael Juma --- .../core/security_rolling_upgrade_test.py | 63 +++++++++++++++++++ tests/kafkatest/tests/core/upgrade_test.py | 6 +- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/tests/kafkatest/tests/core/security_rolling_upgrade_test.py b/tests/kafkatest/tests/core/security_rolling_upgrade_test.py index 39774902a6cb..e14d001972ab 100644 --- a/tests/kafkatest/tests/core/security_rolling_upgrade_test.py +++ b/tests/kafkatest/tests/core/security_rolling_upgrade_test.py @@ -20,6 +20,7 @@ from kafkatest.services.console_consumer import ConsoleConsumer from kafkatest.utils import is_int from kafkatest.tests.produce_consume_validate import ProduceConsumeValidateTest +from ducktape.mark import parametrize from ducktape.mark import matrix from kafkatest.services.security.kafka_acls import ACLs import time @@ -74,6 +75,9 @@ def roll_in_secured_settings(self, client_protocol, broker_protocol): # Roll cluster to disable PLAINTEXT port self.kafka.close_port('PLAINTEXT') + self.set_authorizer_and_bounce(client_protocol, broker_protocol) + + def set_authorizer_and_bounce(self, client_protocol, broker_protocol): self.kafka.authorizer_class_name = KafkaService.SIMPLE_AUTHORIZER self.acls.set_acls(client_protocol, self.kafka, self.zk, self.topic, self.group) self.acls.set_acls(broker_protocol, self.kafka, self.zk, self.topic, self.group) @@ -85,6 +89,19 @@ def open_secured_port(self, client_protocol): self.kafka.start_minikdc() self.bounce() + def add_sasl_mechanism(self, new_client_sasl_mechanism): + self.kafka.client_sasl_mechanism = new_client_sasl_mechanism + self.kafka.start_minikdc() + self.bounce() + + def roll_in_sasl_mechanism(self, security_protocol, new_sasl_mechanism): + # Roll cluster to update inter-broker SASL mechanism. This disables the old mechanism. + self.kafka.interbroker_sasl_mechanism = new_sasl_mechanism + self.bounce() + + # Bounce again with ACLs for new mechanism + self.set_authorizer_and_bounce(security_protocol, security_protocol) + @matrix(client_protocol=["SSL", "SASL_PLAINTEXT", "SASL_SSL"]) def test_rolling_upgrade_phase_one(self, client_protocol): """ @@ -125,3 +142,49 @@ def test_rolling_upgrade_phase_two(self, client_protocol, broker_protocol): #Roll in the security protocol. Disable Plaintext. Ensure we can produce and Consume throughout self.run_produce_consume_validate(self.roll_in_secured_settings, client_protocol, broker_protocol) + + @parametrize(new_client_sasl_mechanism='PLAIN') + def test_rolling_upgrade_sasl_mechanism_phase_one(self, new_client_sasl_mechanism): + """ + Start with a SASL/GSSAPI cluster, add new SASL mechanism, via a rolling upgrade, ensuring we could produce + and consume throughout over SASL/GSSAPI. Finally check we can produce and consume using new mechanism. + """ + self.kafka.interbroker_security_protocol = "SASL_SSL" + self.kafka.security_protocol = "SASL_SSL" + self.kafka.client_sasl_mechanism = "GSSAPI" + self.kafka.interbroker_sasl_mechanism = "GSSAPI" + self.kafka.start() + + # Create SASL/GSSAPI producer and consumer + self.create_producer_and_consumer() + + # Rolling upgrade, adding new SASL mechanism, ensuring the GSSAPI producer/consumer continues to run + self.run_produce_consume_validate(self.add_sasl_mechanism, new_client_sasl_mechanism) + + # Now we can produce and consume using the new SASL mechanism + self.kafka.client_sasl_mechanism = new_client_sasl_mechanism + self.create_producer_and_consumer() + self.run_produce_consume_validate(lambda: time.sleep(1)) + + @parametrize(new_sasl_mechanism='PLAIN') + def test_rolling_upgrade_sasl_mechanism_phase_two(self, new_sasl_mechanism): + """ + Start with a SASL cluster with GSSAPI for inter-broker and a second mechanism for clients (i.e. result of phase one). + Start Producer and Consumer using the second mechanism + Incrementally upgrade to set inter-broker to the second mechanism and disable GSSAPI + Incrementally upgrade again to add ACLs + Ensure the producer and consumer run throughout + """ + #Start with a broker that has GSSAPI for inter-broker and a second mechanism for clients + self.kafka.security_protocol = "SASL_SSL" + self.kafka.interbroker_security_protocol = "SASL_SSL" + self.kafka.client_sasl_mechanism = new_sasl_mechanism + self.kafka.interbroker_sasl_mechanism = "GSSAPI" + self.kafka.start() + + #Create Producer and Consumer using second mechanism + self.create_producer_and_consumer() + + #Roll in the second SASL mechanism for inter-broker, disabling first mechanism. Ensure we can produce and consume throughout + self.run_produce_consume_validate(self.roll_in_sasl_mechanism, self.kafka.security_protocol, new_sasl_mechanism) + diff --git a/tests/kafkatest/tests/core/upgrade_test.py b/tests/kafkatest/tests/core/upgrade_test.py index 790b69d1d8af..16a518d8a573 100644 --- a/tests/kafkatest/tests/core/upgrade_test.py +++ b/tests/kafkatest/tests/core/upgrade_test.py @@ -61,6 +61,7 @@ def perform_upgrade(self, from_kafka_version, to_message_format_version=None): @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["none"]) + @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["none"], new_consumer=True, security_protocol="SASL_SSL") @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["snappy"], new_consumer=True) @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["lz4"]) @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["lz4"], new_consumer=True) @@ -70,7 +71,8 @@ def perform_upgrade(self, from_kafka_version, to_message_format_version=None): @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=str(LATEST_0_9), compression_types=["lz4"], new_consumer=True) @parametrize(from_kafka_version=str(LATEST_0_8_2), to_message_format_version=None, compression_types=["none"]) @parametrize(from_kafka_version=str(LATEST_0_8_2), to_message_format_version=None, compression_types=["snappy"]) - def test_upgrade(self, from_kafka_version, to_message_format_version, compression_types, new_consumer=False): + def test_upgrade(self, from_kafka_version, to_message_format_version, compression_types, + new_consumer=False, security_protocol="PLAINTEXT"): """Test upgrade of Kafka broker cluster from 0.8.2 or 0.9.0 to 0.10 from_kafka_version is a Kafka version to upgrade from: either 0.8.2.X or 0.9 @@ -93,6 +95,8 @@ def test_upgrade(self, from_kafka_version, to_message_format_version, compressio version=KafkaVersion(from_kafka_version), topics={self.topic: {"partitions": 3, "replication-factor": 3, 'configs': {"min.insync.replicas": 2}}}) + self.kafka.security_protocol = security_protocol + self.kafka.interbroker_security_protocol = security_protocol self.kafka.start() self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, From 7699f3a395cadd9d0078e9918744955b2c610136 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Mon, 9 May 2016 16:47:20 -0700 Subject: [PATCH 116/267] MINOR: Update protocol doc link in Introduction. Author: Ashish Singh Reviewers: Gwen Shapira Closes #1211 from SinghAsDev/MinorFixDocLink (cherry picked from commit eb1de107b0c29e322999d9052b82b1e042d8fcfd) Signed-off-by: Gwen Shapira --- docs/introduction.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/introduction.html b/docs/introduction.html index ad81e97f671b..c2e3554098f0 100644 --- a/docs/introduction.html +++ b/docs/introduction.html @@ -33,7 +33,7 @@

    1.1 Introduction

    -Communication between the clients and the servers is done with a simple, high-performance, language agnostic TCP protocol. We provide a Java client for Kafka, but clients are available in many languages. +Communication between the clients and the servers is done with a simple, high-performance, language agnostic TCP protocol. We provide a Java client for Kafka, but clients are available in many languages.

    Topics and Logs

    Let's first dive into the high-level abstraction Kafka provides—the topic. From e76d9e958157dc4838660811ec37af076e2e0454 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Mon, 9 May 2016 16:56:32 -0700 Subject: [PATCH 117/267] KAFKA-3676: system tests for connector pause/resume Author: Jason Gustafson Reviewers: Ewen Cheslack-Postava Closes #1345 from hachikuji/KAFKA-3676 (cherry picked from commit f96da638ee9a4e1e47ece1ea337ee071d911c3da) Signed-off-by: Ewen Cheslack-Postava --- .../storage/KafkaConfigBackingStore.java | 2 +- tests/kafkatest/services/connect.py | 28 +++- tests/kafkatest/services/kafka/kafka.py | 2 +- .../tests/connect/connect_distributed_test.py | 146 +++++++++++++++++- 4 files changed, 166 insertions(+), 12 deletions(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java index a894f3102c2c..9a93a4e48a22 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java @@ -469,7 +469,7 @@ public void onCompletion(Throwable error, ConsumerRecord record) try { TargetState state = TargetState.valueOf((String) targetState); - log.trace("Setting target state for connector {} to {}", connectorName, targetState); + log.debug("Setting target state for connector {} to {}", connectorName, targetState); connectorTargetStates.put(connectorName, state); } catch (IllegalArgumentException e) { log.error("Invalid target state for connector ({}): {}", connectorName, targetState); diff --git a/tests/kafkatest/services/connect.py b/tests/kafkatest/services/connect.py index cf67c301b09e..aad9ff3c009f 100644 --- a/tests/kafkatest/services/connect.py +++ b/tests/kafkatest/services/connect.py @@ -88,12 +88,15 @@ def stop_node(self, node, clean_shutdown=True): node.account.ssh("rm -f " + self.PID_FILE, allow_fail=False) - def restart(self): + def restart(self, clean_shutdown=True): # We don't want to do any clean up here, just restart the process. for node in self.nodes: self.logger.info("Restarting Kafka Connect on " + str(node.account)) - self.stop_node(node) - self.start_node(node) + self.restart_node(node, clean_shutdown) + + def restart_node(self, node, clean_shutdown=True): + self.stop_node(node, clean_shutdown) + self.start_node(node) def clean_node(self, node): node.account.kill_process("connect", clean_shutdown=False, allow_fail=True) @@ -128,6 +131,15 @@ def get_connector_tasks(self, name, node=None, retries=0, retry_backoff=.01): def delete_connector(self, name, node=None, retries=0, retry_backoff=.01): return self._rest_with_retry('/connectors/' + name, node=node, method="DELETE", retries=retries, retry_backoff=retry_backoff) + def get_connector_status(self, name, node=None): + return self._rest('/connectors/' + name + '/status', node=node) + + def pause_connector(self, name, node=None): + return self._rest('/connectors/' + name + '/pause', method="PUT") + + def resume_connector(self, name, node=None): + return self._rest('/connectors/' + name + '/resume', method="PUT") + def _rest(self, path, body=None, node=None, method="GET"): if node is None: node = random.choice(self.nodes) @@ -139,7 +151,7 @@ def _rest(self, path, body=None, node=None, method="GET"): self.logger.debug("%s %s response: %d", url, method, resp.status_code) if resp.status_code > 400: raise ConnectRestError(resp.status_code, resp.text, resp.url) - if resp.status_code == 204: + if resp.status_code == 204 or resp.status_code == 202: return None else: return resp.json() @@ -185,7 +197,7 @@ def start_node(self, node): self.logger.info("Starting Kafka Connect standalone process on " + str(node.account)) with node.account.monitor_log(self.LOG_FILE) as monitor: node.account.ssh(self.start_cmd(node, remote_connector_configs)) - monitor.wait_until('Kafka Connect started', timeout_sec=15, err_msg="Never saw message indicating Kafka Connect finished startup on " + str(node.account)) + monitor.wait_until('Kafka Connect started', timeout_sec=30, err_msg="Never saw message indicating Kafka Connect finished startup on " + str(node.account)) if len(self.pids(node)) == 0: raise RuntimeError("No process ids recorded") @@ -298,6 +310,12 @@ def __init__(self, cc, name="verifiable-sink", tasks=1, topics=["verifiable"]): self.tasks = tasks self.topics = topics + def flushed_messages(self): + return filter(lambda m: 'flushed' in m and m['flushed'], self.messages()) + + def received_messages(self): + return filter(lambda m: 'flushed' not in m or not m['flushed'], self.messages()) + def start(self): self.logger.info("Creating connector VerifiableSinkConnector %s", self.name) self.cc.create_connector({ diff --git a/tests/kafkatest/services/kafka/kafka.py b/tests/kafkatest/services/kafka/kafka.py index 6ff7d0c7b553..334069d99544 100644 --- a/tests/kafkatest/services/kafka/kafka.py +++ b/tests/kafkatest/services/kafka/kafka.py @@ -227,7 +227,7 @@ def stop_node(self, node, clean_shutdown=True): for pid in pids: node.account.signal(pid, sig, allow_fail=False) - wait_until(lambda: len(self.pids(node)) == 0, timeout_sec=20, err_msg="Kafka node failed to stop") + wait_until(lambda: len(self.pids(node)) == 0, timeout_sec=60, err_msg="Kafka node failed to stop") def clean_node(self, node): JmxMixin.clean_node(self, node) diff --git a/tests/kafkatest/tests/connect/connect_distributed_test.py b/tests/kafkatest/tests/connect/connect_distributed_test.py index 698a827b1712..d3ae2e169083 100644 --- a/tests/kafkatest/tests/connect/connect_distributed_test.py +++ b/tests/kafkatest/tests/connect/connect_distributed_test.py @@ -17,13 +17,14 @@ from kafkatest.services.zookeeper import ZookeeperService from kafkatest.services.kafka import KafkaService -from kafkatest.services.connect import ConnectDistributedService, VerifiableSource, VerifiableSink +from kafkatest.services.connect import ConnectDistributedService, VerifiableSource, VerifiableSink, ConnectRestError from kafkatest.services.console_consumer import ConsoleConsumer from kafkatest.services.security.security_config import SecurityConfig from ducktape.utils.util import wait_until from ducktape.mark import matrix import subprocess, itertools, time from collections import Counter +import operator class ConnectDistributedTest(Test): """ @@ -73,6 +74,142 @@ def setup_services(self, security_protocol=SecurityConfig.PLAINTEXT): self.zk.start() self.kafka.start() + def _start_connector(self, config_file): + connector_props = self.render(config_file) + connector_config = dict([line.strip().split('=', 1) for line in connector_props.split('\n') if line.strip() and not line.strip().startswith('#')]) + self.cc.create_connector(connector_config) + + def _connector_status(self, connector, node=None): + try: + return self.cc.get_connector_status(connector, node) + except ConnectRestError: + return None + + def _has_state(self, status, state): + return status is not None and status['connector']['state'] == state + + def _all_tasks_have_state(self, status, task_count, state): + if status is None: + return False + + tasks = status['tasks'] + if len(tasks) != task_count: + return False + + return reduce(operator.and_, [task['state'] == state for task in tasks], True) + + def is_running(self, connector, node=None): + status = self._connector_status(connector.name, node) + return self._has_state(status, 'RUNNING') and self._all_tasks_have_state(status, connector.tasks, 'RUNNING') + + def is_paused(self, connector, node=None): + status = self._connector_status(connector.name, node) + return self._has_state(status, 'PAUSED') and self._all_tasks_have_state(status, connector.tasks, 'PAUSED') + + def test_pause_and_resume_source(self): + """ + Verify that source connectors stop producing records when paused and begin again after + being resumed. + """ + + self.setup_services() + self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node)) + self.cc.start() + + self.source = VerifiableSource(self.cc) + self.source.start() + + wait_until(lambda: self.is_running(self.source), timeout_sec=30, + err_msg="Failed to see connector transition to the RUNNING state") + + self.cc.pause_connector(self.source.name) + + # wait until all nodes report the paused transition + for node in self.cc.nodes: + wait_until(lambda: self.is_paused(self.source, node), timeout_sec=30, + err_msg="Failed to see connector transition to the PAUSED state") + + # verify that we do not produce new messages while paused + num_messages = len(self.source.messages()) + time.sleep(10) + assert num_messages == len(self.source.messages()), "Paused source connector should not produce any messages" + + self.cc.resume_connector(self.source.name) + + for node in self.cc.nodes: + wait_until(lambda: self.is_running(self.source, node), timeout_sec=30, + err_msg="Failed to see connector transition to the RUNNING state") + + # after resuming, we should see records produced again + wait_until(lambda: len(self.source.messages()) > num_messages, timeout_sec=30, + err_msg="Failed to produce messages after resuming source connector") + + def test_pause_and_resume_sink(self): + """ + Verify that sink connectors stop consuming records when paused and begin again after + being resumed. + """ + + self.setup_services() + self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node)) + self.cc.start() + + # use the verifiable source to produce a steady stream of messages + self.source = VerifiableSource(self.cc) + self.source.start() + + self.sink = VerifiableSink(self.cc) + self.sink.start() + + wait_until(lambda: self.is_running(self.sink), timeout_sec=30, + err_msg="Failed to see connector transition to the RUNNING state") + + self.cc.pause_connector(self.sink.name) + + # wait until all nodes report the paused transition + for node in self.cc.nodes: + wait_until(lambda: self.is_paused(self.sink, node), timeout_sec=30, + err_msg="Failed to see connector transition to the PAUSED state") + + # verify that we do not consume new messages while paused + num_messages = len(self.sink.received_messages()) + time.sleep(10) + assert num_messages == len(self.sink.received_messages()), "Paused sink connector should not consume any messages" + + self.cc.resume_connector(self.sink.name) + + for node in self.cc.nodes: + wait_until(lambda: self.is_running(self.sink, node), timeout_sec=30, + err_msg="Failed to see connector transition to the RUNNING state") + + # after resuming, we should see records consumed again + wait_until(lambda: len(self.sink.received_messages()) > num_messages, timeout_sec=30, + err_msg="Failed to consume messages after resuming source connector") + + + def test_pause_state_persistent(self): + """ + Verify that paused state is preserved after a cluster restart. + """ + + self.setup_services() + self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node)) + self.cc.start() + + self.source = VerifiableSource(self.cc) + self.source.start() + + wait_until(lambda: self.is_running(self.source), timeout_sec=30, + err_msg="Failed to see connector transition to the RUNNING state") + + self.cc.pause_connector(self.source.name) + + self.cc.restart() + + # we should still be paused after restarting + for node in self.cc.nodes: + wait_until(lambda: self.is_paused(self.source, node), timeout_sec=30, + err_msg="Failed to see connector startup in PAUSED state") @matrix(security_protocol=[SecurityConfig.PLAINTEXT, SecurityConfig.SASL_SSL]) def test_file_source_and_sink(self, security_protocol): @@ -87,10 +224,9 @@ def test_file_source_and_sink(self, security_protocol): self.cc.start() self.logger.info("Creating connectors") - for connector_props in [self.render("connect-file-source.properties"), self.render("connect-file-sink.properties")]: - connector_config = dict([line.strip().split('=', 1) for line in connector_props.split('\n') if line.strip() and not line.strip().startswith('#')]) - self.cc.create_connector(connector_config) - + self._start_connector("connect-file-source.properties") + self._start_connector("connect-file-sink.properties") + # Generating data on the source node should generate new records and create new output on the sink node. Timeouts # here need to be more generous than they are for standalone mode because a) it takes longer to write configs, # do rebalancing of the group, etc, and b) without explicit leave group support, rebalancing takes awhile From cb20e8442145c82e718f8b5ae20d40d068b5b4d9 Mon Sep 17 00:00:00 2001 From: Vahid Hashemian Date: Mon, 9 May 2016 17:29:15 -0700 Subject: [PATCH 118/267] KAFKA-3608; Fix ZooKeeper structures and output format in documentation Author: Vahid Hashemian Reviewers: Gwen Shapira Closes #1257 from vahidhashemian/KAFKA-3608 (cherry picked from commit 18226ff0be6f98795b98dd505d8ac0b4f3cf8c07) Signed-off-by: Gwen Shapira --- docs/implementation.html | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/implementation.html b/docs/implementation.html index be81227c906b..0a36c22d9f83 100644 --- a/docs/implementation.html +++ b/docs/implementation.html @@ -282,7 +282,7 @@

    Notation

    Broker Node Registry

    -/brokers/ids/[0...N] --> host:port (ephemeral node)
    +/brokers/ids/[0...N] --> {"jmx_port":...,"timestamp":...,"endpoints":[...],"host":...,"version":...,"port":...} (ephemeral node)
     

    This is a list of all present broker nodes, each of which provides a unique logical broker id which identifies it to consumers (which must be given as part of its configuration). On startup, a broker node registers itself by creating a znode with the logical broker id under /brokers/ids. The purpose of the logical broker id is to allow a broker to be moved to a different physical machine without affecting consumers. An attempt to register a broker id that is already in use (say because two servers are configured with the same broker id) results in an error. @@ -292,7 +292,7 @@

    Broker Node Registry

    Broker Topic Registry

    -/brokers/topics/[topic]/[0...N] --> nPartitions (ephemeral node)
    +/brokers/topics/[topic]/partitions/[0...N]/state --> {"controller_epoch":...,"leader":...,"version":...,"leader_epoch":...,"isr":[...]} (ephemeral node)
     

    @@ -317,7 +317,7 @@

    Consumer Id Registry

    In addition to the group_id which is shared by all consumers in a group, each consumer is given a transient, unique consumer_id (of the form hostname:uuid) for identification purposes. Consumer ids are registered in the following directory.

    -/consumers/[group_id]/ids/[consumer_id] --> {"topic1": #streams, ..., "topicN": #streams} (ephemeral node)
    +/consumers/[group_id]/ids/[consumer_id] --> {"version":...,"subscription":{...:...},"pattern":...,"timestamp":...} (ephemeral node)
     
    Each of the consumers in the group registers under its group and creates a znode with its consumer_id. The value of the znode contains a map of <topic, #streams>. This id is simply used to identify each of the consumers which is currently active within a group. This is an ephemeral node so it will disappear if the consumer process dies.

    @@ -327,7 +327,7 @@

    Consumer Offse Consumers track the maximum offset they have consumed in each partition. This value is stored in a ZooKeeper directory if offsets.storage=zookeeper.

    -/consumers/[group_id]/offsets/[topic]/[broker_id-partition_id] --> offset_counter_value ((persistent node)
    +/consumers/[group_id]/offsets/[topic]/[partition_id] --> offset_counter_value ((persistent node)
     

    Partition Owner registry

    @@ -337,7 +337,7 @@

    Partition Owner registry

    -/consumers/[group_id]/owners/[topic]/[broker_id-partition_id] --> consumer_node_id (ephemeral node)
    +/consumers/[group_id]/owners/[topic]/[partition_id] --> consumer_node_id (ephemeral node)
     

    Broker node registration

    From 7c45a5ea76079a0ad402c3fa7c7461741dd857a9 Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Mon, 9 May 2016 17:37:17 -0700 Subject: [PATCH 119/267] KAFKA-3684: SinkConnectorConfig does not return topics in config validation. Author: Liquan Pei Reviewers: Ewen Cheslack-Postava Closes #1356 from Ishiihara/bug-fix-validate (cherry picked from commit 9575e93070a480eb1ef1e136a67ce0226914b937) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/connect/runtime/AbstractHerder.java | 2 +- .../kafka/connect/runtime/ConnectorConfig.java | 16 +++++----------- .../connect/runtime/SinkConnectorConfig.java | 4 ++++ .../connect/runtime/SourceConnectorConfig.java | 4 ++++ 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java index 43fc4d1e3ebf..a29d216d2de9 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java @@ -241,7 +241,7 @@ public ConfigInfos validateConfigs(String connType, Map connecto connectorConfigDef = SinkConnectorConfig.configDef(); } List connectorConfigValues = connectorConfigDef.validate(connectorConfig); - + Config config = connector.validate(connectorConfig); ConfigDef configDef = connector.config(); Map configKeys = configDef.configKeys(); diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java index 0cbfe214caab..9569b4beae0b 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectorConfig.java @@ -60,17 +60,11 @@ public class ConnectorConfig extends AbstractConfig { private static final String TASK_MAX_DISPLAY = "Tasks max"; - protected static ConfigDef config; - - static { - config = new ConfigDef() - .define(NAME_CONFIG, Type.STRING, Importance.HIGH, NAME_DOC, COMMON_GROUP, 1, Width.MEDIUM, NAME_DISPLAY) - .define(CONNECTOR_CLASS_CONFIG, Type.STRING, Importance.HIGH, CONNECTOR_CLASS_DOC, COMMON_GROUP, 2, Width.LONG, CONNECTOR_CLASS_DISPLAY) - .define(TASKS_MAX_CONFIG, Type.INT, TASKS_MAX_DEFAULT, atLeast(TASKS_MIN_CONFIG), Importance.HIGH, TASKS_MAX_DOC, COMMON_GROUP, 3, Width.SHORT, TASK_MAX_DISPLAY); - } - public static ConfigDef configDef() { - return config; + return new ConfigDef() + .define(NAME_CONFIG, Type.STRING, Importance.HIGH, NAME_DOC, COMMON_GROUP, 1, Width.MEDIUM, NAME_DISPLAY) + .define(CONNECTOR_CLASS_CONFIG, Type.STRING, Importance.HIGH, CONNECTOR_CLASS_DOC, COMMON_GROUP, 2, Width.LONG, CONNECTOR_CLASS_DISPLAY) + .define(TASKS_MAX_CONFIG, Type.INT, TASKS_MAX_DEFAULT, atLeast(TASKS_MIN_CONFIG), Importance.HIGH, TASKS_MAX_DOC, COMMON_GROUP, 3, Width.SHORT, TASK_MAX_DISPLAY); } public ConnectorConfig() { @@ -78,7 +72,7 @@ public ConnectorConfig() { } public ConnectorConfig(Map props) { - super(config, props); + super(configDef(), props); } public ConnectorConfig(ConfigDef subClassConfig, Map props) { diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java index cbfc6d1a6405..7de3b0223002 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java @@ -40,6 +40,10 @@ public SinkConnectorConfig() { this(new HashMap()); } + public static ConfigDef configDef() { + return config; + } + public SinkConnectorConfig(Map props) { super(config, props); } diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java index ca9219f5449f..27b0408648dd 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java @@ -17,10 +17,14 @@ package org.apache.kafka.connect.runtime; +import org.apache.kafka.common.config.ConfigDef; + import java.util.Map; public class SourceConnectorConfig extends ConnectorConfig { + private static ConfigDef config = configDef(); + public SourceConnectorConfig(Map props) { super(config, props); } From 2a969664599c7f617fc16c47945a093e9950dd81 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Tue, 10 May 2016 08:00:51 +0100 Subject: [PATCH 120/267] MINOR: Add Kafka Streams API / upgrade notes Author: Guozhang Wang Reviewers: Michael G. Noll , Ismael Juma Closes #1321 from guozhangwang/KStreamsJavaDoc (cherry picked from commit 6f1873242c1a189770319e09f53467d26584112f) Signed-off-by: Ismael Juma --- docs/api.html | 19 +++++++++++++++++++ docs/documentation.html | 1 + docs/quickstart.html | 18 +++++++++--------- docs/upgrade.html | 1 + docs/uses.html | 2 +- 5 files changed, 31 insertions(+), 10 deletions(-) diff --git a/docs/api.html b/docs/api.html index 8d5be9b030da..c4572411c472 100644 --- a/docs/api.html +++ b/docs/api.html @@ -165,3 +165,22 @@

    2.2.3 New Consumer API

    javadocs. + +

    2.3 Streams API

    + +As of the 0.10.0 release we have added a new client library named Kafka Streams to let users implement their stream processing +applications with data stored in Kafka topics. Kafka Streams is considered alpha quality and its public APIs are likely to change in +future releases. +You can use Kafka Streams by adding a dependency on the streams jar using +the following example maven co-ordinates (you can change the version numbers with new releases): + +
    +	<dependency>
    +	    <groupId>org.apache.kafka</groupId>
    +	    <artifactId>kafka-streams</artifactId>
    +	    <version>0.10.0.0</version>
    +	</dependency>
    +
    + +Examples showing how to use this library are given in the +javadocs (note those classes annotated with @InterfaceStability.Unstable, indicating their public APIs may change without backward-compatibility in future releases). \ No newline at end of file diff --git a/docs/documentation.html b/docs/documentation.html index 70002ab8ec4e..ddc310218018 100644 --- a/docs/documentation.html +++ b/docs/documentation.html @@ -40,6 +40,7 @@

    Kafka 0.10.0 Documentation

  • 2.2.2 Old Simple Consumer API
  • 2.2.3 New Consumer API +
  • 2.3 Streams API
  • 3. Configuration diff --git a/docs/quickstart.html b/docs/quickstart.html index 7a923c69fc00..4d4f7eae6836 100644 --- a/docs/quickstart.html +++ b/docs/quickstart.html @@ -258,15 +258,15 @@

    Step 8: Use of the WordCountDemo example code (converted to use Java 8 lambda expressions for easy reading).

    -KStream wordCounts = textLines
    -// Split each text line, by whitespace, into words.
    -.flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
    -// Ensure the words are available as message keys for the next aggregate operation.
    -.map((key, value) -> new KeyValue<>(value, value))
    -// Count the occurrences of each word (message key).
    -.countByKey(stringSerializer, longSerializer, stringDeserializer, longDeserializer, "Counts")
    -// Convert the resulted aggregate table into another stream.
    -.toStream();
    +KTable wordCounts = textLines
    +    // Split each text line, by whitespace, into words.
    +    .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
    +
    +    // Ensure the words are available as record keys for the next aggregate operation.
    +    .map((key, value) -> new KeyValue<>(value, value))
    +
    +    // Count the occurrences of each word (record key) and store the results into a table named "Counts".
    +    .countByKey("Counts")
     

    diff --git a/docs/upgrade.html b/docs/upgrade.html index 486954c1c62f..4b8ec7eb9f08 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -90,6 +90,7 @@

    Potential breaking c
    Notable changes in 0.10.0.0
      +
    • Starting from Kafka 0.10.0.0, a new client library named Kafka Streams is available for stream processing on data stored in Kafka topics. This new client library only works with 0.10.x and upward versioned brokers due to message format changes mentioned above. For more information please read this section.
    • The default value of the configuration parameter receive.buffer.bytes is now 64K for the new consumer.
    • The new consumer now exposes the configuration parameter exclude.internal.topics to restrict internal topics (such as the consumer offsets topic) from accidentally being included in regular expression subscriptions. By default, it is enabled.
    • The old Scala producer has been deprecated. Users should migrate their code to the Java producer included in the kafka-clients JAR as soon as possible.
    • diff --git a/docs/uses.html b/docs/uses.html index f769bedfcadf..5b97272598a0 100644 --- a/docs/uses.html +++ b/docs/uses.html @@ -45,7 +45,7 @@

      Log Aggregation

      Stream Processing

      -Many users end up doing stage-wise processing of data where data is consumed from topics of raw data and then aggregated, enriched, or otherwise transformed into new Kafka topics for further consumption. For example a processing flow for article recommendation might crawl article content from RSS feeds and publish it to an "articles" topic; further processing might help normalize or deduplicate this content to a topic of cleaned article content; a final stage might attempt to match this content to users. This creates a graph of real-time data flow out of the individual topics. Storm and Samza are popular frameworks for implementing these kinds of transformations. +Many users of Kafka process data in processing pipelines consisting of multiple stages, where raw input data is consumed from Kafka topics and then aggregated, enriched, or otherwise transformed into new topics for further consumption or follow-up processing. For example, a processing pipeline for recommending news articles might crawl article content from RSS feeds and publish it to an "articles" topic; further processing might normalize or deduplicate this content and published the cleansed article content to a new topic; a final processing stage might attempt to recommend this content to users. Such processing pipelines create graphs of real-time data flows based on the individual topics. Starting in 0.10.0.0, a light-weight but powerful stream processing library called Kafka Streams is available in Apache Kafka to perform such data processing as described above. Apart from Kafka Streams, alternative open source stream processing tools include Apache Storm and Apache Samza.

      Event Sourcing

      From 9ca393407ea8dfbacc5c8af388a6c815497751fd Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 10 May 2016 17:30:36 -0700 Subject: [PATCH 121/267] MINOR: Double timeout passed to `producer.close` in `sendAndVerifyTimestamp` We have had transient failures in this method when Jenkins is overloaded. Author: Ismael Juma Reviewers: Ewen Cheslack-Postava Closes #1359 from ijuma/increase-producer-close-timeout-in-test (cherry picked from commit fe0335ea1030ca61f38b343a67c78e6537d25717) Signed-off-by: Ewen Cheslack-Postava --- .../test/scala/integration/kafka/api/BaseProducerSendTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala b/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala index 15eeb63dd4c8..9489e70555bf 100644 --- a/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala +++ b/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala @@ -220,7 +220,7 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness { val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, partition, baseTimestamp + i, "key".getBytes, "value".getBytes) producer.send(record, callback) } - producer.close(5000L, TimeUnit.MILLISECONDS) + producer.close(10000L, TimeUnit.MILLISECONDS) assertEquals(s"Should have offset $numRecords but only successfully sent ${callback.offset}", numRecords, callback.offset) } finally { producer.close() From 5bfba1decaf3c11bbd7563c8993d7f68739bedb5 Mon Sep 17 00:00:00 2001 From: Rajini Sivaram Date: Wed, 11 May 2016 13:05:58 +0100 Subject: [PATCH 122/267] MINOR: Ensure that selection key is cancelled on close Author: Rajini Sivaram Reviewers: Ismael Juma Closes #1368 from rajinisivaram/minor-channelclose (cherry picked from commit e20eba958d8de29cb4e3b6feea37ac3a1e1ab4f9) Signed-off-by: Ismael Juma --- .../common/network/PlaintextTransportLayer.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/network/PlaintextTransportLayer.java b/clients/src/main/java/org/apache/kafka/common/network/PlaintextTransportLayer.java index 3db4345c14f0..1135359f8d19 100644 --- a/clients/src/main/java/org/apache/kafka/common/network/PlaintextTransportLayer.java +++ b/clients/src/main/java/org/apache/kafka/common/network/PlaintextTransportLayer.java @@ -30,11 +30,8 @@ import java.security.Principal; import org.apache.kafka.common.security.auth.KafkaPrincipal; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class PlaintextTransportLayer implements TransportLayer { - private static final Logger log = LoggerFactory.getLogger(PlaintextTransportLayer.class); private final SelectionKey key; private final SocketChannel socketChannel; private final Principal principal = KafkaPrincipal.ANONYMOUS; @@ -84,10 +81,13 @@ public boolean isConnected() { */ @Override public void close() throws IOException { - socketChannel.socket().close(); - socketChannel.close(); - key.attach(null); - key.cancel(); + try { + socketChannel.socket().close(); + socketChannel.close(); + } finally { + key.attach(null); + key.cancel(); + } } /** From 3178ecf11331e9e3e643c7ccdbd28afaf4b0e9b9 Mon Sep 17 00:00:00 2001 From: Ben Stopford Date: Wed, 11 May 2016 15:28:18 +0100 Subject: [PATCH 123/267] MINOR: Documentation for Rack Awareness Author: Ben Stopford Reviewers: Ismael Juma Closes #1369 from benstopford/rack-awareness-docs (cherry picked from commit 6978115514fc74021e4f2c761402d6b0c954d50c) Signed-off-by: Ismael Juma --- docs/ops.html | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/ops.html b/docs/ops.html index f64a701717d2..faf545358602 100644 --- a/docs/ops.html +++ b/docs/ops.html @@ -98,6 +98,17 @@

      Balanc auto.leader.rebalance.enable=true +

      Balancing Replicas Across Racks

      +The rack awareness feature spreads replicas of the same partition across different racks. This extends the guarantees Kafka provides for broker-failure to cover rack-failure, limiting the risk of data loss should all the brokers on a rack fail at once. The feature can also be applied to other broker groupings such as availability zones in EC2. +

      +You can specify that a broker belongs to a particular rack by adding a property to the broker config: +
         broker.rack=my-rack-id
      +When a topic is created, modified or replicas are redistributed, the rack constraint will be honoured, ensuring replicas span as many racks as they can (a partition will span min(#racks, replication-factor) different racks). +

      +The algorithm used to assign replicas to brokers ensures that the number of leaders per broker will be constant, regardless of how brokers are distributed across racks. This ensures balanced throughput. +

      +However if racks are assigned different numbers of brokers, the assignment of replicas will not be even. Racks with fewer brokers will get more replicas, meaning they will use more storage and put more resources into replication. Hence it is sensible to configure an equal number of brokers per rack. +

      Mirroring data between clusters

      We refer to the process of replicating data between Kafka clusters "mirroring" to avoid confusion with the replication that happens amongst the nodes in a single cluster. Kafka comes with a tool for mirroring data between Kafka clusters. The tool reads from a source cluster and writes to a destination cluster, like this: From 8ec8267c2666e423414305e5daf89c1ce5fd96bd Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Wed, 11 May 2016 13:06:20 -0700 Subject: [PATCH 124/267] KAFKA-3690: Avoid to pass null to UnmodifiableMap Author: Liquan Pei Reviewers: Jason Gustafson , Ismael Juma , Ewen Cheslack-Postava Closes #1360 from Ishiihara/avoid-to-pass-null (cherry picked from commit bd8681cdd51d2878ea917941abe335f68a373716) Signed-off-by: Ewen Cheslack-Postava --- .../org/apache/kafka/connect/data/SchemaBuilder.java | 2 +- .../org/apache/kafka/connect/data/SchemaBuilderTest.java | 9 ++++++++- .../kafka/connect/storage/KafkaConfigBackingStore.java | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaBuilder.java b/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaBuilder.java index 3d2f5cca8427..32045f965122 100644 --- a/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaBuilder.java +++ b/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaBuilder.java @@ -181,7 +181,7 @@ public SchemaBuilder doc(String doc) { @Override public Map parameters() { - return Collections.unmodifiableMap(parameters); + return parameters == null ? null : Collections.unmodifiableMap(parameters); } /** diff --git a/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaBuilderTest.java b/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaBuilderTest.java index 62020f372f5c..fdbaa0ae15e2 100644 --- a/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaBuilderTest.java +++ b/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaBuilderTest.java @@ -252,7 +252,14 @@ public void testArrayBuilderInvalidDefault() { @Test public void testMapBuilder() { - Schema schema = SchemaBuilder.map(Schema.INT8_SCHEMA, Schema.INT8_SCHEMA).build(); + // SchemaBuilder should also pass the check + Schema schema = SchemaBuilder.map(Schema.INT8_SCHEMA, Schema.INT8_SCHEMA); + assertTypeAndDefault(schema, Schema.Type.MAP, false, null); + assertEquals(schema.keySchema(), Schema.INT8_SCHEMA); + assertEquals(schema.valueSchema(), Schema.INT8_SCHEMA); + assertNoMetadata(schema); + + schema = SchemaBuilder.map(Schema.INT8_SCHEMA, Schema.INT8_SCHEMA).build(); assertTypeAndDefault(schema, Schema.Type.MAP, false, null); assertEquals(schema.keySchema(), Schema.INT8_SCHEMA); assertEquals(schema.valueSchema(), Schema.INT8_SCHEMA); diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java index 9a93a4e48a22..af8efee8e9d8 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java @@ -179,7 +179,7 @@ public static String COMMIT_TASKS_KEY(String connectorName) { // converter/serializer changes causing keys to change. We need to absolutely ensure that the keys remain precisely // the same. public static final Schema CONNECTOR_CONFIGURATION_V0 = SchemaBuilder.struct() - .field("properties", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.OPTIONAL_STRING_SCHEMA)) + .field("properties", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.OPTIONAL_STRING_SCHEMA).build()) .build(); public static final Schema TASK_CONFIGURATION_V0 = CONNECTOR_CONFIGURATION_V0; public static final Schema CONNECTOR_TASKS_COMMIT_V0 = SchemaBuilder.struct() From 2e078d9d8f8b5788ec7bf897cbfe65dce83c92b1 Mon Sep 17 00:00:00 2001 From: Rajini Sivaram Date: Wed, 11 May 2016 21:11:17 +0100 Subject: [PATCH 125/267] MINOR: Improve handling of channel close exception Propagate IOException in SslTransportLayer channel.close to be consistent with PlaintextTransportLayer, close authenticator on channel close even if transport layer close fails. Author: Rajini Sivaram Reviewers: Ismael Juma Closes #1370 from rajinisivaram/minor-channelclose2 (cherry picked from commit b28bc57a1fdb9b56c89c3cb9c3df60afbeda521c) Signed-off-by: Ismael Juma --- .../kafka/common/network/Authenticator.java | 10 +-- .../kafka/common/network/KafkaChannel.java | 5 +- .../common/network/SslTransportLayer.java | 9 +-- .../org/apache/kafka/common/utils/Utils.java | 23 ++++++ .../apache/kafka/common/utils/UtilsTest.java | 80 +++++++++++++++++++ 5 files changed, 112 insertions(+), 15 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/network/Authenticator.java b/clients/src/main/java/org/apache/kafka/common/network/Authenticator.java index 6f01fe5c0999..0012f158f87f 100644 --- a/clients/src/main/java/org/apache/kafka/common/network/Authenticator.java +++ b/clients/src/main/java/org/apache/kafka/common/network/Authenticator.java @@ -17,6 +17,7 @@ package org.apache.kafka.common.network; +import java.io.Closeable; import java.io.IOException; import java.util.Map; import java.security.Principal; @@ -27,7 +28,7 @@ /** * Authentication for Channel */ -public interface Authenticator { +public interface Authenticator extends Closeable { /** * Configures Authenticator using the provided parameters. @@ -54,11 +55,4 @@ public interface Authenticator { */ boolean complete(); - /** - * Closes this Authenticator - * - * @throws IOException if any I/O error occurs - */ - void close() throws IOException; - } diff --git a/clients/src/main/java/org/apache/kafka/common/network/KafkaChannel.java b/clients/src/main/java/org/apache/kafka/common/network/KafkaChannel.java index f72f91b8f005..16002eb6686b 100644 --- a/clients/src/main/java/org/apache/kafka/common/network/KafkaChannel.java +++ b/clients/src/main/java/org/apache/kafka/common/network/KafkaChannel.java @@ -26,6 +26,8 @@ import java.security.Principal; +import org.apache.kafka.common.utils.Utils; + public class KafkaChannel { private final String id; private final TransportLayer transportLayer; @@ -42,8 +44,7 @@ public KafkaChannel(String id, TransportLayer transportLayer, Authenticator auth } public void close() throws IOException { - transportLayer.close(); - authenticator.close(); + Utils.closeAll(transportLayer, authenticator); } /** diff --git a/clients/src/main/java/org/apache/kafka/common/network/SslTransportLayer.java b/clients/src/main/java/org/apache/kafka/common/network/SslTransportLayer.java index d18d6b77a109..cfd618dfaa9d 100644 --- a/clients/src/main/java/org/apache/kafka/common/network/SslTransportLayer.java +++ b/clients/src/main/java/org/apache/kafka/common/network/SslTransportLayer.java @@ -141,7 +141,7 @@ public boolean isConnected() { * Sends a SSL close message and closes socketChannel. */ @Override - public void close() { + public void close() throws IOException { if (closing) return; closing = true; sslEngine.closeOutbound(); @@ -168,12 +168,11 @@ public void close() { try { socketChannel.socket().close(); socketChannel.close(); - } catch (IOException e) { - log.warn("Failed to close SSL socket channel: " + e); + } finally { + key.attach(null); + key.cancel(); } } - key.attach(null); - key.cancel(); } /** diff --git a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java index bd173ed63fd3..e74061870aa7 100755 --- a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java +++ b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java @@ -14,6 +14,7 @@ import java.io.IOException; import java.io.InputStream; +import java.io.Closeable; import java.io.File; import java.io.FileInputStream; import java.io.OutputStream; @@ -676,4 +677,26 @@ public static void atomicMoveWithFallback(Path source, Path target) throws IOExc } } + /** + * Closes all the provided closeables. + * @throws IOException if any of the close methods throws an IOException. + * The first IOException is thrown with subsequent exceptions + * added as suppressed exceptions. + */ + public static void closeAll(Closeable... closeables) throws IOException { + IOException exception = null; + for (Closeable closeable : closeables) { + try { + closeable.close(); + } catch (IOException e) { + if (exception != null) + exception.addSuppressed(e); + else + exception = e; + } + } + if (exception != null) + throw exception; + } + } diff --git a/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java index 1078578aa9ba..1af7e43f8e98 100755 --- a/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java +++ b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java @@ -18,6 +18,8 @@ import java.util.Arrays; import java.util.Collections; +import java.io.Closeable; +import java.io.IOException; import java.nio.ByteBuffer; import org.junit.Test; @@ -26,6 +28,8 @@ import static org.apache.kafka.common.utils.Utils.getPort; import static org.apache.kafka.common.utils.Utils.formatAddress; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; public class UtilsTest { @@ -114,4 +118,80 @@ public void testMin() { assertEquals(1, Utils.min(2, 1, 3)); assertEquals(1, Utils.min(2, 3, 1)); } + + @Test + public void testCloseAll() { + TestCloseable[] closeablesWithoutException = TestCloseable.createCloseables(false, false, false); + try { + Utils.closeAll(closeablesWithoutException); + TestCloseable.checkClosed(closeablesWithoutException); + } catch (IOException e) { + fail("Unexpected exception: " + e); + } + + TestCloseable[] closeablesWithException = TestCloseable.createCloseables(true, true, true); + try { + Utils.closeAll(closeablesWithException); + fail("Expected exception not thrown"); + } catch (IOException e) { + TestCloseable.checkClosed(closeablesWithException); + TestCloseable.checkException(e, closeablesWithException); + } + + TestCloseable[] singleExceptionCloseables = TestCloseable.createCloseables(false, true, false); + try { + Utils.closeAll(singleExceptionCloseables); + fail("Expected exception not thrown"); + } catch (IOException e) { + TestCloseable.checkClosed(singleExceptionCloseables); + TestCloseable.checkException(e, singleExceptionCloseables[1]); + } + + TestCloseable[] mixedCloseables = TestCloseable.createCloseables(false, true, false, true, true); + try { + Utils.closeAll(mixedCloseables); + fail("Expected exception not thrown"); + } catch (IOException e) { + TestCloseable.checkClosed(mixedCloseables); + TestCloseable.checkException(e, mixedCloseables[1], mixedCloseables[3], mixedCloseables[4]); + } + } + + private static class TestCloseable implements Closeable { + private final int id; + private final IOException closeException; + private boolean closed; + + TestCloseable(int id, boolean exceptionOnClose) { + this.id = id; + this.closeException = exceptionOnClose ? new IOException("Test close exception " + id) : null; + } + + @Override + public void close() throws IOException { + closed = true; + if (closeException != null) + throw closeException; + } + + static TestCloseable[] createCloseables(boolean... exceptionOnClose) { + TestCloseable[] closeables = new TestCloseable[exceptionOnClose.length]; + for (int i = 0; i < closeables.length; i++) + closeables[i] = new TestCloseable(i, exceptionOnClose[i]); + return closeables; + } + + static void checkClosed(TestCloseable... closeables) { + for (TestCloseable closeable : closeables) + assertTrue("Close not invoked for " + closeable.id, closeable.closed); + } + + static void checkException(IOException e, TestCloseable... closeablesWithException) { + assertEquals(closeablesWithException[0].closeException, e); + Throwable[] suppressed = e.getSuppressed(); + assertEquals(closeablesWithException.length - 1, suppressed.length); + for (int i = 1; i < closeablesWithException.length; i++) + assertEquals(closeablesWithException[i].closeException, suppressed[i - 1]); + } + } } From dca78b586fd2561709793f163c7c3f05e194d768 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Wed, 11 May 2016 23:48:46 +0100 Subject: [PATCH 126/267] KAFKA-3694; Ensure broker Zk deregistration prior to restart in ReplicationTest Author: Jason Gustafson Reviewers: Geoff Anderson , Ismael Juma Closes #1365 from hachikuji/KAFKA-3694 (cherry picked from commit f892f0ca6d38cb21a93c2c05dd8b9a23c4165181) Signed-off-by: Ismael Juma --- tests/kafkatest/services/kafka/kafka.py | 16 +++++++++++++++- .../services/kafka/templates/kafka.properties | 1 + tests/kafkatest/tests/core/replication_test.py | 11 ++++------- .../kafkatest/tests/produce_consume_validate.py | 2 +- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/tests/kafkatest/services/kafka/kafka.py b/tests/kafkatest/services/kafka/kafka.py index 334069d99544..a843a127b74d 100644 --- a/tests/kafkatest/services/kafka/kafka.py +++ b/tests/kafkatest/services/kafka/kafka.py @@ -66,7 +66,7 @@ class KafkaService(KafkaPathResolverMixin, JmxMixin, Service): def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT, client_sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, interbroker_sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, authorizer_class_name=None, topics=None, version=TRUNK, quota_config=None, jmx_object_names=None, - jmx_attributes=[], zk_connect_timeout=5000): + jmx_attributes=[], zk_connect_timeout=5000, zk_session_timeout=6000): """ :type context :type zk: ZookeeperService @@ -99,6 +99,11 @@ def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAI # for this constructor. self.zk_connect_timeout = zk_connect_timeout + # Also allow the session timeout to be provided explicitly, + # primarily so that test cases can depend on it when waiting + # e.g. brokers to deregister after a hard kill. + self.zk_session_timeout = zk_session_timeout + self.port_mappings = { 'PLAINTEXT': Port('PLAINTEXT', 9092, False), 'SSL': Port('SSL', 9093, False), @@ -513,6 +518,15 @@ def controller(self): self.logger.info("Controller's ID: %d" % (controller_idx)) return self.get_node(controller_idx) + def is_registered(self, node): + """ + Check whether a broker is registered in Zookeeper + """ + self.logger.debug("Querying zookeeper to see if broker %s is registered", node) + broker_info = self.zk.query("/brokers/ids/%s" % self.idx(node)) + self.logger.debug("Broker info: %s", broker_info) + return broker_info is not None + def get_offset_shell(self, topic, partitions, max_wait_ms, offsets, time): node = self.nodes[0] diff --git a/tests/kafkatest/services/kafka/templates/kafka.properties b/tests/kafkatest/services/kafka/templates/kafka.properties index 1e4f17c0d3ca..1f2371302c8a 100644 --- a/tests/kafkatest/services/kafka/templates/kafka.properties +++ b/tests/kafkatest/services/kafka/templates/kafka.properties @@ -72,6 +72,7 @@ zookeeper.set.acl={{zk_set_acl}} {% endif %} zookeeper.connection.timeout.ms={{ zk_connect_timeout }} +zookeeper.session.timeout.ms={{ zk_session_timeout }} {% if replica_lag is defined %} replica.lag.time.max.ms={{replica_lag}} diff --git a/tests/kafkatest/tests/core/replication_test.py b/tests/kafkatest/tests/core/replication_test.py index 8e9474aec270..f8150341c499 100644 --- a/tests/kafkatest/tests/core/replication_test.py +++ b/tests/kafkatest/tests/core/replication_test.py @@ -65,15 +65,12 @@ def hard_bounce(test, broker_type): test.kafka.signal_node(prev_broker_node, sig=signal.SIGKILL) # Since this is a hard kill, we need to make sure the process is down and that - # zookeeper and the broker cluster have registered the loss of the leader/controller. - # Waiting for a new leader for the topic-partition/controller to be elected is a reasonable heuristic for this. + # zookeeper has registered the loss by expiring the broker's session timeout. - def role_reassigned(): - current_elected_broker = broker_node(test, broker_type) - return current_elected_broker is not None and current_elected_broker != prev_broker_node + wait_until(lambda: len(test.kafka.pids(prev_broker_node)) == 0 and not test.kafka.is_registered(prev_broker_node), + timeout_sec=test.kafka.zk_session_timeout + 5, + err_msg="Failed to see timely deregistration of hard-killed broker %s" % str(prev_broker_node.account)) - wait_until(lambda: len(test.kafka.pids(prev_broker_node)) == 0, timeout_sec=5) - wait_until(role_reassigned, timeout_sec=10, backoff_sec=.5) test.kafka.start_node(prev_broker_node) failures = { diff --git a/tests/kafkatest/tests/produce_consume_validate.py b/tests/kafkatest/tests/produce_consume_validate.py index 425b81659576..a5da7be1a885 100644 --- a/tests/kafkatest/tests/produce_consume_validate.py +++ b/tests/kafkatest/tests/produce_consume_validate.py @@ -35,7 +35,7 @@ def setup_producer_and_consumer(self): def start_producer_and_consumer(self): # Start background producer and consumer self.producer.start() - wait_until(lambda: self.producer.num_acked > 5, timeout_sec=10, + wait_until(lambda: self.producer.num_acked > 5, timeout_sec=20, err_msg="Producer failed to start in a reasonable amount of time.") self.consumer.start() wait_until(lambda: len(self.consumer.messages_consumed[1]) > 0, timeout_sec=60, From 05640c86eebc673f110604e0c7b3fa3315dfcd7b Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Wed, 11 May 2016 17:01:14 -0700 Subject: [PATCH 127/267] KAFKA-3704: Remove hard-coded block size in KafkaProducer Author: Guozhang Wang Reviewers: Ismael Juma Closes #1371 from guozhangwang/K3565-remove-compression-blocksize (cherry picked from commit 1182d61deb23b5cd86cbe462471f7df583a796e1) Signed-off-by: Guozhang Wang --- .../apache/kafka/common/record/Compressor.java | 17 ++++++----------- .../org/apache/kafka/common/record/Record.java | 2 +- docs/upgrade.html | 1 + 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/record/Compressor.java b/clients/src/main/java/org/apache/kafka/common/record/Compressor.java index 37d53b81dcba..60c15e60bff3 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/Compressor.java +++ b/clients/src/main/java/org/apache/kafka/common/record/Compressor.java @@ -33,7 +33,6 @@ public class Compressor { static private final float COMPRESSION_RATE_DAMPING_FACTOR = 0.9f; static private final float COMPRESSION_RATE_ESTIMATION_FACTOR = 1.05f; - static private final int COMPRESSION_DEFAULT_BUFFER_SIZE = 1024; private static final float[] TYPE_TO_RATE; @@ -53,7 +52,7 @@ public class Compressor { @Override public Constructor get() throws ClassNotFoundException, NoSuchMethodException { return Class.forName("org.xerial.snappy.SnappyOutputStream") - .getConstructor(OutputStream.class, Integer.TYPE); + .getConstructor(OutputStream.class); } }); @@ -91,7 +90,7 @@ public Constructor get() throws ClassNotFoundException, NoSuchMethodException { public float compressionRate; public long maxTimestamp; - public Compressor(ByteBuffer buffer, CompressionType type, int blockSize) { + public Compressor(ByteBuffer buffer, CompressionType type) { this.type = type; this.initPos = buffer.position(); @@ -108,11 +107,7 @@ public Compressor(ByteBuffer buffer, CompressionType type, int blockSize) { // create the stream bufferStream = new ByteBufferOutputStream(buffer); - appendStream = wrapForOutput(bufferStream, type, blockSize); - } - - public Compressor(ByteBuffer buffer, CompressionType type) { - this(buffer, type, COMPRESSION_DEFAULT_BUFFER_SIZE); + appendStream = wrapForOutput(bufferStream, type); } public ByteBuffer buffer() { @@ -246,16 +241,16 @@ public long estimatedBytesWritten() { // the following two functions also need to be public since they are used in MemoryRecords.iteration - static public DataOutputStream wrapForOutput(ByteBufferOutputStream buffer, CompressionType type, int bufferSize) { + static public DataOutputStream wrapForOutput(ByteBufferOutputStream buffer, CompressionType type) { try { switch (type) { case NONE: return new DataOutputStream(buffer); case GZIP: - return new DataOutputStream(new GZIPOutputStream(buffer, bufferSize)); + return new DataOutputStream(new GZIPOutputStream(buffer)); case SNAPPY: try { - OutputStream stream = (OutputStream) snappyOutputStreamSupplier.get().newInstance(buffer, bufferSize); + OutputStream stream = (OutputStream) snappyOutputStreamSupplier.get().newInstance(buffer); return new DataOutputStream(stream); } catch (Exception e) { throw new KafkaException(e); diff --git a/clients/src/main/java/org/apache/kafka/common/record/Record.java b/clients/src/main/java/org/apache/kafka/common/record/Record.java index 147ad86986b2..baab9ab6f1c6 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/Record.java +++ b/clients/src/main/java/org/apache/kafka/common/record/Record.java @@ -146,7 +146,7 @@ public Record(long timestamp, byte[] value) { public static void write(ByteBuffer buffer, long timestamp, byte[] key, byte[] value, CompressionType type, int valueOffset, int valueSize) { // construct the compressor with compression type none since this function will not do any //compression according to the input type, it will just write the record's payload as is - Compressor compressor = new Compressor(buffer, CompressionType.NONE, buffer.capacity()); + Compressor compressor = new Compressor(buffer, CompressionType.NONE); compressor.putRecord(timestamp, key, value, type, valueOffset, valueSize); } diff --git a/docs/upgrade.html b/docs/upgrade.html index 4b8ec7eb9f08..3c98540ae07c 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -91,6 +91,7 @@
      Notable changes in 0.1
      • Starting from Kafka 0.10.0.0, a new client library named Kafka Streams is available for stream processing on data stored in Kafka topics. This new client library only works with 0.10.x and upward versioned brokers due to message format changes mentioned above. For more information please read this section.
      • +
      • If compression with snappy or gzip is enabled, the new producer will use the compression scheme's default buffer size (this is already the case for LZ4) instead of 1 KB in order to improve the compression ratio. Note that the default buffer sizes for snappy, gzip and LZ4 are 0.5 KB, 32 KB and 64KB respectively. For the snappy case, a producer with 5000 partitions will require an additional 155 MB of JVM heap.
      • The default value of the configuration parameter receive.buffer.bytes is now 64K for the new consumer.
      • The new consumer now exposes the configuration parameter exclude.internal.topics to restrict internal topics (such as the consumer offsets topic) from accidentally being included in regular expression subscriptions. By default, it is enabled.
      • The old Scala producer has been deprecated. Users should migrate their code to the Java producer included in the kafka-clients JAR as soon as possible.
      • From f4dc90e9e277abd70e6156e4b7e51194154be92c Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Wed, 11 May 2016 17:19:11 -0700 Subject: [PATCH 128/267] KAFKA-3583: Add documentation for Connect status control APIs Author: Jason Gustafson Reviewers: Roger Hoover , Ismael Juma , Ewen Cheslack-Postava Closes #1358 from hachikuji/KAFKA-3583 (cherry picked from commit 4807dd1df945f6e66c6276f0a85e8abeb24b568a) Signed-off-by: Ewen Cheslack-Postava --- docs/connect.html | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/docs/connect.html b/docs/connect.html index c3cf58303f3a..a362ddeb5945 100644 --- a/docs/connect.html +++ b/docs/connect.html @@ -98,6 +98,10 @@

        REST API

      • GET /connectors/{name}/status - get current status of the connector, including if it is running, failed, paused, etc., which worker it is assigned to, error information if it has failed, and the state of all its tasks
      • GET /connectors/{name}/tasks - get a list of tasks currently running for a connector
      • GET /connectors/{name}/tasks/{taskid}/status - get current status of the task, including if it is running, failed, paused, etc., which worker it is assigned to, and error information if it has failed
      • +
      • PUT /connectors/{name}/pause - pause the connector and its tasks, which stops message processing until the connector is resumed
      • +
      • PUT /connectors/{name}/resume - resume a paused connector (or do nothing if the connector is not paused)
      • +
      • POST /connectors/{name}/restart - restart a connector (typically because it has failed)
      • +
      • POST /connectors/{name}/tasks/{taskId}/restart - restart an individual task (typically because it has failed)
      • DELETE /connectors/{name} - delete a connector, halting all tasks and deleting its configuration
      @@ -338,3 +342,48 @@

      Working with Schemas

      Kafka Connect Administration
    + +

    +Kafka Connect's REST layer provides a set of APIs to enable administration of the cluster. This includes APIs to view the configuration of connectors and the status of their tasks, as well as to alter their current behavior (e.g. changing configuration and restarting tasks). +

    + +

    +When a connector is first submitted to the cluster, the workers rebalance the full set of connectors in the cluster and their tasks so that each worker has approximately the same amount of work. This same rebalancing procedure is also used when connectors increase or decrease the number of tasks they require, or when a connector's configuration is changed. You can use the REST API to view the current status of a connector and its tasks, including the id of the worker to which each was assigned. For example, querying the status of a file source (using GET /connectors/file-source/status) might produce output like the following: +

    + +
    +{
    +  "name": "file-source",
    +  "connector": {
    +    "state": "RUNNING",
    +    "worker_id": "192.168.1.208:8083"
    +  },
    +  "tasks": [
    +    {
    +      "id": 0,
    +      "state": "RUNNING",
    +      "worker_id": "192.168.1.209:8083"
    +    }
    +  ]
    +}
    +
    + +

    +Connectors and their tasks publish status updates to a shared topic (configured with status.storage.topic) which all workers in the cluster monitor. Because the workers consume this topic asynchronously, there is typically a (short) delay before a state change is visible through the status API. The following states are possible for a connector or one of its tasks: +

    + +
      +
    • UNASSIGNED: The connector/task has not yet been assigned to a worker.
    • +
    • RUNNING: The connector/task is running.
    • +
    • PAUSED: The connector/task has been administratively paused.
    • +
    • FAILED: The connector/task has failed (usually by raising an exception, which is reported in the status output).
    • +
    + +

    +In most cases, connector and task states will match, though they may be different for short periods of time when changes are occurring or if tasks have failed. For example, when a connector is first started, there may be a noticeable delay before the connector and its tasks have all transitioned to the RUNNING state. States will also diverge when tasks fail since Connect does not automatically restart failed tasks. To restart a connector/task manually, you can use the restart APIs listed above. Note that if you try to restart a task while a rebalance is taking place, Connect will return a 409 (Conflict) status code. You can retry after the rebalance completes, but it might not be necessary since rebalances effectively restart all the connectors and tasks in the cluster. +

    + +

    +It's sometimes useful to temporarily stop the message processing of a connector. For example, if the remote system is undergoing maintenance, it would be preferable for source connectors to stop polling it for new data instead of filling logs with exception spam. For this use case, Connect offers a pause/resume API. While a source connector is paused, Connect will stop polling it for additional records. While a sink connector is paused, Connect will stop pushing new messages to it. The pause state is persistent, so even if you restart the cluster, the connector will not begin message processing again until the task has been resumed. Note that there may be a delay before all of a connector's tasks have transitioned to the PAUSED state since it may take time for them to finish whatever processing they were in the middle of when being paused. Additionally, failed tasks will not transition to the PAUSED state until they have been restarted. +

    From addbaefa6a254013853f05c938996919d5097da8 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Thu, 12 May 2016 01:38:50 +0100 Subject: [PATCH 129/267] MINOR: Fix order of compression algorithms in upgrade note Author: Ismael Juma Reviewers: Guozhang Wang , Jun Rao Closes #1373 from ijuma/fix-producer-buffer-size-upgrade-note (cherry picked from commit 84d17bdf220292dc9950566afe1de34b64be4746) Signed-off-by: Ismael Juma --- docs/upgrade.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/upgrade.html b/docs/upgrade.html index 3c98540ae07c..3e07ef89628b 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -91,7 +91,7 @@
    Notable changes in 0.1
    • Starting from Kafka 0.10.0.0, a new client library named Kafka Streams is available for stream processing on data stored in Kafka topics. This new client library only works with 0.10.x and upward versioned brokers due to message format changes mentioned above. For more information please read this section.
    • -
    • If compression with snappy or gzip is enabled, the new producer will use the compression scheme's default buffer size (this is already the case for LZ4) instead of 1 KB in order to improve the compression ratio. Note that the default buffer sizes for snappy, gzip and LZ4 are 0.5 KB, 32 KB and 64KB respectively. For the snappy case, a producer with 5000 partitions will require an additional 155 MB of JVM heap.
    • +
    • If compression with snappy or gzip is enabled, the new producer will use the compression scheme's default buffer size (this is already the case for LZ4) instead of 1 KB in order to improve the compression ratio. Note that the default buffer sizes for gzip, snappy and LZ4 are 0.5 KB, 32 KB and 64KB respectively. For the snappy case, a producer with 5000 partitions will require an additional 155 MB of JVM heap.
    • The default value of the configuration parameter receive.buffer.bytes is now 64K for the new consumer.
    • The new consumer now exposes the configuration parameter exclude.internal.topics to restrict internal topics (such as the consumer offsets topic) from accidentally being included in regular expression subscriptions. By default, it is enabled.
    • The old Scala producer has been deprecated. Users should migrate their code to the Java producer included in the kafka-clients JAR as soon as possible.
    • From 553cc6487e7bb032351aade0232fa8537be76a64 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Thu, 12 May 2016 11:12:17 +0100 Subject: [PATCH 130/267] MINOR: Change type of StreamsConfig.BOOTSTRAP_SERVERS_CONFIG to List This is an improved version of https://github.com/apache/kafka/pull/1374, where we include a unit test. /cc ijuma and guozhangwang Author: Guozhang Wang Author: Michael G. Noll Reviewers: Michael G. Noll , Ismael Juma Closes #1377 from miguno/streamsconfig-multiple-bootstrap-servers (cherry picked from commit 5aedde5b29cc5a1adea4c65d5c44fb657f162eab) Signed-off-by: Ismael Juma --- .../apache/kafka/streams/StreamsConfig.java | 2 +- .../kafka/streams/StreamsConfigTest.java | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java index fac29141ffd9..efccd7aa842a 100644 --- a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java +++ b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java @@ -120,7 +120,7 @@ public class StreamsConfig extends AbstractConfig { Importance.HIGH, StreamsConfig.APPLICATION_ID_DOC) .define(BOOTSTRAP_SERVERS_CONFIG, // required with no default value - Type.STRING, + Type.LIST, Importance.HIGH, CommonClientConfigs.BOOSTRAP_SERVERS_DOC) .define(CLIENT_ID_CONFIG, diff --git a/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java b/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java index 81b406f60931..d7def7067e8f 100644 --- a/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java @@ -20,14 +20,20 @@ import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.common.serialization.Serdes; import org.apache.kafka.common.serialization.Serializer; +import org.apache.kafka.common.utils.Utils; import org.junit.Before; import org.junit.Test; +import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Properties; + +import static org.hamcrest.CoreMatchers.equalTo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThat; public class StreamsConfigTest { @@ -84,4 +90,18 @@ public void defaultSerdeShouldBeConfigured() { assertEquals("Should get the original string after serialization and deserialization with the configured encoding", str, streamsConfig.valueSerde().deserializer().deserialize(topic, serializer.serialize(topic, str))); } + + @Test + public void shouldSupportMultipleBootstrapServers() { + List expectedBootstrapServers = Arrays.asList("broker1:9092", "broker2:9092"); + String bootstrapServersString = Utils.mkString(expectedBootstrapServers, ",").toString(); + Properties props = new Properties(); + props.put(StreamsConfig.APPLICATION_ID_CONFIG, "irrelevant"); + props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServersString); + StreamsConfig config = new StreamsConfig(props); + + List actualBootstrapServers = config.getList(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG); + assertThat(actualBootstrapServers, equalTo(expectedBootstrapServers)); + } + } From 056a78dff96ee96e8c9481d16081645f511960ab Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Thu, 12 May 2016 20:36:47 +0100 Subject: [PATCH 131/267] MINOR: Fix checkstyle failure in `StreamsConfigTest` I removed the hamcrest matcher to unbreak the build, but we probably want to tweak the `import-control.xml` as it currently only allows it for ``, which is weird. Author: Ismael Juma Reviewers: Guozhang Wang Closes #1380 from ijuma/fix-streams-config-test-checkstyle (cherry picked from commit ac434a4ebdc36338ff707c38e189f9d1668ff423) Signed-off-by: Ismael Juma --- .../test/java/org/apache/kafka/streams/StreamsConfigTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java b/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java index d7def7067e8f..17d6b4bd4c1c 100644 --- a/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java @@ -30,10 +30,8 @@ import java.util.Map; import java.util.Properties; -import static org.hamcrest.CoreMatchers.equalTo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertThat; public class StreamsConfigTest { @@ -101,7 +99,7 @@ public void shouldSupportMultipleBootstrapServers() { StreamsConfig config = new StreamsConfig(props); List actualBootstrapServers = config.getList(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG); - assertThat(actualBootstrapServers, equalTo(expectedBootstrapServers)); + assertEquals(expectedBootstrapServers, actualBootstrapServers); } } From c6e9717dfd3b9a9c16c32cb7ac64abc7ce3ebe6c Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Thu, 12 May 2016 18:14:37 -0700 Subject: [PATCH 132/267] KAFKA-3421: Connect developer guide update and several fixes This is a follow up of KAKFA-3421 to update the connect developer guide to include the configuration validation. Also includes a couple of minor fixes. Author: Liquan Pei Reviewers: Jason Gustafson , Ewen Cheslack-Postava Closes #1366 from Ishiihara/connect-dev-doc (cherry picked from commit 527b98d82f5142ab6a5efc26e84f6b0a21aec062) Signed-off-by: Ewen Cheslack-Postava --- config/connect-distributed.properties | 14 +++++-- docs/connect.html | 54 +++++++++++++++++++-------- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/config/connect-distributed.properties b/config/connect-distributed.properties index b25339f83e91..931b85352889 100644 --- a/config/connect-distributed.properties +++ b/config/connect-distributed.properties @@ -18,6 +18,7 @@ # These are defaults. This file just demonstrates how to override some settings. bootstrap.servers=localhost:9092 +# unique name for the cluster, used in forming the Connect cluster group. Note that this must not conflict with consumer group IDs group.id=connect-cluster # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will @@ -36,8 +37,15 @@ internal.value.converter=org.apache.kafka.connect.json.JsonConverter internal.key.converter.schemas.enable=false internal.value.converter.schemas.enable=false +# Topic to use for storing offsets. This topic should have many partitions and be replicated. offset.storage.topic=connect-offsets -# Flush much faster than normal, which is useful for testing/debugging -offset.flush.interval.ms=10000 + +# Topic to use for storing connector and task configurations; note that this should be a single partition, highly replicated topic. +# You may need to manually create the topic to ensure single partition for the config topic as auto created topics may have multiple partitions. config.storage.topic=connect-configs -status.storage.topic=connect-status \ No newline at end of file + +# Topic to use for storing statuses. This topic can have multiple partitions and should be replicated. +status.storage.topic=connect-status + +# Flush much faster than normal, which is useful for testing/debugging +offset.flush.interval.ms=10000 \ No newline at end of file diff --git a/docs/connect.html b/docs/connect.html index a362ddeb5945..4ba406e25f53 100644 --- a/docs/connect.html +++ b/docs/connect.html @@ -53,15 +53,17 @@

      Running Kafka Connect -The difference is in the class which is started and the configuration parameters which change how the Kafka Connect process decides where to store configurations, how to assign work, and where to store offsets. In particular, the following configuration parameters are critical to set before starting your cluster: +The difference is in the class which is started and the configuration parameters which change how the Kafka Connect process decides where to store configurations, how to assign work, and where to store offsets and task statues. In the distributed mode, Kafka Connect stores the offsets, configs and task statuses in Kafka topics. It is recommended to manually create the topics for offset, configs and statuses in order to achieve the desired the number of partitions and replication factors. If the topics are not yet created when starting Kafka Connect, the topics will be auto created with default number of partitions and replication factor, which may not be best suited for its usage. +In particular, the following configuration parameters are critical to set before starting your cluster:
      • group.id (default connect-cluster) - unique name for the cluster, used in forming the Connect cluster group; note that this must not conflict with consumer group IDs
      • -
      • config.storage.topic (default connect-configs) - topic to use for storing connector and task configurations; note that this should be a single partition, highly replicated topic
      • -
      • offset.storage.topic (default connect-offsets) - topic to use for ; this topic should have many partitions and be replicated
      • +
      • config.storage.topic (default connect-configs) - topic to use for storing connector and task configurations; note that this should be a single partition, highly replicated topic. You may need to manually create the topic to ensure single partition for the config topic as auto created topics may have multiple partitions.
      • +
      • offset.storage.topic (default connect-offsets) - topic to use for storing offsets; this topic should have many partitions and be replicated
      • +
      • status.storage.topic (default connect-status) - topic to use for storing statuses; this topic can have multiple partitions and should be replicated
      -Note that in distributed mode the connector configurations are not passed on the command line. Instead, use the REST API described below to create, modify, and destroy connectors. +Note that in distributed mode the connector configurations are not passed on the command line. Instead, use the REST API described below to create, modify, and destroy connectors.

      Configuring Connectors

      @@ -158,7 +160,7 @@
      Connector
       @Override
      -public Class getTaskClass() {
      +public Class<? extends Task> getTaskClass() {
           return FileStreamSourceTask.class;
       }
       
      @@ -179,7 +181,7 @@
      Connector } -Finally, the real core of the implementation is in getTaskConfigs(). In this case we're only +Finally, the real core of the implementation is in getTaskConfigs(). In this case we are only handling a single file, so even though we may be permitted to generate more tasks as per the maxTasks argument, we return a list with only one entry: @@ -225,7 +227,7 @@
      Task Example - Sourc @Override public synchronized void stop() { - stream.close() + stream.close(); } @@ -241,8 +243,8 @@
      Task Example - Sourc while (streamValid(stream) && records.isEmpty()) { LineAndOffset line = readToNextLine(stream); if (line != null) { - Map sourcePartition = Collections.singletonMap("filename", filename); - Map sourceOffset = Collections.singletonMap("position", streamOffset); + Map<String, Object> sourcePartition = Collections.singletonMap("filename", filename); + Map<String, Object> sourceOffset = Collections.singletonMap("position", streamOffset); records.add(new SourceRecord(sourcePartition, sourceOffset, topic, Schema.STRING_SCHEMA, line)); } else { Thread.sleep(1); @@ -267,11 +269,13 @@
      Sink Tasks
       public abstract class SinkTask implements Task {
      -public void initialize(SinkTaskContext context) { ... }
      -
      -public abstract void put(Collection<SinkRecord> records);
      +    public void initialize(SinkTaskContext context) {
      +        this.context = context;
      +    }
       
      -public abstract void flush(Map<TopicPartition, Long> offsets);
      +    public abstract void put(Collection<SinkRecord> records);
      +     
      +    public abstract void flush(Map<TopicPartition, Long> offsets);
       
      The SinkTask documentation contains full details, but this interface is nearly as simple as the SourceTask. The put() method should contain most of the implementation, accepting sets of SinkRecords, performing any required translation, and storing them in the destination system. This method does not need to ensure the data has been fully written to the destination system before returning. In fact, in many cases internal buffering will be useful so an entire batch of records can be sent at once, reducing the overhead of inserting events into the downstream data store. The SinkRecords contain essentially the same information as SourceRecords: Kafka topic, partition, offset and the event key and value. @@ -305,8 +309,8 @@

      Dynamic Input/Output Str Source connectors need to monitor the source system for changes, e.g. table additions/deletions in a database. When they pick up changes, they should notify the framework via the ConnectorContext object that reconfiguration is necessary. For example, in a SourceConnector:
      -if (inputsChanged())
      -    this.context.requestTaskReconfiguration();
      +    if (inputsChanged())
      +        this.context.requestTaskReconfiguration();
       
      The framework will promptly request new configuration information and update the tasks, allowing them to gracefully commit their progress before reconfiguring them. Note that in the SourceConnector this monitoring is currently left up to the connector implementation. If an extra thread is required to perform this monitoring, the connector must allocate it itself. @@ -315,6 +319,26 @@

      Dynamic Input/Output Str SinkConnectors usually only have to handle the addition of streams, which may translate to new entries in their outputs (e.g., a new database table). The framework manages any changes to the Kafka input, such as when the set of input topics changes because of a regex subscription. SinkTasks should expect new input streams, which may require creating new resources in the downstream system, such as a new table in a database. The trickiest situation to handle in these cases may be conflicts between multiple SinkTasks seeing a new input stream for the first time and simultaneously trying to create the new resource. SinkConnectors, on the other hand, will generally require no special code for handling a dynamic set of streams. +

      Connect Configuration Validation

      + +Kafka Connect allows you to validate connector configurations before submitting a connector to be executed and can provide feedback about errors and recommended values. To take advantage of this, connector developers need to provide an implementation of config() to expose the configuration definition to the framework. + +The following code in FileStreamSourceConnector defines the configuration and exposes it to the framework. + +
      +    private static final ConfigDef CONFIG_DEF = new ConfigDef()
      +        .define(FILE_CONFIG, Type.STRING, Importance.HIGH, "Source filename.")
      +        .define(TOPIC_CONFIG, Type.STRING, Importance.HIGH, "The topic to publish data to");
      +
      +    public ConfigDef config() {
      +        return CONFIG_DEF;
      +    }
      +
      + +ConfigDef class is used for specifying the set of expected configurations. For each configuration, you can specify the name, the type, the default value, the documentation, the group information, the order in the group, the width of the configuration value and the name suitable for display in the UI. Plus, you can provide special validation logic used for single configuration validation by overriding the Validator class. Moreover, as there may be dependencies between configurations, for example, the valid values and visibility of a configuration may change according to the values of other configurations. To handle this, ConfigDef allows you to specify the dependents of a configuration and to provide an implementation of Recommender to get valid values and set visibility of a configuration given the current configuration values. + +Also, the validate() method in Connector provides a default validation implementation which returns a list of allowed configurations together with configuration errors and recommended values for each configuration. However, it does not use the recommended values for configuration validation. You may provide an override of the default implementation for customized configuration validation, which may use the recommended values. +

      Working with Schemas

      The FileStream connectors are good examples because they are simple, but they also have trivially structured data -- each line is just a string. Almost all practical connectors will need schemas with more complex data formats. From d43ea744f4f2d02e5415b0669b1d7ecb5dcacf61 Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Thu, 12 May 2016 18:19:00 -0700 Subject: [PATCH 133/267] KAFKA-3520: Add system tests for REST APIs of list connector plugins and config validation ewen granders Ready for review. Author: Liquan Pei Reviewers: Ewen Cheslack-Postava Closes #1195 from Ishiihara/system-test (cherry picked from commit 81f76bde8565eaffd67e5adaa69ddfdb4f5cebaa) Signed-off-by: Ewen Cheslack-Postava --- tests/kafkatest/services/connect.py | 9 +++- .../tests/connect/connect_distributed_test.py | 3 ++ .../tests/connect/connect_rest_test.py | 42 +++++++++++++++---- tests/kafkatest/tests/connect/connect_test.py | 3 ++ .../templates/connect-file-sink.properties | 2 +- .../templates/connect-file-source.properties | 2 +- 6 files changed, 49 insertions(+), 12 deletions(-) diff --git a/tests/kafkatest/services/connect.py b/tests/kafkatest/services/connect.py index aad9ff3c009f..5371a72f6cdb 100644 --- a/tests/kafkatest/services/connect.py +++ b/tests/kafkatest/services/connect.py @@ -101,7 +101,8 @@ def restart_node(self, node, clean_shutdown=True): def clean_node(self, node): node.account.kill_process("connect", clean_shutdown=False, allow_fail=True) self.security_config.clean_node(node) - node.account.ssh("rm -rf " + " ".join([self.CONFIG_FILE, self.LOG4J_CONFIG_FILE, self.PID_FILE, self.LOG_FILE, self.STDOUT_FILE, self.STDERR_FILE] + self.config_filenames() + self.files), allow_fail=False) + all_files = " ".join([self.CONFIG_FILE, self.LOG4J_CONFIG_FILE, self.PID_FILE, self.LOG_FILE, self.STDOUT_FILE, self.STDERR_FILE] + self.config_filenames() + self.files) + node.account.ssh("rm -rf " + all_files, allow_fail=False) def config_filenames(self): return [os.path.join(self.PERSISTENT_ROOT, "connect-connector-" + str(idx) + ".properties") for idx, template in enumerate(self.connector_config_templates or [])] @@ -140,6 +141,12 @@ def pause_connector(self, name, node=None): def resume_connector(self, name, node=None): return self._rest('/connectors/' + name + '/resume', method="PUT") + def list_connector_plugins(self, node=None): + return self._rest('/connector-plugins/', node=node) + + def validate_config(self, connector_type, validate_request, node=None): + return self._rest('/connector-plugins/' + connector_type + '/config/validate', validate_request, node=node, method="PUT") + def _rest(self, path, body=None, node=None, method="GET"): if node is None: node = random.choice(self.nodes) diff --git a/tests/kafkatest/tests/connect/connect_distributed_test.py b/tests/kafkatest/tests/connect/connect_distributed_test.py index d3ae2e169083..a4d68f39858c 100644 --- a/tests/kafkatest/tests/connect/connect_distributed_test.py +++ b/tests/kafkatest/tests/connect/connect_distributed_test.py @@ -32,6 +32,9 @@ class ConnectDistributedTest(Test): another, validating the total output is identical to the input. """ + FILE_SOURCE_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSourceConnector' + FILE_SINK_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSinkConnector' + INPUT_FILE = "/mnt/connect.input" OUTPUT_FILE = "/mnt/connect.output" diff --git a/tests/kafkatest/tests/connect/connect_rest_test.py b/tests/kafkatest/tests/connect/connect_rest_test.py index 63b9bb11a58e..c32b8e179c91 100644 --- a/tests/kafkatest/tests/connect/connect_rest_test.py +++ b/tests/kafkatest/tests/connect/connect_rest_test.py @@ -15,7 +15,6 @@ from kafkatest.tests.kafka_test import KafkaTest from kafkatest.services.connect import ConnectDistributedService, ConnectRestError -from kafkatest.utils.util import retry_on_exception from ducktape.utils.util import wait_until import subprocess import json @@ -27,6 +26,12 @@ class ConnectRestApiTest(KafkaTest): Test of Kafka Connect's REST API endpoints. """ + FILE_SOURCE_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSourceConnector' + FILE_SINK_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSinkConnector' + + FILE_SOURCE_CONFIGS = {'name', 'connector.class', 'tasks.max', 'topic', 'file'} + FILE_SINK_CONFIGS = {'name', 'connector.class', 'tasks.max', 'topics', 'file'} + INPUT_FILE = "/mnt/connect.input" INPUT_FILE2 = "/mnt/connect.input2" OUTPUT_FILE = "/mnt/connect.output" @@ -43,11 +48,11 @@ class ConnectRestApiTest(KafkaTest): LONGER_INPUT_LIST = ["foo", "bar", "baz", "razz", "ma", "tazz"] LONER_INPUTS = "\n".join(LONGER_INPUT_LIST) + "\n" - SCHEMA = { "type": "string", "optional": False } + SCHEMA = {"type": "string", "optional": False} def __init__(self, test_context): super(ConnectRestApiTest, self).__init__(test_context, num_zk=1, num_brokers=1, topics={ - 'test' : { 'partitions': 1, 'replication-factor': 1 } + 'test': {'partitions': 1, 'replication-factor': 1} }) self.cc = ConnectDistributedService(test_context, 2, self.kafka, [self.INPUT_FILE, self.INPUT_FILE2, self.OUTPUT_FILE]) @@ -64,12 +69,23 @@ def test_rest_api(self): assert self.cc.list_connectors() == [] - self.logger.info("Creating connectors") + assert set([connector_plugin['class'] for connector_plugin in self.cc.list_connector_plugins()]) == {self.FILE_SOURCE_CONNECTOR, self.FILE_SINK_CONNECTOR} + source_connector_props = self.render("connect-file-source.properties") sink_connector_props = self.render("connect-file-sink.properties") - for connector_props in [source_connector_props, sink_connector_props]: - connector_config = self._config_dict_from_props(connector_props) - self.cc.create_connector(connector_config, retries=120, retry_backoff=1) + + self.logger.info("Validating connector configurations") + source_connector_config = self._config_dict_from_props(source_connector_props) + configs = self.cc.validate_config(self.FILE_SOURCE_CONNECTOR, source_connector_config) + self.verify_config(self.FILE_SOURCE_CONNECTOR, self.FILE_SOURCE_CONFIGS, configs) + + sink_connector_config = self._config_dict_from_props(sink_connector_props) + configs = self.cc.validate_config(self.FILE_SINK_CONNECTOR, sink_connector_config) + self.verify_config(self.FILE_SINK_CONNECTOR, self.FILE_SINK_CONFIGS, configs) + + self.logger.info("Creating connectors") + self.cc.create_connector(source_connector_config, retries=120, retry_backoff=1) + self.cc.create_connector(sink_connector_config, retries=120, retry_backoff=1) # We should see the connectors appear wait_until(lambda: set(self.cc.list_connectors(retries=5, retry_backoff=1)) == set(["local-file-source", "local-file-sink"]), @@ -91,7 +107,7 @@ def test_rest_api(self): expected_source_info = { 'name': 'local-file-source', 'config': self._config_dict_from_props(source_connector_props), - 'tasks': [{ 'connector': 'local-file-source', 'task': 0 }] + 'tasks': [{'connector': 'local-file-source', 'task': 0}] } source_info = self.cc.get_connector("local-file-source") assert expected_source_info == source_info, "Incorrect info:" + json.dumps(source_info) @@ -100,7 +116,7 @@ def test_rest_api(self): expected_sink_info = { 'name': 'local-file-sink', 'config': self._config_dict_from_props(sink_connector_props), - 'tasks': [{'connector': 'local-file-sink', 'task': 0 }] + 'tasks': [{'connector': 'local-file-sink', 'task': 0}] } sink_info = self.cc.get_connector("local-file-sink") assert expected_sink_info == sink_info, "Incorrect info:" + json.dumps(sink_info) @@ -164,3 +180,11 @@ def file_contents(self, node, file): def _config_dict_from_props(self, connector_props): return dict([line.strip().split('=', 1) for line in connector_props.split('\n') if line.strip() and not line.strip().startswith('#')]) + def verify_config(self, name, config_def, configs): + # Should have zero errors + assert name == configs['name'] + # Should have zero errors + assert 0 == configs['error_count'] + # Should return all configuration + config_names = [config['definition']['name'] for config in configs['configs']] + assert config_def == set(config_names) diff --git a/tests/kafkatest/tests/connect/connect_test.py b/tests/kafkatest/tests/connect/connect_test.py index 7b57402bf7ce..91843900e73d 100644 --- a/tests/kafkatest/tests/connect/connect_test.py +++ b/tests/kafkatest/tests/connect/connect_test.py @@ -31,6 +31,9 @@ class ConnectStandaloneFileTest(Test): identical to the input. """ + FILE_SOURCE_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSourceConnector' + FILE_SINK_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSinkConnector' + INPUT_FILE = "/mnt/connect.input" OUTPUT_FILE = "/mnt/connect.output" diff --git a/tests/kafkatest/tests/connect/templates/connect-file-sink.properties b/tests/kafkatest/tests/connect/templates/connect-file-sink.properties index ad78bb366dbe..216dab55592b 100644 --- a/tests/kafkatest/tests/connect/templates/connect-file-sink.properties +++ b/tests/kafkatest/tests/connect/templates/connect-file-sink.properties @@ -14,7 +14,7 @@ # limitations under the License. name=local-file-sink -connector.class=FileStreamSink +connector.class={{ FILE_SINK_CONNECTOR }} tasks.max=1 file={{ OUTPUT_FILE }} topics={{ TOPIC }} \ No newline at end of file diff --git a/tests/kafkatest/tests/connect/templates/connect-file-source.properties b/tests/kafkatest/tests/connect/templates/connect-file-source.properties index d2d5e9747664..bff9720b8bfb 100644 --- a/tests/kafkatest/tests/connect/templates/connect-file-source.properties +++ b/tests/kafkatest/tests/connect/templates/connect-file-source.properties @@ -14,7 +14,7 @@ # limitations under the License. name=local-file-source -connector.class=FileStreamSource +connector.class={{ FILE_SOURCE_CONNECTOR }} tasks.max=1 file={{ INPUT_FILE }} topic={{ TOPIC }} \ No newline at end of file From f696bd2244f018365d487424bd72b6195e120fff Mon Sep 17 00:00:00 2001 From: Jeff Klukas Date: Thu, 12 May 2016 21:14:51 -0700 Subject: [PATCH 134/267] MINOR: Fix bugs in KafkaStreams.close() Initially proposed by ijuma in https://github.com/apache/kafka/pull/1362#issuecomment-218293662 mjsax commented: > StreamThread.close() should be extended to call metrics.close() (the class need a private member to reference the Metrics object, too) The `Metrics` instance is created in the `KafkaStreams` constructor and shared between all threads, so closing it within the threads doesn't seem like the right approach. This PR calls `Metrics.close()` in `KafkaStreams.close()` instead. cc guozhangwang Author: Jeff Klukas Reviewers: Ismael Juma, Guozhang Wang Closes #1379 from jklukas/close-streams-metrics (cherry picked from commit f34164eed53d791768f05df21f4dfeca89859b2e) Signed-off-by: Guozhang Wang --- .../apache/kafka/streams/KafkaStreams.java | 14 ++- .../kafka/streams/KafkaStreamsTest.java | 106 ++++++++++++++++++ 2 files changed, 115 insertions(+), 5 deletions(-) create mode 100644 streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java diff --git a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java index b3e3f5d6f059..af6d973e3dc2 100644 --- a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java +++ b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java @@ -91,6 +91,7 @@ public class KafkaStreams { private int state = CREATED; private final StreamThread[] threads; + private final Metrics metrics; // processId is expected to be unique across JVMs and to be used // in userData of the subscription request to allow assignor be aware @@ -147,7 +148,7 @@ public KafkaStreams(TopologyBuilder builder, StreamsConfig config, KafkaClientSu .timeWindow(config.getLong(StreamsConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS); - Metrics metrics = new Metrics(metricConfig, reporters, time); + this.metrics = new Metrics(metricConfig, reporters, time); this.threads = new StreamThread[config.getInt(StreamsConfig.NUM_STREAM_THREADS_CONFIG)]; for (int i = 0; i < this.threads.length; i++) { @@ -169,8 +170,10 @@ public synchronized void start() { state = RUNNING; log.info("Started Kafka Stream process"); - } else { + } else if (state == RUNNING) { throw new IllegalStateException("This process was already started."); + } else { + throw new IllegalStateException("Cannot restart after closing."); } } @@ -194,13 +197,14 @@ public synchronized void close() { Thread.interrupted(); } } + } + if (state != STOPPED) { + metrics.close(); state = STOPPED; - log.info("Stopped Kafka Stream process"); - } else { - throw new IllegalStateException("This process has not started yet."); } + } /** diff --git a/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java new file mode 100644 index 000000000000..22d8bf2dd168 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kafka.streams; + +import org.apache.kafka.streams.kstream.KStreamBuilder; +import org.apache.kafka.test.MockMetricsReporter; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Properties; + +public class KafkaStreamsTest { + + @Test + public void testStartAndClose() throws Exception { + Properties props = new Properties(); + props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testStartAndClose"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.setProperty(StreamsConfig.METRIC_REPORTER_CLASSES_CONFIG, MockMetricsReporter.class.getName()); + + final int oldInitCount = MockMetricsReporter.INIT_COUNT.get(); + final int oldCloseCount = MockMetricsReporter.CLOSE_COUNT.get(); + + KStreamBuilder builder = new KStreamBuilder(); + KafkaStreams streams = new KafkaStreams(builder, props); + + streams.start(); + final int newInitCount = MockMetricsReporter.INIT_COUNT.get(); + final int initCountDifference = newInitCount - oldInitCount; + Assert.assertTrue("some reporters should be initialized by calling start()", initCountDifference > 0); + + streams.close(); + Assert.assertEquals("each reporter initialized should also be closed", + oldCloseCount + initCountDifference, MockMetricsReporter.CLOSE_COUNT.get()); + } + + @Test + public void testCloseIsIdempotent() throws Exception { + Properties props = new Properties(); + props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCloseIsIdempotent"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.setProperty(StreamsConfig.METRIC_REPORTER_CLASSES_CONFIG, MockMetricsReporter.class.getName()); + + KStreamBuilder builder = new KStreamBuilder(); + KafkaStreams streams = new KafkaStreams(builder, props); + streams.close(); + final int closeCount = MockMetricsReporter.CLOSE_COUNT.get(); + + streams.close(); + Assert.assertEquals("subsequent close() calls should do nothing", + closeCount, MockMetricsReporter.CLOSE_COUNT.get()); + } + + @Test + public void testCannotStartOnceClosed() throws Exception { + Properties props = new Properties(); + props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCannotStartOnceClosed"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + + KStreamBuilder builder = new KStreamBuilder(); + KafkaStreams streams = new KafkaStreams(builder, props); + streams.close(); + + try { + streams.start(); + } catch (IllegalStateException e) { + Assert.assertEquals("Cannot restart after closing.", e.getMessage()); + return; + } + Assert.fail("should have caught an exception and returned"); + } + + @Test + public void testCannotStartTwice() throws Exception { + Properties props = new Properties(); + props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCannotStartTwice"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + + KStreamBuilder builder = new KStreamBuilder(); + KafkaStreams streams = new KafkaStreams(builder, props); + streams.start(); + + try { + streams.start(); + } catch (IllegalStateException e) { + Assert.assertEquals("This process was already started.", e.getMessage()); + return; + } + Assert.fail("should have caught an exception and returned"); + } +} From c6ba42c7069377c0c3d46e8b6c2e63a420dc874c Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Fri, 13 May 2016 12:10:09 +0100 Subject: [PATCH 135/267] KAFKA-3692; Add quotes to variables in kafka-run-class.sh Author: Liquan Pei Reviewers: Geoff Anderson , Jun Rao , Ismael Juma Closes #1364 from Ishiihara/add-quote-classpath (cherry picked from commit fb421dbcfe36c4a0d9df7a742c18c11515f095c5) Signed-off-by: Ismael Juma --- bin/kafka-run-class.sh | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/bin/kafka-run-class.sh b/bin/kafka-run-class.sh index 688cced28614..76d0b8862627 100755 --- a/bin/kafka-run-class.sh +++ b/bin/kafka-run-class.sh @@ -50,85 +50,85 @@ fi # run ./gradlew copyDependantLibs to get all dependant jars in a local dir shopt -s nullglob -for dir in $base_dir/core/build/dependant-libs-${SCALA_VERSION}*; +for dir in "$base_dir"/core/build/dependant-libs-${SCALA_VERSION}*; do - if [ -z $CLASSPATH ] ; then - CLASSPATH=$dir/* + if [ -z "$CLASSPATH" ] ; then + CLASSPATH="$dir/*" else - CLASSPATH=$CLASSPATH:$dir/* + CLASSPATH="$CLASSPATH:$dir/*" fi done -for file in $base_dir/examples/build/libs/kafka-examples*.jar; +for file in "$base_dir"/examples/build/libs/kafka-examples*.jar; do if should_include_file "$file"; then - CLASSPATH=$CLASSPATH:$file + CLASSPATH="$CLASSPATH":"$file" fi done -for file in $base_dir/clients/build/libs/kafka-clients*.jar; +for file in "$base_dir"/clients/build/libs/kafka-clients*.jar; do if should_include_file "$file"; then - CLASSPATH=$CLASSPATH:$file + CLASSPATH="$CLASSPATH":"$file" fi done -for file in $base_dir/streams/build/libs/kafka-streams*.jar; +for file in "$base_dir"/streams/build/libs/kafka-streams*.jar; do if should_include_file "$file"; then - CLASSPATH=$CLASSPATH:$file + CLASSPATH="$CLASSPATH":"$file" fi done -for file in $base_dir/streams/examples/build/libs/kafka-streams-examples*.jar; +for file in "$base_dir"/streams/examples/build/libs/kafka-streams-examples*.jar; do if should_include_file "$file"; then - CLASSPATH=$CLASSPATH:$file + CLASSPATH="$CLASSPATH":"$file" fi done -for file in $base_dir/streams/build/dependant-libs-${SCALA_VERSION}/rocksdb*.jar; +for file in "$base_dir"/streams/build/dependant-libs-${SCALA_VERSION}/rocksdb*.jar; do - CLASSPATH=$CLASSPATH:$file + CLASSPATH="$CLASSPATH":"$file" done -for file in $base_dir/tools/build/libs/kafka-tools*.jar; +for file in "$base_dir"/tools/build/libs/kafka-tools*.jar; do if should_include_file "$file"; then - CLASSPATH=$CLASSPATH:$file + CLASSPATH="$CLASSPATH":"$file" fi done -for dir in $base_dir/tools/build/dependant-libs-${SCALA_VERSION}*; +for dir in "$base_dir"/tools/build/dependant-libs-${SCALA_VERSION}*; do - CLASSPATH=$CLASSPATH:$dir/* + CLASSPATH="$CLASSPATH:$dir/*" done for cc_pkg in "api" "runtime" "file" "json" "tools" do - for file in $base_dir/connect/${cc_pkg}/build/libs/connect-${cc_pkg}*.jar; + for file in "$base_dir"/connect/${cc_pkg}/build/libs/connect-${cc_pkg}*.jar; do if should_include_file "$file"; then - CLASSPATH=$CLASSPATH:$file + CLASSPATH="$CLASSPATH":"$file" fi done if [ -d "$base_dir/connect/${cc_pkg}/build/dependant-libs" ] ; then - CLASSPATH=$CLASSPATH:$base_dir/connect/${cc_pkg}/build/dependant-libs/* + CLASSPATH="$CLASSPATH:$base_dir/connect/${cc_pkg}/build/dependant-libs/*" fi done # classpath addition for release -for file in $base_dir/libs/*; +for file in "$base_dir"/libs/*; do if should_include_file "$file"; then - CLASSPATH=$CLASSPATH:$file + CLASSPATH="$CLASSPATH":"$file" fi done -for file in $base_dir/core/build/libs/kafka_${SCALA_BINARY_VERSION}*.jar; +for file in "$base_dir"/core/build/libs/kafka_${SCALA_BINARY_VERSION}*.jar; do if should_include_file "$file"; then - CLASSPATH=$CLASSPATH:$file + CLASSPATH="$CLASSPATH":"$file" fi done shopt -u nullglob From cceaaf9eee0f52af9ef2bf8cde9509e7cbfeca5f Mon Sep 17 00:00:00 2001 From: Manikumar reddy O Date: Fri, 13 May 2016 12:57:09 +0100 Subject: [PATCH 136/267] KAFKA-3584; Fix synchronization issue between deleteOldSegments() and delete() methods This PR is to fix synchronization issue between deleteOldSegments() and delete() method calls. log.deleteOldSegments() call throws NullPointerException after log.delete() method call. cc ijuma junrao Author: Manikumar reddy O Reviewers: Jun Rao , Ismael Juma Closes #1367 from omkreddy/KAFKA-3584 (cherry picked from commit 1c4b943f2d9cc90101026519769f142c07bc1785) Signed-off-by: Ismael Juma --- core/src/main/scala/kafka/log/Log.scala | 20 +++++++------ .../test/scala/unit/kafka/log/LogTest.scala | 28 +++++++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/core/src/main/scala/kafka/log/Log.scala b/core/src/main/scala/kafka/log/Log.scala index e0ad73de9fb3..a7549dc134a7 100644 --- a/core/src/main/scala/kafka/log/Log.scala +++ b/core/src/main/scala/kafka/log/Log.scala @@ -563,21 +563,23 @@ class Log(val dir: File, * @return The number of segments deleted */ def deleteOldSegments(predicate: LogSegment => Boolean): Int = { - // find any segments that match the user-supplied predicate UNLESS it is the final segment - // and it is empty (since we would just end up re-creating it - val lastSegment = activeSegment - val deletable = logSegments.takeWhile(s => predicate(s) && (s.baseOffset != lastSegment.baseOffset || s.size > 0)) - val numToDelete = deletable.size - if(numToDelete > 0) { - lock synchronized { + lock synchronized { + //find any segments that match the user-supplied predicate UNLESS it is the final segment + //and it is empty (since we would just end up re-creating it) + val lastEntry = segments.lastEntry + val deletable = + if (lastEntry == null) Seq.empty + else logSegments.takeWhile(s => predicate(s) && (s.baseOffset != lastEntry.getValue.baseOffset || s.size > 0)) + val numToDelete = deletable.size + if (numToDelete > 0) { // we must always have at least one segment, so if we are going to delete all the segments, create a new one first - if(segments.size == numToDelete) + if (segments.size == numToDelete) roll() // remove the segments for lookups deletable.foreach(deleteSegment(_)) } + numToDelete } - numToDelete } /** diff --git a/core/src/test/scala/unit/kafka/log/LogTest.scala b/core/src/test/scala/unit/kafka/log/LogTest.scala index 796f5c35d878..f48f6b1e85fa 100755 --- a/core/src/test/scala/unit/kafka/log/LogTest.scala +++ b/core/src/test/scala/unit/kafka/log/LogTest.scala @@ -930,4 +930,32 @@ class LogTest extends JUnitSuite { def topicPartitionName(topic: String, partition: String): String = File.separator + topic + "-" + partition + @Test + def testDeleteOldSegmentsMethod() { + val set = TestUtils.singleMessageSet("test".getBytes) + val logProps = new Properties() + logProps.put(LogConfig.SegmentBytesProp, set.sizeInBytes * 5: java.lang.Integer) + logProps.put(LogConfig.SegmentIndexBytesProp, 1000: java.lang.Integer) + val config = LogConfig(logProps) + val log = new Log(logDir, + config, + recoveryPoint = 0L, + time.scheduler, + time) + + // append some messages to create some segments + for (i <- 0 until 100) + log.append(set) + + log.deleteOldSegments(_ => true) + assertEquals("The deleted segments should be gone.", 1, log.numberOfSegments) + + // append some messages to create some segments + for (i <- 0 until 100) + log.append(set) + + log.delete() + assertEquals("The number of segments should be 0", 0, log.numberOfSegments) + assertEquals("The number of deleted segments shoud be zero.", 0, log.deleteOldSegments(_ => true)) + } } From 51b63c421460a00884e7f00a648c41ed31eb6dd5 Mon Sep 17 00:00:00 2001 From: Jeff Klukas Date: Fri, 13 May 2016 15:32:24 -0700 Subject: [PATCH 137/267] MINOR: Allow Serdes subclasses to access WrapperSerde This PR loosens access restrictions on `WrapperSerde` to `protected` so that users can define a `Serdes` subclass that provides additional custom serde members following the same pattern as the parent class. This is my own work and is compatible with Kafka's license. Author: Jeff Klukas Reviewers: Guozhang Wang Closes #1382 from jklukas/expose-wrapperserde (cherry picked from commit 08205de185090b0cd744844b58b1508c82459d28) Signed-off-by: Guozhang Wang --- .../main/java/org/apache/kafka/common/serialization/Serdes.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java b/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java index 9075a932a5db..e5fb6c0a3b8a 100644 --- a/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java +++ b/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java @@ -23,7 +23,7 @@ */ public class Serdes { - static private class WrapperSerde implements Serde { + static protected class WrapperSerde implements Serde { final private Serializer serializer; final private Deserializer deserializer; From e0c741d4e7a8e996f8432d9cf803a0fddb76fc70 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Fri, 13 May 2016 20:02:54 -0700 Subject: [PATCH 138/267] Revert 51b63c421460a00884e7f00a648c41ed31eb6dd5 from 0.10.0 --- .../main/java/org/apache/kafka/common/serialization/Serdes.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java b/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java index e5fb6c0a3b8a..9075a932a5db 100644 --- a/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java +++ b/clients/src/main/java/org/apache/kafka/common/serialization/Serdes.java @@ -23,7 +23,7 @@ */ public class Serdes { - static protected class WrapperSerde implements Serde { + static private class WrapperSerde implements Serde { final private Serializer serializer; final private Deserializer deserializer; From bca95cba96212dc3460d0a87fd4dceb6ce322ccc Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Fri, 13 May 2016 21:03:35 -0700 Subject: [PATCH 139/267] KAFKA-3713; Close `compressor` to fix memory leak This fixes test_producer_throughput with compression_type=snappy. Also: added heap dump on out of memory error to `producer_performance.py` and corrected the upgrade note related to the change in buffer size for compression streams. Author: Ismael Juma Reviewers: Gwen Shapira Closes #1385 from ijuma/kafka-3713-test_producer_throughput-snappy-fail and squashes the following commits: 54c7962 [Ismael Juma] Correct upgrade note about buffer size for compression stream 515040b [Ismael Juma] Call `compressor.close()` to fix memory leak 5311e5b [Ismael Juma] Dump heap on out of memory error when running `producer_performance.py` (cherry picked from commit 13130139ff70d0127e87d2c87dd5e62e6320fa45) Signed-off-by: Gwen Shapira --- .../main/java/org/apache/kafka/common/record/Record.java | 6 +++++- docs/upgrade.html | 2 +- .../kafkatest/services/performance/producer_performance.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/record/Record.java b/clients/src/main/java/org/apache/kafka/common/record/Record.java index baab9ab6f1c6..77e4f68e6497 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/Record.java +++ b/clients/src/main/java/org/apache/kafka/common/record/Record.java @@ -147,7 +147,11 @@ public static void write(ByteBuffer buffer, long timestamp, byte[] key, byte[] v // construct the compressor with compression type none since this function will not do any //compression according to the input type, it will just write the record's payload as is Compressor compressor = new Compressor(buffer, CompressionType.NONE); - compressor.putRecord(timestamp, key, value, type, valueOffset, valueSize); + try { + compressor.putRecord(timestamp, key, value, type, valueOffset, valueSize); + } finally { + compressor.close(); + } } public static void write(Compressor compressor, long crc, byte attributes, long timestamp, byte[] key, byte[] value, int valueOffset, int valueSize) { diff --git a/docs/upgrade.html b/docs/upgrade.html index 3e07ef89628b..a6754bcb6e91 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -91,7 +91,7 @@
      Notable changes in 0.1
      • Starting from Kafka 0.10.0.0, a new client library named Kafka Streams is available for stream processing on data stored in Kafka topics. This new client library only works with 0.10.x and upward versioned brokers due to message format changes mentioned above. For more information please read this section.
      • -
      • If compression with snappy or gzip is enabled, the new producer will use the compression scheme's default buffer size (this is already the case for LZ4) instead of 1 KB in order to improve the compression ratio. Note that the default buffer sizes for gzip, snappy and LZ4 are 0.5 KB, 32 KB and 64KB respectively. For the snappy case, a producer with 5000 partitions will require an additional 155 MB of JVM heap.
      • +
      • If compression with snappy or gzip is enabled, the new producer will use the compression scheme's default buffer size (this is already the case for LZ4) instead of 1 KB in order to improve the compression ratio. Note that the default buffer sizes for gzip, snappy and LZ4 are 0.5 KB, 2x32 KB and 2x64KB respectively. For the snappy case, a producer with 5000 partitions will require an additional 315 MB of JVM heap.
      • The default value of the configuration parameter receive.buffer.bytes is now 64K for the new consumer.
      • The new consumer now exposes the configuration parameter exclude.internal.topics to restrict internal topics (such as the consumer offsets topic) from accidentally being included in regular expression subscriptions. By default, it is enabled.
      • The old Scala producer has been deprecated. Users should migrate their code to the Java producer included in the kafka-clients JAR as soon as possible.
      • diff --git a/tests/kafkatest/services/performance/producer_performance.py b/tests/kafkatest/services/performance/producer_performance.py index 7131df14d417..d66efec3c8e0 100644 --- a/tests/kafkatest/services/performance/producer_performance.py +++ b/tests/kafkatest/services/performance/producer_performance.py @@ -100,7 +100,7 @@ def start_cmd(self, node): cmd += "export CLASSPATH; " cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % ProducerPerformanceService.LOG4J_CONFIG - cmd += "JMX_PORT=%(jmx_port)d KAFKA_OPTS=%(kafka_opts)s %(kafka_run_class)s org.apache.kafka.tools.ProducerPerformance " \ + cmd += "JMX_PORT=%(jmx_port)d KAFKA_OPTS=%(kafka_opts)s KAFKA_HEAP_OPTS=\"-XX:+HeapDumpOnOutOfMemoryError\" %(kafka_run_class)s org.apache.kafka.tools.ProducerPerformance " \ "--topic %(topic)s --num-records %(num_records)d --record-size %(record_size)d --throughput %(throughput)d --producer-props bootstrap.servers=%(bootstrap_servers)s client.id=%(client_id)s" % args self.security_config.setup_node(node) From 5834a6a562d0fcdf5dbebb4371b5fe2c783740ca Mon Sep 17 00:00:00 2001 From: Jiangjie Qin Date: Sun, 15 May 2016 09:04:21 -0700 Subject: [PATCH 140/267] =?UTF-8?q?KAFKA-3565;=20Add=20documentation=20to?= =?UTF-8?q?=20warn=20user=20about=20the=20potential=20messag=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …e throughput drop due to the addition of timestamp field. Author: Jiangjie Qin Reviewers: Ismael Juma , Jun Rao Closes #1372 from becketqin/KAFKA-3565 (cherry picked from commit 7ded19a29ec140de93d57a9eb01722e6a8f2012a) Signed-off-by: Jun Rao --- docs/upgrade.html | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/upgrade.html b/docs/upgrade.html index a6754bcb6e91..c5431aa568af 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -63,6 +63,13 @@
        message format version. Otherwise consumers before 0.10.0.0 might break. In particular, after the message format is set to 0.10.0, one should not change it back to an earlier format as it may break consumers on versions before 0.10.0.0.

        +

        + Note: Due to the additional timestamp introduced in each message, producers sending small messages may see a + message throughput degradation because of the increased overhead. When receiving compressed messages, 0.10.0 + brokers avoid recompressing the messages, which in general reduces the latency and improves the throughput. In + certain cases, this may reduce the batching size on the producer, which could lead to worse throughput. If this + happens, users can tune linger.ms and batch.size of the producer for better throughput. +

        Potential breaking changes in 0.10.0.0

        Upgrading from 0.8.1 to 0.8.2

        From a86ae26fcb18d307d5d54f7061df613ce148fc33 Mon Sep 17 00:00:00 2001 From: Rajini Sivaram Date: Mon, 16 May 2016 14:37:09 +0100 Subject: [PATCH 153/267] KAFKA-3258; Delete broker topic metrics of deleted topics Delete per-topic metrics when there are no replicas of any partitions of the topic on a broker. Author: Rajini Sivaram Reviewers: Joel Koshy , Manikumar reddy O , Ashish Singh , Ismael Juma Closes #944 from rajinisivaram/KAFKA-3258 --- .../kafka/server/KafkaRequestHandler.scala | 42 +++++++++++++++---- .../scala/kafka/server/ReplicaManager.scala | 17 ++++---- .../unit/kafka/metrics/MetricsTest.scala | 15 ++++++- 3 files changed, 56 insertions(+), 18 deletions(-) diff --git a/core/src/main/scala/kafka/server/KafkaRequestHandler.scala b/core/src/main/scala/kafka/server/KafkaRequestHandler.scala index a1558afed20b..f70955d3e8a7 100755 --- a/core/src/main/scala/kafka/server/KafkaRequestHandler.scala +++ b/core/src/main/scala/kafka/server/KafkaRequestHandler.scala @@ -100,17 +100,37 @@ class BrokerTopicMetrics(name: Option[String]) extends KafkaMetricsGroup { case Some(topic) => Map("topic" -> topic) } - val messagesInRate = newMeter("MessagesInPerSec", "messages", TimeUnit.SECONDS, tags) - val bytesInRate = newMeter("BytesInPerSec", "bytes", TimeUnit.SECONDS, tags) - val bytesOutRate = newMeter("BytesOutPerSec", "bytes", TimeUnit.SECONDS, tags) - val bytesRejectedRate = newMeter("BytesRejectedPerSec", "bytes", TimeUnit.SECONDS, tags) - val failedProduceRequestRate = newMeter("FailedProduceRequestsPerSec", "requests", TimeUnit.SECONDS, tags) - val failedFetchRequestRate = newMeter("FailedFetchRequestsPerSec", "requests", TimeUnit.SECONDS, tags) - val totalProduceRequestRate = newMeter("TotalProduceRequestsPerSec", "requests", TimeUnit.SECONDS, tags) - val totalFetchRequestRate = newMeter("TotalFetchRequestsPerSec", "requests", TimeUnit.SECONDS, tags) + val messagesInRate = newMeter(BrokerTopicStats.MessagesInPerSec, "messages", TimeUnit.SECONDS, tags) + val bytesInRate = newMeter(BrokerTopicStats.BytesInPerSec, "bytes", TimeUnit.SECONDS, tags) + val bytesOutRate = newMeter(BrokerTopicStats.BytesOutPerSec, "bytes", TimeUnit.SECONDS, tags) + val bytesRejectedRate = newMeter(BrokerTopicStats.BytesRejectedPerSec, "bytes", TimeUnit.SECONDS, tags) + val failedProduceRequestRate = newMeter(BrokerTopicStats.FailedProduceRequestsPerSec, "requests", TimeUnit.SECONDS, tags) + val failedFetchRequestRate = newMeter(BrokerTopicStats.FailedFetchRequestsPerSec, "requests", TimeUnit.SECONDS, tags) + val totalProduceRequestRate = newMeter(BrokerTopicStats.TotalProduceRequestsPerSec, "requests", TimeUnit.SECONDS, tags) + val totalFetchRequestRate = newMeter(BrokerTopicStats.TotalFetchRequestsPerSec, "requests", TimeUnit.SECONDS, tags) + + def close() { + removeMetric(BrokerTopicStats.MessagesInPerSec, tags) + removeMetric(BrokerTopicStats.BytesInPerSec, tags) + removeMetric(BrokerTopicStats.BytesOutPerSec, tags) + removeMetric(BrokerTopicStats.BytesRejectedPerSec, tags) + removeMetric(BrokerTopicStats.FailedProduceRequestsPerSec, tags) + removeMetric(BrokerTopicStats.FailedFetchRequestsPerSec, tags) + removeMetric(BrokerTopicStats.TotalProduceRequestsPerSec, tags) + removeMetric(BrokerTopicStats.TotalFetchRequestsPerSec, tags) + } } object BrokerTopicStats extends Logging { + val MessagesInPerSec = "MessagesInPerSec" + val BytesInPerSec = "BytesInPerSec" + val BytesOutPerSec = "BytesOutPerSec" + val BytesRejectedPerSec = "BytesRejectedPerSec" + val FailedProduceRequestsPerSec = "FailedProduceRequestsPerSec" + val FailedFetchRequestsPerSec = "FailedFetchRequestsPerSec" + val TotalProduceRequestsPerSec = "TotalProduceRequestsPerSec" + val TotalFetchRequestsPerSec = "TotalFetchRequestsPerSec" + private val valueFactory = (k: String) => new BrokerTopicMetrics(Some(k)) private val stats = new Pool[String, BrokerTopicMetrics](Some(valueFactory)) private val allTopicsStats = new BrokerTopicMetrics(None) @@ -120,4 +140,10 @@ object BrokerTopicStats extends Logging { def getBrokerTopicStats(topic: String): BrokerTopicMetrics = { stats.getAndMaybePut(topic) } + + def removeMetrics(topic: String) { + val metrics = stats.remove(topic) + if (metrics != null) + metrics.close() + } } diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala index 534de271bfb7..68f23859ee77 100644 --- a/core/src/main/scala/kafka/server/ReplicaManager.scala +++ b/core/src/main/scala/kafka/server/ReplicaManager.scala @@ -110,7 +110,9 @@ class ReplicaManager(val config: KafkaConfig, /* epoch of the controller that last changed the leader */ @volatile var controllerEpoch: Int = KafkaController.InitialControllerEpoch - 1 private val localBrokerId = config.brokerId - private val allPartitions = new Pool[(String, Int), Partition] + private val allPartitions = new Pool[(String, Int), Partition](valueFactory = Some { case (t, p) => + new Partition(t, p, time, this) + }) private val replicaStateChangeLock = new Object val replicaFetcherManager = new ReplicaFetcherManager(config, this, metrics, jTime, threadNamePrefix) private val highWatermarkCheckPointThreadStarted = new AtomicBoolean(false) @@ -223,8 +225,12 @@ class ReplicaManager(val config: KafkaConfig, case Some(partition) => if(deletePartition) { val removedPartition = allPartitions.remove((topic, partitionId)) - if (removedPartition != null) + if (removedPartition != null) { removedPartition.delete() // this will delete the local log + val topicHasPartitions = allPartitions.keys.exists { case (t, _) => topic == t } + if (!topicHasPartitions) + BrokerTopicStats.removeMetrics(topic) + } } case None => // Delete log and corresponding folders in case replica manager doesn't hold them anymore. @@ -266,12 +272,7 @@ class ReplicaManager(val config: KafkaConfig, } def getOrCreatePartition(topic: String, partitionId: Int): Partition = { - var partition = allPartitions.get((topic, partitionId)) - if (partition == null) { - allPartitions.putIfNotExists((topic, partitionId), new Partition(topic, partitionId, time, this)) - partition = allPartitions.get((topic, partitionId)) - } - partition + allPartitions.getAndMaybePut((topic, partitionId)) } def getPartition(topic: String, partitionId: Int): Option[Partition] = { diff --git a/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala b/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala index 3707deb3a2df..1980e8aaf920 100644 --- a/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala +++ b/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala @@ -15,10 +15,9 @@ * limitations under the License. */ -package kafka.consumer +package kafka.metrics import java.util.Properties - import com.yammer.metrics.Metrics import com.yammer.metrics.core.MetricPredicate import org.junit.{After, Test} @@ -32,6 +31,7 @@ import kafka.utils.TestUtils._ import scala.collection._ import scala.collection.JavaConversions._ import scala.util.matching.Regex +import kafka.consumer.{ConsumerConfig, ZookeeperConsumerConnector} class MetricsTest extends KafkaServerTestHarness with Logging { val numNodes = 2 @@ -79,6 +79,17 @@ class MetricsTest extends KafkaServerTestHarness with Logging { assertFalse("Topic metrics exists after deleteTopic", checkTopicMetricsExists(topic)) } + @Test + def testBrokerTopicMetricsUnregisteredAfterDeletingTopic() { + val topic = "test-broker-topic-metric" + AdminUtils.createTopic(zkUtils, topic, 2, 1) + createAndShutdownStep("group0", "consumer0", "producer0") + assertNotNull(BrokerTopicStats.getBrokerTopicStats(topic)) + AdminUtils.deleteTopic(zkUtils, topic) + TestUtils.verifyTopicDeletion(zkUtils, topic, 1, servers) + assertFalse("Topic metrics exists after deleteTopic", checkTopicMetricsExists(topic)) + } + @deprecated("This test has been deprecated and it will be removed in a future release", "0.10.0.0") def createAndShutdownStep(group: String, consumerId: String, producerId: String): Unit = { sendMessages(servers, topic, nMessages) From d1e24000c8770d6c207dca265f4cdafe33690325 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 24 May 2016 09:13:40 +0100 Subject: [PATCH 154/267] KAFKA-3747; Close `RecordBatch.records` when append to batch fails With this change, `test_producer_throughput` with message_size=10000, compression_type=snappy and a snappy buffer size of 32k can be executed in a heap of 192m in a local environment (768m is needed without this change). Author: Ismael Juma Reviewers: Guozhang Wang Closes #1418 from ijuma/kafka-3747-close-record-batch-when-append-fails (cherry picked from commit fe27d8f787f38428e0add36edeac9d694f16af53) Signed-off-by: Ismael Juma --- .../producer/internals/RecordAccumulator.java | 63 ++++++++++++------- .../kafka/common/record/MemoryRecords.java | 5 ++ .../internals/RecordAccumulatorTest.java | 18 +++++- 3 files changed, 60 insertions(+), 26 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java index 5339096efa75..a73d88222656 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java @@ -74,7 +74,6 @@ public final class RecordAccumulator { private final Set muted; private int drainIndex; - /** * Create a new record accumulator * @@ -104,11 +103,11 @@ public RecordAccumulator(int batchSize, this.compression = compression; this.lingerMs = lingerMs; this.retryBackoffMs = retryBackoffMs; - this.batches = new CopyOnWriteMap>(); + this.batches = new CopyOnWriteMap<>(); String metricGrpName = "producer-metrics"; this.free = new BufferPool(totalSize, batchSize, metrics, time, metricGrpName); this.incomplete = new IncompleteRecordBatches(); - this.muted = new HashSet(); + this.muted = new HashSet<>(); this.time = time; registerMetrics(metrics, metricGrpName); } @@ -171,12 +170,9 @@ public RecordAppendResult append(TopicPartition tp, synchronized (dq) { if (closed) throw new IllegalStateException("Cannot send after the producer is closed."); - RecordBatch last = dq.peekLast(); - if (last != null) { - FutureRecordMetadata future = last.tryAppend(timestamp, key, value, callback, time.milliseconds()); - if (future != null) - return new RecordAppendResult(future, dq.size() > 1 || last.records.isFull(), false); - } + RecordAppendResult appendResult = tryAppend(timestamp, key, value, callback, dq); + if (appendResult != null) + return appendResult; } // we don't have an in-progress record batch try to allocate a new batch @@ -187,14 +183,12 @@ public RecordAppendResult append(TopicPartition tp, // Need to check if producer is closed again after grabbing the dequeue lock. if (closed) throw new IllegalStateException("Cannot send after the producer is closed."); - RecordBatch last = dq.peekLast(); - if (last != null) { - FutureRecordMetadata future = last.tryAppend(timestamp, key, value, callback, time.milliseconds()); - if (future != null) { - // Somebody else found us a batch, return the one we waited for! Hopefully this doesn't happen often... - free.deallocate(buffer); - return new RecordAppendResult(future, dq.size() > 1 || last.records.isFull(), false); - } + + RecordAppendResult appendResult = tryAppend(timestamp, key, value, callback, dq); + if (appendResult != null) { + // Somebody else found us a batch, return the one we waited for! Hopefully this doesn't happen often... + free.deallocate(buffer); + return appendResult; } MemoryRecords records = MemoryRecords.emptyRecords(buffer, compression, this.batchSize); RecordBatch batch = new RecordBatch(tp, records, time.milliseconds()); @@ -209,12 +203,28 @@ public RecordAppendResult append(TopicPartition tp, } } + /** + * If `RecordBatch.tryAppend` fails (i.e. the record batch is full), close its memory records to release temporary + * resources (like compression streams buffers). + */ + private RecordAppendResult tryAppend(long timestamp, byte[] key, byte[] value, Callback callback, Deque deque) { + RecordBatch last = deque.peekLast(); + if (last != null) { + FutureRecordMetadata future = last.tryAppend(timestamp, key, value, callback, time.milliseconds()); + if (future == null) + last.records.close(); + else + return new RecordAppendResult(future, deque.size() > 1 || last.records.isFull(), false); + } + return null; + } + /** * Abort the batches that have been sitting in RecordAccumulator for more than the configured requestTimeout * due to metadata being unavailable */ public List abortExpiredBatches(int requestTimeout, long now) { - List expiredBatches = new ArrayList(); + List expiredBatches = new ArrayList<>(); int count = 0; for (Map.Entry> entry : this.batches.entrySet()) { Deque dq = entry.getValue(); @@ -245,7 +255,7 @@ public List abortExpiredBatches(int requestTimeout, long now) { } } } - if (expiredBatches.size() > 0) + if (!expiredBatches.isEmpty()) log.trace("Expired {} batches in accumulator", count); return expiredBatches; @@ -287,7 +297,7 @@ public void reenqueue(RecordBatch batch, long now) { * */ public ReadyCheckResult ready(Cluster cluster, long nowMs) { - Set readyNodes = new HashSet(); + Set readyNodes = new HashSet<>(); long nextReadyCheckDelayMs = Long.MAX_VALUE; boolean unknownLeadersExist = false; @@ -333,7 +343,7 @@ public boolean hasUnsent() { for (Map.Entry> entry : this.batches.entrySet()) { Deque deque = entry.getValue(); synchronized (deque) { - if (deque.size() > 0) + if (!deque.isEmpty()) return true; } } @@ -357,11 +367,11 @@ public Map> drain(Cluster cluster, if (nodes.isEmpty()) return Collections.emptyMap(); - Map> batches = new HashMap>(); + Map> batches = new HashMap<>(); for (Node node : nodes) { int size = 0; List parts = cluster.partitionsForNode(node.id()); - List ready = new ArrayList(); + List ready = new ArrayList<>(); /* to make starvation less likely this loop doesn't start at 0 */ int start = drainIndex = drainIndex % parts.size(); do { @@ -436,6 +446,11 @@ public void deallocate(RecordBatch batch) { boolean flushInProgress() { return flushesInProgress.get() > 0; } + + /* Visible for testing */ + Map> batches() { + return Collections.unmodifiableMap(batches); + } /** * Initiate the flushing of data from the accumulator...this makes all requests immediately ready @@ -569,7 +584,7 @@ public void remove(RecordBatch batch) { public Iterable all() { synchronized (incomplete) { - return new ArrayList(this.incomplete); + return new ArrayList<>(this.incomplete); } } } diff --git a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java index fcf7f446a459..603f74b18e0e 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java +++ b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java @@ -213,6 +213,11 @@ public String toString() { return builder.toString(); } + /** Visible for testing */ + public boolean isWritable() { + return writable; + } + public static class RecordsIterator extends AbstractIterator { private final ByteBuffer buffer; private final DataInputStream stream; diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java index b3a5a049a82d..43ac15a09a4a 100644 --- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Deque; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -75,14 +76,27 @@ public void teardown() { @Test public void testFull() throws Exception { long now = time.milliseconds(); - RecordAccumulator accum = new RecordAccumulator(1024, 10 * 1024, CompressionType.NONE, 10L, 100L, metrics, time); - int appends = 1024 / msgSize; + int batchSize = 1024; + RecordAccumulator accum = new RecordAccumulator(batchSize, 10 * batchSize, CompressionType.NONE, 10L, 100L, metrics, time); + int appends = batchSize / msgSize; for (int i = 0; i < appends; i++) { + // append to the first batch accum.append(tp1, 0L, key, value, null, maxBlockTimeMs); + Deque partitionBatches = accum.batches().get(tp1); + assertEquals(1, partitionBatches.size()); + assertTrue(partitionBatches.peekFirst().records.isWritable()); assertEquals("No partitions should be ready.", 0, accum.ready(cluster, now).readyNodes.size()); } + + // this append doesn't fit in the first batch, so a new batch is created and the first batch is closed accum.append(tp1, 0L, key, value, null, maxBlockTimeMs); + Deque partitionBatches = accum.batches().get(tp1); + assertEquals(2, partitionBatches.size()); + Iterator partitionBatchesIterator = partitionBatches.iterator(); + assertFalse(partitionBatchesIterator.next().records.isWritable()); + assertTrue(partitionBatchesIterator.next().records.isWritable()); assertEquals("Our partition's leader should be ready", Collections.singleton(node1), accum.ready(cluster, time.milliseconds()).readyNodes); + List batches = accum.drain(cluster, Collections.singleton(node1), Integer.MAX_VALUE, 0).get(node1.id()); assertEquals(1, batches.size()); RecordBatch batch = batches.get(0); From 11a50ff1031b6a44900f876bb89593352d9cf41b Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Tue, 24 May 2016 12:53:24 -0700 Subject: [PATCH 155/267] MINOR: Fix documentation table of contents and `BLOCK_ON_BUFFER_FULL_DOC` Author: Ismael Juma Reviewers: Gwen Shapira Closes #1423 from ijuma/minor-doc-fixes --- .../org/apache/kafka/clients/producer/ProducerConfig.java | 7 +++---- docs/documentation.html | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java index 4ed083b2e79a..4d121b90948d 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java @@ -144,11 +144,10 @@ public class ProducerConfig extends AbstractConfig { @Deprecated public static final String BLOCK_ON_BUFFER_FULL_CONFIG = "block.on.buffer.full"; private static final String BLOCK_ON_BUFFER_FULL_DOC = "When our memory buffer is exhausted we must either stop accepting new records (block) or throw errors. " - + "By default this setting is false and the producer will no longer throw a BufferExhaustException but instead will use the {@link #MAX_BLOCK_MS_CONFIG} " - + "value to block, after which it will throw a TimeoutException. Setting this property to true will set the " + MAX_BLOCK_MS_CONFIG + " to Long.MAX_VALUE." + + "By default this setting is false and the producer will no longer throw a BufferExhaustException but instead will use the " + MAX_BLOCK_MS_CONFIG + " " + + "value to block, after which it will throw a TimeoutException. Setting this property to true will set the " + MAX_BLOCK_MS_CONFIG + " to Long.MAX_VALUE. " + "Also if this property is set to true, parameter " + METADATA_FETCH_TIMEOUT_CONFIG + " is not longer honored." - + "

        " - + "This parameter is deprecated and will be removed in a future release. " + + "

        This parameter is deprecated and will be removed in a future release. " + "Parameter " + MAX_BLOCK_MS_CONFIG + " should be used instead."; /** buffer.memory */ diff --git a/docs/documentation.html b/docs/documentation.html index ddc310218018..31dc03960c4f 100644 --- a/docs/documentation.html +++ b/docs/documentation.html @@ -123,7 +123,8 @@

        Kafka 0.10.0 Documentation

      • 7.2 Encryption and Authentication using SSL
      • 7.3 Authentication using SASL
      • 7.4 Authorization and ACLs
      • -
      • 7.5 ZooKeeper Authentication
      • +
      • 7.5 Incorporating Security Features in a Running Cluster
      • +
      • 7.6 ZooKeeper Authentication
        • New Clusters
        • Migrating Clusters
        • From 6193357d87f4dce4c6832ac4f9a208d2a7a0539c Mon Sep 17 00:00:00 2001 From: Dustin Cote Date: Tue, 24 May 2016 17:26:54 -0700 Subject: [PATCH 156/267] KAFKA-3683; Add file descriptor recommendation to ops guide Adding sizing recommendations for file descriptors to the ops guide. Author: Dustin Cote Author: Dustin Cote Reviewers: Gwen Shapira Closes #1353 from cotedm/KAFKA-3683 and squashes the following commits: 8120318 [Dustin Cote] Adding file descriptor sizing recommendations 0908aa9 [Dustin Cote] Merge https://github.com/apache/kafka into trunk 32315e4 [Dustin Cote] Merge branch 'trunk' of https://github.com/cotedm/kafka into trunk 13309ed [Dustin Cote] Update links for new consumer API 4dcffc1 [Dustin Cote] Update links for new consumer API (cherry picked from commit 0e1c012fb551f32cf27b6b7367749047c374ee97) Signed-off-by: Gwen Shapira --- docs/ops.html | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/ops.html b/docs/ops.html index faf545358602..741312934554 100644 --- a/docs/ops.html +++ b/docs/ops.html @@ -468,13 +468,12 @@

          OS

          We have seen a few issues running on Windows and Windows is not currently a well supported platform though we would be happy to change that.

          -You likely don't need to do much OS-level tuning though there are a few things that will help performance. -

          -Two configurations that may be important: +It is unlikely to require much OS-level tuning, but there are two potentially important OS-level configurations:

            -
          • We upped the number of file descriptors since we have lots of topics and lots of connections. -
          • We upped the max socket buffer size to enable high-performance data transfer between data centers described here. +
          • File descriptor limits: Kafka uses file descriptors for log segments and open connections. If a broker hosts many partitions, consider that the broker needs at least (number_of_partitions)*(partition_size/segment_size) to track all log segments in addition to the number of connections the broker makes. We recommend at least 100000 allowed file descriptors for the broker processes as a starting point. +
          • Max socket buffer size: can be increased to enable high-performance data transfer between data centers as described here.
          +

          Disks and Filesystem

          We recommend using multiple drives to get good throughput and not sharing the same drives used for Kafka data with application logs or other OS filesystem activity to ensure good latency. You can either RAID these drives together into a single volume or format and mount each drive as its own directory. Since Kafka has replication the redundancy provided by RAID can also be provided at the application level. This choice has several tradeoffs. From cb8b6c07140b9d0cca2f17c2fdcdd0f1c7248b99 Mon Sep 17 00:00:00 2001 From: Eno Thereska Date: Wed, 25 May 2016 13:08:57 +0100 Subject: [PATCH 157/267] MINOR: Removed 1/2 of the hardcoded sleeps in Streams Author: Eno Thereska Reviewers: Guozhang Wang , Ismael Juma Closes #1422 from enothereska/minor-integration-timeout2 (cherry picked from commit fee6f6f927b36ac74bc4a8b233711234558f3b51) Signed-off-by: Ismael Juma --- .../integration/FanoutIntegrationTest.java | 11 ++- .../InternalTopicIntegrationTest.java | 4 + .../integration/JoinIntegrationTest.java | 10 +-- .../MapFunctionIntegrationTest.java | 8 +- .../PassThroughIntegrationTest.java | 8 +- .../integration/WordCountIntegrationTest.java | 6 +- .../utils/IntegrationTestUtils.java | 80 ++++++++++++++++++- 7 files changed, 98 insertions(+), 29 deletions(-) diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java index 2e11cd23e1e3..5199caa0a552 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/FanoutIntegrationTest.java @@ -134,10 +134,6 @@ public String apply(String value) { producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); IntegrationTestUtils.produceValuesSynchronously(INPUT_TOPIC_A, inputValues, producerConfig); - // Give the stream processing application some time to do its work. - Thread.sleep(10000); - streams.close(); - // // Step 3: Verify the application's output data. // @@ -149,7 +145,8 @@ public String apply(String value) { consumerConfigB.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); consumerConfigB.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class); consumerConfigB.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); - List actualValuesForB = IntegrationTestUtils.readValues(OUTPUT_TOPIC_B, consumerConfigB, inputValues.size()); + List actualValuesForB = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfigB, + OUTPUT_TOPIC_B, inputValues.size()); assertThat(actualValuesForB, equalTo(expectedValuesForB)); // Verify output topic C @@ -159,7 +156,9 @@ public String apply(String value) { consumerConfigC.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); consumerConfigC.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class); consumerConfigC.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); - List actualValuesForC = IntegrationTestUtils.readValues(OUTPUT_TOPIC_C, consumerConfigC, inputValues.size()); + List actualValuesForC = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfigC, + OUTPUT_TOPIC_C, inputValues.size()); + streams.close(); assertThat(actualValuesForC, equalTo(expectedValuesForC)); } diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java index 66111c4279cd..e431b57ad8d3 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java @@ -149,6 +149,10 @@ public KeyValue apply(String key, String value) { KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration); streams.start(); + // Wait briefly for the topology to be fully up and running (otherwise it might miss some or all + // of the input data we produce below). + Thread.sleep(5000); + // // Step 2: Produce some input data to the input topic. // diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java index 93e31e22652c..4f318ec37392 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java @@ -219,7 +219,7 @@ public Long apply(Long value1, Long value2) { // Wait briefly for the topology to be fully up and running (otherwise it might miss some or all // of the input data we produce below). - Thread.sleep(5000); + Thread.sleep(10000); // // Step 2: Publish user-region information. @@ -246,10 +246,6 @@ public Long apply(Long value1, Long value2) { userClicksProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class); IntegrationTestUtils.produceKeyValuesSynchronously(USER_CLICKS_TOPIC, userClicks, userClicksProducerConfig); - // Give the stream processing application some time to do its work. - Thread.sleep(10000); - streams.close(); - // // Step 4: Verify the application's output data. // @@ -259,7 +255,9 @@ public Long apply(Long value1, Long value2) { consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class); - List> actualClicksPerRegion = IntegrationTestUtils.readKeyValues(OUTPUT_TOPIC, consumerConfig); + List> actualClicksPerRegion = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, + OUTPUT_TOPIC, expectedClicksPerRegion.size()); + streams.close(); assertThat(actualClicksPerRegion, equalTo(expectedClicksPerRegion)); } diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java index 31ac4006aa25..3c37aa183d39 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/MapFunctionIntegrationTest.java @@ -107,10 +107,6 @@ public String apply(String value) { producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); IntegrationTestUtils.produceValuesSynchronously(DEFAULT_INPUT_TOPIC, inputValues, producerConfig); - // Give the stream processing application some time to do its work. - Thread.sleep(10000); - streams.close(); - // // Step 3: Verify the application's output data. // @@ -120,7 +116,9 @@ public String apply(String value) { consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class); consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); - List actualValues = IntegrationTestUtils.readValues(DEFAULT_OUTPUT_TOPIC, consumerConfig, inputValues.size()); + List actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, + DEFAULT_OUTPUT_TOPIC, inputValues.size()); + streams.close(); assertThat(actualValues, equalTo(expectedValues)); } diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java index e126ed8cb92e..e81d21ca9ef6 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/PassThroughIntegrationTest.java @@ -94,10 +94,6 @@ public void shouldWriteTheInputDataAsIsToTheOutputTopic() throws Exception { producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); IntegrationTestUtils.produceValuesSynchronously(DEFAULT_INPUT_TOPIC, inputValues, producerConfig); - // Give the stream processing application some time to do its work. - Thread.sleep(10000); - streams.close(); - // // Step 3: Verify the application's output data. // @@ -107,7 +103,9 @@ public void shouldWriteTheInputDataAsIsToTheOutputTopic() throws Exception { consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); - List actualValues = IntegrationTestUtils.readValues(DEFAULT_OUTPUT_TOPIC, consumerConfig, inputValues.size()); + List actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, + DEFAULT_OUTPUT_TOPIC, inputValues.size()); + streams.close(); assertThat(actualValues, equalTo(inputValues)); } } \ No newline at end of file diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java index c8583d1da0ed..c86409a97060 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java @@ -134,15 +134,15 @@ public KeyValue apply(String key, String value) { // // Step 3: Verify the application's output data. // - Thread.sleep(10000); - streams.close(); Properties consumerConfig = new Properties(); consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "wordcount-integration-test-standard-consumer"); consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class); - List> actualWordCounts = IntegrationTestUtils.readKeyValues(DEFAULT_OUTPUT_TOPIC, consumerConfig); + List> actualWordCounts = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, + DEFAULT_OUTPUT_TOPIC, expectedWordCounts.size()); + streams.close(); assertThat(actualWordCounts, equalTo(expectedWordCounts)); } diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java b/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java index 89fe0c4ef943..c3f90897fcc2 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java @@ -44,7 +44,8 @@ */ public class IntegrationTestUtils { - private static final int UNLIMITED_MESSAGES = -1; + public static final int UNLIMITED_MESSAGES = -1; + public static final long DEFAULT_TIMEOUT = 30 * 1000L; /** * Returns up to `maxMessages` message-values from the topic. @@ -54,10 +55,10 @@ public class IntegrationTestUtils { * @param maxMessages Maximum number of messages to read via the consumer. * @return The values retrieved via the consumer. */ - public static List readValues(String topic, Properties consumerConfig, int maxMessages) { + public static List readValues(String topic, Properties consumerConfig, int maxMessages) { List returnList = new ArrayList<>(); - List> kvs = readKeyValues(topic, consumerConfig, maxMessages); - for (KeyValue kv : kvs) { + List> kvs = readKeyValues(topic, consumerConfig, maxMessages); + for (KeyValue kv : kvs) { returnList.add(kv.value); } return returnList; @@ -154,4 +155,75 @@ public static void produceValuesSynchronously( produceKeyValuesSynchronously(topic, keyedRecords, producerConfig); } + public static List> waitUntilMinKeyValueRecordsReceived(Properties consumerConfig, + String topic, + int expectedNumRecords) throws InterruptedException { + + return waitUntilMinKeyValueRecordsReceived(consumerConfig, topic, expectedNumRecords, DEFAULT_TIMEOUT); + } + + /** + * Wait until enough data (key-value records) has been consumed. + * @param consumerConfig Kafka Consumer configuration + * @param topic Topic to consume from + * @param expectedNumRecords Minimum number of expected records + * @param waitTime Upper bound in waiting time in milliseconds + * @return All the records consumed, or null if no records are consumed + * @throws InterruptedException + * @throws AssertionError if the given wait time elapses + */ + public static List> waitUntilMinKeyValueRecordsReceived(Properties consumerConfig, + String topic, + int expectedNumRecords, + long waitTime) throws InterruptedException { + List> accumData = new ArrayList<>(); + long startTime = System.currentTimeMillis(); + while (true) { + List> readData = readKeyValues(topic, consumerConfig); + accumData.addAll(readData); + if (accumData.size() >= expectedNumRecords) + return accumData; + if (System.currentTimeMillis() > startTime + waitTime) + throw new AssertionError("Expected " + expectedNumRecords + + " but received only " + accumData.size() + + " records before timeout " + waitTime + " ms"); + Thread.sleep(Math.min(waitTime, 100L)); + } + } + + public static List waitUntilMinValuesRecordsReceived(Properties consumerConfig, + String topic, + int expectedNumRecords) throws InterruptedException { + + return waitUntilMinValuesRecordsReceived(consumerConfig, topic, expectedNumRecords, DEFAULT_TIMEOUT); + } + + /** + * Wait until enough data (value records) has been consumed. + * @param consumerConfig Kafka Consumer configuration + * @param topic Topic to consume from + * @param expectedNumRecords Minimum number of expected records + * @param waitTime Upper bound in waiting time in milliseconds + * @return All the records consumed, or null if no records are consumed + * @throws InterruptedException + * @throws AssertionError if the given wait time elapses + */ + public static List waitUntilMinValuesRecordsReceived(Properties consumerConfig, + String topic, + int expectedNumRecords, + long waitTime) throws InterruptedException { + List accumData = new ArrayList<>(); + long startTime = System.currentTimeMillis(); + while (true) { + List readData = readValues(topic, consumerConfig, expectedNumRecords); + accumData.addAll(readData); + if (accumData.size() >= expectedNumRecords) + return accumData; + if (System.currentTimeMillis() > startTime + waitTime) + throw new AssertionError("Expected " + expectedNumRecords + + " but received only " + accumData.size() + + " records before timeout " + waitTime + " ms"); + Thread.sleep(Math.min(waitTime, 100L)); + } + } } \ No newline at end of file From 5f9446498e24afdb1149af53583c0ab23345d965 Mon Sep 17 00:00:00 2001 From: Onur Karaman Date: Thu, 26 May 2016 09:17:31 +0100 Subject: [PATCH 158/267] KAFKA-3718; propagate all KafkaConfig __consumer_offsets configs to OffsetConfig instantiation Kafka has two configurable compression codecs: the one used by the client (source codec) and the one finally used when storing into the log (target codec). The target codec defaults to KafkaConfig.compressionType and can be dynamically configured through zookeeper. The GroupCoordinator appends group membership information into the __consumer_offsets topic by: 1. making a message with group membership information 2. making a MessageSet with the single message compressed with the source codec 3. doing a log.append on the MessageSet Without this patch, KafkaConfig.offsetsTopicCompressionCodec doesn't get propagated to OffsetConfig instantiation, so GroupMetadataManager uses a source codec of NoCompressionCodec when making the MessageSet. Let's say we have enough group information such that the message formed exceeds KafkaConfig.messageMaxBytes before compression but would fall below the threshold after compression using our source codec. Even if we had dynamically configured __consumer_offsets with our favorite compression codec, the log.append will throw RecordTooLargeException during analyzeAndValidateMessageSet since the message was unexpectedly uncompressed instead of having been compressed with the source codec defined by KafkaConfig.offsetsTopicCompressionCodec. Author: Onur Karaman Reviewers: Manikumar Reddy , Jason Gustafson , Ismael Juma Closes #1394 from onurkaraman/KAFKA-3718 (cherry picked from commit 62dc1afb69369c64207991ba59bcd203505d37ea) Signed-off-by: Ismael Juma --- .../kafka/coordinator/GroupCoordinator.scala | 10 +-- .../main/scala/kafka/server/KafkaServer.scala | 14 ++--- .../kafka/api/AuthorizerIntegrationTest.scala | 2 +- .../api/GroupCoordinatorIntegrationTest.scala | 63 +++++++++++++++++++ .../kafka/api/IntegrationTestHarness.scala | 2 +- 5 files changed, 78 insertions(+), 13 deletions(-) create mode 100644 core/src/test/scala/integration/kafka/api/GroupCoordinatorIntegrationTest.scala diff --git a/core/src/main/scala/kafka/coordinator/GroupCoordinator.scala b/core/src/main/scala/kafka/coordinator/GroupCoordinator.scala index fb712541ec87..f4457647ece8 100644 --- a/core/src/main/scala/kafka/coordinator/GroupCoordinator.scala +++ b/core/src/main/scala/kafka/coordinator/GroupCoordinator.scala @@ -21,7 +21,7 @@ import java.util.concurrent.atomic.AtomicBoolean import kafka.common.{OffsetAndMetadata, OffsetMetadataAndError, TopicAndPartition} import kafka.log.LogConfig -import kafka.message.UncompressedCodec +import kafka.message.ProducerCompressionCodec import kafka.server._ import kafka.utils._ import org.apache.kafka.common.TopicPartition @@ -65,7 +65,7 @@ class GroupCoordinator(val brokerId: Int, val props = new Properties props.put(LogConfig.CleanupPolicyProp, LogConfig.Compact) props.put(LogConfig.SegmentBytesProp, offsetConfig.offsetsTopicSegmentBytes.toString) - props.put(LogConfig.CompressionTypeProp, UncompressedCodec.name) + props.put(LogConfig.CompressionTypeProp, ProducerCompressionCodec.name) props } @@ -744,14 +744,16 @@ object GroupCoordinator { offsetsRetentionMs = config.offsetsRetentionMinutes * 60 * 1000L, offsetsRetentionCheckIntervalMs = config.offsetsRetentionCheckIntervalMs, offsetsTopicNumPartitions = config.offsetsTopicPartitions, + offsetsTopicSegmentBytes = config.offsetsTopicSegmentBytes, offsetsTopicReplicationFactor = config.offsetsTopicReplicationFactor, + offsetsTopicCompressionCodec = config.offsetsTopicCompressionCodec, offsetCommitTimeoutMs = config.offsetCommitTimeoutMs, offsetCommitRequiredAcks = config.offsetCommitRequiredAcks) val groupConfig = GroupConfig(groupMinSessionTimeoutMs = config.groupMinSessionTimeoutMs, groupMaxSessionTimeoutMs = config.groupMaxSessionTimeoutMs) - val groupManager = new GroupMetadataManager(config.brokerId, offsetConfig, replicaManager, zkUtils, time) - new GroupCoordinator(config.brokerId, groupConfig, offsetConfig, groupManager, heartbeatPurgatory, joinPurgatory, time) + val groupMetadataManager = new GroupMetadataManager(config.brokerId, offsetConfig, replicaManager, zkUtils, time) + new GroupCoordinator(config.brokerId, groupConfig, offsetConfig, groupMetadataManager, heartbeatPurgatory, joinPurgatory, time) } } diff --git a/core/src/main/scala/kafka/server/KafkaServer.scala b/core/src/main/scala/kafka/server/KafkaServer.scala index 2832ebc6266e..de3054a97d86 100755 --- a/core/src/main/scala/kafka/server/KafkaServer.scala +++ b/core/src/main/scala/kafka/server/KafkaServer.scala @@ -123,7 +123,7 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime, threadNamePr var dynamicConfigHandlers: Map[String, ConfigHandler] = null var dynamicConfigManager: DynamicConfigManager = null - var consumerCoordinator: GroupCoordinator = null + var groupCoordinator: GroupCoordinator = null var kafkaController: KafkaController = null @@ -199,9 +199,9 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime, threadNamePr kafkaController = new KafkaController(config, zkUtils, brokerState, kafkaMetricsTime, metrics, threadNamePrefix) kafkaController.startup() - /* start kafka coordinator */ - consumerCoordinator = GroupCoordinator(config, zkUtils, replicaManager, kafkaMetricsTime) - consumerCoordinator.startup() + /* start group coordinator */ + groupCoordinator = GroupCoordinator(config, zkUtils, replicaManager, kafkaMetricsTime) + groupCoordinator.startup() /* Get the authorizer and initialize it if one is specified.*/ authorizer = Option(config.authorizerClassName).filter(_.nonEmpty).map { authorizerClassName => @@ -211,7 +211,7 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime, threadNamePr } /* start processing requests */ - apis = new KafkaApis(socketServer.requestChannel, replicaManager, consumerCoordinator, + apis = new KafkaApis(socketServer.requestChannel, replicaManager, groupCoordinator, kafkaController, zkUtils, config.brokerId, config, metadataCache, metrics, authorizer) requestHandlerPool = new KafkaRequestHandlerPool(config.brokerId, socketServer.requestChannel, apis, config.numIoThreads) brokerState.newState(RunningAsBroker) @@ -555,8 +555,8 @@ class KafkaServer(val config: KafkaConfig, time: Time = SystemTime, threadNamePr CoreUtils.swallow(replicaManager.shutdown()) if(logManager != null) CoreUtils.swallow(logManager.shutdown()) - if(consumerCoordinator != null) - CoreUtils.swallow(consumerCoordinator.shutdown()) + if(groupCoordinator != null) + CoreUtils.swallow(groupCoordinator.shutdown()) if(kafkaController != null) CoreUtils.swallow(kafkaController.shutdown()) if(zkUtils != null) diff --git a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala index bc705f13f10d..2d5900f8364a 100644 --- a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala +++ b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala @@ -148,7 +148,7 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness { 1, 1, servers, - servers.head.consumerCoordinator.offsetsTopicConfigs) + servers.head.groupCoordinator.offsetsTopicConfigs) // create the test topic with all the brokers as replicas TestUtils.createTopic(zkUtils, topic, 1, 1, this.servers) } diff --git a/core/src/test/scala/integration/kafka/api/GroupCoordinatorIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/GroupCoordinatorIntegrationTest.scala new file mode 100644 index 000000000000..9183d0f36e28 --- /dev/null +++ b/core/src/test/scala/integration/kafka/api/GroupCoordinatorIntegrationTest.scala @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file + * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ +package integration.kafka.api + +import kafka.common.TopicAndPartition +import kafka.integration.KafkaServerTestHarness +import kafka.log.Log +import kafka.message.GZIPCompressionCodec +import kafka.server.KafkaConfig +import kafka.utils.TestUtils +import org.apache.kafka.clients.consumer.OffsetAndMetadata +import org.apache.kafka.common.TopicPartition +import org.apache.kafka.common.internals.TopicConstants +import org.apache.kafka.common.protocol.SecurityProtocol +import org.junit.Test +import org.junit.Assert._ + +import scala.collection.JavaConverters._ +import java.util.Properties + +class GroupCoordinatorIntegrationTest extends KafkaServerTestHarness { + val offsetsTopicCompressionCodec = GZIPCompressionCodec + val overridingProps = new Properties() + overridingProps.put(KafkaConfig.OffsetsTopicPartitionsProp, "1") + overridingProps.put(KafkaConfig.OffsetsTopicCompressionCodecProp, offsetsTopicCompressionCodec.codec.toString) + + override def generateConfigs = TestUtils.createBrokerConfigs(1, zkConnect, enableControlledShutdown = false).map { + KafkaConfig.fromProps(_, overridingProps) + } + + @Test + def testGroupCoordinatorPropagatesOfffsetsTopicCompressionCodec() { + val consumer = TestUtils.createNewConsumer(TestUtils.getBrokerListStrFromServers(servers), + securityProtocol = SecurityProtocol.PLAINTEXT) + val offsetMap = Map( + new TopicPartition(TopicConstants.GROUP_METADATA_TOPIC_NAME, 0) -> new OffsetAndMetadata(10, "") + ).asJava + consumer.commitSync(offsetMap) + val logManager = servers.head.getLogManager + + def getGroupMetadataLogOpt: Option[Log] = + logManager.getLog(TopicAndPartition(TopicConstants.GROUP_METADATA_TOPIC_NAME, 0)) + + TestUtils.waitUntilTrue(() => getGroupMetadataLogOpt.exists(_.logSegments.exists(_.log.nonEmpty)), + "Commit message not appended in time") + + val logSegments = getGroupMetadataLogOpt.get.logSegments + val incorrectCompressionCodecs = logSegments.flatMap(_.log.map(_.message.compressionCodec)).filter(_ != offsetsTopicCompressionCodec) + assertEquals("Incorrect compression codecs should be empty", Seq.empty, incorrectCompressionCodecs) + + consumer.close() + } +} diff --git a/core/src/test/scala/integration/kafka/api/IntegrationTestHarness.scala b/core/src/test/scala/integration/kafka/api/IntegrationTestHarness.scala index de05c9c6ea4b..6e76f90b21a3 100644 --- a/core/src/test/scala/integration/kafka/api/IntegrationTestHarness.scala +++ b/core/src/test/scala/integration/kafka/api/IntegrationTestHarness.scala @@ -81,7 +81,7 @@ trait IntegrationTestHarness extends KafkaServerTestHarness { serverConfig.getProperty(KafkaConfig.OffsetsTopicPartitionsProp).toInt, serverConfig.getProperty(KafkaConfig.OffsetsTopicReplicationFactorProp).toInt, servers, - servers(0).consumerCoordinator.offsetsTopicConfigs) + servers(0).groupCoordinator.offsetsTopicConfigs) } @After From 5aa20fb578dc8f20230a76ae8223fa013e590622 Mon Sep 17 00:00:00 2001 From: Mickael Maison Date: Fri, 27 May 2016 21:35:08 +0100 Subject: [PATCH 159/267] MINOR: Fix tracing in KafkaApis.handle() requestObj() returns null for the o.a.k.c.requests objects so use header() for these. Once all the requests will have been replaced by o.a.k.c.requests objects, we should be able to clean that up, but in the meantime it's useful to trace both. Author: Mickael Maison Reviewers: Ismael Juma Closes #1435 from mimaison/kafkaapis_trace (cherry picked from commit 936e26aa8db991b51141b21df234d702bc05408f) Signed-off-by: Ismael Juma --- core/src/main/scala/kafka/network/RequestChannel.scala | 2 +- core/src/main/scala/kafka/server/KafkaApis.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/kafka/network/RequestChannel.scala b/core/src/main/scala/kafka/network/RequestChannel.scala index e2000dbfd04c..53a2346b42a3 100644 --- a/core/src/main/scala/kafka/network/RequestChannel.scala +++ b/core/src/main/scala/kafka/network/RequestChannel.scala @@ -100,7 +100,7 @@ object RequestChannel extends Logging { buffer = null private val requestLogger = Logger.getLogger("kafka.request.logger") - private def requestDesc(details: Boolean): String = { + def requestDesc(details: Boolean): String = { if (requestObj != null) requestObj.describe(details) else diff --git a/core/src/main/scala/kafka/server/KafkaApis.scala b/core/src/main/scala/kafka/server/KafkaApis.scala index 086bd4b893db..1edc16242db5 100644 --- a/core/src/main/scala/kafka/server/KafkaApis.scala +++ b/core/src/main/scala/kafka/server/KafkaApis.scala @@ -71,7 +71,7 @@ class KafkaApis(val requestChannel: RequestChannel, def handle(request: RequestChannel.Request) { try { trace("Handling request:%s from connection %s;securityProtocol:%s,principal:%s". - format(request.requestObj, request.connectionId, request.securityProtocol, request.session.principal)) + format(request.requestDesc(true), request.connectionId, request.securityProtocol, request.session.principal)) ApiKeys.forId(request.requestId) match { case ApiKeys.PRODUCE => handleProducerRequest(request) case ApiKeys.FETCH => handleFetchRequest(request) From 946ae60a4c8b694bfad65f20348d8081f103830b Mon Sep 17 00:00:00 2001 From: Flavio Junqueira Date: Wed, 1 Jun 2016 21:03:40 -0700 Subject: [PATCH 160/267] KAFKA-3660: Log exception message in ControllerBrokerRequestBatch Author: Flavio Junqueira Reviewers: Grant Henke , Ismael Juma , Ewen Cheslack-Postava Closes #1325 from fpj/KAFKA-3660 (cherry picked from commit ab7325928164b0cc7c26c695ecdf431ce1d89b8d) Signed-off-by: Ewen Cheslack-Postava --- .../scala/kafka/controller/ControllerChannelManager.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala index 65b7096286a6..b4059a496471 100755 --- a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala +++ b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala @@ -426,15 +426,15 @@ class ControllerBrokerRequestBatch(controller: KafkaController) extends Logging case e : Throwable => { if (leaderAndIsrRequestMap.size > 0) { error("Haven't been able to send leader and isr requests, current state of " + - s"the map is $leaderAndIsrRequestMap") + s"the map is $leaderAndIsrRequestMap. Exception message: $e") } if (updateMetadataRequestMap.size > 0) { error("Haven't been able to send metadata update requests, current state of " + - s"the map is $updateMetadataRequestMap") + s"the map is $updateMetadataRequestMap. Exception message: $e") } if (stopReplicaRequestMap.size > 0) { error("Haven't been able to send stop replica requests, current state of " + - s"the map is $stopReplicaRequestMap") + s"the map is $stopReplicaRequestMap. Exception message: $e") } throw new IllegalStateException(e) } From de23c6376bead90249e1e4344d8b7a5ed148fba3 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Fri, 3 Jun 2016 01:19:46 +0100 Subject: [PATCH 161/267] MINOR: Fix setting of ACLs and ZK shutdown in test harnesses I found both issues while investigating the issue described in PR #1425. Author: Ismael Juma Reviewers: Sriharsha Chintalapani , Jun Rao Closes #1455 from ijuma/fix-integration-test-harness-and-zk-test-harness (cherry picked from commit 1029030466f01937d416e11f93562bcaaecce253) Signed-off-by: Ismael Juma --- .../kafka/api/EndToEndAuthorizationTest.scala | 7 ++-- .../integration/KafkaServerTestHarness.scala | 34 +++++++++++-------- .../unit/kafka/zk/EmbeddedZookeeper.scala | 10 ++++++ .../unit/kafka/zk/ZooKeeperTestHarness.scala | 13 ------- 4 files changed, 33 insertions(+), 31 deletions(-) diff --git a/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala b/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala index fec96cda0bdd..e13f160a3e3f 100644 --- a/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala +++ b/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala @@ -61,12 +61,11 @@ trait EndToEndAuthorizationTest extends IntegrationTestHarness with SaslSetup { override val producerCount = 1 override val consumerCount = 2 override val serverCount = 3 - override val setClusterAcl = Some { () => + + override def setAclsBeforeServersStart() { AclCommand.main(clusterAclArgs) - servers.foreach(s => - TestUtils.waitAndVerifyAcls(ClusterActionAcl, s.apis.authorizer.get, clusterResource) - ) } + val numRecords = 1 val group = "group" val topic = "e2etopic" diff --git a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala index 8e8ae8b3266c..7059d177ad47 100755 --- a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala +++ b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala @@ -38,7 +38,6 @@ trait KafkaServerTestHarness extends ZooKeeperTestHarness { var brokerList: String = null var alive: Array[Boolean] = null val kafkaPrincipalType = KafkaPrincipal.USER_TYPE - val setClusterAcl: Option[() => Unit] = None /** * Implementations must override this method to return a set of KafkaConfigs. This method will be invoked for every @@ -46,13 +45,26 @@ trait KafkaServerTestHarness extends ZooKeeperTestHarness { */ def generateConfigs(): Seq[KafkaConfig] + /** + * Override this in case ACLs must be set before `servers` are started. + * + * This is required in some cases because of the topic creation in the setup of `IntegrationTestHarness`. If the ACLs + * are only set later, tests may fail. The failure could manifest itself as a cluster action + * authorization exception when processing an update metadata request (controller -> broker) or in more obscure + * ways (e.g. __consumer_offsets topic replication fails because the metadata cache has no brokers as a previous + * update metadata request failed due to an authorization exception). + * + * The default implementation of this method is a no-op. + */ + def setAclsBeforeServersStart() {} + def configs: Seq[KafkaConfig] = { if (instanceConfigs == null) instanceConfigs = generateConfigs() instanceConfigs } - def serverForId(id: Int) = servers.find(s => s.config.brokerId == id) + def serverForId(id: Int): Option[KafkaServer] = servers.find(s => s.config.brokerId == id) protected def securityProtocol: SecurityProtocol = SecurityProtocol.PLAINTEXT protected def trustStoreFile: Option[File] = None @@ -61,23 +73,17 @@ trait KafkaServerTestHarness extends ZooKeeperTestHarness { @Before override def setUp() { super.setUp - if (configs.size <= 0) + + if (configs.isEmpty) throw new KafkaException("Must supply at least one server config.") + + // default implementation is a no-op, it is overridden by subclasses if required + setAclsBeforeServersStart() + servers = configs.map(TestUtils.createServer(_)).toBuffer brokerList = TestUtils.getBrokerListStrFromServers(servers, securityProtocol) alive = new Array[Boolean](servers.length) Arrays.fill(alive, true) - // We need to set a cluster ACL in some cases here - // because of the topic creation in the setup of - // IntegrationTestHarness. If we don't, then tests - // fail with a cluster action authorization exception - // when processing an update metadata request - // (controller -> broker). - // - // The following method does nothing by default, but - // if the test case requires setting up a cluster ACL, - // then it needs to be implemented. - setClusterAcl.foreach(_.apply) } @After diff --git a/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala b/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala index 1030c46d93bd..22465ea23d2a 100755 --- a/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala +++ b/core/src/test/scala/unit/kafka/zk/EmbeddedZookeeper.scala @@ -39,6 +39,16 @@ class EmbeddedZookeeper() { def shutdown() { CoreUtils.swallow(zookeeper.shutdown()) CoreUtils.swallow(factory.shutdown()) + + def isDown(): Boolean = { + try { + ZkFourLetterWords.sendStat("127.0.0.1", port, 3000) + false + } catch { case _: Throwable => true } + } + + Iterator.continually(isDown()).exists(identity) + Utils.delete(logDir) Utils.delete(snapshotDir) } diff --git a/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala b/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala index 0de11cdbe12f..305e0749a309 100755 --- a/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala +++ b/core/src/test/scala/unit/kafka/zk/ZooKeeperTestHarness.scala @@ -46,19 +46,6 @@ trait ZooKeeperTestHarness extends JUnitSuite with Logging { CoreUtils.swallow(zkUtils.close()) if (zookeeper != null) CoreUtils.swallow(zookeeper.shutdown()) - - def isDown(): Boolean = { - try { - ZkFourLetterWords.sendStat("127.0.0.1", zkPort, 3000) - false - } catch { case _: Throwable => - debug("Server is down") - true - } - } - - Iterator.continually(isDown()).exists(identity) - Configuration.setConfiguration(null) } From c91f742deb8b83d59847f2afb7a786f18b34eb1d Mon Sep 17 00:00:00 2001 From: Greg Fodor Date: Fri, 3 Jun 2016 09:41:26 +0100 Subject: [PATCH 162/267] KAFKA-3785; Fetcher spending unnecessary time during metrics recording Author: Greg Fodor Reviewers: Ismael Juma Closes #1464 from gfodor/gfodor/kafka-3785 (cherry picked from commit 7699b91f870f58e71dbdeddba79b58f009f9c0a2) Signed-off-by: Ismael Juma --- .../apache/kafka/clients/consumer/internals/Fetcher.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java index 0256fe7f536b..ddfb5841e3f9 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java @@ -770,13 +770,13 @@ public FetchManagerMetrics(Metrics metrics, String metricGrpPrefix) { } public void recordTopicFetchMetrics(String topic, int bytes, int records) { - Map metricTags = new HashMap<>(); - metricTags.put("topic", topic.replace(".", "_")); - // record bytes fetched String name = "topic." + topic + ".bytes-fetched"; Sensor bytesFetched = this.metrics.getSensor(name); if (bytesFetched == null) { + Map metricTags = new HashMap<>(1); + metricTags.put("topic", topic.replace('.', '_')); + bytesFetched = this.metrics.sensor(name); bytesFetched.add(this.metrics.metricName("fetch-size-avg", this.metricGrpName, @@ -797,6 +797,9 @@ public void recordTopicFetchMetrics(String topic, int bytes, int records) { name = "topic." + topic + ".records-fetched"; Sensor recordsFetched = this.metrics.getSensor(name); if (recordsFetched == null) { + Map metricTags = new HashMap<>(1); + metricTags.put("topic", topic.replace('.', '_')); + recordsFetched = this.metrics.sensor(name); recordsFetched.add(this.metrics.metricName("records-per-request-avg", this.metricGrpName, From 419e6517c75822d94974628151d399ae93e4a9f0 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Fri, 3 Jun 2016 10:30:01 +0100 Subject: [PATCH 163/267] MINOR: Add user overridden test logging events Author: Guozhang Wang Reviewers: Ismael Juma Closes #1456 from guozhangwang/Kminor-test-logging (cherry picked from commit 76ca6f8cc1a6533866a2bd7ca6a7d32314d21dba) Signed-off-by: Ismael Juma --- README.md | 1 + build.gradle | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e9322b47e840..91c09805e749 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ The following options should be set with a `-D` switch, for example `./gradlew - * `org.gradle.project.maxParallelForks`: limits the maximum number of processes for each task. * `org.gradle.project.showStandardStreams`: shows standard out and standard error of the test JVM(s) on the console. * `org.gradle.project.skipSigning`: skips signing of artifacts. +* `org.gradle.project.testLoggingEvents`: unit test events to be logged, separated by comma. For example `./gradlew -Dorg.gradle.project.testLoggingEvents=started,passed,skipped,failed test` ### Running in Vagrant ### diff --git a/build.gradle b/build.gradle index 4864d5a0a3dd..d076c294145b 100644 --- a/build.gradle +++ b/build.gradle @@ -82,6 +82,8 @@ ext { userShowStandardStreams = project.hasProperty("showStandardStreams") ? showStandardStreams : null + userTestLoggingEvents = project.hasProperty("testLoggingEvents") ? Arrays.asList(testLoggingEvents.split(",")) : null + generatedDocsDir = new File("${project.rootDir}/docs/generated") } @@ -156,7 +158,7 @@ subprojects { test { maxParallelForks = userMaxForks ?: Runtime.runtime.availableProcessors() testLogging { - events "passed", "skipped", "failed" + events = userTestLoggingEvents ?: ["passed", "skipped", "failed"] showStandardStreams = userShowStandardStreams ?: false exceptionFormat = 'full' } From c0537b5f059e025fa268d92a58c27d98540b7c5a Mon Sep 17 00:00:00 2001 From: Tom Rybak Date: Fri, 3 Jun 2016 13:21:40 -0700 Subject: [PATCH 164/267] KAFKA-3784: TimeWindows#windowsFor calculation is incorrect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fixed the logic calculating the windows that are affected by a new …event in the case of hopping windows and a small overlap. - Added a unit test that tests for the issue Author: Tom Rybak Reviewers: Michael G. Noll, Matthias J. Sax, Guozhang Wang Closes #1462 from trybak/bugfix/KAFKA-3784-TimeWindows#windowsFor-false-positives (cherry picked from commit 234fa5a6949c9a5bfb4f543989c2ece84fcce033) Signed-off-by: Guozhang Wang --- .../org/apache/kafka/streams/kstream/TimeWindows.java | 4 +--- .../org/apache/kafka/streams/kstream/TimeWindowsTest.java | 8 ++++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/TimeWindows.java b/streams/src/main/java/org/apache/kafka/streams/kstream/TimeWindows.java index e4ce88332ef3..001e92eaf6eb 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/TimeWindows.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/TimeWindows.java @@ -99,9 +99,7 @@ public TimeWindows advanceBy(long interval) { @Override public Map windowsFor(long timestamp) { - long enclosed = (size - 1) / advance; - long windowStart = Math.max(0, timestamp - timestamp % advance - enclosed * advance); - + long windowStart = (Math.max(0, timestamp - this.size + this.advance) / this.advance) * this.advance; Map windows = new HashMap<>(); while (windowStart <= timestamp) { TimeWindow window = new TimeWindow(windowStart, windowStart + this.size); diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/TimeWindowsTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/TimeWindowsTest.java index e9ff235d48c8..62b12a9ff409 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/TimeWindowsTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/TimeWindowsTest.java @@ -112,6 +112,14 @@ public void windowsForHoppingWindows() { assertEquals(new TimeWindow(20L, 32L), matched.get(20L)); } + @Test + public void windowsForBarelyOverlappingHoppingWindows() { + TimeWindows windows = TimeWindows.of(anyName, 6L).advanceBy(5L); + Map matched = windows.windowsFor(7L); + assertEquals(1, matched.size()); + assertEquals(new TimeWindow(5L, 11L), matched.get(5L)); + } + @Test public void windowsForTumblingWindows() { TimeWindows windows = TimeWindows.of(anyName, 12L); From 717c7bbc39b56a17a413b3bd02c776d9b79e9b3e Mon Sep 17 00:00:00 2001 From: Edoardo Comar Date: Fri, 3 Jun 2016 21:31:27 +0100 Subject: [PATCH 165/267] KAFKA-3728; EndToEndAuthorizationTest offsets_topic misconfigured Set OffsetsTopicReplicationFactorProp to 3 like MinInSyncReplicasProp Else a consumer was able to consume via assign but not via subscribe, so the testProduceAndConsume is now duplicated to check both paths Author: Edoardo Comar Reviewers: Ismael Juma Closes #1425 from edoardocomar/KAFKA-3728 (cherry picked from commit 49ddc897b8feda9c4786d5bcd03814b91ede7124) Signed-off-by: Ismael Juma --- .../kafka/api/EndToEndAuthorizationTest.scala | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala b/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala index e13f160a3e3f..05d8dc54fd5f 100644 --- a/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala +++ b/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala @@ -132,7 +132,7 @@ trait EndToEndAuthorizationTest extends IntegrationTestHarness with SaslSetup { this.serverConfig.setProperty(KafkaConfig.AuthorizerClassNameProp, classOf[SimpleAclAuthorizer].getName) // Some needed configuration for brokers, producers, and consumers this.serverConfig.setProperty(KafkaConfig.OffsetsTopicPartitionsProp, "1") - this.serverConfig.setProperty(KafkaConfig.OffsetsTopicReplicationFactorProp, "1") + this.serverConfig.setProperty(KafkaConfig.OffsetsTopicReplicationFactorProp, "3") this.serverConfig.setProperty(KafkaConfig.MinInSyncReplicasProp, "3") this.consumerConfig.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "group") @@ -169,7 +169,22 @@ trait EndToEndAuthorizationTest extends IntegrationTestHarness with SaslSetup { * Tests the ability of producing and consuming with the appropriate ACLs set. */ @Test - def testProduceConsume { + def testProduceConsumeViaAssign { + setAclsAndProduce() + consumers.head.assign(List(tp).asJava) + consumeRecords(this.consumers.head, numRecords) + debug("Finished consuming") + } + + @Test + def testProduceConsumeViaSubscribe { + setAclsAndProduce() + consumers.head.subscribe(List(topic).asJava) + consumeRecords(this.consumers.head, numRecords) + debug("Finished consuming") + } + + private def setAclsAndProduce() { AclCommand.main(produceAclArgs) AclCommand.main(consumeAclArgs) servers.foreach(s => { @@ -181,9 +196,6 @@ trait EndToEndAuthorizationTest extends IntegrationTestHarness with SaslSetup { sendRecords(numRecords, tp) //Consume records debug("Finished sending and starting to consume records") - consumers.head.assign(List(tp).asJava) - consumeRecords(this.consumers.head, numRecords) - debug("Finished consuming") } /** From 625e9400108b15e86b060768b082e196127d48ee Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Sat, 4 Jun 2016 22:43:54 +0100 Subject: [PATCH 166/267] KAFKA-3789; Upgrade Snappy to fix snappy decompression errors Author: Grant Henke Reviewers: Ismael Juma Closes #1467 from granthenke/snappy-fix (cherry picked from commit 27cb6686fd678a1625fe3bb114e7ff0afb4f6448) Signed-off-by: Ismael Juma --- gradle/dependencies.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index 8b292e8f9c74..c97af87aebfa 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -42,7 +42,7 @@ versions += [ scalaParserCombinators: "1.0.4", scoverage: "1.1.1", slf4j: "1.7.21", - snappy: "1.1.2.4", + snappy: "1.1.2.6", zkclient: "0.8", zookeeper: "3.4.6", ] From 6500b53c75495296275e83e4e4beb224d7ac0c4f Mon Sep 17 00:00:00 2001 From: Tao Xiao Date: Sun, 5 Jun 2016 08:42:42 +0100 Subject: [PATCH 167/267] KAFKA-3787; Preserve the message timestamp in mirror maker The timestamp of messages consumed by mirror maker is not preserved after sending to target cluster. The correct behavior is to keep create timestamp the same in both source and target clusters. Author: Tao Xiao Reviewers: Jun Rao , Ismael Juma Closes #1466 from xiaotao183/KAFKA-3787 (cherry picked from commit f4a263b5a89e028ad3b658dca2259b6da0197214) Signed-off-by: Ismael Juma --- .../main/scala/kafka/tools/MirrorMaker.scala | 4 +- .../unit/kafka/tools/MirrorMakerTest.scala | 42 +++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 core/src/test/scala/unit/kafka/tools/MirrorMakerTest.scala diff --git a/core/src/main/scala/kafka/tools/MirrorMaker.scala b/core/src/main/scala/kafka/tools/MirrorMaker.scala index 87f3cc53bafb..7d6b5fbae527 100755 --- a/core/src/main/scala/kafka/tools/MirrorMaker.scala +++ b/core/src/main/scala/kafka/tools/MirrorMaker.scala @@ -673,9 +673,9 @@ object MirrorMaker extends Logging with KafkaMetricsGroup { def handle(record: BaseConsumerRecord): util.List[ProducerRecord[Array[Byte], Array[Byte]]] } - private object defaultMirrorMakerMessageHandler extends MirrorMakerMessageHandler { + private[tools] object defaultMirrorMakerMessageHandler extends MirrorMakerMessageHandler { override def handle(record: BaseConsumerRecord): util.List[ProducerRecord[Array[Byte], Array[Byte]]] = { - Collections.singletonList(new ProducerRecord[Array[Byte], Array[Byte]](record.topic, record.key, record.value)) + Collections.singletonList(new ProducerRecord[Array[Byte], Array[Byte]](record.topic, null, record.timestamp, record.key, record.value)) } } diff --git a/core/src/test/scala/unit/kafka/tools/MirrorMakerTest.scala b/core/src/test/scala/unit/kafka/tools/MirrorMakerTest.scala new file mode 100644 index 000000000000..39a0ac9d6d12 --- /dev/null +++ b/core/src/test/scala/unit/kafka/tools/MirrorMakerTest.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package kafka.tools + +import kafka.consumer.BaseConsumerRecord +import org.apache.kafka.common.record.TimestampType +import org.junit.Assert._ +import org.junit.Test + +class MirrorMakerTest { + + @Test + def testDefaultMirrorMakerMessageHandler() { + val now = 12345L + val consumerRecord = BaseConsumerRecord("topic", 0, 1L, now, TimestampType.CREATE_TIME, "key".getBytes, "value".getBytes) + + val result = MirrorMaker.defaultMirrorMakerMessageHandler.handle(consumerRecord) + assertEquals(1, result.size) + + val producerRecord = result.get(0) + assertEquals(now, producerRecord.timestamp) + assertEquals("topic", producerRecord.topic) + assertNull(producerRecord.partition) + assertEquals("key", new String(producerRecord.key)) + assertEquals("value", new String(producerRecord.value)) + } +} From 780bc646d543eab86faaa215a15f51fba7c1c4f8 Mon Sep 17 00:00:00 2001 From: Sebastien Launay Date: Tue, 7 Jun 2016 01:22:58 +0100 Subject: [PATCH 168/267] KAFKA-3783; Catch proper exception on path delete - ZkClient is used for conditional path deletion and wraps `KeeperException.BadVersionException` into `ZkBadVersionException` - add unit test to `SimpleAclAuthorizerTest` to reproduce the issue and catch potential future regression Author: Sebastien Launay Reviewers: Ismael Juma Closes #1461 from slaunay/bugfix/KAFKA-3783-zk-conditional-delete-path (cherry picked from commit f643d1b75d17bb27a378c7e66fcc49607454e445) Signed-off-by: Ismael Juma --- core/src/main/scala/kafka/utils/ZkUtils.scala | 4 +- .../auth/SimpleAclAuthorizerTest.scala | 20 ++++++- .../scala/unit/kafka/utils/ZkUtilsTest.scala | 55 +++++++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) create mode 100755 core/src/test/scala/unit/kafka/utils/ZkUtilsTest.scala diff --git a/core/src/main/scala/kafka/utils/ZkUtils.scala b/core/src/main/scala/kafka/utils/ZkUtils.scala index fad2c9c724e3..de4a9772918a 100644 --- a/core/src/main/scala/kafka/utils/ZkUtils.scala +++ b/core/src/main/scala/kafka/utils/ZkUtils.scala @@ -515,14 +515,14 @@ class ZkUtils(val zkClient: ZkClient, /** * Conditional delete the persistent path data, return true if it succeeds, - * otherwise (the current version is not the expected version) + * false otherwise (the current version is not the expected version) */ def conditionalDeletePath(path: String, expectedVersion: Int): Boolean = { try { zkClient.delete(path, expectedVersion) true } catch { - case e: KeeperException.BadVersionException => false + case e: ZkBadVersionException => false } } diff --git a/core/src/test/scala/unit/kafka/security/auth/SimpleAclAuthorizerTest.scala b/core/src/test/scala/unit/kafka/security/auth/SimpleAclAuthorizerTest.scala index 7fcc33dd8c1b..1f52af48f65f 100644 --- a/core/src/test/scala/unit/kafka/security/auth/SimpleAclAuthorizerTest.scala +++ b/core/src/test/scala/unit/kafka/security/auth/SimpleAclAuthorizerTest.scala @@ -336,12 +336,30 @@ class SimpleAclAuthorizerTest extends ZooKeeperTestHarness { aclId % 10 != 0 }.toSet - TestUtils.assertConcurrent("Should support many concurrent calls", concurrentFuctions, 15000) + TestUtils.assertConcurrent("Should support many concurrent calls", concurrentFuctions, 30 * 1000) TestUtils.waitAndVerifyAcls(expectedAcls, simpleAclAuthorizer, commonResource) TestUtils.waitAndVerifyAcls(expectedAcls, simpleAclAuthorizer2, commonResource) } + @Test + def testHighConcurrencyDeletionOfResourceAcls() { + val acl = new Acl(new KafkaPrincipal(KafkaPrincipal.USER_TYPE, username), Allow, WildCardHost, All) + + // Alternate authorizer to keep adding and removing zookeeper path + val concurrentFuctions = (0 to 50).map { i => + () => { + simpleAclAuthorizer.addAcls(Set(acl), resource) + simpleAclAuthorizer2.removeAcls(Set(acl), resource) + } + } + + TestUtils.assertConcurrent("Should support many concurrent calls", concurrentFuctions, 30 * 1000) + + TestUtils.waitAndVerifyAcls(Set.empty[Acl], simpleAclAuthorizer, resource) + TestUtils.waitAndVerifyAcls(Set.empty[Acl], simpleAclAuthorizer2, resource) + } + private def changeAclAndVerify(originalAcls: Set[Acl], addedAcls: Set[Acl], removedAcls: Set[Acl], resource: Resource = resource): Set[Acl] = { var acls = originalAcls diff --git a/core/src/test/scala/unit/kafka/utils/ZkUtilsTest.scala b/core/src/test/scala/unit/kafka/utils/ZkUtilsTest.scala new file mode 100755 index 000000000000..2d81ed987f6f --- /dev/null +++ b/core/src/test/scala/unit/kafka/utils/ZkUtilsTest.scala @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package kafka.utils + +import kafka.zk.ZooKeeperTestHarness +import org.junit.Assert._ +import org.junit.Test + +class ZkUtilsTest extends ZooKeeperTestHarness { + + val path = "/path" + + @Test + def testSuccessfulConditionalDeletePath() { + // Given an existing path + zkUtils.createPersistentPath(path) + val (_, statAfterCreation) = zkUtils.readData(path) + + // Deletion is successful when the version number matches + assertTrue("Deletion should be successful", zkUtils.conditionalDeletePath(path, statAfterCreation.getVersion)) + val (optionalData, _) = zkUtils.readDataMaybeNull(path) + assertTrue("Node should be deleted", optionalData.isEmpty) + + // Deletion is successful when the node does not exist too + assertTrue("Deletion should be successful", zkUtils.conditionalDeletePath(path, 0)) + } + + @Test + def testAbortedConditionalDeletePath() { + // Given an existing path that gets updated + zkUtils.createPersistentPath(path) + val (_, statAfterCreation) = zkUtils.readData(path) + zkUtils.updatePersistentPath(path, "data") + + // Deletion is aborted when the version number does not match + assertFalse("Deletion should be aborted", zkUtils.conditionalDeletePath(path, statAfterCreation.getVersion)) + val (optionalData, _) = zkUtils.readDataMaybeNull(path) + assertTrue("Node should still be there", optionalData.isDefined) + } +} From b372c4bf4a65c25086fc7d8e60f1970d5486ce74 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Mon, 6 Jun 2016 20:28:31 -0700 Subject: [PATCH 169/267] KAFKA-3781; Errors.exceptionName() can throw NPE Author: Ismael Juma Reviewers: Grant Henke , Ewen Cheslack-Postava Closes #1476 from ijuma/kafka-3781-exception-name-npe (cherry picked from commit feab5a374a33a7b7b8e96c6a88b872c4db33dcf1) Signed-off-by: Ewen Cheslack-Postava --- .../java/org/apache/kafka/common/protocol/Errors.java | 4 ++-- .../org/apache/kafka/common/protocol/ErrorsTest.java | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java b/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java index 64a709e5eeaa..bd7310ba4551 100644 --- a/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java +++ b/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java @@ -170,10 +170,10 @@ public ApiException exception() { } /** - * Returns the class name of the exception + * Returns the class name of the exception or null if this is {@code Errors.NONE}. */ public String exceptionName() { - return exception.getClass().getName(); + return exception == null ? null : exception.getClass().getName(); } /** diff --git a/clients/src/test/java/org/apache/kafka/common/protocol/ErrorsTest.java b/clients/src/test/java/org/apache/kafka/common/protocol/ErrorsTest.java index 2d96e587a607..e198e739ac96 100644 --- a/clients/src/test/java/org/apache/kafka/common/protocol/ErrorsTest.java +++ b/clients/src/test/java/org/apache/kafka/common/protocol/ErrorsTest.java @@ -77,4 +77,14 @@ public void testForExceptionDefault() { assertEquals("forException should default to unknown", Errors.UNKNOWN, error); } + @Test + public void testExceptionName() { + String exceptionName = Errors.UNKNOWN.exceptionName(); + assertEquals("org.apache.kafka.common.errors.UnknownServerException", exceptionName); + exceptionName = Errors.NONE.exceptionName(); + assertNull(exceptionName); + exceptionName = Errors.INVALID_TOPIC_EXCEPTION.exceptionName(); + assertEquals("org.apache.kafka.common.errors.InvalidTopicException", exceptionName); + } + } From 86cce341b8fd5439501a1451d7f2e90691494922 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Fri, 10 Jun 2016 13:14:05 -0700 Subject: [PATCH 170/267] KAFKA-3817: handle null keys in KTableRepartitionMap Author: Guozhang Wang Reviewers: Jeff Klukas Closes #1488 from guozhangwang/K3817-handle-null-groupedkey (cherry picked from commit da8517182d2f30c4e03b33b38d41d2fa33621e24) Signed-off-by: Guozhang Wang --- .../internals/KTableRepartitionMap.java | 31 ++++-------- .../internals/KTableAggregateTest.java | 50 +++++++++++++++++++ 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java index 2a7cf1b56f05..bba185719aaa 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java @@ -53,7 +53,6 @@ public KTableValueGetterSupplier> view() { public KTableValueGetter> get() { return new KTableMapValueGetter(parentValueGetterSupplier.get()); } - }; } @@ -66,15 +65,6 @@ public void enableSendingOldValues() { throw new IllegalStateException("KTableRepartitionMap should always require sending old values."); } - private KeyValue computeValue(K key, V value) { - KeyValue newValue = null; - - if (key != null || value != null) - newValue = mapper.apply(key, value); - - return newValue; - } - private class KTableMapProcessor extends AbstractProcessor> { /** @@ -82,16 +72,18 @@ private class KTableMapProcessor extends AbstractProcessor> { */ @Override public void process(K key, Change change) { - KeyValue newPair = computeValue(key, change.newValue); - - // the selected repartition key should never be null - if (newPair.key == null) - throw new StreamsException("Record key for KTable repartition operator should not be null."); + // the original key should never be null + if (key == null) + throw new StreamsException("Record key for the grouping KTable should not be null."); - context().forward(newPair.key, new Change<>(newPair.value, null)); + KeyValue newPair = mapper.apply(key, change.newValue); + KeyValue oldPair = mapper.apply(key, change.oldValue); - if (change.oldValue != null) { - KeyValue oldPair = computeValue(key, change.oldValue); + // if the selected repartition key or value is null, skip + if (newPair != null && newPair.key != null && newPair.value != null) { + context().forward(newPair.key, new Change<>(newPair.value, null)); + } + if (oldPair != null && oldPair.key != null && oldPair.value != null) { context().forward(oldPair.key, new Change<>(null, oldPair.value)); } } @@ -112,9 +104,8 @@ public void init(ProcessorContext context) { @Override public KeyValue get(K key) { - return computeValue(key, parentGetter.get(key)); + return mapper.apply(key, parentGetter.get(key)); } - } } diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableAggregateTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableAggregateTest.java index a6144796490f..75e007dc62d8 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableAggregateTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableAggregateTest.java @@ -20,8 +20,10 @@ import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.common.serialization.Serdes; import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.kstream.KStreamBuilder; import org.apache.kafka.streams.kstream.KTable; +import org.apache.kafka.streams.kstream.KeyValueMapper; import org.apache.kafka.test.KStreamTestDriver; import org.apache.kafka.test.MockAggregator; import org.apache.kafka.test.MockInitializer; @@ -96,4 +98,52 @@ public void testAggBasic() throws Exception { "B:0+2+4-2+7", "B:0+2+4-2+7-4", "C:0+5+8", "C:0+5+8-5"), proc2.processed); } + + @Test + public void testAggRepartition() throws Exception { + final KStreamBuilder builder = new KStreamBuilder(); + String topic1 = "topic1"; + + KTable table1 = builder.table(stringSerde, stringSerde, topic1); + KTable table2 = table1.groupBy(new KeyValueMapper>() { + @Override + public KeyValue apply(String key, String value) { + if (key.equals("null")) { + return KeyValue.pair(null, value + "s"); + } else if (key.equals("NULL")) { + return null; + } else { + return KeyValue.pair(value, value + "s"); + } + } + }, + stringSerde, + stringSerde + ) + .aggregate(MockInitializer.STRING_INIT, + MockAggregator.STRING_ADDER, + MockAggregator.STRING_REMOVER, + stringSerde, + "topic1-Canonized"); + + MockProcessorSupplier proc2 = new MockProcessorSupplier<>(); + table2.toStream().process(proc2); + + driver = new KStreamTestDriver(builder, stateDir); + + driver.process(topic1, "A", "1"); + driver.process(topic1, "B", "2"); + driver.process(topic1, "null", "3"); + driver.process(topic1, "B", "4"); + driver.process(topic1, "NULL", "5"); + driver.process(topic1, "B", "7"); + + assertEquals(Utils.mkList( + "1:0+1s", + "2:0+2s", + "4:0+4s", + "2:0+2s-2s", + "7:0+7s", + "4:0+4s-4s"), proc2.processed); + } } From 9e2f067d11be908b86e50a79c6cb2b5138168db0 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Tue, 14 Jun 2016 09:49:54 -0700 Subject: [PATCH 171/267] KAFKA-3699: Update protocol page on website to explain how KIP-35 should be used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …uld be used Author: Ashish Singh Reviewers: Oleksiy Krivoshey , Gwen Shapira , Magnus Edenhill , Dana Powers , Ewen Cheslack-Postava Closes #1395 from SinghAsDev/KAFKA-3699 (cherry picked from commit b1ba54025fc872e14e1ff97fde018637826a3a5e) Signed-off-by: Ewen Cheslack-Postava --- docs/protocol.html | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/protocol.html b/docs/protocol.html index c26f16b464cd..e28b0a8a6172 100644 --- a/docs/protocol.html +++ b/docs/protocol.html @@ -114,6 +114,32 @@
          Versioning and

          Currently all versions are baselined at 0, as we evolve these APIs we will indicate the format for each version individually.

          +
          Retrieving Supported API versions
          +

          In order for a client to successfully talk to a broker, it must use request versions supported by the broker. Clients + may work against multiple broker versions, however to do so the clients need to know what versions of various APIs a + broker supports. Starting from 0.10.0.0, brokers provide information on various versions of APIs they support. Details + of this new capability can be found here. + Clients may use the supported API versions information to take appropriate actions such as propagating an unsupported + API version error to application or choose an API request/response version supported by both the client and broker. + The following sequence maybe used by a client to obtain supported API versions from a broker.

          +
            +
          1. Client sends ApiVersionsRequest to a broker after connection has been established with the broker. If SSL is enabled, + this happens after SSL connection has been established.
          2. +
          3. On receiving ApiVersionsRequest, a broker returns its full list of supported ApiKeys and + versions regardless of current authentication state (e.g., before SASL authentication on an SASL listener, do note that no + Kafka protocol requests may take place on a SSL listener before the SSL handshake is finished). If this is considered to + leak information about the broker version a workaround is to use SSL with client authentication which is performed at an + earlier stage of the connection where the ApiVersionRequest is not available. Also, note that broker versions older + than 0.10.0.0 do not support this API and will either ignore the request or close connection in response to the request.
          4. +
          5. If multiple versions of an API are supported by broker and client, clients are recommended to use the latest version supported + by the broker and itself.
          6. +
          7. Deprecation of a protocol version is done by marking an API version as deprecated in protocol documentation.
          8. +
          9. Supported API versions obtained from a broker, is valid only for current connection on which that information is obtained. + In the event of disconnection, the client should obtain the information from broker again, as the broker might have + upgraded/downgraded in the mean time.
          10. +
          + +
          SASL Authentication Sequence

          The following sequence is used for SASL authentication:

            From 78ac7b6b09b6c2c9062bcb98d7683c1f9a427b65 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Wed, 15 Jun 2016 09:32:40 -0700 Subject: [PATCH 172/267] KAFKA-3830; getTGT() debug logging exposes confidential information Only log the client and server principals, which is what ZooKeeper does after ZOOKEEPER-2405. Author: Ismael Juma Reviewers: Grant Henke , Sriharsha Chintalapani Closes #1498 from ijuma/kafka-3830-get-tgt-debug-confidential (cherry picked from commit 84ca887295efbd99a6a7d7363f77d59b7a42b642) Signed-off-by: Sriharsha Chintalapani --- .../apache/kafka/common/security/kerberos/KerberosLogin.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clients/src/main/java/org/apache/kafka/common/security/kerberos/KerberosLogin.java b/clients/src/main/java/org/apache/kafka/common/security/kerberos/KerberosLogin.java index 58becdf67fd7..74b4ff288856 100644 --- a/clients/src/main/java/org/apache/kafka/common/security/kerberos/KerberosLogin.java +++ b/clients/src/main/java/org/apache/kafka/common/security/kerberos/KerberosLogin.java @@ -334,7 +334,8 @@ private synchronized KerberosTicket getTGT() { for (KerberosTicket ticket : tickets) { KerberosPrincipal server = ticket.getServer(); if (server.getName().equals("krbtgt/" + server.getRealm() + "@" + server.getRealm())) { - log.debug("Found TGT {}.", ticket); + log.debug("Found TGT with client principal '{}' and server principal '{}'.", ticket.getClient().getName(), + ticket.getServer().getName()); return ticket; } } From 9ba2fdf8b70da85668966026d41b6fd4b8679108 Mon Sep 17 00:00:00 2001 From: Rajini Sivaram Date: Thu, 16 Jun 2016 09:10:59 +0200 Subject: [PATCH 173/267] MINOR: Fix quota violation exception message Author: Rajini Sivaram Reviewers: Ismael Juma Closes #1508 from rajinisivaram/MINOR-quota-exception (cherry picked from commit eb2619cac0849dd54db956e87781d143a57b2477) Signed-off-by: Ismael Juma --- .../main/java/org/apache/kafka/common/metrics/Sensor.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java b/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java index 098bfa85e5d1..3e500d54e9ea 100644 --- a/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java +++ b/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java @@ -121,10 +121,10 @@ private void checkQuotas(long timeMs) { double value = metric.value(timeMs); if (!quota.acceptable(value)) { throw new QuotaViolationException(String.format( - "(%s) violated quota. Actual: (%f), Threshold: (%f)", + "'%s' violated quota. Actual: %f, Threshold: %f", metric.metricName(), - quota.bound(), - value)); + value, + quota.bound())); } } } From 8d38c115ab6e61496eee84f790220f1643a1a804 Mon Sep 17 00:00:00 2001 From: Eno Thereska Date: Thu, 16 Jun 2016 16:18:02 -0700 Subject: [PATCH 174/267] KAFKA-3805: Check if DB is null. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Check if DB is null before flushing or closing. In some cases, a state store is closed twice. This happens in `StreamTask.close()` where both `node.close()` and `super.close` (in `ProcessorManager`) are called in a sequence. If the user's processor defines a `close` that closes the underlying state store, then the second close will be redundant. Author: Eno Thereska Reviewers: Andrés Gómez, Ismael Juma, Guozhang Wang Closes #1485 from enothereska/KAFKA-3805-locks (cherry picked from commit 751fe9309011b99f60c1cb03c23a47d0444dce05) Signed-off-by: Guozhang Wang --- .../org/apache/kafka/streams/processor/StateStore.java | 4 +++- .../processor/internals/ProcessorStateManager.java | 4 ++++ .../kafka/streams/state/internals/RocksDBStore.java | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/StateStore.java b/streams/src/main/java/org/apache/kafka/streams/processor/StateStore.java index f79e6f68f5ba..68f3644a97bd 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/StateStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/StateStore.java @@ -46,7 +46,9 @@ public interface StateStore { void flush(); /** - * Close the storage engine + * Close the storage engine. + * Note that this function needs to be idempotent since it may be called + * several times on the same state store. */ void close(); diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java index 1d97384a9bf5..92b1069936e8 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java @@ -134,6 +134,8 @@ private static FileLock lockStateDirectory(File stateDir, int retry) throws IOEx retry--; lock = lockStateDirectory(channel); } + // TODO: closing the channel here risks releasing all locks on the file + // see {@link https://issues.apache.org/jira/browse/KAFKA-3812} if (lock == null) { channel.close(); } @@ -336,6 +338,8 @@ public void flush() { */ public void close(Map ackedOffsets) throws IOException { try { + // attempting to flush and close the stores, just in case they + // are not closed by a ProcessorNode yet if (!stores.isEmpty()) { log.debug("Closing stores."); for (Map.Entry entry : stores.entrySet()) { diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java index 37609a0d28b3..a6dc8818c01f 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java @@ -404,6 +404,10 @@ private void flushCache() { @Override public void flush() { + if (db == null) { + return; + } + // flush of the cache entries if necessary flushCache(); @@ -424,6 +428,11 @@ public void flushInternal() { @Override public void close() { + + if (db == null) { + return; + } + flush(); db.close(); } From c052002e75bfa6c835ca3d0654e806c998e0fce3 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Mon, 16 May 2016 14:44:23 -0700 Subject: [PATCH 175/267] MINOR: Add INFO logging if ZK config is not specified Author: Guozhang Wang Reviewers: Ismael Juma Closes #1392 from guozhangwang/Kminor-warn-no-zk-config --- .../streams/processor/internals/StreamPartitionAssignor.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java index f2eea36c11c1..085ff94aa0dd 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java @@ -124,6 +124,8 @@ public void configure(Map configs) { internalTopicManager = new InternalTopicManager( (String) configs.get(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG), configs.containsKey(StreamsConfig.REPLICATION_FACTOR_CONFIG) ? (Integer) configs.get(StreamsConfig.REPLICATION_FACTOR_CONFIG) : 1); + } else { + log.info("Config '{}' isn't supplied and hence no internal topics will be created.", StreamsConfig.ZOOKEEPER_CONNECT_CONFIG); } } From 73949c288d43ed848550c1b5ba6cbbf297b110b8 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Tue, 17 May 2016 11:25:49 -0700 Subject: [PATCH 176/267] KAFKA-3716; Validate all timestamps are not negative Author: Guozhang Wang Reviewers: Eno Thereska, Ismael Juma Closes #1393 from guozhangwang/K3716-check-non-negative-timestamps --- .../java/org/apache/kafka/streams/kstream/Windows.java | 6 +----- .../kafka/streams/processor/internals/RecordQueue.java | 5 +++++ .../kafka/streams/processor/internals/StreamTask.java | 7 ++++++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java index 06cacb4fa128..c64a80f2a7e5 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java @@ -17,9 +17,7 @@ package org.apache.kafka.streams.kstream; - import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; /** * The window specification interface that can be extended for windowing operation in joins and aggregations. @@ -32,8 +30,6 @@ public abstract class Windows { private static final long DEFAULT_MAINTAIN_DURATION = 24 * 60 * 60 * 1000L; // one day - private static final AtomicInteger NAME_INDEX = new AtomicInteger(0); - protected String name; private long maintainDurationMs; @@ -86,7 +82,7 @@ public long maintainMs() { } /** - * Creates all windows that contain the provided timestamp. + * Creates all windows that contain the provided timestamp, indexed by non-negative window start timestamps. * * @param timestamp the timestamp window should get created for * @return a map of {@code windowStartTimestamp -> Window} entries diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordQueue.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordQueue.java index 6911a4546efe..7e5baf30679b 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordQueue.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordQueue.java @@ -20,6 +20,7 @@ import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.record.TimestampType; +import org.apache.kafka.streams.errors.StreamsException; import org.apache.kafka.streams.processor.TimestampExtractor; import java.util.ArrayDeque; @@ -84,6 +85,10 @@ public int addRawRecords(Iterable> rawRecords, Ti rawRecord.serializedValueSize(), key, value); long timestamp = timestampExtractor.extract(record); + // validate that timestamp must be non-negative + if (timestamp < 0) + throw new StreamsException("Extracted timestamp value is negative, which is not allowed."); + StampedRecord stampedRecord = new StampedRecord(record, timestamp); fifoQueue.addLast(stampedRecord); diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java index d9efb6debb4b..e7e24fb6593f 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java @@ -209,7 +209,12 @@ public boolean requiresPoll() { public boolean maybePunctuate() { long timestamp = partitionGroup.timestamp(); - return punctuationQueue.mayPunctuate(timestamp, this); + // if the timestamp is not known yet, meaning there is not enough data accumulated + // to reason stream partition time, then skip. + if (timestamp == TimestampTracker.NOT_KNOWN) + return false; + else + return punctuationQueue.mayPunctuate(timestamp, this); } /** From bef359ef2e53920d91dfac037c0fceefb51954d1 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Fri, 20 May 2016 11:52:36 -0700 Subject: [PATCH 177/267] KAFKA-3735: Dispose all RocksObejcts upon completeness Author: Guozhang Wang Reviewers: Roger Hoover, Eno Thereska, Ismael Juma Closes #1411 from guozhangwang/K3735-dispose-rocksobject --- .../wordcount/WordCountProcessorDemo.java | 16 +++--- .../kstream/internals/KStreamKStreamJoin.java | 19 +++---- .../internals/KStreamWindowAggregate.java | 49 ++++++++-------- .../internals/KStreamWindowReduce.java | 56 +++++++++---------- .../kafka/streams/state/KeyValueIterator.java | 3 + .../streams/state/WindowStoreIterator.java | 8 ++- .../streams/state/internals/RocksDBStore.java | 31 ++++++---- .../internals/ProcessorTopologyTest.java | 8 ++- .../state/KeyValueStoreTestDriver.java | 8 ++- .../internals/RocksDBWindowStoreTest.java | 7 ++- 10 files changed, 110 insertions(+), 95 deletions(-) diff --git a/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountProcessorDemo.java b/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountProcessorDemo.java index 34c35b7c2fc2..1ee6928e98f2 100644 --- a/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountProcessorDemo.java +++ b/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountProcessorDemo.java @@ -81,19 +81,17 @@ public void process(String dummy, String line) { @Override public void punctuate(long timestamp) { - KeyValueIterator iter = this.kvStore.all(); + try (KeyValueIterator iter = this.kvStore.all()) { + System.out.println("----------- " + timestamp + " ----------- "); - System.out.println("----------- " + timestamp + " ----------- "); + while (iter.hasNext()) { + KeyValue entry = iter.next(); - while (iter.hasNext()) { - KeyValue entry = iter.next(); + System.out.println("[" + entry.key + ", " + entry.value + "]"); - System.out.println("[" + entry.key + ", " + entry.value + "]"); - - context.forward(entry.key, entry.value.toString()); + context.forward(entry.key, entry.value.toString()); + } } - - iter.close(); } @Override diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoin.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoin.java index d13d11208d91..72029a8c2409 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoin.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoin.java @@ -17,7 +17,6 @@ package org.apache.kafka.streams.kstream.internals; -import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.errors.StreamsException; import org.apache.kafka.streams.kstream.ValueJoiner; import org.apache.kafka.streams.processor.AbstractProcessor; @@ -25,8 +24,8 @@ import org.apache.kafka.streams.processor.ProcessorContext; import org.apache.kafka.streams.processor.ProcessorSupplier; import org.apache.kafka.streams.state.WindowStore; +import org.apache.kafka.streams.state.WindowStoreIterator; -import java.util.Iterator; class KStreamKStreamJoin implements ProcessorSupplier { @@ -76,15 +75,15 @@ public void process(K key, V1 value) { long timeFrom = Math.max(0L, context().timestamp() - joinBeforeMs); long timeTo = Math.max(0L, context().timestamp() + joinAfterMs); - Iterator> iter = otherWindow.fetch(key, timeFrom, timeTo); - while (iter.hasNext()) { - needOuterJoin = false; - context().forward(key, joiner.apply(value, iter.next().value)); - } + try (WindowStoreIterator iter = otherWindow.fetch(key, timeFrom, timeTo)) { + while (iter.hasNext()) { + needOuterJoin = false; + context().forward(key, joiner.apply(value, iter.next().value)); + } - if (needOuterJoin) - context().forward(key, joiner.apply(value, null)); + if (needOuterJoin) + context().forward(key, joiner.apply(value, null)); + } } } - } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregate.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregate.java index b4272f89a827..125c7fcc25d0 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregate.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregate.java @@ -29,7 +29,6 @@ import org.apache.kafka.streams.state.WindowStore; import org.apache.kafka.streams.state.WindowStoreIterator; -import java.util.Iterator; import java.util.Map; public class KStreamWindowAggregate implements KStreamAggProcessorSupplier, V, T> { @@ -90,38 +89,37 @@ public void process(K key, V value) { timeTo = windowStartMs > timeTo ? windowStartMs : timeTo; } - WindowStoreIterator iter = windowStore.fetch(key, timeFrom, timeTo); + try (WindowStoreIterator iter = windowStore.fetch(key, timeFrom, timeTo)) { - // for each matching window, try to update the corresponding key and send to the downstream - while (iter.hasNext()) { - KeyValue entry = iter.next(); - W window = matchedWindows.get(entry.key); + // for each matching window, try to update the corresponding key and send to the downstream + while (iter.hasNext()) { + KeyValue entry = iter.next(); + W window = matchedWindows.get(entry.key); - if (window != null) { + if (window != null) { - T oldAgg = entry.value; + T oldAgg = entry.value; - if (oldAgg == null) - oldAgg = initializer.apply(); + if (oldAgg == null) + oldAgg = initializer.apply(); - // try to add the new new value (there will never be old value) - T newAgg = aggregator.apply(key, value, oldAgg); + // try to add the new new value (there will never be old value) + T newAgg = aggregator.apply(key, value, oldAgg); - // update the store with the new value - windowStore.put(key, newAgg, window.start()); + // update the store with the new value + windowStore.put(key, newAgg, window.start()); - // forward the aggregated change pair - if (sendOldValues) - context().forward(new Windowed<>(key, window), new Change<>(newAgg, oldAgg)); - else - context().forward(new Windowed<>(key, window), new Change<>(newAgg, null)); + // forward the aggregated change pair + if (sendOldValues) + context().forward(new Windowed<>(key, window), new Change<>(newAgg, oldAgg)); + else + context().forward(new Windowed<>(key, window), new Change<>(newAgg, null)); - matchedWindows.remove(entry.key); + matchedWindows.remove(entry.key); + } } } - iter.close(); - // create the new window for the rest of unmatched window that do not exist yet for (long windowStartMs : matchedWindows.keySet()) { T oldAgg = initializer.apply(); @@ -167,10 +165,9 @@ public T get(Windowed windowedKey) { W window = (W) windowedKey.window(); // this iterator should contain at most one element - Iterator> iter = windowStore.fetch(key, window.start(), window.start()); - - return iter.hasNext() ? iter.next().value : null; + try (WindowStoreIterator iter = windowStore.fetch(key, window.start(), window.start())) { + return iter.hasNext() ? iter.next().value : null; + } } - } } diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowReduce.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowReduce.java index 3ed1499f658a..a526506c1793 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowReduce.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowReduce.java @@ -28,7 +28,6 @@ import org.apache.kafka.streams.state.WindowStore; import org.apache.kafka.streams.state.WindowStoreIterator; -import java.util.Iterator; import java.util.Map; public class KStreamWindowReduce implements KStreamAggProcessorSupplier, V, V> { @@ -88,40 +87,38 @@ public void process(K key, V value) { timeTo = windowStartMs > timeTo ? windowStartMs : timeTo; } - WindowStoreIterator iter = windowStore.fetch(key, timeFrom, timeTo); + try (WindowStoreIterator iter = windowStore.fetch(key, timeFrom, timeTo)) { + // for each matching window, try to update the corresponding key and send to the downstream + while (iter.hasNext()) { + KeyValue entry = iter.next(); + W window = matchedWindows.get(entry.key); - // for each matching window, try to update the corresponding key and send to the downstream - while (iter.hasNext()) { - KeyValue entry = iter.next(); - W window = matchedWindows.get(entry.key); + if (window != null) { - if (window != null) { + V oldAgg = entry.value; + V newAgg = oldAgg; - V oldAgg = entry.value; - V newAgg = oldAgg; + // try to add the new new value (there will never be old value) + if (newAgg == null) { + newAgg = value; + } else { + newAgg = reducer.apply(newAgg, value); + } - // try to add the new new value (there will never be old value) - if (newAgg == null) { - newAgg = value; - } else { - newAgg = reducer.apply(newAgg, value); - } - - // update the store with the new value - windowStore.put(key, newAgg, window.start()); + // update the store with the new value + windowStore.put(key, newAgg, window.start()); - // forward the aggregated change pair - if (sendOldValues) - context().forward(new Windowed<>(key, window), new Change<>(newAgg, oldAgg)); - else - context().forward(new Windowed<>(key, window), new Change<>(newAgg, null)); + // forward the aggregated change pair + if (sendOldValues) + context().forward(new Windowed<>(key, window), new Change<>(newAgg, oldAgg)); + else + context().forward(new Windowed<>(key, window), new Change<>(newAgg, null)); - matchedWindows.remove(entry.key); + matchedWindows.remove(entry.key); + } } } - iter.close(); - // create the new window for the rest of unmatched window that do not exist yet for (long windowStartMs : matchedWindows.keySet()) { windowStore.put(key, value, windowStartMs); @@ -161,10 +158,9 @@ public V get(Windowed windowedKey) { W window = (W) windowedKey.window(); // this iterator should only contain one element - Iterator> iter = windowStore.fetch(key, window.start(), window.start()); - - return iter.next().value; + try (WindowStoreIterator iter = windowStore.fetch(key, window.start(), window.start())) { + return iter.next().value; + } } - } } diff --git a/streams/src/main/java/org/apache/kafka/streams/state/KeyValueIterator.java b/streams/src/main/java/org/apache/kafka/streams/state/KeyValueIterator.java index cdb3de5f90a2..ddbc7b333b6b 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/KeyValueIterator.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/KeyValueIterator.java @@ -27,6 +27,9 @@ /** * Iterator interface of {@link KeyValue}. * + * Users need to call its {@code close} method explicitly upon completeness to release resources, + * or use try-with-resources statement (available since JDK7) for this {@link Closeable} class. + * * @param Type of keys * @param Type of values */ diff --git a/streams/src/main/java/org/apache/kafka/streams/state/WindowStoreIterator.java b/streams/src/main/java/org/apache/kafka/streams/state/WindowStoreIterator.java index 7c474dd60bf8..b6e6d0c2df38 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/WindowStoreIterator.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/WindowStoreIterator.java @@ -21,13 +21,19 @@ import org.apache.kafka.streams.KeyValue; +import java.io.Closeable; import java.util.Iterator; /** * Iterator interface of {@link KeyValue} with key typed {@link Long} used for {@link WindowStore#fetch(Object, long, long)}. * + * Users need to call its {@code close} method explicitly upon completeness to release resources, + * or use try-with-resources statement (available since JDK7) for this {@link Closeable} class. + * * @param Type of values */ -public interface WindowStoreIterator extends Iterator> { +public interface WindowStoreIterator extends Iterator>, Closeable { + + @Override void close(); } diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java index 37609a0d28b3..a00de19926fc 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java @@ -77,17 +77,18 @@ public class RocksDBStore implements KeyValueStore { private final String name; private final String parentDir; - private final Options options; - private final WriteOptions wOptions; - private final FlushOptions fOptions; - + protected File dbDir; + private StateSerdes serdes; private final Serde keySerde; private final Serde valueSerde; - private StateSerdes serdes; - protected File dbDir; private RocksDB db; + // the following option objects will be created at constructor and disposed at close() + private Options options; + private WriteOptions wOptions; + private FlushOptions fOptions; + private boolean loggingEnabled = false; private int cacheSize = DEFAULT_UNENCODED_CACHE_SIZE; @@ -313,14 +314,16 @@ public void putAll(List> entries) { private void putAllInternal(List> entries) { WriteBatch batch = new WriteBatch(); - for (KeyValue entry : entries) { - batch.put(entry.key, entry.value); - } - try { + for (KeyValue entry : entries) { + batch.put(entry.key, entry.value); + } + db.write(wOptions, batch); } catch (RocksDBException e) { throw new ProcessorStateException("Error while batch writing to store " + this.name, e); + } finally { + batch.dispose(); } } @@ -425,7 +428,15 @@ public void flushInternal() { @Override public void close() { flush(); + options.dispose(); + wOptions.dispose(); + fOptions.dispose(); db.close(); + + options = null; + wOptions = null; + fOptions = null; + db = null; } private static class RocksDbIterator implements KeyValueIterator { diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorTopologyTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorTopologyTest.java index 1095fcf513ff..62b283aefd94 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorTopologyTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorTopologyTest.java @@ -351,9 +351,11 @@ public void process(String key, String value) { @Override public void punctuate(long streamTime) { int count = 0; - for (KeyValueIterator iter = store.all(); iter.hasNext();) { - iter.next(); - ++count; + try (KeyValueIterator iter = store.all()) { + while (iter.hasNext()) { + iter.next(); + ++count; + } } context().forward(Long.toString(streamTime), count); } diff --git a/streams/src/test/java/org/apache/kafka/streams/state/KeyValueStoreTestDriver.java b/streams/src/test/java/org/apache/kafka/streams/state/KeyValueStoreTestDriver.java index 3a35d7542fce..be5596d05362 100644 --- a/streams/src/test/java/org/apache/kafka/streams/state/KeyValueStoreTestDriver.java +++ b/streams/src/test/java/org/apache/kafka/streams/state/KeyValueStoreTestDriver.java @@ -362,9 +362,11 @@ public int checkForRestoredEntries(KeyValueStore store) { */ public int sizeOf(KeyValueStore store) { int size = 0; - for (KeyValueIterator iterator = store.all(); iterator.hasNext();) { - iterator.next(); - ++size; + try (KeyValueIterator iterator = store.all()) { + while (iterator.hasNext()) { + iterator.next(); + ++size; + } } return size; } diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreTest.java index e9888ada6be6..d889e7b32342 100644 --- a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreTest.java @@ -785,9 +785,10 @@ public void send(ProducerRecord record, Serializer keySeria segmentDirs(baseDir) ); - WindowStoreIterator iter = store.fetch(0, 0L, 1000000L); - while (iter.hasNext()) { - iter.next(); + try (WindowStoreIterator iter = store.fetch(0, 0L, 1000000L)) { + while (iter.hasNext()) { + iter.next(); + } } assertEquals( From 175fbb559073f21d9ae2b65be22fddbae4e152ea Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Sat, 18 Jun 2016 11:25:33 -0700 Subject: [PATCH 178/267] HOTFIX: Check hasNext in KStreamWindowReduce Author: Guozhang Wang Reviewers: Damian Guy, Matthias J. Sax Closes #1520 from guozhangwang/KHotfix-iter-hasNext-window-value-getter --- .../streams/kstream/internals/KStreamWindowReduce.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowReduce.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowReduce.java index a526506c1793..510c138990f0 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowReduce.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowReduce.java @@ -135,13 +135,13 @@ public KTableValueGetterSupplier, V> view() { return new KTableValueGetterSupplier, V>() { public KTableValueGetter, V> get() { - return new KStreamAggregateValueGetter(); + return new KStreamWindowReduceValueGetter(); } }; } - private class KStreamAggregateValueGetter implements KTableValueGetter, V> { + private class KStreamWindowReduceValueGetter implements KTableValueGetter, V> { private WindowStore windowStore; @@ -159,7 +159,7 @@ public V get(Windowed windowedKey) { // this iterator should only contain one element try (WindowStoreIterator iter = windowStore.fetch(key, window.start(), window.start())) { - return iter.next().value; + return iter.hasNext() ? iter.next().value : null; } } } From 84c995d8d55cb920c0fb139107fb08c766d9476e Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Sat, 18 Jun 2016 12:02:17 -0700 Subject: [PATCH 179/267] MINOR: Check null keys in KTableSource Author: Guozhang Wang Reviewers: Damian Guy, Matthias J. Sax Closes #1521 from guozhangwang/Kminor-check-nullkey-ktable-source (cherry picked from commit 91135ea33a7231e9b17cfc74c78dc5223475834b) Signed-off-by: Guozhang Wang --- .../apache/kafka/streams/kstream/KStreamBuilder.java | 2 ++ .../kafka/streams/kstream/internals/KTableSource.java | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KStreamBuilder.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KStreamBuilder.java index 9d90ba053cce..9f7f9c4bd0f5 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KStreamBuilder.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KStreamBuilder.java @@ -77,6 +77,7 @@ public KStream stream(Serde keySerde, Serde valSerde, String. /** * Create a {@link KTable} instance for the specified topic. + * Record keys of the topic should never by null, otherwise an exception will be thrown at runtime. * The default deserializers specified in the config are used. * * @param topic the topic name; cannot be null @@ -88,6 +89,7 @@ public KTable table(String topic) { /** * Create a {@link KTable} instance for the specified topic. + * Record keys of the topic should never by null, otherwise an exception will be thrown at runtime. * * @param keySerde key serde used to send key-value pairs, * if not specified the default key serde defined in the configuration will be used diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSource.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSource.java index 8010b3a3a348..5aafc026398a 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSource.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSource.java @@ -17,6 +17,7 @@ package org.apache.kafka.streams.kstream.internals; +import org.apache.kafka.streams.errors.StreamsException; import org.apache.kafka.streams.processor.AbstractProcessor; import org.apache.kafka.streams.processor.Processor; import org.apache.kafka.streams.processor.ProcessorContext; @@ -54,6 +55,10 @@ public void enableSendingOldValues() { private class KTableSourceProcessor extends AbstractProcessor { @Override public void process(K key, V value) { + // the keys should never be null + if (key == null) + throw new StreamsException("Record key for the source KTable from topic " + topic + " should not be null."); + context().forward(key, new Change<>(value, null)); } } @@ -71,11 +76,14 @@ public void init(ProcessorContext context) { @Override public void process(K key, V value) { + // the keys should never be null + if (key == null) + throw new StreamsException("Record key for the source KTable from topic " + topic + " should not be null."); + V oldValue = sendOldValues ? store.get(key) : null; store.put(key, value); context().forward(key, new Change<>(value, oldValue)); } } - } From c179dee9d10b87f479bf1d90c57fff62bd5e585b Mon Sep 17 00:00:00 2001 From: Vahid Hashemian Date: Sun, 19 Jun 2016 10:42:30 +0200 Subject: [PATCH 180/267] MINOR: Fix javadoc typos in ConsumerRebalanceListener Author: Vahid Hashemian Reviewers: Ismael Juma Closes #1500 from vahidhashemian/typo07/fix_javadoc_typos_consumerrebalancelistener (cherry picked from commit 7fd4fe4487d328da0aab80ac658ba783ff1ebf9b) Signed-off-by: Ismael Juma --- .../consumer/ConsumerRebalanceListener.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRebalanceListener.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRebalanceListener.java index 8af405cedd3b..938d22bbbece 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRebalanceListener.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRebalanceListener.java @@ -3,9 +3,9 @@ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the * License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. @@ -20,23 +20,23 @@ * A callback interface that the user can implement to trigger custom actions when the set of partitions assigned to the * consumer changes. *

            - * This is applicable when the consumer is having Kafka auto-manage group membership. If the consumer's directly assign partitions, + * This is applicable when the consumer is having Kafka auto-manage group membership. If the consumer directly assigns partitions, * those partitions will never be reassigned and this callback is not applicable. *

            - * When Kafka is managing the group membership, a partition re-assignment will be triggered any time the members of the group changes or the subscription + * When Kafka is managing the group membership, a partition re-assignment will be triggered any time the members of the group change or the subscription * of the members changes. This can occur when processes die, new process instances are added or old instances come back to life after failure. - * Rebalances can also be triggered by changes affecting the subscribed topics (e.g. when then number of partitions is + * Rebalances can also be triggered by changes affecting the subscribed topics (e.g. when the number of partitions is * administratively adjusted). *

            * There are many uses for this functionality. One common use is saving offsets in a custom store. By saving offsets in - * the {@link #onPartitionsRevoked(Collection)}, call we can ensure that any time partition assignment changes + * the {@link #onPartitionsRevoked(Collection)} call we can ensure that any time partition assignment changes * the offset gets saved. *

            * Another use is flushing out any kind of cache of intermediate results the consumer may be keeping. For example, * consider a case where the consumer is subscribed to a topic containing user page views, and the goal is to count the - * number of page views per users for each five minute window. Let's say the topic is partitioned by the user id so that - * all events for a particular user will go to a single consumer instance. The consumer can keep in memory a running - * tally of actions per user and only flush these out to a remote data store when its cache gets to big. However if a + * number of page views per user for each five minute window. Let's say the topic is partitioned by the user id so that + * all events for a particular user go to a single consumer instance. The consumer can keep in memory a running + * tally of actions per user and only flush these out to a remote data store when its cache gets too big. However if a * partition is reassigned it may want to automatically trigger a flush of this cache, before the new owner takes over * consumption. *

            From 968e44cac5919b28ebf5fad14a7946e4db943dc0 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Sun, 19 Jun 2016 15:31:04 -0700 Subject: [PATCH 181/267] KAFKA-3850: WorkerSinkTask commit prior to rebalance should be retried on wakeup Author: Jason Gustafson Reviewers: Liquan Pei , Ewen Cheslack-Postava Closes #1511 from hachikuji/retry-commit-on-wakeup-in-sinks (cherry picked from commit 2c9796114d0a9638be79b4165d0096c7a63babe7) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/connect/runtime/WorkerSinkTask.java | 27 +++++++--- .../connect/runtime/WorkerSinkTaskTest.java | 54 +++++++++++++++++++ 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTask.java index f5eaac4aa328..1aef3bb04a76 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTask.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTask.java @@ -228,6 +228,9 @@ protected void poll(long timeoutMs) { } catch (WakeupException we) { log.trace("{} consumer woken up", id); + if (isStopping()) + return; + if (shouldPause()) { pauseAll(); } else if (!pausedForRedelivery) { @@ -236,6 +239,20 @@ protected void poll(long timeoutMs) { } } + private void doCommitSync(Map offsets, int seqno) { + try { + consumer.commitSync(offsets); + lastCommittedOffsets = offsets; + onCommitCompleted(null, seqno); + } catch (WakeupException e) { + // retry the commit to ensure offsets get pushed, then propagate the wakeup up to poll + doCommitSync(offsets, seqno); + throw e; + } catch (KafkaException e) { + onCommitCompleted(e, seqno); + } + } + /** * Starts an offset commit by flushing outstanding messages from the task and then starting * the write commit. @@ -243,13 +260,7 @@ protected void poll(long timeoutMs) { private void doCommit(Map offsets, boolean closing, final int seqno) { log.info("{} Committing offsets", this); if (closing) { - try { - consumer.commitSync(offsets); - lastCommittedOffsets = offsets; - onCommitCompleted(null, seqno); - } catch (KafkaException e) { - onCommitCompleted(e, seqno); - } + doCommitSync(offsets, seqno); } else { OffsetCommitCallback cb = new OffsetCommitCallback() { @Override @@ -448,7 +459,7 @@ else if (!context.pausedPartitions().isEmpty()) // Instead of invoking the assignment callback on initialization, we guarantee the consumer is ready upon // task start. Since this callback gets invoked during that initial setup before we've started the task, we // need to guard against invoking the user's callback method during that period. - if (rebalanceException == null) { + if (rebalanceException == null || rebalanceException instanceof WakeupException) { try { openPartitions(partitions); } catch (RuntimeException e) { diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskTest.java index 835e30f2e599..6a1407489403 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskTest.java @@ -293,6 +293,60 @@ public void testErrorInRebalancePartitionAssignment() throws Exception { PowerMock.verifyAll(); } + @Test + public void testWakeupInCommitSyncCausesRetry() throws Exception { + expectInitializeTask(); + expectPollInitialAssignment(); + + final List partitions = asList(TOPIC_PARTITION, TOPIC_PARTITION2); + + sinkTask.close(new HashSet<>(partitions)); + EasyMock.expectLastCall(); + + sinkTask.flush(EasyMock.>anyObject()); + EasyMock.expectLastCall(); + + // first one raises wakeup + consumer.commitSync(EasyMock.>anyObject()); + EasyMock.expectLastCall().andThrow(new WakeupException()); + + // we should retry and complete the commit + consumer.commitSync(EasyMock.>anyObject()); + EasyMock.expectLastCall(); + + EasyMock.expect(consumer.position(TOPIC_PARTITION)).andReturn(FIRST_OFFSET); + EasyMock.expect(consumer.position(TOPIC_PARTITION2)).andReturn(FIRST_OFFSET); + + sinkTask.open(partitions); + EasyMock.expectLastCall(); + + EasyMock.expect(consumer.poll(EasyMock.anyLong())).andAnswer( + new IAnswer>() { + @Override + public ConsumerRecords answer() throws Throwable { + rebalanceListener.getValue().onPartitionsRevoked(partitions); + rebalanceListener.getValue().onPartitionsAssigned(partitions); + return ConsumerRecords.empty(); + } + }); + + EasyMock.expect(consumer.assignment()).andReturn(new HashSet<>(partitions)); + + consumer.resume(Collections.singleton(TOPIC_PARTITION)); + EasyMock.expectLastCall(); + + consumer.resume(Collections.singleton(TOPIC_PARTITION2)); + EasyMock.expectLastCall(); + + PowerMock.replayAll(); + + workerTask.initialize(TASK_CONFIG); + workerTask.initializeAndStart(); + workerTask.poll(Long.MAX_VALUE); // poll for initial assignment + workerTask.poll(Long.MAX_VALUE); // now rebalance with the wakeup triggered + + PowerMock.verifyAll(); + } private void expectInitializeTask() throws Exception { PowerMock.expectPrivate(workerTask, "createConsumer").andReturn(consumer); From 412d2da29a7213faad4689758ecccbb55d53d415 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Sun, 19 Jun 2016 16:03:40 -0700 Subject: [PATCH 182/267] MINOR: Mention `log.message.format.version=0.10.0` in rolling upgrade section We had mentioned this step in the performance impact section in the middle of a long paragraph, which made it easy to miss. I also tweaked the reason for setting `log.message.format.version` as it could be misinterpreted previously. Author: Ismael Juma Reviewers: Ewen Cheslack-Postava Closes #1514 from ijuma/tweak-upgrade-notes (cherry picked from commit b9f1c60328da8a494424e30e7ebcb75d31c6fe85) Signed-off-by: Ewen Cheslack-Postava --- docs/upgrade.html | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/upgrade.html b/docs/upgrade.html index dec0808c2e3d..a9a14433be63 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -31,12 +31,15 @@

            Upgrading from 0.8.x or 0.9.x to 0.10.
            1. Update server.properties file on all brokers and add the following property: inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. 0.8.2 or 0.9.0.0). - We recommend that users set log.message.format.version=CURRENT_KAFKA_VERSION as well to avoid a performance regression - during upgrade. See potential performance impact during upgrade for the details. + We recommend that users set log.message.format.version=CURRENT_KAFKA_VERSION as well to ensure that performance of 0.8 and 0.9 consumers is not affected + during the upgrade. See potential performance impact during upgrade for the details.
            2. Upgrade the brokers. This can be done a broker at a time by simply bringing it down, updating the code, and restarting it.
            3. Once the entire cluster is upgraded, bump the protocol version by editing inter.broker.protocol.version and setting it to 0.10.0.0.
            4. Restart the brokers one by one for the new protocol version to take effect.
            5. +
            6. Once most consumers have been upgraded to 0.10.0 and if you followed the recommendation to set log.message.format.version=CURRENT_KAFKA_VERSION, change + log.message.format.version to 0.10.0 on each broker and restart them one by one. +

            Note: If you are willing to accept downtime, you can simply take all the brokers down, update the code and start all of them. They will start with the new protocol by default. From 8d63690ef5b3cbf040b1eeb091f2949581ae1aa6 Mon Sep 17 00:00:00 2001 From: Rollulus Date: Mon, 20 Jun 2016 12:30:27 -0700 Subject: [PATCH 183/267] KAFKA-3864: make field.get return field's default value when needed And not the containing struct's default value. The contribution is my original work and that I license the work to the project under the project's open source license. ewencp Author: Rollulus Reviewers: Ewen Cheslack-Postava Closes #1528 from rollulus/kafka-3864 (cherry picked from commit 4544ee448703b8c1900adcb7a605380eb99a00a2) Signed-off-by: Ewen Cheslack-Postava --- .../org/apache/kafka/connect/data/Struct.java | 4 ++-- .../org/apache/kafka/connect/data/StructTest.java | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/connect/api/src/main/java/org/apache/kafka/connect/data/Struct.java b/connect/api/src/main/java/org/apache/kafka/connect/data/Struct.java index a598259128c9..f0bf86543385 100644 --- a/connect/api/src/main/java/org/apache/kafka/connect/data/Struct.java +++ b/connect/api/src/main/java/org/apache/kafka/connect/data/Struct.java @@ -85,8 +85,8 @@ public Object get(String fieldName) { */ public Object get(Field field) { Object val = values[field.index()]; - if (val == null && schema.defaultValue() != null) { - val = schema.defaultValue(); + if (val == null && field.schema().defaultValue() != null) { + val = field.schema().defaultValue(); } return val; } diff --git a/connect/api/src/test/java/org/apache/kafka/connect/data/StructTest.java b/connect/api/src/test/java/org/apache/kafka/connect/data/StructTest.java index c73992b087c7..11c9fb08fc3b 100644 --- a/connect/api/src/test/java/org/apache/kafka/connect/data/StructTest.java +++ b/connect/api/src/test/java/org/apache/kafka/connect/data/StructTest.java @@ -28,6 +28,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNull; public class StructTest { @@ -160,6 +161,20 @@ public void testMissingFieldWithDefaultValidation() { struct.validate(); } + @Test + public void testMissingFieldWithDefaultValue() { + Schema schema = SchemaBuilder.struct().field("field", DEFAULT_FIELD_SCHEMA).build(); + Struct struct = new Struct(schema); + assertEquals((byte) 0, struct.get("field")); + } + + @Test + public void testMissingFieldWithoutDefaultValue() { + Schema schema = SchemaBuilder.struct().field("field", REQUIRED_FIELD_SCHEMA).build(); + Struct struct = new Struct(schema); + assertNull(struct.get("field")); + } + @Test public void testEquals() { From 280f09b8dc72d41f7b7efbce4fc4e3620a44cf4b Mon Sep 17 00:00:00 2001 From: Liquan Pei Date: Tue, 14 Jun 2016 13:21:30 -0700 Subject: [PATCH 184/267] MINOR: Catch Throwable in commitSourceTask() Author: Liquan Pei Reviewers: Jason Gustafson , Ewen Cheslack-Postava Closes #1402 from Ishiihara/source-task-commit-record --- .../connect/runtime/WorkerSourceTask.java | 6 +- .../connect/runtime/WorkerSourceTaskTest.java | 79 +++++++++++++++---- 2 files changed, 67 insertions(+), 18 deletions(-) diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java index fd551abd3f7b..83d1c8489e63 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java @@ -243,6 +243,8 @@ private void commitTaskRecord(SourceRecord record) { task.commitRecord(record); } catch (InterruptedException e) { log.error("Exception thrown", e); + } catch (Throwable t) { + log.error("Exception thrown while calling task.commitRecord()", t); } } @@ -366,8 +368,8 @@ private void commitSourceTask() { this.task.commit(); } catch (InterruptedException ex) { log.warn("Commit interrupted", ex); - } catch (Throwable ex) { - log.error("Exception thrown while calling task.commit()", ex); + } catch (Throwable t) { + log.error("Exception thrown while calling task.commit()", t); } } diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java index 076878132932..076124514233 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java @@ -21,6 +21,7 @@ import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.clients.producer.RecordMetadata; import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.InterruptException; import org.apache.kafka.common.utils.SystemTime; import org.apache.kafka.common.utils.Utils; import org.apache.kafka.connect.data.Schema; @@ -52,6 +53,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -66,6 +68,7 @@ @RunWith(PowerMockRunner.class) public class WorkerSourceTaskTest extends ThreadedTest { + private final Random random = new Random(); private static final String TOPIC = "topic"; private static final Map PARTITION = Collections.singletonMap("key", "partition".getBytes()); private static final Map OFFSET = Collections.singletonMap("key", 12); @@ -197,7 +200,7 @@ public void testPause() throws Exception { workerTask.initialize(TASK_CONFIG); executor.submit(workerTask); - awaitPolls(pollLatch); + awaitLatch(pollLatch); workerTask.transitionTo(TargetState.PAUSED); @@ -238,7 +241,7 @@ public void testPollsInBackground() throws Exception { workerTask.initialize(TASK_CONFIG); executor.submit(workerTask); - awaitPolls(pollLatch); + awaitLatch(pollLatch); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); @@ -271,7 +274,7 @@ public void testFailureInPoll() throws Exception { workerTask.initialize(TASK_CONFIG); executor.submit(workerTask); - awaitPolls(pollLatch); + awaitLatch(pollLatch); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); @@ -306,7 +309,7 @@ public void testCommit() throws Exception { workerTask.initialize(TASK_CONFIG); executor.submit(workerTask); - awaitPolls(pollLatch); + awaitLatch(pollLatch); assertTrue(workerTask.commitOffsets()); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); @@ -341,7 +344,7 @@ public void testCommitFailure() throws Exception { workerTask.initialize(TASK_CONFIG); executor.submit(workerTask); - awaitPolls(pollLatch); + awaitLatch(pollLatch); assertTrue(workerTask.commitOffsets()); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); @@ -403,6 +406,30 @@ public void testSendRecordsRetries() throws Exception { PowerMock.verifyAll(); } + @Test + public void testSendRecordsTaskCommitRecordFail() throws Exception { + createWorkerTask(); + + // Differentiate only by Kafka partition so we can reuse conversion expectations + SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, "topic", 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD); + SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, "topic", 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD); + SourceRecord record3 = new SourceRecord(PARTITION, OFFSET, "topic", 3, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD); + + // Source task commit record failure will not cause the task to abort + expectSendRecordOnce(false); + expectSendRecordTaskCommitRecordFail(false, false); + expectSendRecordOnce(false); + + PowerMock.replayAll(); + + Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2, record3)); + Whitebox.invokeMethod(workerTask, "sendRecords"); + assertEquals(false, Whitebox.getInternalState(workerTask, "lastSendFailed")); + assertNull(Whitebox.getInternalState(workerTask, "toSend")); + + PowerMock.verifyAll(); + } + @Test public void testSlowTaskStart() throws Exception { final CountDownLatch startupLatch = new CountDownLatch(1); @@ -435,7 +462,7 @@ public Object answer() throws Throwable { // Stopping immediately while the other thread has work to do should result in no polling, no offset commits, // exiting the work thread immediately, and the stop() method will be invoked in the background thread since it // cannot be invoked immediately in the thread trying to stop the task. - startupLatch.await(1000, TimeUnit.MILLISECONDS); + awaitLatch(startupLatch); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); @@ -479,14 +506,22 @@ private void expectSendRecordSyncFailure(Throwable error) throws InterruptedExce } private Capture> expectSendRecordAnyTimes() throws InterruptedException { - return expectSendRecord(true, false); + return expectSendRecordTaskCommitRecordSucceed(true, false); } private Capture> expectSendRecordOnce(boolean isRetry) throws InterruptedException { - return expectSendRecord(false, isRetry); + return expectSendRecordTaskCommitRecordSucceed(false, isRetry); + } + + private Capture> expectSendRecordTaskCommitRecordSucceed(boolean anyTimes, boolean isRetry) throws InterruptedException { + return expectSendRecord(anyTimes, isRetry, true); } - private Capture> expectSendRecord(boolean anyTimes, boolean isRetry) throws InterruptedException { + private Capture> expectSendRecordTaskCommitRecordFail(boolean anyTimes, boolean isRetry) throws InterruptedException { + return expectSendRecord(anyTimes, isRetry, false); + } + + private Capture> expectSendRecord(boolean anyTimes, boolean isRetry, boolean succeed) throws InterruptedException { expectConvertKeyValue(anyTimes); Capture> sent = EasyMock.newCapture(); @@ -523,11 +558,7 @@ public Future answer() throws Throwable { expect.andAnswer(expectResponse); // 3. As a result of a successful producer send callback, we'll notify the source task of the record commit - sourceTask.commitRecord(EasyMock.anyObject(SourceRecord.class)); - if (anyTimes) - EasyMock.expectLastCall().anyTimes(); - else - EasyMock.expectLastCall(); + expectTaskCommitRecord(anyTimes, succeed); return sent; } @@ -545,8 +576,24 @@ private void expectConvertKeyValue(boolean anyTimes) { convertValueExpect.andReturn(SERIALIZED_RECORD); } - private boolean awaitPolls(CountDownLatch latch) throws InterruptedException { - return latch.await(1000, TimeUnit.MILLISECONDS); + private void expectTaskCommitRecord(boolean anyTimes, boolean succeed) throws InterruptedException { + sourceTask.commitRecord(EasyMock.anyObject(SourceRecord.class)); + IExpectationSetters expect = EasyMock.expectLastCall(); + if (!succeed) { + expect = expect.andThrow(new InterruptException("Error committing record in source task")); + } + if (anyTimes) { + expect.anyTimes(); + } + } + + private boolean awaitLatch(CountDownLatch latch) { + try { + return latch.await(1000, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + // ignore + } + return false; } @SuppressWarnings("unchecked") From 37244881efefd593169b15f702b74ab17a4394a7 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Mon, 20 Jun 2016 20:32:09 -0700 Subject: [PATCH 185/267] KAFKA-3865: Fix transient failure in WorkerSourceTaskTest.testSlowTaskStart Author: Jason Gustafson Reviewers: Liquan Pei , Ewen Cheslack-Postava Closes #1531 from hachikuji/KAFKA-3865 --- .../connect/runtime/WorkerSourceTaskTest.java | 79 +++++++++++++------ 1 file changed, 53 insertions(+), 26 deletions(-) diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java index 076124514233..ab9863c03f4e 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java @@ -21,9 +21,7 @@ import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.clients.producer.RecordMetadata; import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.errors.InterruptException; import org.apache.kafka.common.utils.SystemTime; -import org.apache.kafka.common.utils.Utils; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.runtime.standalone.StandaloneConfig; import org.apache.kafka.connect.source.SourceRecord; @@ -53,7 +51,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -68,7 +65,6 @@ @RunWith(PowerMockRunner.class) public class WorkerSourceTaskTest extends ThreadedTest { - private final Random random = new Random(); private static final String TOPIC = "topic"; private static final Map PARTITION = Collections.singletonMap("key", "partition".getBytes()); private static final Map OFFSET = Collections.singletonMap("key", 12); @@ -163,11 +159,14 @@ public Void answer() throws Throwable { PowerMock.replayAll(); workerTask.initialize(TASK_CONFIG); - executor.submit(workerTask); + Future taskFuture = executor.submit(workerTask); + assertTrue(startupLatch.await(5, TimeUnit.SECONDS)); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); + taskFuture.get(); + PowerMock.verifyAll(); } @@ -199,8 +198,8 @@ public void testPause() throws Exception { PowerMock.replayAll(); workerTask.initialize(TASK_CONFIG); - executor.submit(workerTask); - awaitLatch(pollLatch); + Future taskFuture = executor.submit(workerTask); + assertTrue(awaitLatch(pollLatch)); workerTask.transitionTo(TargetState.PAUSED); @@ -213,6 +212,8 @@ public void testPause() throws Exception { workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); + taskFuture.get(); + PowerMock.verifyAll(); } @@ -240,11 +241,14 @@ public void testPollsInBackground() throws Exception { PowerMock.replayAll(); workerTask.initialize(TASK_CONFIG); - executor.submit(workerTask); - awaitLatch(pollLatch); + Future taskFuture = executor.submit(workerTask); + + assertTrue(awaitLatch(pollLatch)); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); + taskFuture.get(); + PowerMock.verifyAll(); } @@ -259,9 +263,15 @@ public void testFailureInPoll() throws Exception { statusListener.onStartup(taskId); EasyMock.expectLastCall(); - final CountDownLatch pollLatch = expectPolls(1); - RuntimeException exception = new RuntimeException(); - EasyMock.expect(sourceTask.poll()).andThrow(exception); + final CountDownLatch pollLatch = new CountDownLatch(1); + final RuntimeException exception = new RuntimeException(); + EasyMock.expect(sourceTask.poll()).andAnswer(new IAnswer>() { + @Override + public List answer() throws Throwable { + pollLatch.countDown(); + throw exception; + } + }); statusListener.onFailure(taskId, exception); EasyMock.expectLastCall(); @@ -273,11 +283,14 @@ public void testFailureInPoll() throws Exception { PowerMock.replayAll(); workerTask.initialize(TASK_CONFIG); - executor.submit(workerTask); - awaitLatch(pollLatch); + Future taskFuture = executor.submit(workerTask); + + assertTrue(awaitLatch(pollLatch)); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); + taskFuture.get(); + PowerMock.verifyAll(); } @@ -308,12 +321,15 @@ public void testCommit() throws Exception { PowerMock.replayAll(); workerTask.initialize(TASK_CONFIG); - executor.submit(workerTask); - awaitLatch(pollLatch); + Future taskFuture = executor.submit(workerTask); + + assertTrue(awaitLatch(pollLatch)); assertTrue(workerTask.commitOffsets()); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); + taskFuture.get(); + PowerMock.verifyAll(); } @@ -343,12 +359,15 @@ public void testCommitFailure() throws Exception { PowerMock.replayAll(); workerTask.initialize(TASK_CONFIG); - executor.submit(workerTask); - awaitLatch(pollLatch); + Future taskFuture = executor.submit(workerTask); + + assertTrue(awaitLatch(pollLatch)); assertTrue(workerTask.commitOffsets()); workerTask.stop(); assertTrue(workerTask.awaitStop(1000)); + taskFuture.get(); + PowerMock.verifyAll(); } @@ -433,39 +452,47 @@ public void testSendRecordsTaskCommitRecordFail() throws Exception { @Test public void testSlowTaskStart() throws Exception { final CountDownLatch startupLatch = new CountDownLatch(1); + final CountDownLatch finishStartupLatch = new CountDownLatch(1); createWorkerTask(); sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class)); EasyMock.expectLastCall(); sourceTask.start(TASK_PROPS); - EasyMock.expectLastCall(); - - statusListener.onStartup(taskId); EasyMock.expectLastCall().andAnswer(new IAnswer() { @Override public Object answer() throws Throwable { startupLatch.countDown(); - Utils.sleep(100); + assertTrue(awaitLatch(finishStartupLatch)); return null; } }); + statusListener.onStartup(taskId); + EasyMock.expectLastCall(); + sourceTask.stop(); EasyMock.expectLastCall(); expectOffsetFlush(true); + statusListener.onShutdown(taskId); + EasyMock.expectLastCall(); + PowerMock.replayAll(); workerTask.initialize(TASK_CONFIG); - executor.submit(workerTask); + Future workerTaskFuture = executor.submit(workerTask); + // Stopping immediately while the other thread has work to do should result in no polling, no offset commits, // exiting the work thread immediately, and the stop() method will be invoked in the background thread since it // cannot be invoked immediately in the thread trying to stop the task. - awaitLatch(startupLatch); + assertTrue(awaitLatch(startupLatch)); workerTask.stop(); + finishStartupLatch.countDown(); assertTrue(workerTask.awaitStop(1000)); + workerTaskFuture.get(); + PowerMock.verifyAll(); } @@ -580,7 +607,7 @@ private void expectTaskCommitRecord(boolean anyTimes, boolean succeed) throws In sourceTask.commitRecord(EasyMock.anyObject(SourceRecord.class)); IExpectationSetters expect = EasyMock.expectLastCall(); if (!succeed) { - expect = expect.andThrow(new InterruptException("Error committing record in source task")); + expect = expect.andThrow(new RuntimeException("Error committing record in source task")); } if (anyTimes) { expect.anyTimes(); @@ -589,7 +616,7 @@ private void expectTaskCommitRecord(boolean anyTimes, boolean succeed) throws In private boolean awaitLatch(CountDownLatch latch) { try { - return latch.await(1000, TimeUnit.MILLISECONDS); + return latch.await(5000, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { // ignore } From 36f8de32414585c95aa6e8ff159cc990e1f3c8f4 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Wed, 22 Jun 2016 17:06:49 -0700 Subject: [PATCH 186/267] KAFKA-3863: System tests covering connector/task failure and restart Author: Jason Gustafson Reviewers: Ewen Cheslack-Postava Closes #1519 from hachikuji/KAFKA-3863 (cherry picked from commit 36cab7dbdff6981d0df4b355dadee3fac35508a6) Signed-off-by: Ewen Cheslack-Postava --- .../kafka/connect/runtime/AbstractHerder.java | 7 +- .../apache/kafka/connect/runtime/Worker.java | 1 + .../connect/runtime/WorkerConnector.java | 3 +- .../kafka/connect/tools/MockConnector.java | 111 ++++++++++++++++++ .../connect/tools/MockSinkConnector.java | 84 +++++++++++++ .../kafka/connect/tools/MockSinkTask.java | 71 +++++++++++ .../connect/tools/MockSourceConnector.java | 84 +++++++++++++ .../kafka/connect/tools/MockSourceTask.java | 66 +++++++++++ .../ConnectorPluginsResourceTest.java | 6 + tests/kafkatest/services/connect.py | 49 ++++++++ .../tests/connect/connect_distributed_test.py | 79 ++++++++++++- 11 files changed, 554 insertions(+), 7 deletions(-) create mode 100644 connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockConnector.java create mode 100644 connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkConnector.java create mode 100644 connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkTask.java create mode 100644 connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceConnector.java create mode 100644 connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceTask.java diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java index a29d216d2de9..113026890736 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java @@ -32,6 +32,9 @@ import org.apache.kafka.connect.source.SourceConnector; import org.apache.kafka.connect.storage.ConfigBackingStore; import org.apache.kafka.connect.storage.StatusBackingStore; +import org.apache.kafka.connect.tools.MockConnector; +import org.apache.kafka.connect.tools.MockSinkConnector; +import org.apache.kafka.connect.tools.MockSourceConnector; import org.apache.kafka.connect.tools.VerifiableSinkConnector; import org.apache.kafka.connect.tools.VerifiableSourceConnector; import org.apache.kafka.connect.util.ConnectorTaskId; @@ -87,7 +90,9 @@ public abstract class AbstractHerder implements Herder, TaskStatus.Listener, Con private static List validConnectorPlugins; private static final Object LOCK = new Object(); private Thread classPathTraverser; - private static final List> EXCLUDES = Arrays.>asList(VerifiableSourceConnector.class, VerifiableSinkConnector.class); + private static final List> EXCLUDES = Arrays.asList( + VerifiableSourceConnector.class, VerifiableSinkConnector.class, + MockConnector.class, MockSourceConnector.class, MockSinkConnector.class); public AbstractHerder(Worker worker, String workerId, diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java index a88d0f928099..e39a7e2b4b38 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java @@ -401,6 +401,7 @@ public void stopAndAwaitTask(ConnectorTaskId id) { WorkerTask task = getTask(id); stopTask(task); awaitStopTask(task, config.getLong(WorkerConfig.TASK_SHUTDOWN_GRACEFUL_TIMEOUT_MS_CONFIG)); + log.info("Task {} completed shutdown.", task.id()); } /** diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConnector.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConnector.java index 788009572791..b96976dbb77a 100644 --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConnector.java +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConnector.java @@ -66,10 +66,9 @@ public WorkerConnector(String connName, } public void initialize(ConnectorConfig connectorConfig) { - log.debug("Initializing connector {} with config {}", connName, config); - try { this.config = connectorConfig.originalsStrings(); + log.debug("Initializing connector {} with config {}", connName, config); connector.initialize(new ConnectorContext() { @Override diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockConnector.java b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockConnector.java new file mode 100644 index 000000000000..919e89613217 --- /dev/null +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockConnector.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

            + * http://www.apache.org/licenses/LICENSE-2.0 + *

            + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.kafka.connect.tools; + +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.utils.AppInfoParser; +import org.apache.kafka.connect.connector.Connector; +import org.apache.kafka.connect.connector.Task; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +/** + * This connector provides support for mocking certain connector behaviors. For example, + * this can be used to simulate connector or task failures. It works by passing a "mock mode" + * through configuration from the system test. New mock behavior can be implemented either + * in the connector or in the task by providing a new mode implementation. + * + * At the moment, this connector only supports a single task and shares configuration between + * the connector and its tasks. + * + * @see MockSinkConnector + * @see MockSourceConnector + */ +public class MockConnector extends Connector { + public static final String MOCK_MODE_KEY = "mock_mode"; + public static final String DELAY_MS_KEY = "delay_ms"; + + public static final String CONNECTOR_FAILURE = "connector-failure"; + public static final String TASK_FAILURE = "task-failure"; + + public static final long DEFAULT_FAILURE_DELAY_MS = 15000; + + private Map config; + private ScheduledExecutorService executor; + + @Override + public String version() { + return AppInfoParser.getVersion(); + } + + @Override + public void start(Map config) { + this.config = config; + + if (CONNECTOR_FAILURE.equals(config.get(MOCK_MODE_KEY))) { + // Schedule this connector to raise an exception after some delay + + String delayMsString = config.get(DELAY_MS_KEY); + long delayMs = DEFAULT_FAILURE_DELAY_MS; + if (delayMsString != null) + delayMs = Long.parseLong(delayMsString); + + executor = Executors.newSingleThreadScheduledExecutor(); + executor.schedule(new Runnable() { + @Override + public void run() { + context.raiseError(new RuntimeException()); + } + }, delayMs, TimeUnit.MILLISECONDS); + } + } + + @Override + public Class taskClass() { + throw new UnsupportedOperationException(); + } + + @Override + public List> taskConfigs(int maxTasks) { + return Collections.singletonList(config); + } + + @Override + public void stop() { + if (executor != null) { + executor.shutdownNow(); + + try { + if (!executor.awaitTermination(20, TimeUnit.SECONDS)) + throw new RuntimeException("Failed timely termination of scheduler"); + } catch (InterruptedException e) { + throw new RuntimeException("Task was interrupted during shutdown"); + } + } + } + + @Override + public ConfigDef config() { + return new ConfigDef(); + } + +} diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkConnector.java b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkConnector.java new file mode 100644 index 000000000000..67fca66566f2 --- /dev/null +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkConnector.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

            + * http://www.apache.org/licenses/LICENSE-2.0 + *

            + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.kafka.connect.tools; + +import org.apache.kafka.common.config.Config; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.connect.connector.ConnectorContext; +import org.apache.kafka.connect.connector.Task; +import org.apache.kafka.connect.sink.SinkConnector; + +import java.util.List; +import java.util.Map; + +/** + * Mock sink implementation which delegates to {@link MockConnector}. + */ +public class MockSinkConnector extends SinkConnector { + + private MockConnector delegate = new MockConnector(); + + @Override + public void initialize(ConnectorContext ctx) { + delegate.initialize(ctx); + } + + @Override + public void initialize(ConnectorContext ctx, List> taskConfigs) { + delegate.initialize(ctx, taskConfigs); + } + + @Override + public void reconfigure(Map props) { + delegate.reconfigure(props); + } + + @Override + public Config validate(Map connectorConfigs) { + return delegate.validate(connectorConfigs); + } + + @Override + public String version() { + return delegate.version(); + } + + @Override + public void start(Map props) { + delegate.start(props); + } + + @Override + public Class taskClass() { + return MockSinkTask.class; + } + + @Override + public List> taskConfigs(int maxTasks) { + return delegate.taskConfigs(maxTasks); + } + + @Override + public void stop() { + delegate.stop(); + } + + @Override + public ConfigDef config() { + return delegate.config(); + } +} diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkTask.java new file mode 100644 index 000000000000..2e4b35eeb8b6 --- /dev/null +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkTask.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

            + * http://www.apache.org/licenses/LICENSE-2.0 + *

            + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.kafka.connect.tools; + +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.utils.AppInfoParser; +import org.apache.kafka.connect.sink.SinkRecord; +import org.apache.kafka.connect.sink.SinkTask; + +import java.util.Collection; +import java.util.Map; + +public class MockSinkTask extends SinkTask { + + private String mockMode; + private long startTimeMs; + private long failureDelayMs; + + @Override + public String version() { + return AppInfoParser.getVersion(); + } + + @Override + public void start(Map config) { + this.mockMode = config.get(MockConnector.MOCK_MODE_KEY); + + if (MockConnector.TASK_FAILURE.equals(mockMode)) { + this.startTimeMs = System.currentTimeMillis(); + + String delayMsString = config.get(MockConnector.DELAY_MS_KEY); + this.failureDelayMs = MockConnector.DEFAULT_FAILURE_DELAY_MS; + if (delayMsString != null) + failureDelayMs = Long.parseLong(delayMsString); + } + } + + @Override + public void put(Collection records) { + if (MockConnector.TASK_FAILURE.equals(mockMode)) { + long now = System.currentTimeMillis(); + if (now > startTimeMs + failureDelayMs) + throw new RuntimeException(); + } + } + + @Override + public void flush(Map offsets) { + + } + + @Override + public void stop() { + + } +} diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceConnector.java b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceConnector.java new file mode 100644 index 000000000000..d69e35559f64 --- /dev/null +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceConnector.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

            + * http://www.apache.org/licenses/LICENSE-2.0 + *

            + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.kafka.connect.tools; + +import org.apache.kafka.common.config.Config; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.connect.connector.ConnectorContext; +import org.apache.kafka.connect.connector.Task; +import org.apache.kafka.connect.source.SourceConnector; + +import java.util.List; +import java.util.Map; + +/** + * Mock source implementation which delegates to {@link MockConnector}. + */ +public class MockSourceConnector extends SourceConnector { + + private MockConnector delegate = new MockConnector(); + + @Override + public void initialize(ConnectorContext ctx) { + delegate.initialize(ctx); + } + + @Override + public void initialize(ConnectorContext ctx, List> taskConfigs) { + delegate.initialize(ctx, taskConfigs); + } + + @Override + public void reconfigure(Map props) { + delegate.reconfigure(props); + } + + @Override + public Config validate(Map connectorConfigs) { + return delegate.validate(connectorConfigs); + } + + @Override + public String version() { + return delegate.version(); + } + + @Override + public void start(Map props) { + delegate.start(props); + } + + @Override + public Class taskClass() { + return MockSourceTask.class; + } + + @Override + public List> taskConfigs(int maxTasks) { + return delegate.taskConfigs(maxTasks); + } + + @Override + public void stop() { + delegate.stop(); + } + + @Override + public ConfigDef config() { + return delegate.config(); + } +} diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceTask.java new file mode 100644 index 000000000000..eb896af08b8a --- /dev/null +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceTask.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

            + * http://www.apache.org/licenses/LICENSE-2.0 + *

            + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.kafka.connect.tools; + +import org.apache.kafka.common.utils.AppInfoParser; +import org.apache.kafka.connect.source.SourceRecord; +import org.apache.kafka.connect.source.SourceTask; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class MockSourceTask extends SourceTask { + + private String mockMode; + private long startTimeMs; + private long failureDelayMs; + + @Override + public String version() { + return AppInfoParser.getVersion(); + } + + @Override + public void start(Map config) { + this.mockMode = config.get(MockConnector.MOCK_MODE_KEY); + + if (MockConnector.TASK_FAILURE.equals(mockMode)) { + this.startTimeMs = System.currentTimeMillis(); + + String delayMsString = config.get(MockConnector.DELAY_MS_KEY); + this.failureDelayMs = MockConnector.DEFAULT_FAILURE_DELAY_MS; + if (delayMsString != null) + failureDelayMs = Long.parseLong(delayMsString); + } + } + + @Override + public List poll() throws InterruptedException { + if (MockConnector.TASK_FAILURE.equals(mockMode)) { + long now = System.currentTimeMillis(); + if (now > startTimeMs + failureDelayMs) + throw new RuntimeException(); + } + return Collections.emptyList(); + } + + @Override + public void stop() { + + } +} diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java index ddf30c712a84..c7f532bf5ed5 100644 --- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResourceTest.java @@ -39,6 +39,9 @@ import org.apache.kafka.connect.runtime.rest.entities.ConnectorPluginInfo; import org.apache.kafka.connect.sink.SinkConnector; import org.apache.kafka.connect.source.SourceConnector; +import org.apache.kafka.connect.tools.MockConnector; +import org.apache.kafka.connect.tools.MockSinkConnector; +import org.apache.kafka.connect.tools.MockSourceConnector; import org.apache.kafka.connect.tools.VerifiableSinkConnector; import org.apache.kafka.connect.tools.VerifiableSourceConnector; import org.easymock.EasyMock; @@ -165,6 +168,9 @@ public void testListConnectorPlugins() { assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(SinkConnector.class.getCanonicalName()))); assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(VerifiableSourceConnector.class.getCanonicalName()))); assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(VerifiableSinkConnector.class.getCanonicalName()))); + assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(MockSourceConnector.class.getCanonicalName()))); + assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(MockSinkConnector.class.getCanonicalName()))); + assertFalse(connectorPlugins.contains(new ConnectorPluginInfo(MockConnector.class.getCanonicalName()))); assertTrue(connectorPlugins.contains(new ConnectorPluginInfo(ConnectorPluginsResourceTestConnector.class.getCanonicalName()))); } diff --git a/tests/kafkatest/services/connect.py b/tests/kafkatest/services/connect.py index 5371a72f6cdb..7f36854f9a7c 100644 --- a/tests/kafkatest/services/connect.py +++ b/tests/kafkatest/services/connect.py @@ -135,6 +135,12 @@ def delete_connector(self, name, node=None, retries=0, retry_backoff=.01): def get_connector_status(self, name, node=None): return self._rest('/connectors/' + name + '/status', node=node) + def restart_connector(self, name, node=None): + return self._rest('/connectors/' + name + '/restart', method="POST") + + def restart_task(self, connector_name, task_id, node=None): + return self._rest('/connectors/' + connector_name + '/tasks/' + str(task_id) + '/restart', method="POST") + def pause_connector(self, name, node=None): return self._rest('/connectors/' + name + '/pause', method="PUT") @@ -331,3 +337,46 @@ def start(self): 'tasks.max': self.tasks, 'topics': ",".join(self.topics) }) + +class MockSink(object): + + def __init__(self, cc, topics, mode=None, delay_sec=10, name="mock-sink"): + self.cc = cc + self.logger = self.cc.logger + self.name = name + self.mode = mode + self.delay_sec = delay_sec + self.topics = topics + + def start(self): + self.logger.info("Creating connector MockSinkConnector %s", self.name) + self.cc.create_connector({ + 'name': self.name, + 'connector.class': 'org.apache.kafka.connect.tools.MockSinkConnector', + 'tasks.max': 1, + 'topics': ",".join(self.topics), + 'mock_mode': self.mode, + 'delay_ms': self.delay_sec * 1000 + }) + +class MockSource(object): + + def __init__(self, cc, topics, mode=None, delay_sec=10, name="mock-source"): + self.cc = cc + self.logger = self.cc.logger + self.name = name + self.mode = mode + self.delay_sec = delay_sec + self.topics = topics + + def start(self): + self.logger.info("Creating connector MockSourceConnector %s", self.name) + self.cc.create_connector({ + 'name': self.name, + 'connector.class': 'org.apache.kafka.connect.tools.MockSourceConnector', + 'tasks.max': 1, + 'topics': ",".join(self.topics), + 'mock_mode': self.mode, + 'delay_ms': self.delay_sec * 1000 + }) + diff --git a/tests/kafkatest/tests/connect/connect_distributed_test.py b/tests/kafkatest/tests/connect/connect_distributed_test.py index a4d68f39858c..d4c4225a0bf8 100644 --- a/tests/kafkatest/tests/connect/connect_distributed_test.py +++ b/tests/kafkatest/tests/connect/connect_distributed_test.py @@ -17,7 +17,7 @@ from kafkatest.services.zookeeper import ZookeeperService from kafkatest.services.kafka import KafkaService -from kafkatest.services.connect import ConnectDistributedService, VerifiableSource, VerifiableSink, ConnectRestError +from kafkatest.services.connect import ConnectDistributedService, VerifiableSource, VerifiableSink, ConnectRestError, MockSink, MockSource from kafkatest.services.console_consumer import ConsoleConsumer from kafkatest.services.security.security_config import SecurityConfig from ducktape.utils.util import wait_until @@ -88,9 +88,23 @@ def _connector_status(self, connector, node=None): except ConnectRestError: return None - def _has_state(self, status, state): + def _connector_has_state(self, status, state): return status is not None and status['connector']['state'] == state + def _task_has_state(self, task_id, status, state): + if not status: + return False + + tasks = status['tasks'] + if not tasks: + return False + + for task in tasks: + if task['id'] == task_id: + return task['state'] == state + + return False + def _all_tasks_have_state(self, status, task_count, state): if status is None: return False @@ -103,11 +117,68 @@ def _all_tasks_have_state(self, status, task_count, state): def is_running(self, connector, node=None): status = self._connector_status(connector.name, node) - return self._has_state(status, 'RUNNING') and self._all_tasks_have_state(status, connector.tasks, 'RUNNING') + return self._connector_has_state(status, 'RUNNING') and self._all_tasks_have_state(status, connector.tasks, 'RUNNING') def is_paused(self, connector, node=None): status = self._connector_status(connector.name, node) - return self._has_state(status, 'PAUSED') and self._all_tasks_have_state(status, connector.tasks, 'PAUSED') + return self._connector_has_state(status, 'PAUSED') and self._all_tasks_have_state(status, connector.tasks, 'PAUSED') + + def connector_is_running(self, connector, node=None): + status = self._connector_status(connector.name, node) + return self._connector_has_state(status, 'RUNNING') + + def connector_is_failed(self, connector, node=None): + status = self._connector_status(connector.name, node) + return self._connector_has_state(status, 'FAILED') + + def task_is_failed(self, connector, task_id, node=None): + status = self._connector_status(connector.name, node) + return self._task_has_state(task_id, status, 'FAILED') + + def task_is_running(self, connector, task_id, node=None): + status = self._connector_status(connector.name, node) + return self._task_has_state(task_id, status, 'RUNNING') + + def test_restart_failed_connector(self): + self.setup_services() + self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node)) + self.cc.start() + + self.sink = MockSink(self.cc, self.topics.keys(), mode='connector-failure', delay_sec=5) + self.sink.start() + + wait_until(lambda: self.connector_is_failed(self.sink), timeout_sec=15, + err_msg="Failed to see connector transition to the FAILED state") + + self.cc.restart_connector(self.sink.name) + + wait_until(lambda: self.connector_is_running(self.sink), timeout_sec=10, + err_msg="Failed to see connector transition to the RUNNING state") + + + @matrix(connector_type=["source", "sink"]) + def test_restart_failed_task(self, connector_type): + self.setup_services() + self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node)) + self.cc.start() + + connector = None + if connector_type == "sink": + connector = MockSink(self.cc, self.topics.keys(), mode='task-failure', delay_sec=5) + else: + connector = MockSource(self.cc, self.topics.keys(), mode='task-failure', delay_sec=5) + + connector.start() + + task_id = 0 + wait_until(lambda: self.task_is_failed(connector, task_id), timeout_sec=15, + err_msg="Failed to see task transition to the FAILED state") + + self.cc.restart_task(connector.name, task_id) + + wait_until(lambda: self.task_is_running(connector, task_id), timeout_sec=10, + err_msg="Failed to see task transition to the RUNNING state") + def test_pause_and_resume_source(self): """ From b669b2786c69d2dfa719033eb7c02ac444f115f4 Mon Sep 17 00:00:00 2001 From: "Matthias J. Sax" Date: Thu, 23 Jun 2016 14:16:42 -0700 Subject: [PATCH 187/267] cherry-pick dummy --- .../kafka/streams/kstream/JoinWindows.java | 20 ++-- .../streams/kstream/JoinWindowsTest.java | 99 +++++++++++++++++++ .../streams/kstream/TimeWindowsTest.java | 30 +++--- .../kstream/internals/KStreamImplTest.java | 5 +- .../internals/KStreamKStreamJoinTest.java | 9 +- .../internals/KStreamKStreamLeftJoinTest.java | 6 +- 6 files changed, 140 insertions(+), 29 deletions(-) create mode 100644 streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java index f45c0640dff5..53ddf3ec4628 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java @@ -41,6 +41,8 @@ * * A join is symmetric in the sense, that a join specification on the first stream returns the same result record as * a join specification on the second stream with flipped before and after values. + *

            + * Both values (before and after) must not be negative and not zero at the same time. */ public class JoinWindows extends Windows { @@ -52,21 +54,27 @@ public class JoinWindows extends Windows { private JoinWindows(String name, long before, long after) { super(name); + if (before < 0) { + throw new IllegalArgumentException("window size must be > 0 (you provided before as " + before + ")"); + } + if (after < 0) { + throw new IllegalArgumentException("window size must be > 0 (you provided after as " + after + ")"); + } + if (before == 0 && after == 0) { + throw new IllegalArgumentException("window size must be > 0 (you provided 0)"); + } + this.after = after; this.before = before; } - public static JoinWindows of(String name) { - return new JoinWindows(name, 0L, 0L); - } - /** * Specifies that records of the same key are joinable if their timestamps are within {@code timeDifference}. * * @param timeDifference join window interval */ - public JoinWindows within(long timeDifference) { - return new JoinWindows(this.name, timeDifference, timeDifference); + public static JoinWindows of(String name, long timeDifference) { + return new JoinWindows(name, timeDifference, timeDifference); } /** diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java new file mode 100644 index 000000000000..d8fa7b49c7c1 --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

            + * http://www.apache.org/licenses/LICENSE-2.0 + *

            + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.kafka.streams.kstream; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + + +public class JoinWindowsTest { + + private static String anyName = "window"; + private static long anySize = 123L; + private static long anyOtherSize = 456L; + + @Test + public void shouldHaveSaneEqualsAndHashCode() { + JoinWindows w1 = JoinWindows.of("w1", anySize); + JoinWindows w2 = JoinWindows.of("w2", anySize); + + // Reflexive + assertEquals(w1, w1); + assertEquals(w1.hashCode(), w1.hashCode()); + + // Symmetric + assertEquals(w1, w2); + assertEquals(w2, w1); + assertEquals(w1.hashCode(), w2.hashCode()); + + JoinWindows w3 = JoinWindows.of("w3", w2.after).before(anyOtherSize); + JoinWindows w4 = JoinWindows.of("w4", anyOtherSize).after(w2.after); + assertEquals(w3, w4); + assertEquals(w4, w3); + assertEquals(w3.hashCode(), w4.hashCode()); + + // Inequality scenarios + assertNotEquals("must be false for null", null, w1); + assertNotEquals("must be false for different window types", UnlimitedWindows.of("irrelevant"), w1); + assertNotEquals("must be false for different types", new Object(), w1); + + JoinWindows differentWindowSize = JoinWindows.of("differentWindowSize", w1.after + 1); + assertNotEquals("must be false when window sizes are different", differentWindowSize, w1); + + JoinWindows differentWindowSize2 = JoinWindows.of("differentWindowSize", w1.after).after(w1.after + 1); + assertNotEquals("must be false when window sizes are different", differentWindowSize2, w1); + + JoinWindows differentWindowSize3 = JoinWindows.of("differentWindowSize", w1.after).before(w1.before + 1); + assertNotEquals("must be false when window sizes are different", differentWindowSize3, w1); + } + + @Test(expected = IllegalArgumentException.class) + public void nameMustNotBeEmpty() { + JoinWindows.of("", anySize); + } + + @Test(expected = IllegalArgumentException.class) + public void nameMustNotBeNull() { + JoinWindows.of(null, anySize); + } + + @Test(expected = IllegalArgumentException.class) + public void windowSizeMustNotBeNegative() { + JoinWindows.of(anyName, -1); + } + + @Test(expected = IllegalArgumentException.class) + public void beforeMustNotBeNegative() { + JoinWindows.of(anyName, anySize).before(-1); + } + + @Test(expected = IllegalArgumentException.class) + public void afterSizeMustNotBeNegative() { + JoinWindows.of(anyName, anySize).after(-1); + } + + @Test(expected = IllegalArgumentException.class) + public void windowSizeMustNotBeZero() { + JoinWindows.of(anyName, 0); + } + +} \ No newline at end of file diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/TimeWindowsTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/TimeWindowsTest.java index 62b12a9ff409..5acd6e22c691 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/TimeWindowsTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/TimeWindowsTest.java @@ -25,8 +25,7 @@ import java.util.Map; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotEquals; public class TimeWindowsTest { @@ -39,31 +38,30 @@ public void shouldHaveSaneEqualsAndHashCode() { TimeWindows w2 = TimeWindows.of("w2", w1.size); // Reflexive - assertTrue(w1.equals(w1)); - assertTrue(w1.hashCode() == w1.hashCode()); + assertEquals(w1, w1); + assertEquals(w1.hashCode(), w1.hashCode()); // Symmetric - assertTrue(w1.equals(w2)); - assertTrue(w1.hashCode() == w2.hashCode()); - assertTrue(w2.hashCode() == w1.hashCode()); + assertEquals(w1, w2); + assertEquals(w2, w1); + assertEquals(w1.hashCode(), w2.hashCode()); // Transitive TimeWindows w3 = TimeWindows.of("w3", w2.size); - assertTrue(w2.equals(w3)); - assertTrue(w2.hashCode() == w3.hashCode()); - assertTrue(w1.equals(w3)); - assertTrue(w1.hashCode() == w3.hashCode()); + assertEquals(w2, w3); + assertEquals(w1, w3); + assertEquals(w1.hashCode(), w3.hashCode()); // Inequality scenarios - assertFalse("must be false for null", w1.equals(null)); - assertFalse("must be false for different window types", w1.equals(UnlimitedWindows.of("irrelevant"))); - assertFalse("must be false for different types", w1.equals(new Object())); + assertNotEquals("must be false for null", null, w1); + assertNotEquals("must be false for different window types", UnlimitedWindows.of("irrelevant"), w1); + assertNotEquals("must be false for different types", new Object(), w1); TimeWindows differentWindowSize = TimeWindows.of("differentWindowSize", w1.size + 1); - assertFalse("must be false when window sizes are different", w1.equals(differentWindowSize)); + assertNotEquals("must be false when window sizes are different", differentWindowSize, w1); TimeWindows differentAdvanceInterval = w1.advanceBy(w1.advance - 1); - assertFalse("must be false when advance intervals are different", w1.equals(differentAdvanceInterval)); + assertNotEquals("must be false when advance intervals are different", differentAdvanceInterval, w1); } @Test(expected = IllegalArgumentException.class) diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java index 3d45d1dcc8a2..6242702d7cb8 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java @@ -103,19 +103,20 @@ public boolean test(String key, Integer value) { } ); + final int anyWindowSize = 1; KStream stream4 = streams2[0].join(streams3[0], new ValueJoiner() { @Override public Integer apply(Integer value1, Integer value2) { return value1 + value2; } - }, JoinWindows.of("join-0"), stringSerde, intSerde, intSerde); + }, JoinWindows.of("join-0", anyWindowSize), stringSerde, intSerde, intSerde); KStream stream5 = streams2[1].join(streams3[1], new ValueJoiner() { @Override public Integer apply(Integer value1, Integer value2) { return value1 + value2; } - }, JoinWindows.of("join-1"), stringSerde, intSerde, intSerde); + }, JoinWindows.of("join-1", anyWindowSize), stringSerde, intSerde, intSerde); stream4.to("topic-5"); diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java index 6b0828a62aa7..aa7d117685a6 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java @@ -77,7 +77,8 @@ public void testJoin() throws Exception { processor = new MockProcessorSupplier<>(); stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.join(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").within(100), intSerde, stringSerde, stringSerde); + joined = stream1.join(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), + intSerde, stringSerde, stringSerde); joined.process(processor); Collection> copartitionGroups = builder.copartitionGroups(); @@ -175,7 +176,8 @@ public void testOuterJoin() throws Exception { processor = new MockProcessorSupplier<>(); stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.outerJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").within(100), intSerde, stringSerde, stringSerde); + joined = stream1.outerJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), + intSerde, stringSerde, stringSerde); joined.process(processor); Collection> copartitionGroups = builder.copartitionGroups(); @@ -275,7 +277,8 @@ public void testWindowing() throws Exception { processor = new MockProcessorSupplier<>(); stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.join(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").within(100), intSerde, stringSerde, stringSerde); + joined = stream1.join(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), + intSerde, stringSerde, stringSerde); joined.process(processor); Collection> copartitionGroups = builder.copartitionGroups(); diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java index 65a4b542e583..5b12a301bd84 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java @@ -78,7 +78,8 @@ public void testLeftJoin() throws Exception { processor = new MockProcessorSupplier<>(); stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.leftJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").within(100), intSerde, stringSerde); + + joined = stream1.leftJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), intSerde, stringSerde); joined.process(processor); Collection> copartitionGroups = builder.copartitionGroups(); @@ -156,7 +157,8 @@ public void testWindowing() throws Exception { processor = new MockProcessorSupplier<>(); stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.leftJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").within(100), intSerde, stringSerde); + + joined = stream1.leftJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), intSerde, stringSerde); joined.process(processor); Collection> copartitionGroups = builder.copartitionGroups(); From e467394a85471fbcf439cf90c7e0591161815789 Mon Sep 17 00:00:00 2001 From: Jeff Klukas Date: Thu, 23 Jun 2016 14:36:38 -0700 Subject: [PATCH 188/267] MINOR: Pass absolute directory path to RocksDB.open The method `RocksDB.open` assumes an absolute file path. If a relative path is configured, it leads to an exception like the following: ``` org.apache.kafka.streams.errors.ProcessorStateException: Error opening store CustomerIdToUserIdLookup at location ./tmp/rocksdb/CustomerIdToUserIdLookup at org.rocksdb.RocksDB.open(Native Method) at org.rocksdb.RocksDB.open(RocksDB.java:183) at org.apache.kafka.streams.state.internals.RocksDBStore.openDB(RocksDBStore.java:214) at org.apache.kafka.streams.state.internals.RocksDBStore.openDB(RocksDBStore.java:165) at org.apache.kafka.streams.state.internals.RocksDBStore.init(RocksDBStore.java:170) at org.apache.kafka.streams.state.internals.MeteredKeyValueStore.init(MeteredKeyValueStore.java:85) at org.apache.kafka.test.KStreamTestDriver.(KStreamTestDriver.java:64) at org.apache.kafka.test.KStreamTestDriver.(KStreamTestDriver.java:50) at com.simple.estuary.transform.streaming.CartesianTransactionEnrichmentJobTest.testBuilder(CartesianTransactionEnrichmentJobTest.java:41) ``` Is there any risk to always fetching the absolute path as proposed here? Let me know if you think this requires a JIRA issue or a unit test. I started working on a unit test, but don't know of a great solution for writing out a file to a relative directory. This contribution is my original work and I license the work to the project under the project's open source license. Author: Jeff Klukas Reviewers: Guozhang Wang Closes #1481 from jklukas/rocksdb-abspath (cherry picked from commit 41a54358bececc42ba680267281ba01b3f33f77f) Signed-off-by: Guozhang Wang --- .../org/apache/kafka/streams/state/internals/RocksDBStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java index 2a7f6b1dc0b2..8f3bab0f2578 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java @@ -212,7 +212,7 @@ private RocksDB openDB(File dir, Options options, int ttl) { try { if (ttl == TTL_NOT_USED) { dir.getParentFile().mkdirs(); - return RocksDB.open(options, dir.toString()); + return RocksDB.open(options, dir.getAbsolutePath()); } else { throw new UnsupportedOperationException("Change log is not supported for store " + this.name + " since it is TTL based."); // TODO: support TTL with change log? From a06448b9b1131c3193c7ba8394035c81580772c2 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Thu, 23 Jun 2016 14:42:20 -0700 Subject: [PATCH 189/267] cherry-pick doc change to remove Java 1.6 reference --- docs/quickstart.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quickstart.html b/docs/quickstart.html index 4d4f7eae6836..73e5d6fd09ab 100644 --- a/docs/quickstart.html +++ b/docs/quickstart.html @@ -169,7 +169,7 @@

            Step 6: Settin Now let's test out fault-tolerance. Broker 1 was acting as the leader so let's kill it:
             > ps | grep server-1.properties
            -7564 ttys002    0:15.91 /System/Library/Frameworks/JavaVM.framework/Versions/1.6/Home/bin/java...
            +7564 ttys002    0:15.91 /System/Library/Frameworks/JavaVM.framework/Versions/1.8/Home/bin/java...
             > kill -9 7564
             
            From f630cc79ac5d57b3ee4e948c36bb08ad96333923 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Fri, 24 Jun 2016 14:46:39 -0700 Subject: [PATCH 190/267] MINOR: Improve doc string in PartitionGrouper Author: Guozhang Wang Reviewers: Matthias J. Sax Closes #1550 from guozhangwang/Kminor-grouppartitioner-javadoc (cherry picked from commit 0e0b632b926f52451495ab254e74ab87848cb353) Signed-off-by: Guozhang Wang --- .../kafka/streams/processor/PartitionGrouper.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/PartitionGrouper.java b/streams/src/main/java/org/apache/kafka/streams/processor/PartitionGrouper.java index 0c9408475251..71bfe99fc5a2 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/PartitionGrouper.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/PartitionGrouper.java @@ -35,8 +35,14 @@ public interface PartitionGrouper { /** * Returns a map of task ids to groups of partitions. A partition group forms a task, thus, partitions that are - * expected to be processed together must be in the same group. DefaultPartitionGrouper implements this - * interface. See {@link DefaultPartitionGrouper} for more information. + * expected to be processed together must be in the same group. + * + * Note that the grouping of partitions need to be sticky such that for a given partition, its assigned + * task should always be the same regardless of the input parameters to this function. This is to ensure task's + * local state stores remain valid through workload rebalances among Kafka Streams instances. + * + * The default partition grouper implements this interface by assigning all partitions across different topics with the same + * partition id into the same task. See {@link DefaultPartitionGrouper} for more information. * * @param topicGroups The map from the {@link TopologyBuilder#topicGroups(String)} topic group} id to topics * @param metadata Metadata of the consuming cluster From 73e2f090c58873d19d0fd8b568d733ec84effb92 Mon Sep 17 00:00:00 2001 From: Henry Cai Date: Wed, 29 Jun 2016 15:19:47 -0700 Subject: [PATCH 191/267] KAFKA-3890: Streams use same task assignment on cluster rolling restart Current task assignment in TaskAssignor is not deterministic. During cluster restart or rolling restart, we have the same set of participating worker nodes. But the current TaskAssignor is not able to maintain a deterministic mapping, so about 20% partitions will be reassigned which would cause state repopulation. When the topology of work nodes (# of worker nodes, the TaskIds they are carrying with) is not changed, we really just want to keep the old task assignment. Add the code to check whether the node topology is changing or not: - when the prevAssignedTasks from the old clientStates is the same as the new task list - when there is no new node joining (its prevAssignTasks would be either empty or conflict with some other nodes) - when there is no node dropping out (the total of prevAssignedTasks from other nodes would not be equal to the new task list) When the topology is not changing, we would just use the old mapping. I also added the code to check whether the previous assignment is balanced (whether each node's task list is within [1/2 average -- 2 * average]), if it's not balanced, we will still start the a new task assignment. Author: Henry Cai Reviewers: Guozhang Wang Closes #1543 from HenryCaiHaiying/upstream (cherry picked from commit a34f78dcad1d7844a21d0afbf0f6eef183847d0d) Signed-off-by: Guozhang Wang --- .../internals/assignment/ClientState.java | 10 +++++++ .../internals/assignment/TaskAssignor.java | 26 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientState.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientState.java index a0f61791570d..b59af8656bb2 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientState.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientState.java @@ -69,4 +69,14 @@ public void assign(T taskId, boolean active) { this.cost += cost; } + @Override + public String toString() { + return "[activeTasks: (" + activeTasks + + ") assignedTasks: (" + assignedTasks + + ") prevActiveTasks: (" + prevActiveTasks + + ") prevAssignedTasks: (" + prevAssignedTasks + + ") capacity: " + capacity + + " cost: " + cost + + "]"; + } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/TaskAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/TaskAssignor.java index 2501677ebbdb..e246c4bc2f6c 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/TaskAssignor.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/TaskAssignor.java @@ -40,10 +40,16 @@ public static > Map> assign(Map assignor = new TaskAssignor<>(states, tasks, seed); + log.info("Assigning tasks to clients: {}, prevAssignmentBalanced: {}, " + + "prevClientsUnchangeed: {}, tasks: {}, replicas: {}", + states, assignor.prevAssignmentBalanced, assignor.prevClientsUnchanged, + tasks, numStandbyReplicas); + assignor.assignTasks(); if (numStandbyReplicas > 0) assignor.assignStandbyTasks(numStandbyReplicas); + log.info("Assigned with: " + assignor.states); return assignor.states; } @@ -52,13 +58,29 @@ public static > Map> assign(Map> taskPairs; private final int maxNumTaskPairs; private final ArrayList tasks; + private boolean prevAssignmentBalanced = true; + private boolean prevClientsUnchanged = true; private TaskAssignor(Map> states, Set tasks, long randomSeed) { this.rand = new Random(randomSeed); this.states = new HashMap<>(); + int avgNumTasks = tasks.size() / states.size(); + Set existingTasks = new HashSet<>(); for (Map.Entry> entry : states.entrySet()) { this.states.put(entry.getKey(), entry.getValue().copy()); + Set oldTasks = entry.getValue().prevAssignedTasks; + // make sure the previous assignment is balanced + prevAssignmentBalanced = prevAssignmentBalanced && + oldTasks.size() < 2 * avgNumTasks && oldTasks.size() > avgNumTasks / 2; + for (T task : oldTasks) { + // Make sure there is no duplicates + prevClientsUnchanged = prevClientsUnchanged && !existingTasks.contains(task); + } + existingTasks.addAll(oldTasks); } + // Make sure the existing assignment didn't miss out any task + prevClientsUnchanged = prevClientsUnchanged && existingTasks.equals(tasks); + this.tasks = new ArrayList<>(tasks); int numTasks = tasks.size(); @@ -112,6 +134,10 @@ private ClientState findClientByAdditionCost(T task, boolean checkTaskPairs) double candidateAdditionCost = 0d; for (ClientState state : states.values()) { + if (prevAssignmentBalanced && prevClientsUnchanged && + state.prevAssignedTasks.contains(task)) { + return state; + } if (!state.assignedTasks.contains(task)) { // if checkTaskPairs flag is on, skip this client if this task doesn't introduce a new task combination if (checkTaskPairs && !state.assignedTasks.isEmpty() && !hasNewTaskPair(task, state)) From 9dc5dc461ea4fcaf4f429cfb6c83b4fd6284921f Mon Sep 17 00:00:00 2001 From: "Matthias J. Sax" Date: Thu, 30 Jun 2016 01:53:06 +0200 Subject: [PATCH 192/267] hotfix: check join window boundaries --- .../kafka/streams/kstream/JoinWindows.java | 21 +++++++----- .../streams/kstream/JoinWindowsTest.java | 32 ++++++++++++------- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java index 53ddf3ec4628..936bcd28cee7 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java @@ -42,7 +42,8 @@ * A join is symmetric in the sense, that a join specification on the first stream returns the same result record as * a join specification on the second stream with flipped before and after values. *

            - * Both values (before and after) must not be negative and not zero at the same time. + * Both values (before and after) must not result in an "inverse" window, + * i.e., lower-interval-bound must not be larger than upper-interval.bound. */ public class JoinWindows extends Windows { @@ -54,14 +55,17 @@ public class JoinWindows extends Windows { private JoinWindows(String name, long before, long after) { super(name); - if (before < 0) { - throw new IllegalArgumentException("window size must be > 0 (you provided before as " + before + ")"); + if (before < 0) { // shift lower bound to right + if (after < -before) { + throw new IllegalArgumentException("Upper interval bound smaller than lower interval bound." + + " must be at least " + (-before)); + } } - if (after < 0) { - throw new IllegalArgumentException("window size must be > 0 (you provided after as " + after + ")"); - } - if (before == 0 && after == 0) { - throw new IllegalArgumentException("window size must be > 0 (you provided 0)"); + if (after < 0) { // shift upper bound to left + if (before < -after) { + throw new IllegalArgumentException("Lower interval bound greater than upper interval bound." + + " must be at least " + (-after)); + } } this.after = after; @@ -70,6 +74,7 @@ private JoinWindows(String name, long before, long after) { /** * Specifies that records of the same key are joinable if their timestamps are within {@code timeDifference}. + * ({@code timeDifference} must not be negative) * * @param timeDifference join window interval */ diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java index d8fa7b49c7c1..d80342a7e94c 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java @@ -29,7 +29,7 @@ public class JoinWindowsTest { private static String anyName = "window"; private static long anySize = 123L; - private static long anyOtherSize = 456L; + private static long anyOtherSize = 456L; // should be larger than anySize @Test public void shouldHaveSaneEqualsAndHashCode() { @@ -66,6 +66,21 @@ public void shouldHaveSaneEqualsAndHashCode() { assertNotEquals("must be false when window sizes are different", differentWindowSize3, w1); } + @Test + public void validWindows() { + JoinWindows.of(anyName, anyOtherSize) // [ -anyOtherSize ; anyOtherSize ] + .before(anySize) // [ -anySize ; anyOtherSize ] + .before(0) // [ 0 ; anyOtherSize ] + .before(-anySize) // [ anySize ; anyOtherSize ] + .before(-anyOtherSize); // [ anyOtherSize ; anyOtherSize ] + + JoinWindows.of(anyName, anyOtherSize) // [ -anyOtherSize ; anyOtherSize ] + .after(anySize) // [ -anyOtherSize ; anySize ] + .after(0) // [ -anyOtherSize ; 0 ] + .after(-anySize) // [ -anyOtherSize ; -anySize ] + .after(-anyOtherSize); // [ -anyOtherSize ; -anyOtherSize ] + } + @Test(expected = IllegalArgumentException.class) public void nameMustNotBeEmpty() { JoinWindows.of("", anySize); @@ -77,23 +92,18 @@ public void nameMustNotBeNull() { } @Test(expected = IllegalArgumentException.class) - public void windowSizeMustNotBeNegative() { + public void timeDifferenceMustNotBeNegative() { JoinWindows.of(anyName, -1); } @Test(expected = IllegalArgumentException.class) - public void beforeMustNotBeNegative() { - JoinWindows.of(anyName, anySize).before(-1); - } - - @Test(expected = IllegalArgumentException.class) - public void afterSizeMustNotBeNegative() { - JoinWindows.of(anyName, anySize).after(-1); + public void afterBelowLower() { + JoinWindows.of(anyName, anySize).after(-anySize-1); } @Test(expected = IllegalArgumentException.class) - public void windowSizeMustNotBeZero() { - JoinWindows.of(anyName, 0); + public void beforeOverUpper() { + JoinWindows.of(anyName, anySize).before(-anySize-1); } } \ No newline at end of file From 6ba0aaaedfe39069ecd8826b73c5a530a529faeb Mon Sep 17 00:00:00 2001 From: "Matthias J. Sax" Date: Thu, 30 Jun 2016 14:59:11 +0200 Subject: [PATCH 193/267] fixed checkstyle simplified boundary check --- .../apache/kafka/streams/kstream/JoinWindows.java | 13 ++----------- .../kafka/streams/kstream/JoinWindowsTest.java | 4 ++-- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java index 936bcd28cee7..309a9e6a8a93 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java @@ -55,17 +55,8 @@ public class JoinWindows extends Windows { private JoinWindows(String name, long before, long after) { super(name); - if (before < 0) { // shift lower bound to right - if (after < -before) { - throw new IllegalArgumentException("Upper interval bound smaller than lower interval bound." - + " must be at least " + (-before)); - } - } - if (after < 0) { // shift upper bound to left - if (before < -after) { - throw new IllegalArgumentException("Lower interval bound greater than upper interval bound." - + " must be at least " + (-after)); - } + if (before + after < 0) { + throw new IllegalArgumentException("Window interval (ie, before+after) must not be negative"); } this.after = after; diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java index d80342a7e94c..20efd4580c19 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java @@ -98,12 +98,12 @@ public void timeDifferenceMustNotBeNegative() { @Test(expected = IllegalArgumentException.class) public void afterBelowLower() { - JoinWindows.of(anyName, anySize).after(-anySize-1); + JoinWindows.of(anyName, anySize).after(-anySize - 1); } @Test(expected = IllegalArgumentException.class) public void beforeOverUpper() { - JoinWindows.of(anyName, anySize).before(-anySize-1); + JoinWindows.of(anyName, anySize).before(-anySize - 1); } } \ No newline at end of file From 4091a13c5a3ef4805590357ab872333798a1bbe8 Mon Sep 17 00:00:00 2001 From: "Matthias J. Sax" Date: Thu, 30 Jun 2016 15:07:00 +0200 Subject: [PATCH 194/267] revertd API breaking changes --- .../kafka/streams/kstream/JoinWindows.java | 16 +++++-- .../streams/kstream/JoinWindowsTest.java | 48 ++++++++++--------- .../kstream/internals/KStreamImplTest.java | 4 +- .../internals/KStreamKStreamJoinTest.java | 6 +-- .../internals/KStreamKStreamLeftJoinTest.java | 6 ++- 5 files changed, 46 insertions(+), 34 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java index 309a9e6a8a93..50c04534596a 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/JoinWindows.java @@ -63,14 +63,22 @@ private JoinWindows(String name, long before, long after) { this.before = before; } + /** + * Specifies that records of the same key are joinable if their timestamps are equal. + * + * @param name The name of the window. Must not be null or empty. + */ + public static JoinWindows of(String name) { + return new JoinWindows(name, 0L, 0L); + } + /** * Specifies that records of the same key are joinable if their timestamps are within {@code timeDifference}. - * ({@code timeDifference} must not be negative) * - * @param timeDifference join window interval + * @param timeDifference join window interval (must not be negative) */ - public static JoinWindows of(String name, long timeDifference) { - return new JoinWindows(name, timeDifference, timeDifference); + public JoinWindows with(long timeDifference) { + return new JoinWindows(this.name, timeDifference, timeDifference); } /** diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java index 20efd4580c19..4e14777b8461 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/JoinWindowsTest.java @@ -33,8 +33,8 @@ public class JoinWindowsTest { @Test public void shouldHaveSaneEqualsAndHashCode() { - JoinWindows w1 = JoinWindows.of("w1", anySize); - JoinWindows w2 = JoinWindows.of("w2", anySize); + JoinWindows w1 = JoinWindows.of("w1").with(anySize); + JoinWindows w2 = JoinWindows.of("w2").with(anySize); // Reflexive assertEquals(w1, w1); @@ -45,8 +45,8 @@ public void shouldHaveSaneEqualsAndHashCode() { assertEquals(w2, w1); assertEquals(w1.hashCode(), w2.hashCode()); - JoinWindows w3 = JoinWindows.of("w3", w2.after).before(anyOtherSize); - JoinWindows w4 = JoinWindows.of("w4", anyOtherSize).after(w2.after); + JoinWindows w3 = JoinWindows.of("w3").with(w2.after).before(anyOtherSize); + JoinWindows w4 = JoinWindows.of("w4").with(anyOtherSize).after(w2.after); assertEquals(w3, w4); assertEquals(w4, w3); assertEquals(w3.hashCode(), w4.hashCode()); @@ -56,54 +56,56 @@ public void shouldHaveSaneEqualsAndHashCode() { assertNotEquals("must be false for different window types", UnlimitedWindows.of("irrelevant"), w1); assertNotEquals("must be false for different types", new Object(), w1); - JoinWindows differentWindowSize = JoinWindows.of("differentWindowSize", w1.after + 1); + JoinWindows differentWindowSize = JoinWindows.of("differentWindowSize").with(w1.after + 1); assertNotEquals("must be false when window sizes are different", differentWindowSize, w1); - JoinWindows differentWindowSize2 = JoinWindows.of("differentWindowSize", w1.after).after(w1.after + 1); + JoinWindows differentWindowSize2 = JoinWindows.of("differentWindowSize").with(w1.after).after(w1.after + 1); assertNotEquals("must be false when window sizes are different", differentWindowSize2, w1); - JoinWindows differentWindowSize3 = JoinWindows.of("differentWindowSize", w1.after).before(w1.before + 1); + JoinWindows differentWindowSize3 = JoinWindows.of("differentWindowSize").with(w1.after).before(w1.before + 1); assertNotEquals("must be false when window sizes are different", differentWindowSize3, w1); } @Test public void validWindows() { - JoinWindows.of(anyName, anyOtherSize) // [ -anyOtherSize ; anyOtherSize ] - .before(anySize) // [ -anySize ; anyOtherSize ] - .before(0) // [ 0 ; anyOtherSize ] - .before(-anySize) // [ anySize ; anyOtherSize ] - .before(-anyOtherSize); // [ anyOtherSize ; anyOtherSize ] - - JoinWindows.of(anyName, anyOtherSize) // [ -anyOtherSize ; anyOtherSize ] - .after(anySize) // [ -anyOtherSize ; anySize ] - .after(0) // [ -anyOtherSize ; 0 ] - .after(-anySize) // [ -anyOtherSize ; -anySize ] - .after(-anyOtherSize); // [ -anyOtherSize ; -anyOtherSize ] + JoinWindows.of(anyName) + .with(anyOtherSize) // [ -anyOtherSize ; anyOtherSize ] + .before(anySize) // [ -anySize ; anyOtherSize ] + .before(0) // [ 0 ; anyOtherSize ] + .before(-anySize) // [ anySize ; anyOtherSize ] + .before(-anyOtherSize); // [ anyOtherSize ; anyOtherSize ] + + JoinWindows.of(anyName) + .with(anyOtherSize) // [ -anyOtherSize ; anyOtherSize ] + .after(anySize) // [ -anyOtherSize ; anySize ] + .after(0) // [ -anyOtherSize ; 0 ] + .after(-anySize) // [ -anyOtherSize ; -anySize ] + .after(-anyOtherSize); // [ -anyOtherSize ; -anyOtherSize ] } @Test(expected = IllegalArgumentException.class) public void nameMustNotBeEmpty() { - JoinWindows.of("", anySize); + JoinWindows.of("").with(anySize); } @Test(expected = IllegalArgumentException.class) public void nameMustNotBeNull() { - JoinWindows.of(null, anySize); + JoinWindows.of(null).with(anySize); } @Test(expected = IllegalArgumentException.class) public void timeDifferenceMustNotBeNegative() { - JoinWindows.of(anyName, -1); + JoinWindows.of(anyName).with(-1); } @Test(expected = IllegalArgumentException.class) public void afterBelowLower() { - JoinWindows.of(anyName, anySize).after(-anySize - 1); + JoinWindows.of(anyName).with(anySize).after(-anySize - 1); } @Test(expected = IllegalArgumentException.class) public void beforeOverUpper() { - JoinWindows.of(anyName, anySize).before(-anySize - 1); + JoinWindows.of(anyName).with(anySize).before(-anySize - 1); } } \ No newline at end of file diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java index 6242702d7cb8..a40c8fb0e794 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java @@ -109,14 +109,14 @@ public boolean test(String key, Integer value) { public Integer apply(Integer value1, Integer value2) { return value1 + value2; } - }, JoinWindows.of("join-0", anyWindowSize), stringSerde, intSerde, intSerde); + }, JoinWindows.of("join-0").with(anyWindowSize), stringSerde, intSerde, intSerde); KStream stream5 = streams2[1].join(streams3[1], new ValueJoiner() { @Override public Integer apply(Integer value1, Integer value2) { return value1 + value2; } - }, JoinWindows.of("join-1", anyWindowSize), stringSerde, intSerde, intSerde); + }, JoinWindows.of("join-1").with(anyWindowSize), stringSerde, intSerde, intSerde); stream4.to("topic-5"); diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java index aa7d117685a6..14629998a507 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java @@ -77,7 +77,7 @@ public void testJoin() throws Exception { processor = new MockProcessorSupplier<>(); stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.join(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), + joined = stream1.join(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").with(100), intSerde, stringSerde, stringSerde); joined.process(processor); @@ -176,7 +176,7 @@ public void testOuterJoin() throws Exception { processor = new MockProcessorSupplier<>(); stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.outerJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), + joined = stream1.outerJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").with(100), intSerde, stringSerde, stringSerde); joined.process(processor); @@ -277,7 +277,7 @@ public void testWindowing() throws Exception { processor = new MockProcessorSupplier<>(); stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.join(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), + joined = stream1.join(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").with(100), intSerde, stringSerde, stringSerde); joined.process(processor); diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java index 5b12a301bd84..a91dcb2f734e 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java @@ -79,7 +79,8 @@ public void testLeftJoin() throws Exception { stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.leftJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), intSerde, stringSerde); + joined = stream1.leftJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").with(100), + intSerde, stringSerde); joined.process(processor); Collection> copartitionGroups = builder.copartitionGroups(); @@ -158,7 +159,8 @@ public void testWindowing() throws Exception { stream1 = builder.stream(intSerde, stringSerde, topic1); stream2 = builder.stream(intSerde, stringSerde, topic2); - joined = stream1.leftJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test", 100), intSerde, stringSerde); + joined = stream1.leftJoin(stream2, MockValueJoiner.STRING_JOINER, JoinWindows.of("test").with(100), + intSerde, stringSerde); joined.process(processor); Collection> copartitionGroups = builder.copartitionGroups(); From 3ff68ef6c9da82bf4e69bb37ca2f1cd7403e59dd Mon Sep 17 00:00:00 2001 From: Yuto Kawamura Date: Thu, 30 Jun 2016 11:12:34 -0700 Subject: [PATCH 195/267] resolve conflicts --- .../processor/internals/RecordCollector.java | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollector.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollector.java index eb731be59a18..fea616f3f048 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollector.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollector.java @@ -49,18 +49,6 @@ public interface Supplier { private final Producer producer; private final Map offsets; - private final Callback callback = new Callback() { - @Override - public void onCompletion(RecordMetadata metadata, Exception exception) { - if (exception == null) { - TopicPartition tp = new TopicPartition(metadata.topic(), metadata.partition()); - offsets.put(tp, metadata.offset()); - } else { - log.error("Error sending record: " + metadata, exception); - } - } - }; - public RecordCollector(Producer producer) { this.producer = producer; @@ -81,7 +69,22 @@ public void send(ProducerRecord record, Serializer keySerializer if (partitions != null) partition = partitioner.partition(record.key(), record.value(), partitions.size()); } - this.producer.send(new ProducerRecord<>(record.topic(), partition, keyBytes, valBytes), callback); + + ProducerRecord serializedRecord = + new ProducerRecord<>(record.topic(), partition, record.timestamp(), keyBytes, valBytes); + final String topic = serializedRecord.topic(); + + this.producer.send(serializedRecord, new Callback() { + @Override + public void onCompletion(RecordMetadata metadata, Exception exception) { + if (exception == null) { + TopicPartition tp = new TopicPartition(metadata.topic(), metadata.partition()); + offsets.put(tp, metadata.offset()); + } else { + log.error("Error sending record to topic {}", topic, exception); + } + } + }); } public void flush() { From 29c3c4ba05a7cd2085811ff7cd438d527f82c58d Mon Sep 17 00:00:00 2001 From: Alex Loddengaard Date: Thu, 30 Jun 2016 21:16:13 -0700 Subject: [PATCH 196/267] MINOR: bug fixes to ducktape services Here's a (mostly successful) run with these changes: http://testing.confluent.io/confluent-kafka-branch-builder-system-test-results/?prefix=2016-06-27--001.1467080884--alexlod--ducktape-fixes--ad85493/ At least one of the failed tests is failing in trunk, too: http://testing.confluent.io/confluent-kafka-branch-builder-system-test-results/?prefix=2016-06-28--001.1467090978--alexlod--ducktape-fixes--ad85493/ The contribution is my original work and I license the work to the project under the project's open source license. Author: Alex Loddengaard Reviewers: Geoff Anderson , Ismael Juma , Ewen Cheslack-Postava Closes #1566 from alexlod/ducktape-fixes (cherry picked from commit 7edaa3dd89d1a7300a12e101e23f66459a9cc33d) Signed-off-by: Ewen Cheslack-Postava --- tests/kafkatest/services/kafka/kafka.py | 13 +++--- .../services/kafka/templates/kafka.properties | 4 +- tests/kafkatest/services/security/minikdc.py | 2 +- .../services/security/security_config.py | 40 +++++++++++-------- tests/kafkatest/services/zookeeper.py | 3 +- .../core/zookeeper_security_upgrade_test.py | 2 +- 6 files changed, 36 insertions(+), 28 deletions(-) diff --git a/tests/kafkatest/services/kafka/kafka.py b/tests/kafkatest/services/kafka/kafka.py index a843a127b74d..734eb5ca2350 100644 --- a/tests/kafkatest/services/kafka/kafka.py +++ b/tests/kafkatest/services/kafka/kafka.py @@ -38,8 +38,7 @@ class KafkaService(KafkaPathResolverMixin, JmxMixin, Service): PERSISTENT_ROOT = "/mnt" - STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "kafka.log") - STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "kafka.log") + STDOUT_STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "server-start-stdout-stderr.log") LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "kafka-log4j.properties") # Logs such as controller.log, server.log, etc all go here OPERATIONAL_LOG_DIR = os.path.join(PERSISTENT_ROOT, "kafka-operational-logs") @@ -52,6 +51,9 @@ class KafkaService(KafkaPathResolverMixin, JmxMixin, Service): SIMPLE_AUTHORIZER = "kafka.security.auth.SimpleAclAuthorizer" logs = { + "kafka_server_start_stdout_stderr": { + "path": STDOUT_STDERR_CAPTURE, + "collect_default": True}, "kafka_operational_logs_info": { "path": OPERATIONAL_LOG_INFO_DIR, "collect_default": True}, @@ -85,6 +87,7 @@ def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAI self.topics = topics self.minikdc = None self.authorizer_class_name = authorizer_class_name + self.zk_set_acl = False # # In a heavily loaded and not very fast machine, it is @@ -184,8 +187,8 @@ def start_cmd(self, node): cmd += "%s %s 1>> %s 2>> %s &" % \ (self.path.script("kafka-server-start.sh", node), KafkaService.CONFIG_FILE, - KafkaService.STDOUT_CAPTURE, - KafkaService.STDERR_CAPTURE) + KafkaService.STDOUT_STDERR_CAPTURE, + KafkaService.STDOUT_STDERR_CAPTURE) return cmd def start_node(self, node): @@ -199,7 +202,7 @@ def start_node(self, node): cmd = self.start_cmd(node) self.logger.debug("Attempting to start KafkaService on %s with command: %s" % (str(node.account), cmd)) - with node.account.monitor_log(KafkaService.STDOUT_CAPTURE) as monitor: + with node.account.monitor_log(KafkaService.STDOUT_STDERR_CAPTURE) as monitor: node.account.ssh(cmd) monitor.wait_until("Kafka Server.*started", timeout_sec=30, err_msg="Kafka server didn't finish startup") diff --git a/tests/kafkatest/services/kafka/templates/kafka.properties b/tests/kafkatest/services/kafka/templates/kafka.properties index 1f2371302c8a..4408144516a9 100644 --- a/tests/kafkatest/services/kafka/templates/kafka.properties +++ b/tests/kafkatest/services/kafka/templates/kafka.properties @@ -67,9 +67,7 @@ ssl.client.auth=required authorizer.class.name={{ authorizer_class_name }} {% endif %} -{% if zk_set_acl is defined %} -zookeeper.set.acl={{zk_set_acl}} -{% endif %} +zookeeper.set.acl={{"true" if zk_set_acl else "false"}} zookeeper.connection.timeout.ms={{ zk_connect_timeout }} zookeeper.session.timeout.ms={{ zk_session_timeout }} diff --git a/tests/kafkatest/services/security/minikdc.py b/tests/kafkatest/services/security/minikdc.py index 0e7bb1bcb808..3189ddc0aa2f 100644 --- a/tests/kafkatest/services/security/minikdc.py +++ b/tests/kafkatest/services/security/minikdc.py @@ -65,7 +65,7 @@ def start_node(self, node): self.logger.info(props_file) kafka_principals = ' '.join(['kafka/' + kafka_node.account.hostname for kafka_node in self.kafka_nodes]) - principals = 'client ' + kafka_principals + self.extra_principals + principals = 'client ' + kafka_principals + ' ' + self.extra_principals self.logger.info("Starting MiniKdc with principals " + principals) core_libs_jar = self.path.jar(CORE_LIBS_JAR_NAME, TRUNK) diff --git a/tests/kafkatest/services/security/security_config.py b/tests/kafkatest/services/security/security_config.py index d7cc3c0a0174..59a0ed4f5289 100644 --- a/tests/kafkatest/services/security/security_config.py +++ b/tests/kafkatest/services/security/security_config.py @@ -115,27 +115,33 @@ def __init__(self, security_protocol=None, interbroker_security_protocol=None, def client_config(self, template_props=""): return SecurityConfig(self.security_protocol, client_sasl_mechanism=self.client_sasl_mechanism, template_props=template_props) + def setup_ssl(self, node): + node.account.ssh("mkdir -p %s" % SecurityConfig.CONFIG_DIR, allow_fail=False) + node.account.scp_to(SecurityConfig.ssl_stores['ssl.keystore.location'], SecurityConfig.KEYSTORE_PATH) + node.account.scp_to(SecurityConfig.ssl_stores['ssl.truststore.location'], SecurityConfig.TRUSTSTORE_PATH) + + def setup_sasl(self, node): + node.account.ssh("mkdir -p %s" % SecurityConfig.CONFIG_DIR, allow_fail=False) + jaas_conf_file = "jaas.conf" + java_version = node.account.ssh_capture("java -version") + if any('IBM' in line for line in java_version): + is_ibm_jdk = True + else: + is_ibm_jdk = False + jaas_conf = self.render(jaas_conf_file, node=node, is_ibm_jdk=is_ibm_jdk, + client_sasl_mechanism=self.client_sasl_mechanism, + enabled_sasl_mechanisms=self.enabled_sasl_mechanisms) + node.account.create_file(SecurityConfig.JAAS_CONF_PATH, jaas_conf) + if self.has_sasl_kerberos: + node.account.scp_to(MiniKdc.LOCAL_KEYTAB_FILE, SecurityConfig.KEYTAB_PATH) + node.account.scp_to(MiniKdc.LOCAL_KRB5CONF_FILE, SecurityConfig.KRB5CONF_PATH) + def setup_node(self, node): if self.has_ssl: - node.account.ssh("mkdir -p %s" % SecurityConfig.CONFIG_DIR, allow_fail=False) - node.account.scp_to(SecurityConfig.ssl_stores['ssl.keystore.location'], SecurityConfig.KEYSTORE_PATH) - node.account.scp_to(SecurityConfig.ssl_stores['ssl.truststore.location'], SecurityConfig.TRUSTSTORE_PATH) + self.setup_ssl(node) if self.has_sasl: - node.account.ssh("mkdir -p %s" % SecurityConfig.CONFIG_DIR, allow_fail=False) - jaas_conf_file = "jaas.conf" - java_version = node.account.ssh_capture("java -version") - if any('IBM' in line for line in java_version): - is_ibm_jdk = True - else: - is_ibm_jdk = False - jaas_conf = self.render(jaas_conf_file, node=node, is_ibm_jdk=is_ibm_jdk, - client_sasl_mechanism=self.client_sasl_mechanism, - enabled_sasl_mechanisms=self.enabled_sasl_mechanisms) - node.account.create_file(SecurityConfig.JAAS_CONF_PATH, jaas_conf) - if self.has_sasl_kerberos: - node.account.scp_to(MiniKdc.LOCAL_KEYTAB_FILE, SecurityConfig.KEYTAB_PATH) - node.account.scp_to(MiniKdc.LOCAL_KRB5CONF_FILE, SecurityConfig.KRB5CONF_PATH) + self.setup_sasl(node) def clean_node(self, node): if self.security_protocol != SecurityConfig.PLAINTEXT: diff --git a/tests/kafkatest/services/zookeeper.py b/tests/kafkatest/services/zookeeper.py index 07e2c0ca54bd..201988937e1f 100644 --- a/tests/kafkatest/services/zookeeper.py +++ b/tests/kafkatest/services/zookeeper.py @@ -72,7 +72,8 @@ def start_node(self, node): self.logger.info(config_file) node.account.create_file("/mnt/zookeeper.properties", config_file) - start_cmd = "export KAFKA_OPTS=\"%s\";" % self.kafka_opts + start_cmd = "export KAFKA_OPTS=\"%s\";" % (self.kafka_opts + ' ' + self.security_system_properties) \ + if self.security_config.zk_sasl else self.kafka_opts start_cmd += "%s " % self.path.script("zookeeper-server-start.sh", node) start_cmd += "/mnt/zookeeper.properties 1>> %(path)s 2>> %(path)s &" % self.logs["zk_log"] node.account.ssh(start_cmd) diff --git a/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py b/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py index 7e722f7d31da..582eb68fe33c 100644 --- a/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py +++ b/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py @@ -87,7 +87,7 @@ def run_zk_migration(self): self.zk.zookeeper_migration(node, "secure") # restart broker with zookeeper.set.acl=true and acls - self.kafka.zk_set_acl = "true" + self.kafka.zk_set_acl = True for node in self.kafka.nodes: self.kafka.stop_node(node) self.kafka.start_node(node) From 7b01f848a03d7f93a89012bf58fe40fb96c42247 Mon Sep 17 00:00:00 2001 From: Philippe Derome Date: Fri, 1 Jul 2016 16:47:10 -0700 Subject: [PATCH 197/267] KAFKA-3902: Optimize KTable.filter in Streams DSL to avoid forwarding if both old and new values are null The contribution is my original work and that I license the work to the project under the project's open source license. Contributors: Guozhang Wang, Phil Derome guozhangwang Added checkEmpty to validate processor does nothing and added a inhibit check for filter to fix issue. Author: Philippe Derome Author: Phil Derome Author: Damian Guy Reviewers: Guozhang Wang Closes #1556 from phderome/DEROME-3902 (cherry picked from commit 2098529b44cad78731e478aa8af2b49e9c94db7d) Signed-off-by: Guozhang Wang --- .../kstream/internals/KTableFilter.java | 3 ++ .../kstream/internals/KTableFilterTest.java | 39 ++++++++++++++++++- .../kafka/test/MockProcessorSupplier.java | 6 +++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableFilter.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableFilter.java index 080fd9d52a11..ff0c67f37b86 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableFilter.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableFilter.java @@ -77,6 +77,9 @@ public void process(K key, Change change) { V newValue = computeValue(key, change.newValue); V oldValue = sendOldValues ? computeValue(key, change.oldValue) : null; + if (sendOldValues && oldValue == null && newValue == null) + return; // unnecessary to forward here. + context().forward(key, new Change<>(newValue, oldValue)); } diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableFilterTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableFilterTest.java index a3af133d3b0f..e328baeb5342 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableFilterTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableFilterTest.java @@ -24,6 +24,8 @@ import org.apache.kafka.streams.kstream.Predicate; import org.apache.kafka.test.KStreamTestDriver; import org.apache.kafka.test.MockProcessorSupplier; +import org.apache.kafka.test.MockReducer; +import org.apache.kafka.test.MockKeyValueMapper; import org.apache.kafka.test.TestUtils; import org.junit.After; import org.junit.Before; @@ -254,7 +256,7 @@ public boolean test(String key, Integer value) { driver.process(topic1, "C", 1); proc1.checkAndClearProcessResult("A:(1<-null)", "B:(1<-null)", "C:(1<-null)"); - proc2.checkAndClearProcessResult("A:(null<-null)", "B:(null<-null)", "C:(null<-null)"); + proc2.checkEmptyAndClearProcessResult(); driver.process(topic1, "A", 2); driver.process(topic1, "B", 2); @@ -271,7 +273,40 @@ public boolean test(String key, Integer value) { driver.process(topic1, "B", null); proc1.checkAndClearProcessResult("A:(null<-3)", "B:(null<-2)"); - proc2.checkAndClearProcessResult("A:(null<-null)", "B:(null<-2)"); + proc2.checkAndClearProcessResult("B:(null<-2)"); } + @Test + public void testSkipNullOnMaterialization() throws IOException { + // Do not explicitly set enableSendingOldValues. Let a further downstream stateful operator trigger it instead. + KStreamBuilder builder = new KStreamBuilder(); + + String topic1 = "topic1"; + + KTableImpl table1 = + (KTableImpl) builder.table(stringSerde, stringSerde, topic1); + KTableImpl table2 = (KTableImpl) table1.filter( + new Predicate() { + @Override + public boolean test(String key, String value) { + return value.equalsIgnoreCase("accept"); + } + }).groupBy(MockKeyValueMapper.NoOpKeyValueMapper()) + .reduce(MockReducer.STRING_ADDER, MockReducer.STRING_REMOVER, "mock-result"); + + MockProcessorSupplier proc1 = new MockProcessorSupplier<>(); + MockProcessorSupplier proc2 = new MockProcessorSupplier<>(); + + builder.addProcessor("proc1", proc1, table1.name); + builder.addProcessor("proc2", proc2, table2.name); + + driver = new KStreamTestDriver(builder, stateDir, stringSerde, stringSerde); + + driver.process(topic1, "A", "reject"); + driver.process(topic1, "B", "reject"); + driver.process(topic1, "C", "reject"); + + proc1.checkAndClearProcessResult("A:(reject<-null)", "B:(reject<-null)", "C:(reject<-null)"); + proc2.checkEmptyAndClearProcessResult(); + } } diff --git a/streams/src/test/java/org/apache/kafka/test/MockProcessorSupplier.java b/streams/src/test/java/org/apache/kafka/test/MockProcessorSupplier.java index 9cf0eb264d83..67d25f5e563e 100644 --- a/streams/src/test/java/org/apache/kafka/test/MockProcessorSupplier.java +++ b/streams/src/test/java/org/apache/kafka/test/MockProcessorSupplier.java @@ -82,6 +82,12 @@ public void checkAndClearProcessResult(String... expected) { processed.clear(); } + public void checkEmptyAndClearProcessResult() { + + assertEquals("the number of outputs:", 0, processed.size()); + processed.clear(); + } + public void checkAndClearPunctuateResult(long... expected) { assertEquals("the number of outputs:", expected.length, punctuated.size()); From cdf019a8249f95bb0080202b6f806a292a9dc8ef Mon Sep 17 00:00:00 2001 From: Vahid Hashemian Date: Mon, 4 Jul 2016 14:09:30 -0700 Subject: [PATCH 198/267] KAFKA-3854: Fix issues with new consumer's subsequent regex (pattern) subscriptions This patch fixes two issues: 1. Subsequent regex subscriptions fail with the new consumer. 2. Subsequent regex subscriptions would not immediately refresh metadata to change the subscription of the new consumer and trigger a rebalance. The final note on the JIRA stating that a later created topic that matches a consumer's subscription pattern would not be assigned to the consumer upon creation seems to be as designed. A repeat `subscribe()` to the same pattern or some wait time until the next automatic metadata refresh would handle that case. An integration test was also added to verify these issues are fixed with this PR. Author: Vahid Hashemian Reviewers: Jason Gustafson , Ewen Cheslack-Postava Closes #1572 from vahidhashemian/KAFKA-3854 --- .../kafka/clients/consumer/KafkaConsumer.java | 1 + .../consumer/internals/SubscriptionState.java | 41 ++++++--- .../kafka/api/PlaintextConsumerTest.scala | 83 ++++++++++++++++++- 3 files changed, 113 insertions(+), 12 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java index 2784644edcf6..9ee6c95f4abc 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java @@ -853,6 +853,7 @@ public void subscribe(Pattern pattern, ConsumerRebalanceListener listener) { log.debug("Subscribed to pattern: {}", pattern); this.subscriptions.subscribe(pattern, listener); this.metadata.needMetadataForAllTopics(true); + this.metadata.requestUpdate(); } finally { release(); } diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java index ec351153f054..2412d365a15b 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java @@ -3,9 +3,9 @@ * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the * License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. @@ -47,6 +47,13 @@ */ public class SubscriptionState { + private enum SubscriptionType { + NONE, AUTO_TOPICS, AUTO_PATTERN, USER_ASSIGNED + }; + + /* the type of subscription */ + private SubscriptionType subscriptionType; + /* the pattern user has requested */ private Pattern subscribedPattern; @@ -77,6 +84,19 @@ public class SubscriptionState { private static final String SUBSCRIPTION_EXCEPTION_MESSAGE = "Subscription to topics, partitions and pattern are mutually exclusive"; + /** + * This method sets the subscription type if it is not already set (i.e. when it is NONE), + * or verifies that the subscription type is equal to the give type when it is set (i.e. + * when it is not NONE) + * @param type The given subscription type + */ + private void setSubscriptionType(SubscriptionType type) { + if (this.subscriptionType == SubscriptionType.NONE) + this.subscriptionType = type; + else if (this.subscriptionType != type) + throw new IllegalStateException(SUBSCRIPTION_EXCEPTION_MESSAGE); + } + public SubscriptionState(OffsetResetStrategy defaultResetStrategy) { this.defaultResetStrategy = defaultResetStrategy; this.subscription = new HashSet<>(); @@ -86,14 +106,14 @@ public SubscriptionState(OffsetResetStrategy defaultResetStrategy) { this.needsPartitionAssignment = false; this.needsFetchCommittedOffsets = true; // initialize to true for the consumers to fetch offset upon starting up this.subscribedPattern = null; + this.subscriptionType = SubscriptionType.NONE; } public void subscribe(Collection topics, ConsumerRebalanceListener listener) { if (listener == null) throw new IllegalArgumentException("RebalanceListener cannot be null"); - if (!this.userAssignment.isEmpty() || this.subscribedPattern != null) - throw new IllegalStateException(SUBSCRIPTION_EXCEPTION_MESSAGE); + setSubscriptionType(SubscriptionType.AUTO_TOPICS); this.listener = listener; @@ -122,7 +142,7 @@ public void changeSubscription(Collection topicsToSubscribe) { * @param topics The topics to add to the group subscription */ public void groupSubscribe(Collection topics) { - if (!this.userAssignment.isEmpty()) + if (this.subscriptionType == SubscriptionType.USER_ASSIGNED) throw new IllegalStateException(SUBSCRIPTION_EXCEPTION_MESSAGE); this.groupSubscription.addAll(topics); } @@ -138,8 +158,7 @@ public void needReassignment() { * whose input partitions are provided from the subscribed topics. */ public void assignFromUser(Collection partitions) { - if (!this.subscription.isEmpty() || this.subscribedPattern != null) - throw new IllegalStateException(SUBSCRIPTION_EXCEPTION_MESSAGE); + setSubscriptionType(SubscriptionType.USER_ASSIGNED); this.userAssignment.clear(); this.userAssignment.addAll(partitions); @@ -171,15 +190,14 @@ public void subscribe(Pattern pattern, ConsumerRebalanceListener listener) { if (listener == null) throw new IllegalArgumentException("RebalanceListener cannot be null"); - if (!this.subscription.isEmpty() || !this.userAssignment.isEmpty()) - throw new IllegalStateException(SUBSCRIPTION_EXCEPTION_MESSAGE); + setSubscriptionType(SubscriptionType.AUTO_PATTERN); this.listener = listener; this.subscribedPattern = pattern; } public boolean hasPatternSubscription() { - return subscribedPattern != null; + return this.subscriptionType == SubscriptionType.AUTO_PATTERN; } public void unsubscribe() { @@ -188,6 +206,7 @@ public void unsubscribe() { this.assignment.clear(); this.needsPartitionAssignment = true; this.subscribedPattern = null; + this.subscriptionType = SubscriptionType.NONE; } @@ -270,7 +289,7 @@ public Set fetchablePartitions() { } public boolean partitionsAutoAssigned() { - return !this.subscription.isEmpty(); + return this.subscriptionType == SubscriptionType.AUTO_TOPICS || this.subscriptionType == SubscriptionType.AUTO_PATTERN; } public void position(TopicPartition tp, long offset) { diff --git a/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala b/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala index b22ccde9ed31..7db125ab4e3e 100644 --- a/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala +++ b/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala @@ -127,6 +127,15 @@ class PlaintextConsumerTest extends BaseConsumerTest { consumeAndVerifyRecords(consumer = this.consumers(0), numRecords = 1, startingOffset = 0) } + /** + * Verifies that pattern subscription performs as expected. + * The pattern matches the topics 'topic' and 'tblablac', but not 'tblablak' or 'tblab1'. + * It is expected that the consumer is subscribed to all partitions of 'topic' and + * 'tblablac' after the subscription when metadata is refreshed. + * When a new topic 'tsomec' is added afterwards, it is expected that upon the next + * metadata refresh the consumer becomes subscribed to this new topic and all partitions + * of that topic are assigned to it. + */ @Test def testPatternSubscription() { val numRecords = 10000 @@ -183,12 +192,84 @@ class PlaintextConsumerTest extends BaseConsumerTest { assertEquals(0, this.consumers(0).assignment().size) } + /** + * Verifies that a second call to pattern subscription succeeds and performs as expected. + * The initial subscription is to a pattern that matches two topics 'topic' and 'foo'. + * The second subscription is to a pattern that matches 'foo' and a new topic 'bar'. + * It is expected that the consumer is subscribed to all partitions of 'topic' and 'foo' after + * the first subscription, and to all partitions of 'foo' and 'bar' after the second. + * The metadata refresh interval is intentionally increased to a large enough value to guarantee + * that it is the subscription call that triggers a metadata refresh, and not the timeout. + */ + @Test + def testSubsequentPatternSubscription() { + this.consumerConfig.setProperty(ConsumerConfig.METADATA_MAX_AGE_CONFIG, "30000") + val consumer0 = new KafkaConsumer(this.consumerConfig, new ByteArrayDeserializer(), new ByteArrayDeserializer()) + consumers += consumer0 + + val numRecords = 10000 + sendRecords(numRecords) + + // the first topic ('topic') matches first subscription pattern only + + val fooTopic = "foo" // matches both subscription patterns + TestUtils.createTopic(this.zkUtils, fooTopic, 1, serverCount, this.servers) + sendRecords(1000, new TopicPartition(fooTopic, 0)) + + assertEquals(0, consumer0.assignment().size) + + val pattern1 = Pattern.compile(".*o.*") // only 'topic' and 'foo' match this + consumer0.subscribe(pattern1, new TestConsumerReassignmentListener) + consumer0.poll(50) + + var subscriptions = Set( + new TopicPartition(topic, 0), + new TopicPartition(topic, 1), + new TopicPartition(fooTopic, 0)) + + TestUtils.waitUntilTrue(() => { + consumer0.poll(50) + consumer0.assignment() == subscriptions.asJava + }, s"Expected partitions ${subscriptions.asJava} but actually got ${consumer0.assignment()}") + + val barTopic = "bar" // matches the next subscription pattern + TestUtils.createTopic(this.zkUtils, barTopic, 1, serverCount, this.servers) + sendRecords(1000, new TopicPartition(barTopic, 0)) + + val pattern2 = Pattern.compile("...") // only 'foo' and 'bar' match this + consumer0.subscribe(pattern2, new TestConsumerReassignmentListener) + consumer0.poll(50) + + subscriptions --= Set( + new TopicPartition(topic, 0), + new TopicPartition(topic, 1)) + + subscriptions ++= Set( + new TopicPartition(barTopic, 0)) + + TestUtils.waitUntilTrue(() => { + consumer0.poll(50) + consumer0.assignment() == subscriptions.asJava + }, s"Expected partitions ${subscriptions.asJava} but actually got ${consumer0.assignment()}") + + consumer0.unsubscribe() + assertEquals(0, consumer0.assignment().size) + } + + /** + * Verifies that pattern unsubscription performs as expected. + * The pattern matches the topics 'topic' and 'tblablac'. + * It is expected that the consumer is subscribed to all partitions of 'topic' and + * 'tblablac' after the subscription when metadata is refreshed. + * When consumer unsubscribes from all its subscriptions, it is expected that its + * assignments are cleared right away. + */ @Test def testPatternUnsubscription() { val numRecords = 10000 sendRecords(numRecords) - val topic1 = "tblablac" // matches subscribed pattern + val topic1 = "tblablac" // matches the subscription pattern TestUtils.createTopic(this.zkUtils, topic1, 2, serverCount, this.servers) sendRecords(1000, new TopicPartition(topic1, 0)) sendRecords(1000, new TopicPartition(topic1, 1)) From 00d5becbadd397a595072f91fc687182fe41543a Mon Sep 17 00:00:00 2001 From: Alex Glikson Date: Mon, 4 Jul 2016 19:56:09 -0700 Subject: [PATCH 199/267] resolve conflicts --- docs/quickstart.html | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/docs/quickstart.html b/docs/quickstart.html index 73e5d6fd09ab..6c090d0d619d 100644 --- a/docs/quickstart.html +++ b/docs/quickstart.html @@ -304,7 +304,16 @@

            Step 8: Use

            -> cat /tmp/file-input.txt | ./bin/kafka-console-producer --broker-list localhost:9092 --topic streams-file-input
            +> bin/kafka-topics.sh --create \
            +            --zookeeper localhost:2181 \
            +            --replication-factor 1 \
            +            --partitions 1 \
            +            --topic streams-file-input
            +
            + + +
            +> cat file-input.txt | bin/kafka-console-producer.sh --broker-list localhost:9092 --topic streams-file-input
             

            @@ -312,7 +321,7 @@

            Step 8: Use

            -> ./bin/kafka-run-class org.apache.kafka.streams.examples.wordcount.WordCountDemo
            +> bin/kafka-run-class.sh org.apache.kafka.streams.examples.wordcount.WordCountDemo
             

            @@ -324,18 +333,18 @@

            Step 8: Use

            -> ./bin/kafka-console-consumer --zookeeper localhost:2181 \
            +> bin/kafka-console-consumer.sh --zookeeper localhost:2181 \
                         --topic streams-wordcount-output \
                         --from-beginning \
                         --formatter kafka.tools.DefaultMessageFormatter \
                         --property print.key=true \
            -            --property print.key=true \
            +            --property print.value=true \
                         --property key.deserializer=org.apache.kafka.common.serialization.StringDeserializer \
                         --property value.deserializer=org.apache.kafka.common.serialization.LongDeserializer
             

            -with the following output data being printed to the console (You can stop the console consumer via Ctrl-C): +with the following output data being printed to the console:

            @@ -350,11 +359,17 @@ 

            Step 8: Use join 1 kafka 3 summit 1 -^C

            Here, the first column is the Kafka message key, and the second column is the message value, both in in java.lang.String format. Note that the output is actually a continuous stream of updates, where each data record (i.e. each line in the original output above) is an updated count of a single word, aka record key such as "kafka". For multiple records with the same key, each later record is an update of the previous one. -

            \ No newline at end of file + +

            +Now you can write more input messages to the streams-file-input topic and observe additional messages added +to streams-wordcount-output topic, reflecting updated word counts (e.g., using the console producer and the +console consumer, as described above). +

            + +

            You can stop the console consumer via Ctrl-C.

            \ No newline at end of file From 9d1c2daa5c416f614f3c9fcdc1641cef265f8efa Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Tue, 5 Jul 2016 11:38:15 -0700 Subject: [PATCH 200/267] MINOR: fix generics in Windows.segments and Windows.until `Windows.segments(...)` and `Windows.until(...)` currently aren't returning the `Window` with its type param `W`. This causes the generic type to be lost and therefore methods using this can't infer the correct return types. Author: Damian Guy Reviewers: Matthias J. Sax, Guozhang Wang Closes #1587 from dguy/windows-generics (cherry picked from commit dca263b4e00b00983a4b9f74517b25b01492a4c6) Signed-off-by: Guozhang Wang --- .../main/java/org/apache/kafka/streams/kstream/Windows.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java index c64a80f2a7e5..164e584bb68f 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Windows.java @@ -54,7 +54,7 @@ public String name() { * * @return itself */ - public Windows until(long durationMs) { + public Windows until(long durationMs) { this.maintainDurationMs = durationMs; return this; @@ -66,7 +66,7 @@ public Windows until(long durationMs) { * * @return itself */ - protected Windows segments(int segments) { + protected Windows segments(int segments) { this.segments = segments; return this; From a462ebf2f8317f22a371b0c681a3600dab16458c Mon Sep 17 00:00:00 2001 From: Moritz Siuts Date: Wed, 6 Jul 2016 14:33:07 +0100 Subject: [PATCH 201/267] KAFKA-3802; log mtimes reset on broker restart / shutdown There seems to be a bug in the JDK that on some versions the mtime of the file is modified on FileChannel.truncate() even if the javadoc states `If the given size is greater than or equal to the file's current size then the file is not modified.`. This causes problems with log retention, as all the files then look like they contain recent data to Kafka. Therefore this is only done if the channel size is different to the target size. Author: Moritz Siuts Reviewers: Jun Rao , Ismael Juma Closes #1497 from msiuts/KAFKA-3802-log_mtimes_reset_on_broker_shutdown (cherry picked from commit 15e008783cf73dcaed851fe6cc587767031886e5) Signed-off-by: Ismael Juma --- .../main/scala/kafka/log/FileMessageSet.scala | 20 ++++-- .../unit/kafka/log/FileMessageSetTest.scala | 67 ++++++++++++++++++- 2 files changed, 79 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/kafka/log/FileMessageSet.scala b/core/src/main/scala/kafka/log/FileMessageSet.scala index a164b4b96730..d5aa5c5d043a 100755 --- a/core/src/main/scala/kafka/log/FileMessageSet.scala +++ b/core/src/main/scala/kafka/log/FileMessageSet.scala @@ -54,12 +54,12 @@ class FileMessageSet private[kafka](@volatile var file: File, if(isSlice) new AtomicInteger(end - start) // don't check the file size if this is just a slice view else - new AtomicInteger(math.min(channel.size().toInt, end) - start) + new AtomicInteger(math.min(channel.size.toInt, end) - start) /* if this is not a slice, update the file pointer to the end of the file */ if (!isSlice) /* set the file position to the last byte in the file */ - channel.position(math.min(channel.size().toInt, end)) + channel.position(math.min(channel.size.toInt, end)) /** * Create a file message set with no slicing. @@ -157,7 +157,7 @@ class FileMessageSet private[kafka](@volatile var file: File, */ def writeTo(destChannel: GatheringByteChannel, writePosition: Long, size: Int): Int = { // Ensure that the underlying size has not changed. - val newSize = math.min(channel.size().toInt, end) - start + val newSize = math.min(channel.size.toInt, end) - start if (newSize < _size.get()) { throw new KafkaException("Size of FileMessageSet %s has been truncated during write: old size %d, new size %d" .format(file.getAbsolutePath, _size.get(), newSize)) @@ -333,7 +333,11 @@ class FileMessageSet private[kafka](@volatile var file: File, /** * Truncate this file message set to the given size in bytes. Note that this API does no checking that the * given size falls on a valid message boundary. - * @param targetSize The size to truncate to. + * In some versions of the JDK truncating to the same size as the file message set will cause an + * update of the files mtime, so truncate is only performed if the targetSize is smaller than the + * size of the underlying FileChannel. + * It is expected that no other threads will do writes to the log when this function is called. + * @param targetSize The size to truncate to. Must be between 0 and sizeInBytes. * @return The number of bytes truncated off */ def truncateTo(targetSize: Int): Int = { @@ -341,9 +345,11 @@ class FileMessageSet private[kafka](@volatile var file: File, if(targetSize > originalSize || targetSize < 0) throw new KafkaException("Attempt to truncate log segment to " + targetSize + " bytes failed, " + " size of this log segment is " + originalSize + " bytes.") - channel.truncate(targetSize) - channel.position(targetSize) - _size.set(targetSize) + if (targetSize < channel.size.toInt) { + channel.truncate(targetSize) + channel.position(targetSize) + _size.set(targetSize) + } originalSize - targetSize } diff --git a/core/src/test/scala/unit/kafka/log/FileMessageSetTest.scala b/core/src/test/scala/unit/kafka/log/FileMessageSetTest.scala index 534443ce3203..8d0a9871ac40 100644 --- a/core/src/test/scala/unit/kafka/log/FileMessageSetTest.scala +++ b/core/src/test/scala/unit/kafka/log/FileMessageSetTest.scala @@ -19,12 +19,14 @@ package kafka.log import java.io._ import java.nio._ -import java.util.concurrent.atomic._ +import java.nio.channels._ import kafka.common.LongRef import org.junit.Assert._ import kafka.utils.TestUtils._ import kafka.message._ +import kafka.common.KafkaException +import org.easymock.EasyMock import org.junit.Test class FileMessageSetTest extends BaseMessageSetTestCases { @@ -152,6 +154,69 @@ class FileMessageSetTest extends BaseMessageSetTestCases { assertEquals(MessageSet.entrySize(message.message), messageSet.sizeInBytes) } + /** + * Test that truncateTo only calls truncate on the FileChannel if the size of the + * FileChannel is bigger than the target size. This is important because some JVMs + * change the mtime of the file, even if truncate should do nothing. + */ + @Test + def testTruncateNotCalledIfSizeIsSameAsTargetSize() { + val channelMock = EasyMock.createMock(classOf[FileChannel]) + + EasyMock.expect(channelMock.size).andReturn(42L).atLeastOnce() + EasyMock.expect(channelMock.position(42L)).andReturn(null) + EasyMock.replay(channelMock) + + val msgSet = new FileMessageSet(tempFile(), channelMock) + msgSet.truncateTo(42) + + EasyMock.verify(channelMock) + } + + /** + * Expect a KafkaException if targetSize is bigger than the size of + * the FileMessageSet. + */ + @Test + def testTruncateNotCalledIfSizeIsBiggerThanTargetSize() { + val channelMock = EasyMock.createMock(classOf[FileChannel]) + + EasyMock.expect(channelMock.size).andReturn(42L).atLeastOnce() + EasyMock.expect(channelMock.position(42L)).andReturn(null) + EasyMock.replay(channelMock) + + val msgSet = new FileMessageSet(tempFile(), channelMock) + + try { + msgSet.truncateTo(43) + fail("Should throw KafkaException") + } catch { + case e: KafkaException => // expected + } + + EasyMock.verify(channelMock) + } + + /** + * see #testTruncateNotCalledIfSizeIsSameAsTargetSize + */ + @Test + def testTruncateIfSizeIsDifferentToTargetSize() { + val channelMock = EasyMock.createMock(classOf[FileChannel]) + + EasyMock.expect(channelMock.size).andReturn(42L).atLeastOnce() + EasyMock.expect(channelMock.position(42L)).andReturn(null).once() + EasyMock.expect(channelMock.truncate(23L)).andReturn(null).once() + EasyMock.expect(channelMock.position(23L)).andReturn(null).once() + EasyMock.replay(channelMock) + + val msgSet = new FileMessageSet(tempFile(), channelMock) + msgSet.truncateTo(23) + + EasyMock.verify(channelMock) + } + + /** * Test the new FileMessageSet with pre allocate as true */ From ded91fbce5c58e5bcc5a178e65a8652f88e1493d Mon Sep 17 00:00:00 2001 From: Jeyhun Karimov Date: Wed, 6 Jul 2016 12:27:10 -0700 Subject: [PATCH 202/267] KAFKA-3836: KStreamReduce and KTableReduce should not pass nulls to Deserializers Minor changes to check null changes. Author: Jeyhun Karimov Reviewers: Guozhang Wang Closes #1591 from jeyhunkarimov/KAFKA-3836 (cherry picked from commit 7218648ae7ac1ce93f3ff25702b99c3af236fd0f) Signed-off-by: Guozhang Wang --- .../InMemoryKeyValueLoggedStore.java | 8 +++++-- .../streams/state/internals/RocksDBStore.java | 21 +++++++++++++------ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueLoggedStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueLoggedStore.java index efcdac72947d..084a85e20c60 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueLoggedStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueLoggedStore.java @@ -65,8 +65,12 @@ public void init(ProcessorContext context, StateStore root) { @Override public void restore(byte[] key, byte[] value) { - // directly call inner functions so that the operation is not logged - inner.put(serdes.keyFrom(key), serdes.valueFrom(value)); + // directly call inner functions so that the operation is not logged. Check value for null, to avoid deserialization error. + if (value == null) { + inner.put(serdes.keyFrom(key), null); + } else { + inner.put(serdes.keyFrom(key), serdes.valueFrom(value)); + } } }); diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java index 8f3bab0f2578..d9b41cd99fc1 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java @@ -237,17 +237,26 @@ public boolean persistent() { public V get(K key) { if (cache != null) { RocksDBCacheEntry entry = cache.get(key); - if (entry == null) { - V value = serdes.valueFrom(getInternal(serdes.rawKey(key))); - cache.put(key, new RocksDBCacheEntry(value)); - - return value; + byte[] byteValue = getInternal(serdes.rawKey(key)); + //Check value for null, to avoid deserialization error + if (byteValue == null) { + return null; + } else { + V value = serdes.valueFrom(byteValue); + cache.put(key, new RocksDBCacheEntry(value)); + return value; + } } else { return entry.value; } } else { - return serdes.valueFrom(getInternal(serdes.rawKey(key))); + byte[] byteValue = getInternal(serdes.rawKey(key)); + if (byteValue == null) { + return null; + } else { + return serdes.valueFrom(byteValue); + } } } From 4c502ed83dc4ec3455875e6b13486719d689211e Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Fri, 8 Jul 2016 08:36:20 -0700 Subject: [PATCH 203/267] KAFKA-3887: KAFKA-3817 follow-up to avoid forwarding value if it is null in KTableRepartition Also handle Null value in SmokeTestUtil. Author: Guozhang Wang Reviewers: Damian Guy Closes #1597 from guozhangwang/KHotfix-check-null (cherry picked from commit 730bf9a37a08b2ca41dcda52d2c70e92e85980f7) Signed-off-by: Guozhang Wang --- .../streams/kstream/internals/KTableRepartitionMap.java | 5 +++-- .../org/apache/kafka/streams/smoketest/SmokeTestUtil.java | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java index bba185719aaa..ac7c00e1a73a 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java @@ -76,8 +76,9 @@ public void process(K key, Change change) { if (key == null) throw new StreamsException("Record key for the grouping KTable should not be null."); - KeyValue newPair = mapper.apply(key, change.newValue); - KeyValue oldPair = mapper.apply(key, change.oldValue); + // if the value is null, we do not need to forward its selected key-value further + KeyValue newPair = change.newValue == null ? null : mapper.apply(key, change.newValue); + KeyValue oldPair = change.oldValue == null ? null : mapper.apply(key, change.oldValue); // if the selected repartition key or value is null, skip if (newPair != null && newPair.key != null && newPair.value != null) { diff --git a/streams/src/test/java/org/apache/kafka/streams/smoketest/SmokeTestUtil.java b/streams/src/test/java/org/apache/kafka/streams/smoketest/SmokeTestUtil.java index b0d7a0b8013c..f1c237eeef0b 100644 --- a/streams/src/test/java/org/apache/kafka/streams/smoketest/SmokeTestUtil.java +++ b/streams/src/test/java/org/apache/kafka/streams/smoketest/SmokeTestUtil.java @@ -87,7 +87,7 @@ public KeyValueMapper> selector() { return new KeyValueMapper>() { @Override public KeyValue apply(String key, Long value) { - return new KeyValue<>(Long.toString(value), 1L); + return new KeyValue<>(value == null ? null : Long.toString(value), 1L); } }; } From bc805bf2a63e52132f2273bfb601a071515ccea9 Mon Sep 17 00:00:00 2001 From: Todd Palino Date: Mon, 11 Jul 2016 08:51:55 +0100 Subject: [PATCH 204/267] KAFKA-3725; Update documentation with regards to XFS I've updated the ops documentation with information on using the XFS filesystem, based on LinkedIn's testing (and subsequent switch from EXT4). I've also added some information to clarify the potential risk to the suggested EXT4 options (again, based on my experience with a multiple broker failure situation). Author: Todd Palino Reviewers: Sriharsha Chintalapani , Dana Powers , Ismael Juma Closes #1605 from toddpalino/trunk (cherry picked from commit e0eaa7f12e5e90b61b6793301053acad50cbd9ab) Signed-off-by: Ismael Juma --- docs/ops.html | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/ops.html b/docs/ops.html index 741312934554..5c7630576ad1 100644 --- a/docs/ops.html +++ b/docs/ops.html @@ -516,10 +516,22 @@

            Understanding Linux OS Flush Behavior<
          1. It automatically uses all the free memory on the machine -

            Ext4 Notes

            -Ext4 may or may not be the best filesystem for Kafka. Filesystems like XFS supposedly handle locking during fsync better. We have only tried Ext4, though. -

            -It is not necessary to tune these settings, however those wanting to optimize performance have a few knobs that will help: +

            Filesystem Selection

            +

            Kafka uses regular files on disk, and as such it has no hard dependency on a specific filesystem. The two filesystems which have the most usage, however, are EXT4 and XFS. Historically, EXT4 has had more usage, but recent improvements to the XFS filesystem have shown it to have better performance characteristics for Kafka's workload with no compromise in stability.

            +

            Comparison testing was performed on a cluster with significant message loads, using a variety of filesystem creation and mount options. The primary metric in Kafka that was monitored was the "Request Local Time", indicating the amount of time append operations were taking. XFS resulted in much better local times (160ms vs. 250ms+ for the best EXT4 configuration), as well as lower average wait times. The XFS performance also showed less variability in disk performance.

            +
            General Filesystem Notes
            +For any filesystem used for data directories, on Linux systems, the following options are recommended to be used at mount time: +
              +
            • noatime: This option disables updating of a file's atime (last access time) attribute when the file is read. This can eliminate a significant number of filesystem writes, especially in the case of bootstrapping consumers. Kafka does not rely on the atime attributes at all, so it is safe to disable this.
            • +
            +
            XFS Notes
            +The XFS filesystem has a significant amount of auto-tuning in place, so it does not require any change in the default settings, either at filesystem creation time or at mount. The only tuning parameters worth considering are: +
              +
            • largeio: This affects the preferred I/O size reported by the stat call. While this can allow for higher performance on larger disk writes, in practice it had minimal or no effect on performance.
            • +
            • nobarrier: For underlying devices that have battery-backed cache, this option can provide a little more performance by disabling periodic write flushes. However, if the underlying device is well-behaved, it will report to the filesystem that it does not require flushes, and this option will have no effect.
            • +
            +
            EXT4 Notes
            +EXT4 is a serviceable choice of filesystem for the Kafka data directories, however getting the most performance out of it will require adjusting several mount options. In addition, these options are generally unsafe in a failure scenario, and will result in much more data loss and corruption. For a single broker failure, this is not much of a concern as the disk can be wiped and the replicas rebuilt from the cluster. In a multiple-failure scenario, such as a power outage, this can mean underlying filesystem (and therefore data) corruption that is not easily recoverable. The following options can be adjusted:
            • data=writeback: Ext4 defaults to data=ordered which puts a strong order on some writes. Kafka does not require this ordering as it does very paranoid data recovery on all unflushed log. This setting removes the ordering constraint and seems to significantly reduce latency.
            • Disabling journaling: Journaling is a tradeoff: it makes reboots faster after server crashes but it introduces a great deal of additional locking which adds variance to write performance. Those who don't care about reboot time and want to reduce a major source of write latency spikes can turn off journaling entirely. From 28c55d040b04249e09a22d4a1f9cd42f72eaa5c1 Mon Sep 17 00:00:00 2001 From: Vahid Hashemian Date: Tue, 12 Jul 2016 11:31:39 -0700 Subject: [PATCH 205/267] KAFKA-3931: Fix transient failures in pattern subscription tests Full credit for figuring out the cause of these failures goes to hachikuji. Author: Vahid Hashemian Reviewers: Guozhang Wang, Ismael Juma, Jason Gustafson Closes #1594 from vahidhashemian/KAFKA-3931 (cherry picked from commit 98dfc4b307c2e41a7fb0fff330048aa9ff78addd) Signed-off-by: Guozhang Wang --- .../consumer/internals/ConsumerCoordinator.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index a642512a2c59..2880efce5342 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -341,8 +341,18 @@ public Map fetchCommittedOffsets(Set Date: Tue, 12 Jul 2016 12:15:16 -0700 Subject: [PATCH 206/267] MINOR: Check null in SmokeTestDriver to avoid NPE Author: Guozhang Wang Reviewers: Damian Guy Closes #1611 from guozhangwang/Kminor-check-null-smokedriver (cherry picked from commit 3537063a52b97b6d46f6bd17e3f03e1b03630a3e) Signed-off-by: Guozhang Wang --- .../org/apache/kafka/streams/smoketest/SmokeTestDriver.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/streams/src/test/java/org/apache/kafka/streams/smoketest/SmokeTestDriver.java b/streams/src/test/java/org/apache/kafka/streams/smoketest/SmokeTestDriver.java index 205ba4b85f6b..b22d8a776701 100644 --- a/streams/src/test/java/org/apache/kafka/streams/smoketest/SmokeTestDriver.java +++ b/streams/src/test/java/org/apache/kafka/streams/smoketest/SmokeTestDriver.java @@ -379,7 +379,7 @@ private static boolean verifyDif(Map map, Map(storeName, + StateSerdes serdes = new StateSerdes<>(storeName, keySerde == null ? (Serde) context.keySerde() : keySerde, valueSerde == null ? (Serde) context.valueSerde() : valueSerde); this.changeLogger = new StoreChangeLogger<>(storeName, context, serdes); - context.register(root, true, new StateRestoreCallback() { - @Override - public void restore(byte[] key, byte[] value) { - - // directly call inner functions so that the operation is not logged. Check value for null, to avoid deserialization error. - if (value == null) { - inner.put(serdes.keyFrom(key), null); - } else { - inner.put(serdes.keyFrom(key), serdes.valueFrom(value)); - } - } - }); - - inner.init(context, root); - this.getter = new StoreChangeLogger.ValueGetter() { @Override public V get(K key) { return inner.get(key); } }; + + // if the inner store is an LRU cache, add the eviction listener to log removed record + if (inner instanceof MemoryLRUCache) { + ((MemoryLRUCache) inner).whenEldestRemoved(new MemoryNavigableLRUCache.EldestEntryRemovalListener() { + @Override + public void apply(K key, V value) { + removed(key); + } + }); + } } @Override diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueStoreSupplier.java index a25153c97322..8b498a0e092e 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueStoreSupplier.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueStoreSupplier.java @@ -21,10 +21,12 @@ import org.apache.kafka.common.utils.Time; import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.processor.ProcessorContext; +import org.apache.kafka.streams.processor.StateRestoreCallback; import org.apache.kafka.streams.processor.StateStore; import org.apache.kafka.streams.processor.StateStoreSupplier; import org.apache.kafka.streams.state.KeyValueIterator; import org.apache.kafka.streams.state.KeyValueStore; +import org.apache.kafka.streams.state.StateSerdes; import java.util.Iterator; import java.util.List; @@ -67,7 +69,9 @@ public String name() { } public StateStore get() { - return new MeteredKeyValueStore<>(new MemoryStore<>(name, keySerde, valueSerde).enableLogging(), "in-memory-state", time); + MemoryStore store = new MemoryStore<>(name, keySerde, valueSerde); + + return new MeteredKeyValueStore<>(store.enableLogging(), "in-memory-state", time); } private static class MemoryStore implements KeyValueStore { @@ -76,6 +80,8 @@ private static class MemoryStore implements KeyValueStore { private final Serde valueSerde; private final NavigableMap map; + private StateSerdes serdes; + public MemoryStore(String name, Serde keySerde, Serde valueSerde) { this.name = name; this.keySerde = keySerde; @@ -98,7 +104,23 @@ public String name() { @Override @SuppressWarnings("unchecked") public void init(ProcessorContext context, StateStore root) { - // do nothing + // construct the serde + this.serdes = new StateSerdes<>(name, + keySerde == null ? (Serde) context.keySerde() : keySerde, + valueSerde == null ? (Serde) context.valueSerde() : valueSerde); + + // register the store + context.register(root, true, new StateRestoreCallback() { + @Override + public void restore(byte[] key, byte[] value) { + // check value for null, to avoid deserialization error. + if (value == null) { + put(serdes.keyFrom(key), null); + } else { + put(serdes.keyFrom(key), serdes.valueFrom(value)); + } + } + }); } @Override diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryLRUCacheStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryLRUCacheStoreSupplier.java index 4a4fa5f96af1..20a73332c651 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryLRUCacheStoreSupplier.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryLRUCacheStoreSupplier.java @@ -52,17 +52,10 @@ public String name() { return name; } - @SuppressWarnings("unchecked") public StateStore get() { - final MemoryNavigableLRUCache cache = new MemoryNavigableLRUCache(name, capacity); - final InMemoryKeyValueLoggedStore loggedCache = (InMemoryKeyValueLoggedStore) cache.enableLogging(keySerde, valueSerde); - final MeteredKeyValueStore store = new MeteredKeyValueStore<>(loggedCache, "in-memory-lru-state", time); - cache.whenEldestRemoved(new MemoryNavigableLRUCache.EldestEntryRemovalListener() { - @Override - public void apply(K key, V value) { - loggedCache.removed(key); - } - }); - return store; + MemoryNavigableLRUCache cache = new MemoryNavigableLRUCache<>(name, capacity, keySerde, valueSerde); + InMemoryKeyValueLoggedStore loggedCache = (InMemoryKeyValueLoggedStore) cache.enableLogging(); + + return new MeteredKeyValueStore<>(loggedCache, "in-memory-lru-state", time); } } diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryLRUCache.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryLRUCache.java index d410e02bad08..18993891c81d 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryLRUCache.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryLRUCache.java @@ -19,9 +19,11 @@ import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.processor.ProcessorContext; +import org.apache.kafka.streams.processor.StateRestoreCallback; import org.apache.kafka.streams.processor.StateStore; import org.apache.kafka.streams.state.KeyValueIterator; import org.apache.kafka.streams.state.KeyValueStore; +import org.apache.kafka.streams.state.StateSerdes; import java.util.HashSet; import java.util.LinkedHashMap; @@ -48,16 +50,25 @@ public interface EldestEntryRemovalListener { void apply(K key, V value); } + private final Serde keySerde; + private final Serde valueSerde; + protected String name; protected Map map; protected Set keys; + private StateSerdes serdes; protected EldestEntryRemovalListener listener; // this is used for extended MemoryNavigableLRUCache only - public MemoryLRUCache() {} + public MemoryLRUCache(Serde keySerde, Serde valueSerde) { + this.keySerde = keySerde; + this.valueSerde = valueSerde; + } + + public MemoryLRUCache(String name, final int maxCacheSize, Serde keySerde, Serde valueSerde) { + this(keySerde, valueSerde); - public MemoryLRUCache(String name, final int maxCacheSize) { this.name = name; this.keys = new HashSet<>(); @@ -78,7 +89,7 @@ protected boolean removeEldestEntry(Map.Entry eldest) { }; } - public KeyValueStore enableLogging(Serde keySerde, Serde valueSerde) { + public KeyValueStore enableLogging() { return new InMemoryKeyValueLoggedStore<>(this.name, this, keySerde, valueSerde); } @@ -96,7 +107,23 @@ public String name() { @Override @SuppressWarnings("unchecked") public void init(ProcessorContext context, StateStore root) { - // do nothing + // construct the serde + this.serdes = new StateSerdes<>(name, + keySerde == null ? (Serde) context.keySerde() : keySerde, + valueSerde == null ? (Serde) context.valueSerde() : valueSerde); + + // register the store + context.register(root, true, new StateRestoreCallback() { + @Override + public void restore(byte[] key, byte[] value) { + // check value for null, to avoid deserialization error. + if (value == null) { + put(serdes.keyFrom(key), null); + } else { + put(serdes.keyFrom(key), serdes.valueFrom(value)); + } + } + }); } @Override diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryNavigableLRUCache.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryNavigableLRUCache.java index 99bac93a8762..5eb4f495fae2 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryNavigableLRUCache.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryNavigableLRUCache.java @@ -16,6 +16,7 @@ */ package org.apache.kafka.streams.state.internals; +import org.apache.kafka.common.serialization.Serde; import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.state.KeyValueIterator; @@ -27,8 +28,8 @@ public class MemoryNavigableLRUCache extends MemoryLRUCache { - public MemoryNavigableLRUCache(String name, final int maxCacheSize) { - super(); + public MemoryNavigableLRUCache(String name, final int maxCacheSize, Serde keySerde, Serde valueSerde) { + super(keySerde, valueSerde); this.name = name; this.keys = new TreeSet<>(); @@ -57,15 +58,14 @@ public MemoryNavigableLRUCache whenEldestRemoved(EldestEntryRemovalListene return this; } - @SuppressWarnings("unchecked") @Override public KeyValueIterator range(K from, K to) { - return new MemoryNavigableLRUCache.CacheIterator(((NavigableSet) this.keys).subSet(from, true, to, false).iterator(), this.map); + return new MemoryNavigableLRUCache.CacheIterator<>(((NavigableSet) this.keys).subSet(from, true, to, false).iterator(), this.map); } @Override public KeyValueIterator all() { - return new MemoryNavigableLRUCache.CacheIterator(this.keys.iterator(), this.map); + return new MemoryNavigableLRUCache.CacheIterator<>(this.keys.iterator(), this.map); } private static class CacheIterator implements KeyValueIterator { diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreSupplier.java index af9873366b18..16111ad497d9 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreSupplier.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreSupplier.java @@ -53,6 +53,8 @@ public String name() { } public StateStore get() { - return new MeteredKeyValueStore<>(new RocksDBStore<>(name, keySerde, valueSerde).enableLogging(), "rocksdb-state", time); + RocksDBStore store = new RocksDBStore<>(name, keySerde, valueSerde); + + return new MeteredKeyValueStore<>(store.enableLogging(), "rocksdb-state", time); } } diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java index d9b41cd99fc1..207f9faab4e0 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java @@ -173,7 +173,7 @@ public void init(ProcessorContext context, StateStore root) { this.changeLogger = this.loggingEnabled ? new StoreChangeLogger<>(name, context, WindowStoreUtils.INNER_SERDES) : null; if (this.cacheSize > 0) { - this.cache = new MemoryLRUCache(name, cacheSize) + this.cache = new MemoryLRUCache(name, cacheSize, null, null) .whenEldestRemoved(new MemoryLRUCache.EldestEntryRemovalListener() { @Override public void apply(K key, RocksDBCacheEntry entry) { diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreSupplier.java index 0407299562a8..3a1bd5983c55 100644 --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreSupplier.java +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreSupplier.java @@ -59,7 +59,9 @@ public String name() { } public StateStore get() { - return new MeteredWindowStore<>(new RocksDBWindowStore<>(name, retentionPeriod, numSegments, retainDuplicates, keySerde, valueSerde).enableLogging(), "rocksdb-window", time); + RocksDBWindowStore store = new RocksDBWindowStore<>(name, retentionPeriod, numSegments, retainDuplicates, keySerde, valueSerde); + + return new MeteredWindowStore<>(store.enableLogging(), "rocksdb-window", time); } } diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java index e7fb9a4e41f7..9e15e1cdee9b 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java @@ -115,15 +115,11 @@ public void setup() { new PartitionInfo(storeChangelogTopicName1, 2, Node.noNode(), new Node[0], new Node[0]) )); - System.out.println("added " + storeChangelogTopicName1); - restoreStateConsumer.updatePartitions(storeChangelogTopicName2, Utils.mkList( new PartitionInfo(storeChangelogTopicName2, 0, Node.noNode(), new Node[0], new Node[0]), new PartitionInfo(storeChangelogTopicName2, 1, Node.noNode(), new Node[0], new Node[0]), new PartitionInfo(storeChangelogTopicName2, 2, Node.noNode(), new Node[0], new Node[0]) )); - - System.out.println("added " + storeChangelogTopicName2); } @Test From dfd5946d885d7c9ee796a013672dab16e3ccb7d3 Mon Sep 17 00:00:00 2001 From: Wan Wenli Date: Fri, 15 Jul 2016 10:08:48 -0700 Subject: [PATCH 208/267] KAFKA-3952: Consumer rebalance verifier never succeed due to type mismatch Author: Wan Wenli Reviewers: Guozhang Wang Closes #1612 from swwl1992/ticket-KAFKA-3952-fix-consumer-rebalance-verifier (cherry picked from commit 7a70c1a1087ca78d5e6db3908f6d6b6b19e8aaab) Signed-off-by: Guozhang Wang --- core/src/main/scala/kafka/tools/VerifyConsumerRebalance.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/kafka/tools/VerifyConsumerRebalance.scala b/core/src/main/scala/kafka/tools/VerifyConsumerRebalance.scala index 307789648364..b9590f868bb7 100644 --- a/core/src/main/scala/kafka/tools/VerifyConsumerRebalance.scala +++ b/core/src/main/scala/kafka/tools/VerifyConsumerRebalance.scala @@ -18,7 +18,6 @@ package kafka.tools import joptsimple.OptionParser -import org.I0Itec.zkclient.ZkClient import org.apache.kafka.common.security._ import kafka.utils.{Logging, ZKGroupTopicDirs, ZkUtils, CommandLineUtils} @@ -117,7 +116,7 @@ object VerifyConsumerRebalance extends Logging { // check if the owner is a valid consumer id consumerIdsForTopic match { case Some(consumerIds) => - if(!consumerIds.contains(partitionOwner)) { + if(!consumerIds.map(c => c.toString).contains(partitionOwner)) { error(("Owner %s for partition [%s,%d] is not a valid member of consumer " + "group %s").format(partitionOwner, topic, partition, group)) rebalanceSucceeded = false From b670790bc67de08bcd23806cd63c2454c3675664 Mon Sep 17 00:00:00 2001 From: Yuto Kawamura Date: Tue, 19 Jul 2016 10:10:25 +0100 Subject: [PATCH 209/267] MINOR: Doc of 'retries' config should mention max.in.flight.requests.per.connection to avoid confusion Author: Yuto Kawamura Reviewers: Ismael Juma Closes #1607 from kawamuray/MINOR-retries-doc (cherry picked from commit 7354cc1f487cea5c0422c8e80c3aed244321dbfd) Signed-off-by: Ismael Juma --- .../kafka/clients/producer/ProducerConfig.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java index 4d121b90948d..e07519c095b0 100644 --- a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java +++ b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java @@ -159,14 +159,6 @@ public class ProducerConfig extends AbstractConfig { + "not all memory the producer uses is used for buffering. Some additional memory will be used for compression (if " + "compression is enabled) as well as for maintaining in-flight requests."; - /** retries */ - public static final String RETRIES_CONFIG = "retries"; - private static final String RETRIES_DOC = "Setting a value greater than zero will cause the client to resend any record whose send fails with a potentially transient error." - + " Note that this retry is no different than if the client resent the record upon receiving the " - + "error. Allowing retries will potentially change the ordering of records because if two records are " - + "sent to a single partition, and the first fails and is retried but the second succeeds, then the second record " - + "may appear first."; - /** retry.backoff.ms */ public static final String RETRY_BACKOFF_MS_CONFIG = CommonClientConfigs.RETRY_BACKOFF_MS_CONFIG; @@ -191,6 +183,14 @@ public class ProducerConfig extends AbstractConfig { + " Note that if this setting is set to be greater than 1 and there are failed sends, there is a risk of" + " message re-ordering due to retries (i.e., if retries are enabled)."; + /** retries */ + public static final String RETRIES_CONFIG = "retries"; + private static final String RETRIES_DOC = "Setting a value greater than zero will cause the client to resend any record whose send fails with a potentially transient error." + + " Note that this retry is no different than if the client resent the record upon receiving the error." + + " Allowing retries without setting " + MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION + " to 1 will potentially change the" + + " ordering of records because if two batches are sent to a single partition, and the first fails and is retried but the second" + + " succeeds, then the records in the second batch may appear first."; + /** key.serializer */ public static final String KEY_SERIALIZER_CLASS_CONFIG = "key.serializer"; public static final String KEY_SERIALIZER_CLASS_DOC = "Serializer class for key that implements the Serializer interface."; From e0fa9c2e50d3b37701dac9d0c1a82d430fb47822 Mon Sep 17 00:00:00 2001 From: Samuel Taylor Date: Tue, 19 Jul 2016 10:20:13 +0100 Subject: [PATCH 210/267] MINOR: Fix typo in Operations section This contribution is my original work, and I license the work to the project under the project's open source license. Author: Samuel Taylor Reviewers: Ismael Juma Closes #1630 from ssaamm/trunk (cherry picked from commit 0744449eaa0f80827b60f2be3a5a2cc306360ce7) Signed-off-by: Ismael Juma --- docs/ops.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ops.html b/docs/ops.html index 5c7630576ad1..d7b87e116ea1 100644 --- a/docs/ops.html +++ b/docs/ops.html @@ -388,7 +388,7 @@

              Important Client Configurationsconfiguration section.

              A Production Server Config

              -Here is our server production server configuration: +Here is our production server configuration:
               # Replication configurations
               num.replica.fetchers=4
              
              From 6ec48c1a87d898168a5ec605bd77e76ce1369f88 Mon Sep 17 00:00:00 2001
              From: Damian Guy 
              Date: Tue, 19 Jul 2016 08:44:48 -0700
              Subject: [PATCH 211/267] resolve conflicts
              
              ---
               .../streams/processor/TopologyBuilder.java    | 38 +++++++++----------
               1 file changed, 19 insertions(+), 19 deletions(-)
              
              diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java b/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java
              index 542514952839..7161a80c7450 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java
              @@ -203,11 +203,11 @@ public TopologyBuilder() {}
                    * @param topics the name of one or more Kafka topics that this source is to consume
                    * @return this builder instance so methods can be chained together; never null
                    */
              -    public final TopologyBuilder addSource(String name, String... topics) {
              +    public synchronized final TopologyBuilder addSource(String name, String... topics) {
                       return addSource(name, (Deserializer) null, (Deserializer) null, topics);
                   }
               
              -    /**
              +   /**
                    * Add a new source that consumes the named topics and forwards the records to child processor and/or sink nodes.
                    * The source will use the specified key and value deserializers.
                    *
              @@ -223,7 +223,7 @@ public final TopologyBuilder addSource(String name, String... topics) {
                    * @return this builder instance so methods can be chained together; never null
                    * @throws TopologyBuilderException if processor is already added or if topics have already been registered by another source
                    */
              -    public final TopologyBuilder addSource(String name, Deserializer keyDeserializer, Deserializer valDeserializer, String... topics) {
              +    public synchronized final TopologyBuilder addSource(String name, Deserializer keyDeserializer, Deserializer valDeserializer, String... topics) {
                       if (nodeFactories.containsKey(name))
                           throw new TopologyBuilderException("Processor " + name + " is already added.");
               
              @@ -256,7 +256,7 @@ public final TopologyBuilder addSource(String name, Deserializer keyDeserializer
                    * @see #addSink(String, String, Serializer, Serializer, String...)
                    * @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
                    */
              -    public final TopologyBuilder addSink(String name, String topic, String... parentNames) {
              +    public synchronized final TopologyBuilder addSink(String name, String topic, String... parentNames) {
                       return addSink(name, topic, (Serializer) null, (Serializer) null, parentNames);
                   }
               
              @@ -283,7 +283,7 @@ public final TopologyBuilder addSink(String name, String topic, String... parent
                    * @see #addSink(String, String, Serializer, Serializer, String...)
                    * @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
                    */
              -    public final TopologyBuilder addSink(String name, String topic, StreamPartitioner partitioner, String... parentNames) {
              +    public synchronized final TopologyBuilder addSink(String name, String topic, StreamPartitioner partitioner, String... parentNames) {
                       return addSink(name, topic, (Serializer) null, (Serializer) null, partitioner, parentNames);
                   }
               
              @@ -306,7 +306,7 @@ public final TopologyBuilder addSink(String name, String topic, StreamPartitione
                    * @see #addSink(String, String, StreamPartitioner, String...)
                    * @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
                    */
              -    public final TopologyBuilder addSink(String name, String topic, Serializer keySerializer, Serializer valSerializer, String... parentNames) {
              +    public synchronized final TopologyBuilder addSink(String name, String topic, Serializer keySerializer, Serializer valSerializer, String... parentNames) {
                       return addSink(name, topic, keySerializer, valSerializer, (StreamPartitioner) null, parentNames);
                   }
               
              @@ -331,7 +331,7 @@ public final TopologyBuilder addSink(String name, String topic, Serializer keySe
                    * @see #addSink(String, String, Serializer, Serializer, String...)
                    * @throws TopologyBuilderException if parent processor is not added yet, or if this processor's name is equal to the parent's name
                    */
              -    public final  TopologyBuilder addSink(String name, String topic, Serializer keySerializer, Serializer valSerializer, StreamPartitioner partitioner, String... parentNames) {
              +    public synchronized final  TopologyBuilder addSink(String name, String topic, Serializer keySerializer, Serializer valSerializer, StreamPartitioner partitioner, String... parentNames) {
                       if (nodeFactories.containsKey(name))
                           throw new TopologyBuilderException("Processor " + name + " is already added.");
               
              @@ -363,7 +363,7 @@ public final  TopologyBuilder addSink(String name, String topic, Serialize
                    * @return this builder instance so methods can be chained together; never null
                    * @throws TopologyBuilderException if parent processor is not added yet, or if this processor's name is equal to the parent's name
                    */
              -    public final TopologyBuilder addProcessor(String name, ProcessorSupplier supplier, String... parentNames) {
              +    public synchronized final TopologyBuilder addProcessor(String name, ProcessorSupplier supplier, String... parentNames) {
                       if (nodeFactories.containsKey(name))
                           throw new TopologyBuilderException("Processor " + name + " is already added.");
               
              @@ -391,7 +391,7 @@ public final TopologyBuilder addProcessor(String name, ProcessorSupplier supplie
                    * @return this builder instance so methods can be chained together; never null
                    * @throws TopologyBuilderException if state store supplier is already added
                    */
              -    public final TopologyBuilder addStateStore(StateStoreSupplier supplier, boolean isInternal, String... processorNames) {
              +    public synchronized final TopologyBuilder addStateStore(StateStoreSupplier supplier, boolean isInternal, String... processorNames) {
                       if (stateFactories.containsKey(supplier.name())) {
                           throw new TopologyBuilderException("StateStore " + supplier.name() + " is already added.");
                       }
              @@ -413,7 +413,7 @@ public final TopologyBuilder addStateStore(StateStoreSupplier supplier, boolean
                    * @param supplier the supplier used to obtain this state store {@link StateStore} instance
                    * @return this builder instance so methods can be chained together; never null
                    */
              -    public final TopologyBuilder addStateStore(StateStoreSupplier supplier, String... processorNames) {
              +    public synchronized final TopologyBuilder addStateStore(StateStoreSupplier supplier, String... processorNames) {
                       return this.addStateStore(supplier, true, processorNames);
                   }
               
              @@ -424,7 +424,7 @@ public final TopologyBuilder addStateStore(StateStoreSupplier supplier, String..
                    * @param stateStoreNames the names of state stores that the processor uses
                    * @return this builder instance so methods can be chained together; never null
                    */
              -    public final TopologyBuilder connectProcessorAndStateStores(String processorName, String... stateStoreNames) {
              +    public synchronized final TopologyBuilder connectProcessorAndStateStores(String processorName, String... stateStoreNames) {
                       if (stateStoreNames != null) {
                           for (String stateStoreName : stateStoreNames) {
                               connectProcessorAndStateStore(processorName, stateStoreName);
              @@ -444,7 +444,7 @@ public final TopologyBuilder connectProcessorAndStateStores(String processorName
                    * @return this builder instance so methods can be chained together; never null
                    * @throws TopologyBuilderException if less than two processors are specified, or if one of the processors is not added yet
                    */
              -    public final TopologyBuilder connectProcessors(String... processorNames) {
              +    public synchronized final TopologyBuilder connectProcessors(String... processorNames) {
                       if (processorNames.length < 2)
                           throw new TopologyBuilderException("At least two processors need to participate in the connection.");
               
              @@ -467,7 +467,7 @@ public final TopologyBuilder connectProcessors(String... processorNames) {
                    * @param topicName the name of the topic
                    * @return this builder instance so methods can be chained together; never null
                    */
              -    public final TopologyBuilder addInternalTopic(String topicName) {
              +    public synchronized final TopologyBuilder addInternalTopic(String topicName) {
                       this.internalTopicNames.add(topicName);
               
                       return this;
              @@ -501,7 +501,7 @@ private void connectProcessorAndStateStore(String processorName, String stateSto
                    *
                    * @return groups of topic names
                    */
              -    public Map topicGroups(String applicationId) {
              +    public synchronized Map topicGroups(String applicationId) {
                       Map topicGroups = new HashMap<>();
               
                       if (nodeGroups == null)
              @@ -563,7 +563,7 @@ public Map topicGroups(String applicationId) {
                    *
                    * @return groups of node names
                    */
              -    public Map> nodeGroups() {
              +    public synchronized Map> nodeGroups() {
                       if (nodeGroups == null)
                           nodeGroups = makeNodeGroups();
               
              @@ -611,7 +611,7 @@ private Map> makeNodeGroups() {
                    * @param sourceNodes a set of source node names
                    * @return this builder instance so methods can be chained together; never null
                    */
              -    public final TopologyBuilder copartitionSources(Collection sourceNodes) {
              +    public synchronized final TopologyBuilder copartitionSources(Collection sourceNodes) {
                       copartitionSourceGroups.add(Collections.unmodifiableSet(new HashSet<>(sourceNodes)));
                       return this;
                   }
              @@ -622,7 +622,7 @@ public final TopologyBuilder copartitionSources(Collection sourceNodes)
                    *
                    * @return groups of topic names
                    */
              -    public Collection> copartitionGroups() {
              +    public synchronized Collection> copartitionGroups() {
                       List> list = new ArrayList<>(copartitionSourceGroups.size());
                       for (Set nodeNames : copartitionSourceGroups) {
                           Set copartitionGroup = new HashSet<>();
              @@ -642,7 +642,7 @@ public Collection> copartitionGroups() {
                    *
                    * @see org.apache.kafka.streams.KafkaStreams#KafkaStreams(TopologyBuilder, org.apache.kafka.streams.StreamsConfig)
                    */
              -    public ProcessorTopology build(String applicationId, Integer topicGroupId) {
              +    public synchronized ProcessorTopology build(String applicationId, Integer topicGroupId) {
                       Set nodeGroup;
                       if (topicGroupId != null) {
                           nodeGroup = nodeGroups().get(topicGroupId);
              @@ -702,7 +702,7 @@ private ProcessorTopology build(String applicationId, Set nodeGroup) {
                    * Get the names of topics that are to be consumed by the source nodes created by this builder.
                    * @return the unmodifiable set of topic names used by source nodes, which changes as new sources are added; never null
                    */
              -    public Set sourceTopics(String applicationId) {
              +    public synchronized Set sourceTopics(String applicationId) {
                       Set topics = new HashSet<>();
                       for (String topic : sourceTopicNames) {
                           if (internalTopicNames.contains(topic)) {
              
              From e9a1d69edd8302b584e9b6c813807a1a5447f2be Mon Sep 17 00:00:00 2001
              From: Florian Hussonnois 
              Date: Tue, 19 Jul 2016 10:56:12 -0700
              Subject: [PATCH 212/267] KAFKA-3922: add constructor to AbstractStream class
              
              https://issues.apache.org/jira/browse/KAFKA-3922
              
              KAFKA-3922 add copy-constructor to AbstractStream class
              This copy-constructor allow to access protected variables from subclasses.
              
              It should be used to extend KStreamImpl and KTableImpl classes by implementing a decorator pattern.
              
              Author: Florian Hussonnois 
              
              Reviewers: Guozhang Wang 
              
              Closes #1581 from fhussonnois/KAFKA-3922
              
              (cherry picked from commit b418922a3b48bef9d890abb471908ab2a8aeba18)
              Signed-off-by: Guozhang Wang 
              ---
               .../kafka/streams/kstream/internals/AbstractStream.java     | 6 ++++++
               1 file changed, 6 insertions(+)
              
              diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/AbstractStream.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/AbstractStream.java
              index ebada9283962..3b957abcdb04 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/AbstractStream.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/AbstractStream.java
              @@ -30,6 +30,12 @@ public abstract class AbstractStream {
                   protected final String name;
                   protected final Set sourceNodes;
               
              +    public AbstractStream(AbstractStream stream) {
              +        this.topology = stream.topology;
              +        this.name = stream.name;
              +        this.sourceNodes = stream.sourceNodes;
              +    }
              +
                   public AbstractStream(KStreamBuilder topology, String name, Set sourceNodes) {
                       this.topology = topology;
                       this.name = name;
              
              From 0bb1d3ae53fc3f80fb096369951187630519eb04 Mon Sep 17 00:00:00 2001
              From: Mathieu Fenniak 
              Date: Tue, 19 Jul 2016 15:45:12 -0700
              Subject: [PATCH 213/267] MINOR: Remove slf4j-log4j from kafka-streams compile
               dependencies
              
              As kafka-streams is intended to be used by applications that may or may not wish to use log4j, kafka-streams itself should not have a dependency on a concrete log framework.  This change adapts the dependencies to be API-only for compile, and framework-specific for the test runtime only.
              
              I read through the [Contributing Code Guidelines](https://cwiki.apache.org/confluence/display/KAFKA/Contributing+Code+Changes) and interpreted this as a trivial change that doesn't require a Jira ticket.  Please let me know if I've interpreted that wrongly.
              
              This contribution is my original work and I license the work to the project under the project's open source license.
              
              Author: Mathieu Fenniak 
              
              Reviewers: Guozhang Wang 
              
              Closes #1639 from mfenniak/fix-slf4j-dependency-for-streams
              
              (cherry picked from commit cfebfdfa5e537946e4c6e1cc986f20131899be31)
              Signed-off-by: Guozhang Wang 
              ---
               build.gradle | 4 +++-
               1 file changed, 3 insertions(+), 1 deletion(-)
              
              diff --git a/build.gradle b/build.gradle
              index d076c294145b..36647b3468dc 100644
              --- a/build.gradle
              +++ b/build.gradle
              @@ -670,7 +670,7 @@ project(':streams') {
                 dependencies {
                   compile project(':clients')
                   compile project(':connect:json')  // this dependency should be removed after we unify data API
              -    compile libs.slf4jlog4j
              +    compile libs.slf4jApi
                   compile libs.rocksDBJni
                   // this dependency should be removed after KIP-4
                   compile (libs.zkclient) {
              @@ -683,6 +683,8 @@ project(':streams') {
                   testCompile project(':core')
                   testCompile project(':core').sourceSets.test.output
                   testCompile libs.junit
              +
              +    testRuntime libs.slf4jlog4j
                 }
               
                 javadoc {
              
              From 7bd06e9255544c3b9c536b6b9417e4bb52c830b3 Mon Sep 17 00:00:00 2001
              From: Guozhang Wang 
              Date: Wed, 13 Jul 2016 18:11:25 -0700
              Subject: [PATCH 214/267] Cherry-pick KAFKA-3941: Delay eviction listener in
               InMemoryKeyValueLoggedStore after restoration
              
              ---
               .../InMemoryKeyValueLoggedStore.java          | 27 +++++++-------
               .../InMemoryKeyValueStoreSupplier.java        | 26 ++++++++++++--
               .../InMemoryLRUCacheStoreSupplier.java        | 15 +++-----
               .../state/internals/MemoryLRUCache.java       | 35 ++++++++++++++++---
               .../internals/MemoryNavigableLRUCache.java    | 10 +++---
               .../RocksDBKeyValueStoreSupplier.java         |  4 ++-
               .../streams/state/internals/RocksDBStore.java |  2 +-
               .../internals/RocksDBWindowStoreSupplier.java |  4 ++-
               .../processor/internals/StandbyTaskTest.java  |  4 ---
               9 files changed, 84 insertions(+), 43 deletions(-)
              
              diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueLoggedStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueLoggedStore.java
              index efcdac72947d..2c45d1a3f6f6 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueLoggedStore.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueLoggedStore.java
              @@ -20,7 +20,6 @@
               import org.apache.kafka.common.serialization.Serde;
               import org.apache.kafka.streams.KeyValue;
               import org.apache.kafka.streams.processor.ProcessorContext;
              -import org.apache.kafka.streams.processor.StateRestoreCallback;
               import org.apache.kafka.streams.processor.StateStore;
               import org.apache.kafka.streams.state.KeyValueIterator;
               import org.apache.kafka.streams.state.KeyValueStore;
              @@ -35,7 +34,6 @@ public class InMemoryKeyValueLoggedStore implements KeyValueStore {
                   private final Serde valueSerde;
                   private final String storeName;
               
              -    private StateSerdes serdes;
                   private StoreChangeLogger changeLogger;
                   private StoreChangeLogger.ValueGetter getter;
               
              @@ -54,30 +52,31 @@ public String name() {
                   @Override
                   @SuppressWarnings("unchecked")
                   public void init(ProcessorContext context, StateStore root) {
              +        inner.init(context, root);
              +
                       // construct the serde
              -        this.serdes = new StateSerdes<>(storeName,
              +        StateSerdes  serdes = new StateSerdes<>(storeName,
                               keySerde == null ? (Serde) context.keySerde() : keySerde,
                               valueSerde == null ? (Serde) context.valueSerde() : valueSerde);
               
                       this.changeLogger = new StoreChangeLogger<>(storeName, context, serdes);
               
              -        context.register(root, true, new StateRestoreCallback() {
              -            @Override
              -            public void restore(byte[] key, byte[] value) {
              -
              -                // directly call inner functions so that the operation is not logged
              -                inner.put(serdes.keyFrom(key), serdes.valueFrom(value));
              -            }
              -        });
              -
              -        inner.init(context, root);
              -
                       this.getter = new StoreChangeLogger.ValueGetter() {
                           @Override
                           public V get(K key) {
                               return inner.get(key);
                           }
                       };
              +
              +        // if the inner store is an LRU cache, add the eviction listener to log removed record
              +        if (inner instanceof MemoryLRUCache) {
              +            ((MemoryLRUCache) inner).whenEldestRemoved(new MemoryNavigableLRUCache.EldestEntryRemovalListener() {
              +                @Override
              +                public void apply(K key, V value) {
              +                    removed(key);
              +                }
              +            });
              +        }
                   }
               
                   @Override
              diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueStoreSupplier.java
              index a25153c97322..8b498a0e092e 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueStoreSupplier.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryKeyValueStoreSupplier.java
              @@ -21,10 +21,12 @@
               import org.apache.kafka.common.utils.Time;
               import org.apache.kafka.streams.KeyValue;
               import org.apache.kafka.streams.processor.ProcessorContext;
              +import org.apache.kafka.streams.processor.StateRestoreCallback;
               import org.apache.kafka.streams.processor.StateStore;
               import org.apache.kafka.streams.processor.StateStoreSupplier;
               import org.apache.kafka.streams.state.KeyValueIterator;
               import org.apache.kafka.streams.state.KeyValueStore;
              +import org.apache.kafka.streams.state.StateSerdes;
               
               import java.util.Iterator;
               import java.util.List;
              @@ -67,7 +69,9 @@ public String name() {
                   }
               
                   public StateStore get() {
              -        return new MeteredKeyValueStore<>(new MemoryStore<>(name, keySerde, valueSerde).enableLogging(), "in-memory-state", time);
              +        MemoryStore store = new MemoryStore<>(name, keySerde, valueSerde);
              +
              +        return new MeteredKeyValueStore<>(store.enableLogging(), "in-memory-state", time);
                   }
               
                   private static class MemoryStore implements KeyValueStore {
              @@ -76,6 +80,8 @@ private static class MemoryStore implements KeyValueStore {
                       private final Serde valueSerde;
                       private final NavigableMap map;
               
              +        private StateSerdes serdes;
              +
                       public MemoryStore(String name, Serde keySerde, Serde valueSerde) {
                           this.name = name;
                           this.keySerde = keySerde;
              @@ -98,7 +104,23 @@ public String name() {
                       @Override
                       @SuppressWarnings("unchecked")
                       public void init(ProcessorContext context, StateStore root) {
              -            // do nothing
              +            // construct the serde
              +            this.serdes = new StateSerdes<>(name,
              +                    keySerde == null ? (Serde) context.keySerde() : keySerde,
              +                    valueSerde == null ? (Serde) context.valueSerde() : valueSerde);
              +
              +            // register the store
              +            context.register(root, true, new StateRestoreCallback() {
              +                @Override
              +                public void restore(byte[] key, byte[] value) {
              +                    // check value for null, to avoid  deserialization error.
              +                    if (value == null) {
              +                        put(serdes.keyFrom(key), null);
              +                    } else {
              +                        put(serdes.keyFrom(key), serdes.valueFrom(value));
              +                    }
              +                }
              +            });
                       }
               
                       @Override
              diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryLRUCacheStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryLRUCacheStoreSupplier.java
              index 4a4fa5f96af1..20a73332c651 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryLRUCacheStoreSupplier.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryLRUCacheStoreSupplier.java
              @@ -52,17 +52,10 @@ public String name() {
                       return name;
                   }
               
              -    @SuppressWarnings("unchecked")
                   public StateStore get() {
              -        final MemoryNavigableLRUCache cache = new MemoryNavigableLRUCache(name, capacity);
              -        final InMemoryKeyValueLoggedStore loggedCache = (InMemoryKeyValueLoggedStore) cache.enableLogging(keySerde, valueSerde);
              -        final MeteredKeyValueStore store = new MeteredKeyValueStore<>(loggedCache, "in-memory-lru-state", time);
              -        cache.whenEldestRemoved(new MemoryNavigableLRUCache.EldestEntryRemovalListener() {
              -            @Override
              -            public void apply(K key, V value) {
              -                loggedCache.removed(key);
              -            }
              -        });
              -        return store;
              +        MemoryNavigableLRUCache cache = new MemoryNavigableLRUCache<>(name, capacity, keySerde, valueSerde);
              +        InMemoryKeyValueLoggedStore loggedCache = (InMemoryKeyValueLoggedStore) cache.enableLogging();
              +
              +        return new MeteredKeyValueStore<>(loggedCache, "in-memory-lru-state", time);
                   }
               }
              diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryLRUCache.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryLRUCache.java
              index d410e02bad08..18993891c81d 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryLRUCache.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryLRUCache.java
              @@ -19,9 +19,11 @@
               import org.apache.kafka.common.serialization.Serde;
               import org.apache.kafka.streams.KeyValue;
               import org.apache.kafka.streams.processor.ProcessorContext;
              +import org.apache.kafka.streams.processor.StateRestoreCallback;
               import org.apache.kafka.streams.processor.StateStore;
               import org.apache.kafka.streams.state.KeyValueIterator;
               import org.apache.kafka.streams.state.KeyValueStore;
              +import org.apache.kafka.streams.state.StateSerdes;
               
               import java.util.HashSet;
               import java.util.LinkedHashMap;
              @@ -48,16 +50,25 @@ public interface EldestEntryRemovalListener {
                       void apply(K key, V value);
                   }
               
              +    private final Serde keySerde;
              +    private final Serde valueSerde;
              +
                   protected String name;
                   protected Map map;
                   protected Set keys;
              +    private StateSerdes serdes;
               
                   protected EldestEntryRemovalListener listener;
               
                   // this is used for extended MemoryNavigableLRUCache only
              -    public MemoryLRUCache() {}
              +    public MemoryLRUCache(Serde keySerde, Serde valueSerde) {
              +        this.keySerde = keySerde;
              +        this.valueSerde = valueSerde;
              +    }
              +
              +    public MemoryLRUCache(String name, final int maxCacheSize, Serde keySerde, Serde valueSerde) {
              +        this(keySerde, valueSerde);
               
              -    public MemoryLRUCache(String name, final int maxCacheSize) {
                       this.name = name;
                       this.keys = new HashSet<>();
               
              @@ -78,7 +89,7 @@ protected boolean removeEldestEntry(Map.Entry eldest) {
                       };
                   }
               
              -    public KeyValueStore enableLogging(Serde keySerde, Serde valueSerde) {
              +    public KeyValueStore enableLogging() {
                       return new InMemoryKeyValueLoggedStore<>(this.name, this, keySerde, valueSerde);
                   }
               
              @@ -96,7 +107,23 @@ public String name() {
                   @Override
                   @SuppressWarnings("unchecked")
                   public void init(ProcessorContext context, StateStore root) {
              -        // do nothing
              +        // construct the serde
              +        this.serdes = new StateSerdes<>(name,
              +                keySerde == null ? (Serde) context.keySerde() : keySerde,
              +                valueSerde == null ? (Serde) context.valueSerde() : valueSerde);
              +
              +        // register the store
              +        context.register(root, true, new StateRestoreCallback() {
              +            @Override
              +            public void restore(byte[] key, byte[] value) {
              +                // check value for null, to avoid  deserialization error.
              +                if (value == null) {
              +                    put(serdes.keyFrom(key), null);
              +                } else {
              +                    put(serdes.keyFrom(key), serdes.valueFrom(value));
              +                }
              +            }
              +        });
                   }
               
                   @Override
              diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryNavigableLRUCache.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryNavigableLRUCache.java
              index 99bac93a8762..5eb4f495fae2 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryNavigableLRUCache.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/MemoryNavigableLRUCache.java
              @@ -16,6 +16,7 @@
                */
               package org.apache.kafka.streams.state.internals;
               
              +import org.apache.kafka.common.serialization.Serde;
               import org.apache.kafka.streams.KeyValue;
               import org.apache.kafka.streams.state.KeyValueIterator;
               
              @@ -27,8 +28,8 @@
               
               public class MemoryNavigableLRUCache extends MemoryLRUCache {
               
              -    public MemoryNavigableLRUCache(String name, final int maxCacheSize) {
              -        super();
              +    public MemoryNavigableLRUCache(String name, final int maxCacheSize, Serde keySerde, Serde valueSerde) {
              +        super(keySerde, valueSerde);
               
                       this.name = name;
                       this.keys = new TreeSet<>();
              @@ -57,15 +58,14 @@ public MemoryNavigableLRUCache whenEldestRemoved(EldestEntryRemovalListene
                       return this;
                   }
               
              -    @SuppressWarnings("unchecked")
                   @Override
                   public KeyValueIterator range(K from, K to) {
              -        return new MemoryNavigableLRUCache.CacheIterator(((NavigableSet) this.keys).subSet(from, true, to, false).iterator(), this.map);
              +        return new MemoryNavigableLRUCache.CacheIterator<>(((NavigableSet) this.keys).subSet(from, true, to, false).iterator(), this.map);
                   }
               
                   @Override
                   public KeyValueIterator all() {
              -        return new MemoryNavigableLRUCache.CacheIterator(this.keys.iterator(), this.map);
              +        return new MemoryNavigableLRUCache.CacheIterator<>(this.keys.iterator(), this.map);
                   }
               
                   private static class CacheIterator implements KeyValueIterator {
              diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreSupplier.java
              index af9873366b18..16111ad497d9 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreSupplier.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBKeyValueStoreSupplier.java
              @@ -53,6 +53,8 @@ public String name() {
                   }
               
                   public StateStore get() {
              -        return new MeteredKeyValueStore<>(new RocksDBStore<>(name, keySerde, valueSerde).enableLogging(), "rocksdb-state", time);
              +        RocksDBStore store = new RocksDBStore<>(name, keySerde, valueSerde);
              +
              +        return new MeteredKeyValueStore<>(store.enableLogging(), "rocksdb-state", time);
                   }
               }
              diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java
              index 8f3bab0f2578..73110f658c7e 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java
              @@ -173,7 +173,7 @@ public void init(ProcessorContext context, StateStore root) {
                       this.changeLogger = this.loggingEnabled ? new StoreChangeLogger<>(name, context, WindowStoreUtils.INNER_SERDES) : null;
               
                       if (this.cacheSize > 0) {
              -            this.cache = new MemoryLRUCache(name, cacheSize)
              +            this.cache = new MemoryLRUCache(name, cacheSize, null, null)
                                   .whenEldestRemoved(new MemoryLRUCache.EldestEntryRemovalListener() {
                                       @Override
                                       public void apply(K key, RocksDBCacheEntry entry) {
              diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreSupplier.java
              index 0407299562a8..3a1bd5983c55 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreSupplier.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreSupplier.java
              @@ -59,7 +59,9 @@ public String name() {
                   }
               
                   public StateStore get() {
              -        return new MeteredWindowStore<>(new RocksDBWindowStore<>(name, retentionPeriod, numSegments, retainDuplicates, keySerde, valueSerde).enableLogging(), "rocksdb-window", time);
              +        RocksDBWindowStore store = new RocksDBWindowStore<>(name, retentionPeriod, numSegments, retainDuplicates, keySerde, valueSerde);
              +
              +        return new MeteredWindowStore<>(store.enableLogging(), "rocksdb-window", time);
                   }
               
               }
              diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java
              index e7fb9a4e41f7..9e15e1cdee9b 100644
              --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java
              +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java
              @@ -115,15 +115,11 @@ public void setup() {
                               new PartitionInfo(storeChangelogTopicName1, 2, Node.noNode(), new Node[0], new Node[0])
                       ));
               
              -        System.out.println("added " + storeChangelogTopicName1);
              -
                       restoreStateConsumer.updatePartitions(storeChangelogTopicName2, Utils.mkList(
                               new PartitionInfo(storeChangelogTopicName2, 0, Node.noNode(), new Node[0], new Node[0]),
                               new PartitionInfo(storeChangelogTopicName2, 1, Node.noNode(), new Node[0], new Node[0]),
                               new PartitionInfo(storeChangelogTopicName2, 2, Node.noNode(), new Node[0], new Node[0])
                       ));
              -
              -        System.out.println("added " + storeChangelogTopicName2);
                   }
               
                   @Test
              
              From 039e89a6ebcf914f06d83b7277bb7fc209547cc1 Mon Sep 17 00:00:00 2001
              From: Ismael Juma 
              Date: Wed, 20 Jul 2016 12:49:29 -0700
              Subject: [PATCH 215/267] KAFKA-3915; Don't convert messages from v0 to v1
               during log compaction The conversion is unsafe as the converted message size
               may be greater than the message size limit. Updated
               `LogCleanerIntegrationTest` to test the max message size case for both V0 and
               the current version.
              
              Also include a few minor clean-ups:
              * Remove unused code branch in `LogCleaner.compressMessages`
              * Avoid unintentional usage of `scala.collection.immutable.Stream` (`toSeq` on an `Iterator`)
              * Add explicit result type in `FileMessageSet.iterator`
              
              Author: Ismael Juma 
              
              Reviewers: Ewen Cheslack-Postava, Guozhang Wang
              
              Closes #1643 from ijuma/kafka-3915-log-cleaner-io-buffers-message-conversion
              
              (cherry picked from commit 0d19f58850282d16c14fd4abd04663eae817d012)
              ---
               .../main/scala/kafka/log/FileMessageSet.scala |   2 +-
               .../src/main/scala/kafka/log/LogCleaner.scala |  31 +--
               .../kafka/log/LogCleanerIntegrationTest.scala | 184 +++++++++++++-----
               3 files changed, 142 insertions(+), 75 deletions(-)
              
              diff --git a/core/src/main/scala/kafka/log/FileMessageSet.scala b/core/src/main/scala/kafka/log/FileMessageSet.scala
              index d5aa5c5d043a..2ee2cc20aa4b 100755
              --- a/core/src/main/scala/kafka/log/FileMessageSet.scala
              +++ b/core/src/main/scala/kafka/log/FileMessageSet.scala
              @@ -239,7 +239,7 @@ class FileMessageSet private[kafka](@volatile var file: File,
                 /**
                  * Get a shallow iterator over the messages in the set.
                  */
              -  override def iterator = iterator(Int.MaxValue)
              +  override def iterator: Iterator[MessageAndOffset] = iterator(Int.MaxValue)
               
                 /**
                  * Get an iterator over the messages in the set. We only do shallow iteration here.
              diff --git a/core/src/main/scala/kafka/log/LogCleaner.scala b/core/src/main/scala/kafka/log/LogCleaner.scala
              index c6636be09428..c5e4ee8a54cd 100644
              --- a/core/src/main/scala/kafka/log/LogCleaner.scala
              +++ b/core/src/main/scala/kafka/log/LogCleaner.scala
              @@ -431,8 +431,7 @@ private[log] class Cleaner(val id: Int,
                       stats.readMessage(size)
                       if (entry.message.compressionCodec == NoCompressionCodec) {
                         if (shouldRetainMessage(source, map, retainDeletes, entry)) {
              -            val convertedMessage = entry.message.toFormatVersion(messageFormatVersion)
              -            ByteBufferMessageSet.writeMessage(writeBuffer, convertedMessage, entry.offset)
              +            ByteBufferMessageSet.writeMessage(writeBuffer, entry.message, entry.offset)
                           stats.recopyMessage(size)
                         }
                         messagesRead += 1
              @@ -444,22 +443,15 @@ private[log] class Cleaner(val id: Int,
                         val retainedMessages = new mutable.ArrayBuffer[MessageAndOffset]
                         messages.foreach { messageAndOffset =>
                           messagesRead += 1
              -            if (shouldRetainMessage(source, map, retainDeletes, messageAndOffset)) {
              -              retainedMessages += {
              -                if (messageAndOffset.message.magic != messageFormatVersion) {
              -                  writeOriginalMessageSet = false
              -                  new MessageAndOffset(messageAndOffset.message.toFormatVersion(messageFormatVersion), messageAndOffset.offset)
              -                }
              -                else messageAndOffset
              -              }
              -            }
              +            if (shouldRetainMessage(source, map, retainDeletes, messageAndOffset))
              +              retainedMessages += messageAndOffset
                           else writeOriginalMessageSet = false
                         }
               
              -          // There are no messages compacted out and no message format conversion, write the original message set back
              +          // There are no messages compacted out, write the original message set back
                         if (writeOriginalMessageSet)
                           ByteBufferMessageSet.writeMessage(writeBuffer, entry.message, entry.offset)
              -          else if (retainedMessages.nonEmpty)
              +          else
                           compressMessages(writeBuffer, entry.message.compressionCodec, messageFormatVersion, retainedMessages)
                       }
                     }
              @@ -484,14 +476,9 @@ private[log] class Cleaner(val id: Int,
                                              compressionCodec: CompressionCodec,
                                              messageFormatVersion: Byte,
                                              messageAndOffsets: Seq[MessageAndOffset]) {
              -    val messages = messageAndOffsets.map(_.message)
              -    if (messageAndOffsets.isEmpty) {
              -      MessageSet.Empty.sizeInBytes
              -    } else if (compressionCodec == NoCompressionCodec) {
              -      for (messageOffset <- messageAndOffsets)
              -        ByteBufferMessageSet.writeMessage(buffer, messageOffset.message, messageOffset.offset)
              -      MessageSet.messageSetSize(messages)
              -    } else {
              +    require(compressionCodec != NoCompressionCodec, s"compressionCodec must not be $NoCompressionCodec")
              +    if (messageAndOffsets.nonEmpty) {
              +      val messages = messageAndOffsets.map(_.message)
                     val magicAndTimestamp = MessageSet.magicAndLargestTimestamp(messages)
                     val firstMessageOffset = messageAndOffsets.head
                     val firstAbsoluteOffset = firstMessageOffset.offset
              @@ -608,7 +595,7 @@ private[log] class Cleaner(val id: Int,
                  */
                 private[log] def buildOffsetMap(log: Log, start: Long, end: Long, map: OffsetMap): Long = {
                   map.clear()
              -    val dirty = log.logSegments(start, end).toSeq
              +    val dirty = log.logSegments(start, end).toBuffer
                   info("Building offset map for log %s for %d segments in offset range [%d, %d).".format(log.name, dirty.size, start, end))
                   
                   // Add all the dirty segments. We must take at least map.slots * load_factor,
              diff --git a/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala b/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala
              index cc9873c862a6..825a55b5a8c5 100755
              --- a/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala
              +++ b/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala
              @@ -20,6 +20,7 @@ package kafka.log
               import java.io.File
               import java.util.Properties
               
              +import kafka.api.{KAFKA_0_10_0_IV1, KAFKA_0_9_0}
               import kafka.common.TopicAndPartition
               import kafka.message._
               import kafka.server.OffsetCheckpoint
              @@ -33,6 +34,7 @@ import org.junit.runners.Parameterized
               import org.junit.runners.Parameterized.Parameters
               
               import scala.collection._
              +import scala.util.Random
               
               /**
                * This is an integration test that tests the fully integrated log cleaner
              @@ -40,117 +42,195 @@ import scala.collection._
               @RunWith(value = classOf[Parameterized])
               class LogCleanerIntegrationTest(compressionCodec: String) {
               
              +  val codec = CompressionCodec.getCompressionCodec(compressionCodec)
                 val time = new MockTime()
              -  val segmentSize = 100
              +  val segmentSize = 256
                 val deleteDelay = 1000
                 val logName = "log"
                 val logDir = TestUtils.tempDir()
                 var counter = 0
              +  var cleaner: LogCleaner = _
                 val topics = Array(TopicAndPartition("log", 0), TopicAndPartition("log", 1), TopicAndPartition("log", 2))
               
                 @Test
                 def cleanerTest() {
              -    val cleaner = makeCleaner(parts = 3)
              +    val largeMessageKey = 20
              +    val (largeMessageValue, largeMessageSet) = createLargeSingleMessageSet(largeMessageKey, Message.MagicValue_V1)
              +    val maxMessageSize = largeMessageSet.sizeInBytes
              +
              +    cleaner = makeCleaner(parts = 3, maxMessageSize = maxMessageSize)
                   val log = cleaner.logs.get(topics(0))
               
              -    val appends = writeDups(numKeys = 100, numDups = 3, log, CompressionCodec.getCompressionCodec(compressionCodec))
              +    val appends = writeDups(numKeys = 100, numDups = 3, log = log, codec = codec)
                   val startSize = log.size
                   cleaner.startup()
               
                   val firstDirty = log.activeSegment.baseOffset
              -    // wait until cleaning up to base_offset, note that cleaning happens only when "log dirty ratio" is higher than LogConfig.MinCleanableDirtyRatioProp
              -    cleaner.awaitCleaned("log", 0, firstDirty)
              +    checkLastCleaned("log", 0, firstDirty)
                   val compactedSize = log.logSegments.map(_.size).sum
              -    val lastCleaned = cleaner.cleanerManager.allCleanerCheckpoints.get(TopicAndPartition("log", 0)).get
              -    assertTrue(s"log cleaner should have processed up to offset $firstDirty, but lastCleaned=$lastCleaned", lastCleaned >= firstDirty)
              -    assertTrue(s"log should have been compacted:  startSize=$startSize compactedSize=$compactedSize", startSize > compactedSize)
              -    
              -    val read = readFromLog(log)
              -    assertEquals("Contents of the map shouldn't change.", appends.toMap, read.toMap)
              -    assertTrue(startSize > log.size)
              +    assertTrue(s"log should have been compacted: startSize=$startSize compactedSize=$compactedSize", startSize > compactedSize)
               
              -    // write some more stuff and validate again
              -    val appends2 = appends ++ writeDups(numKeys = 100, numDups = 3, log, CompressionCodec.getCompressionCodec(compressionCodec))
              -    val firstDirty2 = log.activeSegment.baseOffset
              -    cleaner.awaitCleaned("log", 0, firstDirty2)
              +    checkLogAfterAppendingDups(log, startSize, appends)
               
              -    val lastCleaned2 = cleaner.cleanerManager.allCleanerCheckpoints.get(TopicAndPartition("log", 0)).get
              -    assertTrue(s"log cleaner should have processed up to offset $firstDirty2", lastCleaned2 >= firstDirty2);
              +    log.append(largeMessageSet, assignOffsets = true)
              +    val dups = writeDups(startKey = largeMessageKey + 1, numKeys = 100, numDups = 3, log = log, codec = codec)
              +    val appends2 = appends ++ Seq(largeMessageKey -> largeMessageValue) ++ dups
              +    val firstDirty2 = log.activeSegment.baseOffset
              +    checkLastCleaned("log", 0, firstDirty2)
               
              -    val read2 = readFromLog(log)
              -    assertEquals("Contents of the map shouldn't change.", appends2.toMap, read2.toMap)
              +    checkLogAfterAppendingDups(log, startSize, appends2)
               
                   // simulate deleting a partition, by removing it from logs
                   // force a checkpoint
                   // and make sure its gone from checkpoint file
              -
                   cleaner.logs.remove(topics(0))
              -
                   cleaner.updateCheckpoints(logDir)
                   val checkpoints = new OffsetCheckpoint(new File(logDir,cleaner.cleanerManager.offsetCheckpointFile)).read()
              -
                   // we expect partition 0 to be gone
              -    assert(!checkpoints.contains(topics(0)))
              -    cleaner.shutdown()
              +    assertFalse(checkpoints.contains(topics(0)))
              +  }
              +
              +  // returns (value, ByteBufferMessageSet)
              +  private def createLargeSingleMessageSet(key: Int, messageFormatVersion: Byte): (String, ByteBufferMessageSet) = {
              +    def messageValue(length: Int): String = {
              +      val random = new Random(0)
              +      new String(random.alphanumeric.take(length).toArray)
              +    }
              +    val value = messageValue(128)
              +    val messageSet = TestUtils.singleMessageSet(payload = value.getBytes, codec = codec, key = key.toString.getBytes,
              +      magicValue = messageFormatVersion)
              +    (value, messageSet)
                 }
               
              -  def readFromLog(log: Log): Iterable[(Int, Int)] = {
              -    for (segment <- log.logSegments; entry <- segment.log; messageAndOffset <- {
              +  @Test
              +  def testCleanerWithMessageFormatV0() {
              +    val largeMessageKey = 20
              +    val (largeMessageValue, largeMessageSet) = createLargeSingleMessageSet(largeMessageKey, Message.MagicValue_V0)
              +    val maxMessageSize = codec match {
              +      case NoCompressionCodec => largeMessageSet.sizeInBytes
              +      case _ =>
              +        // the broker assigns absolute offsets for message format 0 which potentially causes the compressed size to
              +        // increase because the broker offsets are larger than the ones assigned by the client
              +        // adding `5` to the message set size is good enough for this test: it covers the increased message size while
              +        // still being less than the overhead introduced by the conversion from message format version 0 to 1
              +        largeMessageSet.sizeInBytes + 5
              +    }
              +
              +    cleaner = makeCleaner(parts = 3, maxMessageSize = maxMessageSize)
              +
              +    val log = cleaner.logs.get(topics(0))
              +    val props = logConfigProperties(maxMessageSize)
              +    props.put(LogConfig.MessageFormatVersionProp, KAFKA_0_9_0.version)
              +    log.config = new LogConfig(props)
              +
              +    val appends = writeDups(numKeys = 100, numDups = 3, log = log, codec = codec, magicValue = Message.MagicValue_V0)
              +    val startSize = log.size
              +    cleaner.startup()
              +
              +    val firstDirty = log.activeSegment.baseOffset
              +    checkLastCleaned("log", 0, firstDirty)
              +    val compactedSize = log.logSegments.map(_.size).sum
              +    assertTrue(s"log should have been compacted: startSize=$startSize compactedSize=$compactedSize", startSize > compactedSize)
              +
              +    checkLogAfterAppendingDups(log, startSize, appends)
              +
              +    val appends2: Seq[(Int, String)] = {
              +      val dupsV0 = writeDups(numKeys = 40, numDups = 3, log = log, codec = codec, magicValue = Message.MagicValue_V0)
              +      log.append(largeMessageSet, assignOffsets = true)
              +
              +      // also add some messages with version 1 to check that we handle mixed format versions correctly
              +      props.put(LogConfig.MessageFormatVersionProp, KAFKA_0_10_0_IV1.version)
              +      log.config = new LogConfig(props)
              +      val dupsV1 = writeDups(startKey = 30, numKeys = 40, numDups = 3, log = log, codec = codec, magicValue = Message.MagicValue_V1)
              +      appends ++ dupsV0 ++ Seq(largeMessageKey -> largeMessageValue) ++ dupsV1
              +    }
              +    val firstDirty2 = log.activeSegment.baseOffset
              +    checkLastCleaned("log", 0, firstDirty2)
              +
              +    checkLogAfterAppendingDups(log, startSize, appends2)
              +  }
              +
              +  private def checkLastCleaned(topic: String, partitionId: Int, firstDirty: Long) {
              +    // wait until cleaning up to base_offset, note that cleaning happens only when "log dirty ratio" is higher than
              +    // LogConfig.MinCleanableDirtyRatioProp
              +    cleaner.awaitCleaned(topic, partitionId, firstDirty)
              +    val lastCleaned = cleaner.cleanerManager.allCleanerCheckpoints.get(TopicAndPartition(topic, partitionId)).get
              +    assertTrue(s"log cleaner should have processed up to offset $firstDirty, but lastCleaned=$lastCleaned",
              +      lastCleaned >= firstDirty)
              +  }
              +
              +  private def checkLogAfterAppendingDups(log: Log, startSize: Long, appends: Seq[(Int, String)]) {
              +    val read = readFromLog(log)
              +    assertEquals("Contents of the map shouldn't change", appends.toMap, read.toMap)
              +    assertTrue(startSize > log.size)
              +  }
              +
              +  private def readFromLog(log: Log): Iterable[(Int, String)] = {
              +
              +    def messageIterator(entry: MessageAndOffset): Iterator[MessageAndOffset] =
                     // create single message iterator or deep iterator depending on compression codec
              -      if (entry.message.compressionCodec == NoCompressionCodec)
              -        Stream.cons(entry, Stream.empty).iterator
              -      else
              -        ByteBufferMessageSet.deepIterator(entry)
              -    }) yield {
              +      if (entry.message.compressionCodec == NoCompressionCodec) Iterator(entry)
              +      else ByteBufferMessageSet.deepIterator(entry)
              +
              +    for (segment <- log.logSegments; entry <- segment.log; messageAndOffset <- messageIterator(entry)) yield {
                     val key = TestUtils.readString(messageAndOffset.message.key).toInt
              -      val value = TestUtils.readString(messageAndOffset.message.payload).toInt
              +      val value = TestUtils.readString(messageAndOffset.message.payload)
                     key -> value
                   }
                 }
               
              -  def writeDups(numKeys: Int, numDups: Int, log: Log, codec: CompressionCodec): Seq[(Int, Int)] = {
              -    for(dup <- 0 until numDups; key <- 0 until numKeys) yield {
              -      val count = counter
              -      log.append(TestUtils.singleMessageSet(payload = counter.toString.getBytes, codec = codec, key = key.toString.getBytes), assignOffsets = true)
              +  private def writeDups(numKeys: Int, numDups: Int, log: Log, codec: CompressionCodec,
              +                        startKey: Int = 0, magicValue: Byte = Message.CurrentMagicValue): Seq[(Int, String)] = {
              +    for(_ <- 0 until numDups; key <- startKey until (startKey + numKeys)) yield {
              +      val payload = counter.toString
              +      log.append(TestUtils.singleMessageSet(payload = payload.toString.getBytes, codec = codec,
              +        key = key.toString.getBytes, magicValue = magicValue), assignOffsets = true)
                     counter += 1
              -      (key, count)
              +      (key, payload)
                   }
                 }
                   
                 @After
              -  def teardown() {
              +  def tearDown() {
              +    cleaner.shutdown()
                   time.scheduler.shutdown()
                   Utils.delete(logDir)
                 }
              +
              +  private def logConfigProperties(maxMessageSize: Int, minCleanableDirtyRatio: Float = 0.0F): Properties = {
              +    val props = new Properties()
              +    props.put(LogConfig.MaxMessageBytesProp, maxMessageSize: java.lang.Integer)
              +    props.put(LogConfig.SegmentBytesProp, segmentSize: java.lang.Integer)
              +    props.put(LogConfig.SegmentIndexBytesProp, 100*1024: java.lang.Integer)
              +    props.put(LogConfig.FileDeleteDelayMsProp, deleteDelay: java.lang.Integer)
              +    props.put(LogConfig.CleanupPolicyProp, LogConfig.Compact)
              +    props.put(LogConfig.MinCleanableDirtyRatioProp, minCleanableDirtyRatio: java.lang.Float)
              +    props
              +  }
                 
                 /* create a cleaner instance and logs with the given parameters */
              -  def makeCleaner(parts: Int, 
              -                  minCleanableDirtyRatio: Float = 0.0F,
              -                  numThreads: Int = 1,
              -                  defaultPolicy: String = "compact",
              -                  policyOverrides: Map[String, String] = Map()): LogCleaner = {
              +  private def makeCleaner(parts: Int,
              +                          minCleanableDirtyRatio: Float = 0.0F,
              +                          numThreads: Int = 1,
              +                          maxMessageSize: Int = 128,
              +                          defaultPolicy: String = "compact",
              +                          policyOverrides: Map[String, String] = Map()): LogCleaner = {
                   
                   // create partitions and add them to the pool
                   val logs = new Pool[TopicAndPartition, Log]()
                   for(i <- 0 until parts) {
                     val dir = new File(logDir, "log-" + i)
                     dir.mkdirs()
              -      val logProps = new Properties()
              -      logProps.put(LogConfig.SegmentBytesProp, segmentSize: java.lang.Integer)
              -      logProps.put(LogConfig.SegmentIndexBytesProp, 100*1024: java.lang.Integer)
              -      logProps.put(LogConfig.FileDeleteDelayMsProp, deleteDelay: java.lang.Integer)
              -      logProps.put(LogConfig.CleanupPolicyProp, LogConfig.Compact)
              -      logProps.put(LogConfig.MinCleanableDirtyRatioProp, minCleanableDirtyRatio: java.lang.Float)
               
                     val log = new Log(dir = dir,
              -                        LogConfig(logProps),
              +                        LogConfig(logConfigProperties(maxMessageSize, minCleanableDirtyRatio)),
                                       recoveryPoint = 0L,
                                       scheduler = time.scheduler,
                                       time = time)
                     logs.put(TopicAndPartition("log", i), log)      
                   }
                 
              -    new LogCleaner(CleanerConfig(numThreads = numThreads),
              +    new LogCleaner(CleanerConfig(numThreads = numThreads, ioBufferSize = maxMessageSize / 2, maxMessageSize = maxMessageSize),
                                  logDirs = Array(logDir),
                                  logs = logs,
                                  time = time)
              
              From e293ed1458a1bc2118fe37ed6a6002ffe05e3ecf Mon Sep 17 00:00:00 2001
              From: Ashish Singh 
              Date: Thu, 21 Jul 2016 01:00:33 +0100
              Subject: [PATCH 216/267] MINOR: Improve PartitionState logging and remove
               duplication of code
              
              Currently, logs involving PartitionState are not very helpful.
              
              ```
              	Broker 449 cached leader info org.apache.kafka.common.requests.UpdateMetadataRequest$PartitionState3285d64a for partition - in response to UpdateMetadata request sent by controller 356 epoch 138 with correlation id 0
              
              	TRACE state.change.logger: Broker 449 received LeaderAndIsr request org.apache.kafka.common.requests.LeaderAndIsrRequest$PartitionState66d6a8eb correlation id 3 from controller 356 epoch 138 for partition [,]
              ```
              
              Author: Ashish Singh 
              
              Reviewers: Ismael Juma 
              
              Closes #1609 from SinghAsDev/partitionState
              
              (cherry picked from commit 0e5700fb68671f3fb75bfdeceda40e84330aca69)
              Signed-off-by: Ismael Juma 
              ---
               .../common/requests/LeaderAndIsrRequest.java  | 19 --------
               .../kafka/common/requests/PartitionState.java | 46 +++++++++++++++++++
               .../requests/UpdateMetadataRequest.java       | 18 --------
               .../common/requests/RequestResponseTest.java  | 16 +++----
               .../main/scala/kafka/cluster/Partition.scala  | 12 ++---
               .../controller/ControllerChannelManager.scala |  5 +-
               .../scala/kafka/server/MetadataCache.scala    |  3 +-
               .../scala/kafka/server/ReplicaManager.scala   | 18 ++++----
               .../kafka/api/AuthorizerIntegrationTest.scala |  4 +-
               .../kafka/server/LeaderElectionTest.scala     |  3 +-
               .../unit/kafka/server/MetadataCacheTest.scala |  4 +-
               .../kafka/server/ReplicaManagerTest.scala     |  7 ++-
               12 files changed, 81 insertions(+), 74 deletions(-)
               create mode 100644 clients/src/main/java/org/apache/kafka/common/requests/PartitionState.java
              
              diff --git a/clients/src/main/java/org/apache/kafka/common/requests/LeaderAndIsrRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/LeaderAndIsrRequest.java
              index fee3c2111410..52b9674af8c1 100644
              --- a/clients/src/main/java/org/apache/kafka/common/requests/LeaderAndIsrRequest.java
              +++ b/clients/src/main/java/org/apache/kafka/common/requests/LeaderAndIsrRequest.java
              @@ -35,25 +35,6 @@
               
               public class LeaderAndIsrRequest extends AbstractRequest {
               
              -    public static class PartitionState {
              -        public final int controllerEpoch;
              -        public final int leader;
              -        public final int leaderEpoch;
              -        public final List isr;
              -        public final int zkVersion;
              -        public final Set replicas;
              -
              -        public PartitionState(int controllerEpoch, int leader, int leaderEpoch, List isr, int zkVersion, Set replicas) {
              -            this.controllerEpoch = controllerEpoch;
              -            this.leader = leader;
              -            this.leaderEpoch = leaderEpoch;
              -            this.isr = isr;
              -            this.zkVersion = zkVersion;
              -            this.replicas = replicas;
              -        }
              -
              -    }
              -
                   private static final Schema CURRENT_SCHEMA = ProtoUtils.currentRequestSchema(ApiKeys.LEADER_AND_ISR.id);
               
                   private static final String CONTROLLER_ID_KEY_NAME = "controller_id";
              diff --git a/clients/src/main/java/org/apache/kafka/common/requests/PartitionState.java b/clients/src/main/java/org/apache/kafka/common/requests/PartitionState.java
              new file mode 100644
              index 000000000000..e76663216283
              --- /dev/null
              +++ b/clients/src/main/java/org/apache/kafka/common/requests/PartitionState.java
              @@ -0,0 +1,46 @@
              +/**
              + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
              + * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
              + * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
              + * License. You may obtain a copy of the License at
              + *
              + * http://www.apache.org/licenses/LICENSE-2.0
              + *
              + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
              + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
              + * specific language governing permissions and limitations under the License.
              + */
              +
              +package org.apache.kafka.common.requests;
              +
              +import java.util.Arrays;
              +import java.util.List;
              +import java.util.Set;
              +
              +public class PartitionState {
              +    public final int controllerEpoch;
              +    public final int leader;
              +    public final int leaderEpoch;
              +    public final List isr;
              +    public final int zkVersion;
              +    public final Set replicas;
              +
              +    public PartitionState(int controllerEpoch, int leader, int leaderEpoch, List isr, int zkVersion, Set replicas) {
              +        this.controllerEpoch = controllerEpoch;
              +        this.leader = leader;
              +        this.leaderEpoch = leaderEpoch;
              +        this.isr = isr;
              +        this.zkVersion = zkVersion;
              +        this.replicas = replicas;
              +    }
              +
              +    @Override
              +    public String toString() {
              +        return "PartitionState(controllerEpoch=" + controllerEpoch +
              +                ", leader=" + leader +
              +                ", leaderEpoch=" + leaderEpoch +
              +                ", isr=" + Arrays.toString(isr.toArray()) +
              +                ", zkVersion=" + zkVersion +
              +                ", replicas=" + Arrays.toString(replicas.toArray()) + ")";
              +    }
              +}
              diff --git a/clients/src/main/java/org/apache/kafka/common/requests/UpdateMetadataRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/UpdateMetadataRequest.java
              index 27f89fa5796f..1c2178963835 100644
              --- a/clients/src/main/java/org/apache/kafka/common/requests/UpdateMetadataRequest.java
              +++ b/clients/src/main/java/org/apache/kafka/common/requests/UpdateMetadataRequest.java
              @@ -33,24 +33,6 @@
               
               public class UpdateMetadataRequest extends AbstractRequest {
               
              -    public static final class PartitionState {
              -        public final int controllerEpoch;
              -        public final int leader;
              -        public final int leaderEpoch;
              -        public final List isr;
              -        public final int zkVersion;
              -        public final Set replicas;
              -
              -        public PartitionState(int controllerEpoch, int leader, int leaderEpoch, List isr, int zkVersion, Set replicas) {
              -            this.controllerEpoch = controllerEpoch;
              -            this.leader = leader;
              -            this.leaderEpoch = leaderEpoch;
              -            this.isr = isr;
              -            this.zkVersion = zkVersion;
              -            this.replicas = replicas;
              -        }
              -    }
              -
                   public static final class Broker {
                       public final int id;
                       public final Map endPoints;
              diff --git a/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java
              index 345de3f957ae..ecf9e5306adb 100644
              --- a/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java
              +++ b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java
              @@ -372,15 +372,15 @@ private ControlledShutdownResponse createControlledShutdownResponse() {
                   }
               
                   private AbstractRequest createLeaderAndIsrRequest() {
              -        Map partitionStates = new HashMap<>();
              +        Map partitionStates = new HashMap<>();
                       List isr = Arrays.asList(1, 2);
                       List replicas = Arrays.asList(1, 2, 3, 4);
                       partitionStates.put(new TopicPartition("topic5", 105),
              -                new LeaderAndIsrRequest.PartitionState(0, 2, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
              +                new PartitionState(0, 2, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
                       partitionStates.put(new TopicPartition("topic5", 1),
              -                new LeaderAndIsrRequest.PartitionState(1, 1, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
              +                new PartitionState(1, 1, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
                       partitionStates.put(new TopicPartition("topic20", 1),
              -                new LeaderAndIsrRequest.PartitionState(1, 0, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
              +                new PartitionState(1, 0, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
               
                       Set leaders = new HashSet<>(Arrays.asList(
                               new Node(0, "test0", 1223),
              @@ -398,15 +398,15 @@ private AbstractRequestResponse createLeaderAndIsrResponse() {
               
                   @SuppressWarnings("deprecation")
                   private AbstractRequest createUpdateMetadataRequest(int version, String rack) {
              -        Map partitionStates = new HashMap<>();
              +        Map partitionStates = new HashMap<>();
                       List isr = Arrays.asList(1, 2);
                       List replicas = Arrays.asList(1, 2, 3, 4);
                       partitionStates.put(new TopicPartition("topic5", 105),
              -                new UpdateMetadataRequest.PartitionState(0, 2, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
              +                new PartitionState(0, 2, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
                       partitionStates.put(new TopicPartition("topic5", 1),
              -                new UpdateMetadataRequest.PartitionState(1, 1, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
              +                new PartitionState(1, 1, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
                       partitionStates.put(new TopicPartition("topic20", 1),
              -                new UpdateMetadataRequest.PartitionState(1, 0, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
              +                new PartitionState(1, 0, 1, new ArrayList<>(isr), 2, new HashSet<>(replicas)));
               
                       if (version == 0) {
                           Set liveBrokers = new HashSet<>(Arrays.asList(
              diff --git a/core/src/main/scala/kafka/cluster/Partition.scala b/core/src/main/scala/kafka/cluster/Partition.scala
              index 4e79bdcf388c..edf66193c0e6 100755
              --- a/core/src/main/scala/kafka/cluster/Partition.scala
              +++ b/core/src/main/scala/kafka/cluster/Partition.scala
              @@ -18,7 +18,7 @@ package kafka.cluster
               
               import kafka.common._
               import kafka.utils._
              -import kafka.utils.CoreUtils.{inReadLock,inWriteLock}
              +import kafka.utils.CoreUtils.{inReadLock, inWriteLock}
               import kafka.admin.AdminUtils
               import kafka.api.LeaderAndIsr
               import kafka.log.LogConfig
              @@ -26,17 +26,15 @@ import kafka.server._
               import kafka.metrics.KafkaMetricsGroup
               import kafka.controller.KafkaController
               import kafka.message.ByteBufferMessageSet
              -
               import java.io.IOException
               import java.util.concurrent.locks.ReentrantReadWriteLock
              +
               import org.apache.kafka.common.errors.{NotEnoughReplicasException, NotLeaderForPartitionException}
               import org.apache.kafka.common.protocol.Errors
              -import org.apache.kafka.common.requests.LeaderAndIsrRequest
              -
               
               import scala.collection.JavaConverters._
              -
               import com.yammer.metrics.core.Gauge
              +import org.apache.kafka.common.requests.PartitionState
               
               /**
                * Data structure that represents a topic partition. The leader maintains the AR, ISR, CUR, RAR
              @@ -166,7 +164,7 @@ class Partition(val topic: String,
                  * from the time when this broker was the leader last time) and setting the new leader and ISR.
                  * If the leader replica id does not change, return false to indicate the replica manager.
                  */
              -  def makeLeader(controllerId: Int, partitionStateInfo: LeaderAndIsrRequest.PartitionState, correlationId: Int): Boolean = {
              +  def makeLeader(controllerId: Int, partitionStateInfo: PartitionState, correlationId: Int): Boolean = {
                   val (leaderHWIncremented, isNewLeader) = inWriteLock(leaderIsrUpdateLock) {
                     val allReplicas = partitionStateInfo.replicas.asScala.map(_.toInt)
                     // record the epoch of the controller that made the leadership decision. This is useful while updating the isr
              @@ -207,7 +205,7 @@ class Partition(val topic: String,
                  *  Make the local replica the follower by setting the new leader and ISR to empty
                  *  If the leader replica id does not change, return false to indicate the replica manager
                  */
              -  def makeFollower(controllerId: Int, partitionStateInfo: LeaderAndIsrRequest.PartitionState, correlationId: Int): Boolean = {
              +  def makeFollower(controllerId: Int, partitionStateInfo: PartitionState, correlationId: Int): Boolean = {
                   inWriteLock(leaderIsrUpdateLock) {
                     val allReplicas = partitionStateInfo.replicas.asScala.map(_.toInt)
                     val newLeaderBrokerId: Int = partitionStateInfo.leader
              diff --git a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala
              index b4059a496471..c19d35a9387e 100755
              --- a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala
              +++ b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala
              @@ -28,6 +28,7 @@ import org.apache.kafka.clients.{ClientRequest, ClientResponse, ManualMetadataUp
               import org.apache.kafka.common.metrics.Metrics
               import org.apache.kafka.common.network.{ChannelBuilders, LoginType, Mode, NetworkReceive, Selectable, Selector}
               import org.apache.kafka.common.protocol.{ApiKeys, SecurityProtocol}
              +import org.apache.kafka.common.requests
               import org.apache.kafka.common.requests.{UpdateMetadataRequest, _}
               import org.apache.kafka.common.utils.Time
               import org.apache.kafka.common.{Node, TopicPartition}
              @@ -362,7 +363,7 @@ class ControllerBrokerRequestBatch(controller: KafkaController) extends  Logging
                       }
                       val partitionStates = partitionStateInfos.map { case (topicPartition, partitionStateInfo) =>
                         val LeaderIsrAndControllerEpoch(leaderIsr, controllerEpoch) = partitionStateInfo.leaderIsrAndControllerEpoch
              -          val partitionState = new LeaderAndIsrRequest.PartitionState(controllerEpoch, leaderIsr.leader,
              +          val partitionState = new requests.PartitionState(controllerEpoch, leaderIsr.leader,
                           leaderIsr.leaderEpoch, leaderIsr.isr.map(Integer.valueOf).asJava, leaderIsr.zkVersion,
                           partitionStateInfo.allReplicas.map(Integer.valueOf).asJava
                         )
              @@ -379,7 +380,7 @@ class ControllerBrokerRequestBatch(controller: KafkaController) extends  Logging
                         broker, p._1)))
                       val partitionStates = partitionStateInfos.map { case (topicPartition, partitionStateInfo) =>
                         val LeaderIsrAndControllerEpoch(leaderIsr, controllerEpoch) = partitionStateInfo.leaderIsrAndControllerEpoch
              -          val partitionState = new UpdateMetadataRequest.PartitionState(controllerEpoch, leaderIsr.leader,
              +          val partitionState = new requests.PartitionState(controllerEpoch, leaderIsr.leader,
                           leaderIsr.leaderEpoch, leaderIsr.isr.map(Integer.valueOf).asJava, leaderIsr.zkVersion,
                           partitionStateInfo.allReplicas.map(Integer.valueOf).asJava
                         )
              diff --git a/core/src/main/scala/kafka/server/MetadataCache.scala b/core/src/main/scala/kafka/server/MetadataCache.scala
              index b387f2efab3d..f493e7d96d02 100755
              --- a/core/src/main/scala/kafka/server/MetadataCache.scala
              +++ b/core/src/main/scala/kafka/server/MetadataCache.scala
              @@ -30,8 +30,7 @@ import kafka.utils.CoreUtils._
               import kafka.utils.Logging
               import org.apache.kafka.common.Node
               import org.apache.kafka.common.protocol.{Errors, SecurityProtocol}
              -import org.apache.kafka.common.requests.UpdateMetadataRequest.PartitionState
              -import org.apache.kafka.common.requests.{MetadataResponse, UpdateMetadataRequest}
              +import org.apache.kafka.common.requests.{MetadataResponse, PartitionState, UpdateMetadataRequest}
               
               /**
                *  A cache for the state (e.g., current leader) of each partition. This cache is updated through
              diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala
              index 68f23859ee77..8260643a8aeb 100644
              --- a/core/src/main/scala/kafka/server/ReplicaManager.scala
              +++ b/core/src/main/scala/kafka/server/ReplicaManager.scala
              @@ -19,6 +19,7 @@ package kafka.server
               import java.io.{File, IOException}
               import java.util.concurrent.TimeUnit
               import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
              +
               import com.yammer.metrics.core.Gauge
               import kafka.api._
               import kafka.cluster.{Partition, Replica}
              @@ -28,16 +29,17 @@ import kafka.log.{LogAppendInfo, LogManager}
               import kafka.message.{ByteBufferMessageSet, InvalidMessageException, Message, MessageSet}
               import kafka.metrics.KafkaMetricsGroup
               import kafka.utils._
              -import org.I0Itec.zkclient.IZkChildListener
              -import org.apache.kafka.common.errors.{OffsetOutOfRangeException, RecordBatchTooLargeException, ReplicaNotAvailableException, RecordTooLargeException,
              -InvalidTopicException, ControllerMovedException, NotLeaderForPartitionException, CorruptRecordException, UnknownTopicOrPartitionException,
              -InvalidTimestampException}
              +import org.apache.kafka.common.errors.{ControllerMovedException, CorruptRecordException, InvalidTimestampException,
              +                                        InvalidTopicException, NotLeaderForPartitionException, OffsetOutOfRangeException,
              +                                        RecordBatchTooLargeException, RecordTooLargeException, ReplicaNotAvailableException,
              +                                        UnknownTopicOrPartitionException}
               import org.apache.kafka.common.TopicPartition
               import org.apache.kafka.common.metrics.Metrics
               import org.apache.kafka.common.protocol.Errors
              -import org.apache.kafka.common.requests.{LeaderAndIsrRequest, StopReplicaRequest, UpdateMetadataRequest}
              +import org.apache.kafka.common.requests.{LeaderAndIsrRequest, PartitionState, StopReplicaRequest, UpdateMetadataRequest}
               import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse
               import org.apache.kafka.common.utils.{Time => JTime}
              +
               import scala.collection._
               import scala.collection.JavaConverters._
               
              @@ -610,7 +612,7 @@ class ReplicaManager(val config: KafkaConfig,
                       controllerEpoch = leaderAndISRRequest.controllerEpoch
               
                       // First check partition's leader epoch
              -        val partitionState = new mutable.HashMap[Partition, LeaderAndIsrRequest.PartitionState]()
              +        val partitionState = new mutable.HashMap[Partition, PartitionState]()
                       leaderAndISRRequest.partitionStates.asScala.foreach { case (topicPartition, stateInfo) =>
                         val partition = getOrCreatePartition(topicPartition.topic, topicPartition.partition)
                         val partitionLeaderEpoch = partition.getLeaderEpoch()
              @@ -679,7 +681,7 @@ class ReplicaManager(val config: KafkaConfig,
                  */
                 private def makeLeaders(controllerId: Int,
                                         epoch: Int,
              -                          partitionState: Map[Partition, LeaderAndIsrRequest.PartitionState],
              +                          partitionState: Map[Partition, PartitionState],
                                         correlationId: Int,
                                         responseMap: mutable.Map[TopicPartition, Short]): Set[Partition] = {
                   partitionState.foreach(state =>
              @@ -750,7 +752,7 @@ class ReplicaManager(val config: KafkaConfig,
                  */
                 private def makeFollowers(controllerId: Int,
                                           epoch: Int,
              -                            partitionState: Map[Partition, LeaderAndIsrRequest.PartitionState],
              +                            partitionState: Map[Partition, PartitionState],
                                           correlationId: Int,
                                           responseMap: mutable.Map[TopicPartition, Short],
                                           metadataCache: MetadataCache) : Set[Partition] = {
              diff --git a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala
              index 2d5900f8364a..60eb74cf2dd0 100644
              --- a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala
              +++ b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala
              @@ -186,7 +186,7 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness {
                 }
               
                 private def createUpdateMetadataRequest = {
              -    val partitionState = Map(tp -> new requests.UpdateMetadataRequest.PartitionState(Int.MaxValue, brokerId, Int.MaxValue, List(brokerId).asJava, 2, Set(brokerId).asJava)).asJava
              +    val partitionState = Map(tp -> new PartitionState(Int.MaxValue, brokerId, Int.MaxValue, List(brokerId).asJava, 2, Set(brokerId).asJava)).asJava
                   val brokers = Set(new requests.UpdateMetadataRequest.Broker(brokerId,
                     Map(SecurityProtocol.PLAINTEXT -> new requests.UpdateMetadataRequest.EndPoint("localhost", 0)).asJava, null)).asJava
                   new requests.UpdateMetadataRequest(brokerId, Int.MaxValue, partitionState, brokers)
              @@ -215,7 +215,7 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness {
               
                 private def createLeaderAndIsrRequest = {
                   new requests.LeaderAndIsrRequest(brokerId, Int.MaxValue,
              -      Map(tp -> new requests.LeaderAndIsrRequest.PartitionState(Int.MaxValue, brokerId, Int.MaxValue, List(brokerId).asJava, 2, Set(brokerId).asJava)).asJava,
              +      Map(tp -> new PartitionState(Int.MaxValue, brokerId, Int.MaxValue, List(brokerId).asJava, 2, Set(brokerId).asJava)).asJava,
                     Set(new Node(brokerId, "localhost", 0)).asJava)
                 }
               
              diff --git a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala
              index 72589800e111..343a3e1f9d1d 100755
              --- a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala
              +++ b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala
              @@ -18,11 +18,10 @@
               package kafka.server
               
               import org.apache.kafka.common.TopicPartition
              -import org.apache.kafka.common.requests.LeaderAndIsrRequest.PartitionState
               
               import scala.collection.JavaConverters._
               import kafka.api.LeaderAndIsr
              -import org.apache.kafka.common.requests.{AbstractRequestResponse, LeaderAndIsrRequest, LeaderAndIsrResponse}
              +import org.apache.kafka.common.requests.{AbstractRequestResponse, LeaderAndIsrRequest, LeaderAndIsrResponse, PartitionState}
               import org.junit.Assert._
               import kafka.utils.{CoreUtils, TestUtils}
               import kafka.cluster.Broker
              diff --git a/core/src/test/scala/unit/kafka/server/MetadataCacheTest.scala b/core/src/test/scala/unit/kafka/server/MetadataCacheTest.scala
              index 770513c5cfe6..b34c93df787a 100644
              --- a/core/src/test/scala/unit/kafka/server/MetadataCacheTest.scala
              +++ b/core/src/test/scala/unit/kafka/server/MetadataCacheTest.scala
              @@ -22,8 +22,8 @@ import util.Arrays.asList
               import kafka.common.BrokerEndPointNotAvailableException
               import org.apache.kafka.common.TopicPartition
               import org.apache.kafka.common.protocol.{Errors, SecurityProtocol}
              -import org.apache.kafka.common.requests.UpdateMetadataRequest
              -import org.apache.kafka.common.requests.UpdateMetadataRequest.{Broker, EndPoint, PartitionState}
              +import org.apache.kafka.common.requests.{PartitionState, UpdateMetadataRequest}
              +import org.apache.kafka.common.requests.UpdateMetadataRequest.{Broker, EndPoint}
               import org.junit.Test
               import org.junit.Assert._
               
              diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
              index 57398562385b..bfb66b99201e 100644
              --- a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
              +++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
              @@ -24,19 +24,18 @@ import java.util.concurrent.atomic.AtomicBoolean
               import kafka.api.{FetchResponsePartitionData, PartitionFetchInfo}
               import kafka.cluster.Broker
               import kafka.common.TopicAndPartition
              -import kafka.message.{MessageSet, ByteBufferMessageSet, Message}
              +import kafka.message.{ByteBufferMessageSet, Message, MessageSet}
               import kafka.utils.{MockScheduler, MockTime, TestUtils, ZkUtils}
               import org.I0Itec.zkclient.ZkClient
               import org.apache.kafka.common.metrics.Metrics
               import org.apache.kafka.common.protocol.Errors
              -import org.apache.kafka.common.requests.LeaderAndIsrRequest
              -import org.apache.kafka.common.requests.LeaderAndIsrRequest.PartitionState
              +import org.apache.kafka.common.requests.{LeaderAndIsrRequest, PartitionState}
               import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse
               import org.apache.kafka.common.utils.{MockTime => JMockTime}
               import org.apache.kafka.common.{Node, TopicPartition}
               import org.easymock.EasyMock
               import org.junit.Assert.{assertEquals, assertTrue}
              -import org.junit.{Test, Before, After}
              +import org.junit.{After, Before, Test}
               
               import scala.collection.JavaConverters._
               import scala.collection.Map
              
              From 8a6ddf4c4eda7919e24a8ae2a7c6a199e4cd3791 Mon Sep 17 00:00:00 2001
              From: Ismael Juma 
              Date: Thu, 21 Jul 2016 16:11:20 -0700
              Subject: [PATCH 217/267] MINOR: Upgrade RocksDB to 4.8.0
              
              Author: Ismael Juma 
              
              Reviewers: Guozhang Wang 
              
              Closes #1647 from ijuma/kafka-rocksdb-4.8
              
              (cherry picked from commit 44723caec74067aee9b06836b048465f4cffc431)
              Signed-off-by: Guozhang Wang 
              ---
               gradle/dependencies.gradle | 2 +-
               1 file changed, 1 insertion(+), 1 deletion(-)
              
              diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle
              index c97af87aebfa..f1b4ecda57ed 100644
              --- a/gradle/dependencies.gradle
              +++ b/gradle/dependencies.gradle
              @@ -37,7 +37,7 @@ versions += [
                 metrics: "2.2.0",
                 powermock: "1.6.4",
                 reflections: "0.9.10",
              -  rocksDB: "4.4.1",
              +  rocksDB: "4.8.0",
                 scalaTest: "2.2.6",
                 scalaParserCombinators: "1.0.4",
                 scoverage: "1.1.1",
              
              From 71a598a179ea9f1ee897d755b6daf9fb99860b50 Mon Sep 17 00:00:00 2001
              From: Jason Gustafson 
              Date: Thu, 21 Jul 2016 20:09:03 -0700
              Subject: [PATCH 218/267] KAFKA-3782: Fix transient failure in connect
               distributed bounce test
              
              Author: Jason Gustafson 
              
              Reviewers: Ewen Cheslack-Postava 
              
              Closes #1650 from hachikuji/KAFKA-3782
              
              (cherry picked from commit f5df13627aaa6052a19e4cd7896e94730dac7f64)
              Signed-off-by: Ewen Cheslack-Postava 
              ---
               .../tests/connect/connect_distributed_test.py     | 15 +++++++++------
               1 file changed, 9 insertions(+), 6 deletions(-)
              
              diff --git a/tests/kafkatest/tests/connect/connect_distributed_test.py b/tests/kafkatest/tests/connect/connect_distributed_test.py
              index d4c4225a0bf8..b9757bad6d0a 100644
              --- a/tests/kafkatest/tests/connect/connect_distributed_test.py
              +++ b/tests/kafkatest/tests/connect/connect_distributed_test.py
              @@ -329,7 +329,7 @@ def test_bounce(self, clean):
                       self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node))
                       self.cc.start()
               
              -        self.source = VerifiableSource(self.cc, tasks=num_tasks)
              +        self.source = VerifiableSource(self.cc, tasks=num_tasks, throughput=100)
                       self.source.start()
                       self.sink = VerifiableSink(self.cc, tasks=num_tasks)
                       self.sink.start()
              @@ -344,11 +344,14 @@ def test_bounce(self, clean):
                                   monitor.wait_until("Starting connectors and tasks using config offset", timeout_sec=90,
                                                      err_msg="Kafka Connect worker didn't successfully join group and start work")
                               self.logger.info("Bounced Kafka Connect on %s and rejoined in %f seconds", node.account, time.time() - started)
              -                # If this is a hard bounce, give additional time for the consumer groups to recover. If we don't give
              -                # some time here, the next bounce may cause consumers to be shut down before they have any time to process
              -                # data and we can end up with zero data making it through the test.
              -                if not clean:
              -                    time.sleep(15)
              +
              +                # Give additional time for the consumer groups to recover. Even if it is not a hard bounce, there are
              +                # some cases where a restart can cause a rebalance to take the full length of the session timeout
              +                # (e.g. if the client shuts down before it has received the memberId from its initial JoinGroup).
              +                # If we don't give enough time for the group to stabilize, the next bounce may cause consumers to 
              +                # be shut down before they have any time to process data and we can end up with zero data making it 
              +                # through the test.
              +                time.sleep(15)
               
               
                       self.source.stop()
              
              From 24daae7d2703adffcbc90ab903916fa39601ce32 Mon Sep 17 00:00:00 2001
              From: Ryan Pridgeon 
              Date: Fri, 22 Jul 2016 18:09:38 +0100
              Subject: [PATCH 219/267] KAFKA-3983; Add additional information to Acceptor
               debug message
              
              Add additional information to Acceptor debug message upon connection acceptance
              
              Author: rnpridgeon 
              
              Reviewers: Ismael Juma 
              
              Closes #1648 from rnpridgeon/trunk
              
              (cherry picked from commit fa32545442ef6724aa9fb5f4e0e269b0c873288f)
              Signed-off-by: Ismael Juma 
              ---
               core/src/main/scala/kafka/network/SocketServer.scala | 4 ++--
               1 file changed, 2 insertions(+), 2 deletions(-)
              
              diff --git a/core/src/main/scala/kafka/network/SocketServer.scala b/core/src/main/scala/kafka/network/SocketServer.scala
              index b757abd6ec7a..ff2231c0dbcc 100644
              --- a/core/src/main/scala/kafka/network/SocketServer.scala
              +++ b/core/src/main/scala/kafka/network/SocketServer.scala
              @@ -328,8 +328,8 @@ private[kafka] class Acceptor(val endPoint: EndPoint,
                     socketChannel.socket().setKeepAlive(true)
                     socketChannel.socket().setSendBufferSize(sendBufferSize)
               
              -      debug("Accepted connection from %s on %s. sendBufferSize [actual|requested]: [%d|%d] recvBufferSize [actual|requested]: [%d|%d]"
              -            .format(socketChannel.socket.getInetAddress, socketChannel.socket.getLocalSocketAddress,
              +      debug("Accepted connection from %s on %s and assigned it to processor %d, sendBufferSize [actual|requested]: [%d|%d] recvBufferSize [actual|requested]: [%d|%d]"
              +            .format(socketChannel.socket.getRemoteSocketAddress, socketChannel.socket.getLocalSocketAddress, processor.id,
                                 socketChannel.socket.getSendBufferSize, sendBufferSize,
                                 socketChannel.socket.getReceiveBufferSize, recvBufferSize))
               
              
              From 17914453e35cc29396685b7fd7665918861b9805 Mon Sep 17 00:00:00 2001
              From: Alexey Romanchuk 
              Date: Sat, 23 Jul 2016 09:31:16 +0100
              Subject: [PATCH 220/267] KAFKA-3960; Committed offset not set after first
               assign
              
              Author: Alexey Romanchuk 
              
              Reviewers: Jason Gustafson , Ismael Juma 
              
              Closes #1629 from 13h3r/kafka-3960
              
              (cherry picked from commit 932bb84c837807eeca3600007ae3030561fdcb37)
              Signed-off-by: Ismael Juma 
              ---
               .../consumer/internals/SubscriptionState.java |  1 +
               .../clients/consumer/KafkaConsumerTest.java   | 60 +++++++++++++++++++
               .../internals/SubscriptionStateTest.java      |  2 +
               3 files changed, 63 insertions(+)
              
              diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java
              index 2412d365a15b..38660e1b9d3b 100644
              --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java
              +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java
              @@ -170,6 +170,7 @@ public void assignFromUser(Collection partitions) {
                       this.assignment.keySet().retainAll(this.userAssignment);
               
                       this.needsPartitionAssignment = false;
              +        this.needsFetchCommittedOffsets = true;
                   }
               
                   /**
              diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java
              index 694faf2f5f60..d846a69c933e 100644
              --- a/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java
              +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java
              @@ -43,6 +43,7 @@
               import org.apache.kafka.common.requests.JoinGroupResponse;
               import org.apache.kafka.common.requests.OffsetCommitRequest;
               import org.apache.kafka.common.requests.OffsetCommitResponse;
              +import org.apache.kafka.common.requests.OffsetFetchResponse;
               import org.apache.kafka.common.requests.SyncGroupResponse;
               import org.apache.kafka.common.serialization.ByteArrayDeserializer;
               import org.apache.kafka.common.serialization.Deserializer;
              @@ -59,7 +60,9 @@
               import java.util.Arrays;
               import java.util.Collection;
               import java.util.Collections;
              +import java.util.HashMap;
               import java.util.List;
              +import java.util.Map;
               import java.util.Properties;
               import java.util.concurrent.atomic.AtomicBoolean;
               
              @@ -321,6 +324,55 @@ public boolean matches(ClientRequest request) {
                       assertTrue(heartbeatReceived.get());
                   }
               
              +    @Test
              +    public void testCommitsFetchedDuringAssign() {
              +        String topic = "topic";
              +        final TopicPartition partition1 = new TopicPartition(topic, 0);
              +        final TopicPartition partition2 = new TopicPartition(topic, 1);
              +
              +        long offset1 = 10000;
              +        long offset2 = 20000;
              +
              +        int sessionTimeoutMs = 3000;
              +        int heartbeatIntervalMs = 2000;
              +        int autoCommitIntervalMs = 1000;
              +
              +        Time time = new MockTime();
              +        MockClient client = new MockClient(time);
              +        Cluster cluster = TestUtils.singletonCluster(topic, 1);
              +        Node node = cluster.nodes().get(0);
              +        client.setNode(node);
              +        Metadata metadata = new Metadata(0, Long.MAX_VALUE);
              +        metadata.update(cluster, time.milliseconds());
              +        PartitionAssignor assignor = new RoundRobinAssignor();
              +
              +        final KafkaConsumer consumer = newConsumer(time, client, metadata, assignor,
              +                sessionTimeoutMs, heartbeatIntervalMs, autoCommitIntervalMs);
              +        consumer.assign(Arrays.asList(partition1));
              +
              +        // lookup coordinator
              +        client.prepareResponseFrom(new GroupCoordinatorResponse(Errors.NONE.code(), node).toStruct(), node);
              +        Node coordinator = new Node(Integer.MAX_VALUE - node.id(), node.host(), node.port());
              +
              +        // fetch offset for one topic
              +        client.prepareResponseFrom(
              +                offsetResponse(Collections.singletonMap(partition1, offset1), Errors.NONE.code()),
              +                coordinator);
              +
              +        assertEquals(offset1, consumer.committed(partition1).offset());
              +
              +        consumer.assign(Arrays.asList(partition1, partition2));
              +
              +        // fetch offset for two topics
              +        Map offsets = new HashMap<>();
              +        offsets.put(partition1, offset1);
              +        offsets.put(partition2, offset2);
              +        client.prepareResponseFrom(offsetResponse(offsets, Errors.NONE.code()), coordinator);
              +
              +        assertEquals(offset1, consumer.committed(partition1).offset());
              +        assertEquals(offset2, consumer.committed(partition2).offset());
              +    }
              +
                   @Test
                   public void testAutoCommitSentBeforePositionUpdate() {
                       String topic = "topic";
              @@ -479,6 +531,14 @@ private Struct syncGroupResponse(List partitions, short error) {
                       return new SyncGroupResponse(error, buf).toStruct();
                   }
               
              +    private Struct offsetResponse(Map offsets, short error) {
              +        Map partitionData = new HashMap<>();
              +        for (Map.Entry entry : offsets.entrySet()) {
              +            partitionData.put(entry.getKey(), new OffsetFetchResponse.PartitionData(entry.getValue(), "", error));
              +        }
              +        return new OffsetFetchResponse(partitionData).toStruct();
              +    }
              +
                   private Struct fetchResponse(TopicPartition tp, long fetchOffset, int count) {
                       MemoryRecords records = MemoryRecords.emptyRecords(ByteBuffer.allocate(1024), CompressionType.NONE);
                       for (int i = 0; i < count; i++)
              diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/SubscriptionStateTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/SubscriptionStateTest.java
              index 439ded7f79e9..3b4b10e7b0c1 100644
              --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/SubscriptionStateTest.java
              +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/SubscriptionStateTest.java
              @@ -46,6 +46,8 @@ public void partitionAssignment() {
                       state.assignFromUser(Arrays.asList(tp0));
                       assertEquals(Collections.singleton(tp0), state.assignedPartitions());
                       assertFalse(state.partitionAssignmentNeeded());
              +        assertFalse(state.hasAllFetchPositions());
              +        assertTrue(state.refreshCommitsNeeded());
                       state.committed(tp0, new OffsetAndMetadata(1));
                       state.seek(tp0, 1);
                       assertTrue(state.isFetchable(tp0));
              
              From 6b1a6d9551fc44d80c7cffc0b4c21427da9d8fda Mon Sep 17 00:00:00 2001
              From: Avi Flax 
              Date: Mon, 25 Jul 2016 11:03:21 -0700
              Subject: [PATCH 221/267] MINOR: Small enhancement to Deserializer Javadoc
              MIME-Version: 1.0
              Content-Type: text/plain; charset=UTF-8
              Content-Transfer-Encoding: 8bit
              
              I’ve implemented my own custom Deserializer and been using it with `KStream.reduceByKey`; I observed that `reduceByKey` was passing null to my implementation, but it wasn’t clear to me what my implementation was expected to do in this case. So this attempts to clarify it.
              
              This is my original work and I license this work to the Kafka project under Kafka’s open source license (the Apache License 2.0).
              
              Author: Avi Flax 
              
              Reviewers: Eno Thereska , Ismael Juma , Guozhang Wang , Ewen Cheslack-Postava 
              
              Closes #1503 from aviflax/patch-1
              ---
               .../apache/kafka/common/serialization/Deserializer.java   | 8 ++++----
               1 file changed, 4 insertions(+), 4 deletions(-)
              
              diff --git a/clients/src/main/java/org/apache/kafka/common/serialization/Deserializer.java b/clients/src/main/java/org/apache/kafka/common/serialization/Deserializer.java
              index 254b5562da15..d6f4498f343d 100644
              --- a/clients/src/main/java/org/apache/kafka/common/serialization/Deserializer.java
              +++ b/clients/src/main/java/org/apache/kafka/common/serialization/Deserializer.java
              @@ -32,13 +32,13 @@ public interface Deserializer extends Closeable {
                   public void configure(Map configs, boolean isKey);
                   
                   /**
              -     *
              +     * Deserialize a record value from a bytearray into a value or object.
                    * @param topic topic associated with the data
              -     * @param data serialized bytes
              -     * @return deserialized typed data
              +     * @param data serialized bytes; may be null; implementations are recommended to handle null by returning a value or null rather than throwing an exception.
              +     * @return deserialized typed data; may be null
                    */
                   public T deserialize(String topic, byte[] data);
               
                   @Override
                   public void close();
              -}
              \ No newline at end of file
              +}
              
              From c47c3b0b583a849fdf3ed0a06835427a2801950a Mon Sep 17 00:00:00 2001
              From: Tom Crayford 
              Date: Tue, 26 Jul 2016 02:31:37 +0100
              Subject: [PATCH 222/267] KAFKA-3933; Always fully read deepIterator
              
              Avoids leaking native memory and hence crashing brokers on bootup due to
              running out of memory.
              
              Seeeing as `messageFormat > 0` always reads the full compressed message
              set and is the default going forwards, we can use that behaviour to
              always close the compressor when calling `deepIterator`
              
              Author: Tom Crayford 
              
              Reviewers: Ismael Juma 
              
              Closes #1660 from tcrayford/dont_leak_native_memory_round_2
              
              (cherry picked from commit 8a417c89d2f0b7861b2dec26f02e4e302b64b604)
              Signed-off-by: Ismael Juma 
              ---
               .../kafka/message/ByteBufferMessageSet.scala  | 69 +++++++++----------
               1 file changed, 34 insertions(+), 35 deletions(-)
              
              diff --git a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala
              index a116d4bc6767..98f6131385ca 100644
              --- a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala
              +++ b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala
              @@ -17,7 +17,7 @@
               
               package kafka.message
               
              -import kafka.utils.{IteratorTemplate, Logging}
              +import kafka.utils.{CoreUtils, IteratorTemplate, Logging}
               import kafka.common.{KafkaException, LongRef}
               import java.nio.ByteBuffer
               import java.nio.channels._
              @@ -85,36 +85,45 @@ object ByteBufferMessageSet {
                   new IteratorTemplate[MessageAndOffset] {
               
                     val MessageAndOffset(wrapperMessage, wrapperMessageOffset) = wrapperMessageAndOffset
              +
              +      if (wrapperMessage.payload == null)
              +        throw new KafkaException(s"Message payload is null: $wrapperMessage")
              +
                     val wrapperMessageTimestampOpt: Option[Long] =
                       if (wrapperMessage.magic > MagicValue_V0) Some(wrapperMessage.timestamp) else None
                     val wrapperMessageTimestampTypeOpt: Option[TimestampType] =
                       if (wrapperMessage.magic > MagicValue_V0) Some(wrapperMessage.timestampType) else None
              -      if (wrapperMessage.payload == null)
              -        throw new KafkaException(s"Message payload is null: $wrapperMessage")
              -      val inputStream = new ByteBufferBackedInputStream(wrapperMessage.payload)
              -      val compressed = try {
              -        new DataInputStream(CompressionFactory(wrapperMessage.compressionCodec, wrapperMessage.magic, inputStream))
              -      } catch {
              -        case ioe: IOException =>
              -          throw new InvalidMessageException(s"Failed to instantiate input stream compressed with ${wrapperMessage.compressionCodec}", ioe)
              -      }
              +
                     var lastInnerOffset = -1L
               
              -      val messageAndOffsets = if (wrapperMessageAndOffset.message.magic > MagicValue_V0) {
              +      val messageAndOffsets = {
              +        val inputStream = new ByteBufferBackedInputStream(wrapperMessage.payload)
              +        val compressed = try {
              +          new DataInputStream(CompressionFactory(wrapperMessage.compressionCodec, wrapperMessage.magic, inputStream))
              +        } catch {
              +          case ioe: IOException =>
              +            throw new InvalidMessageException(s"Failed to instantiate input stream compressed with ${wrapperMessage.compressionCodec}", ioe)
              +        }
              +
                       val innerMessageAndOffsets = new ArrayDeque[MessageAndOffset]()
                       try {
                         while (true)
              -            innerMessageAndOffsets.add(readMessageFromStream())
              +            innerMessageAndOffsets.add(readMessageFromStream(compressed))
                       } catch {
                         case eofe: EOFException =>
              -            compressed.close()
              +            // we don't do anything at all here, because the finally
              +            // will close the compressed input stream, and we simply
              +            // want to return the innerMessageAndOffsets
                         case ioe: IOException =>
                           throw new InvalidMessageException(s"Error while reading message from stream compressed with ${wrapperMessage.compressionCodec}", ioe)
              +        } finally {
              +          CoreUtils.swallow(compressed.close())
                       }
              -        Some(innerMessageAndOffsets)
              -      } else None
               
              -      private def readMessageFromStream(): MessageAndOffset = {
              +        innerMessageAndOffsets
              +      }
              +
              +      private def readMessageFromStream(compressed: DataInputStream): MessageAndOffset = {
                       val innerOffset = compressed.readLong()
                       val recordSize = compressed.readInt()
               
              @@ -138,25 +147,15 @@ object ByteBufferMessageSet {
                     }
               
                     override def makeNext(): MessageAndOffset = {
              -        messageAndOffsets match {
              -          // Using inner offset and timestamps
              -          case Some(innerMessageAndOffsets) =>
              -            innerMessageAndOffsets.pollFirst() match {
              -              case null => allDone()
              -              case MessageAndOffset(message, offset) =>
              -                val relativeOffset = offset - lastInnerOffset
              -                val absoluteOffset = wrapperMessageOffset + relativeOffset
              -                new MessageAndOffset(message, absoluteOffset)
              -            }
              -          // Not using inner offset and timestamps
              -          case None =>
              -            try readMessageFromStream()
              -            catch {
              -              case eofe: EOFException =>
              -                compressed.close()
              -                allDone()
              -              case ioe: IOException =>
              -                throw new KafkaException(ioe)
              +        messageAndOffsets.pollFirst() match {
              +          case null => allDone()
              +          case nextMessage@ MessageAndOffset(message, offset) =>
              +            if (wrapperMessage.magic > MagicValue_V0) {
              +              val relativeOffset = offset - lastInnerOffset
              +              val absoluteOffset = wrapperMessageOffset + relativeOffset
              +              new MessageAndOffset(message, absoluteOffset)
              +            } else {
              +              nextMessage
                           }
                       }
                     }
              
              From dc93f3bf49564db4c453c7e0b65b462a5415a788 Mon Sep 17 00:00:00 2001
              From: Ewen Cheslack-Postava 
              Date: Tue, 26 Jul 2016 03:02:02 +0100
              Subject: [PATCH 223/267] KAFKA-3935; Fix test_restart_failed_task system test
               for SinkTasks
              
              Fix the test by using a more liberal timeout and forcing more frequent SinkTask.put() calls. Also add some logging to aid future debugging.
              
              Author: Ewen Cheslack-Postava 
              
              Reviewers: Jason Gustafson , Ismael Juma 
              
              Closes #1663 from ewencp/kafka-3935-fix-restart-system-test
              
              (cherry picked from commit d1546960de0aa43989680a59c8f6b1ae7cb333e9)
              Signed-off-by: Ismael Juma 
              ---
               .../kafka/connect/tools/MockConnector.java    |  7 +++++++
               .../kafka/connect/tools/MockSinkTask.java     | 19 ++++++++++++++++++-
               .../kafka/connect/tools/MockSourceTask.java   |  9 ++++++++-
               .../tests/connect/connect_distributed_test.py |  2 +-
               4 files changed, 34 insertions(+), 3 deletions(-)
              
              diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockConnector.java b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockConnector.java
              index 919e89613217..51bb51987dd8 100644
              --- a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockConnector.java
              +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockConnector.java
              @@ -20,6 +20,8 @@
               import org.apache.kafka.common.utils.AppInfoParser;
               import org.apache.kafka.connect.connector.Connector;
               import org.apache.kafka.connect.connector.Task;
              +import org.slf4j.Logger;
              +import org.slf4j.LoggerFactory;
               
               import java.util.Collections;
               import java.util.List;
              @@ -49,6 +51,8 @@ public class MockConnector extends Connector {
               
                   public static final long DEFAULT_FAILURE_DELAY_MS = 15000;
               
              +    private static final Logger log = LoggerFactory.getLogger(MockConnector.class);
              +
                   private Map config;
                   private ScheduledExecutorService executor;
               
              @@ -69,10 +73,12 @@ public void start(Map config) {
                           if (delayMsString != null)
                               delayMs = Long.parseLong(delayMsString);
               
              +            log.debug("Started MockConnector with failure delay of {} ms", delayMs);
                           executor = Executors.newSingleThreadScheduledExecutor();
                           executor.schedule(new Runnable() {
                               @Override
                               public void run() {
              +                    log.debug("Triggering connector failure");
                                   context.raiseError(new RuntimeException());
                               }
                           }, delayMs, TimeUnit.MILLISECONDS);
              @@ -86,6 +92,7 @@ public Class taskClass() {
               
                   @Override
                   public List> taskConfigs(int maxTasks) {
              +        log.debug("Creating single task for MockConnector");
                       return Collections.singletonList(config);
                   }
               
              diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkTask.java
              index 2e4b35eeb8b6..b0de58d938d1 100644
              --- a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkTask.java
              +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSinkTask.java
              @@ -21,11 +21,14 @@
               import org.apache.kafka.common.utils.AppInfoParser;
               import org.apache.kafka.connect.sink.SinkRecord;
               import org.apache.kafka.connect.sink.SinkTask;
              +import org.slf4j.Logger;
              +import org.slf4j.LoggerFactory;
               
               import java.util.Collection;
               import java.util.Map;
               
               public class MockSinkTask extends SinkTask {
              +    private static final Logger log = LoggerFactory.getLogger(MockSinkTask.class);
               
                   private String mockMode;
                   private long startTimeMs;
              @@ -47,6 +50,9 @@ public void start(Map config) {
                           this.failureDelayMs = MockConnector.DEFAULT_FAILURE_DELAY_MS;
                           if (delayMsString != null)
                               failureDelayMs = Long.parseLong(delayMsString);
              +
              +            log.debug("Started MockSinkTask at {} with failure scheduled in {} ms", startTimeMs, failureDelayMs);
              +            setTimeout();
                       }
                   }
               
              @@ -54,8 +60,11 @@ public void start(Map config) {
                   public void put(Collection records) {
                       if (MockConnector.TASK_FAILURE.equals(mockMode)) {
                           long now = System.currentTimeMillis();
              -            if (now > startTimeMs + failureDelayMs)
              +            if (now > startTimeMs + failureDelayMs) {
              +                log.debug("Triggering sink task failure");
                               throw new RuntimeException();
              +            }
              +            setTimeout();
                       }
                   }
               
              @@ -68,4 +77,12 @@ public void flush(Map offsets) {
                   public void stop() {
               
                   }
              +
              +    private void setTimeout() {
              +        // Set a reasonable minimum delay. Since this mock task may not actually consume any data from Kafka, it may only
              +        // see put() calls triggered by wakeups for offset commits. To make sure we aren't tied to the offset commit
              +        // interval, we force a wakeup every 250ms or after the failure delay, whichever is smaller. This is not overly
              +        // aggressive but ensures any scheduled tasks this connector performs are reasonably close to the target time.
              +        context.timeout(Math.min(failureDelayMs, 250));
              +    }
               }
              diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceTask.java
              index eb896af08b8a..d7288f85d2f0 100644
              --- a/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceTask.java
              +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/tools/MockSourceTask.java
              @@ -19,12 +19,15 @@
               import org.apache.kafka.common.utils.AppInfoParser;
               import org.apache.kafka.connect.source.SourceRecord;
               import org.apache.kafka.connect.source.SourceTask;
              +import org.slf4j.Logger;
              +import org.slf4j.LoggerFactory;
               
               import java.util.Collections;
               import java.util.List;
               import java.util.Map;
               
               public class MockSourceTask extends SourceTask {
              +    private static final Logger log = LoggerFactory.getLogger(MockSourceTask.class);
               
                   private String mockMode;
                   private long startTimeMs;
              @@ -46,6 +49,8 @@ public void start(Map config) {
                           this.failureDelayMs = MockConnector.DEFAULT_FAILURE_DELAY_MS;
                           if (delayMsString != null)
                               failureDelayMs = Long.parseLong(delayMsString);
              +
              +            log.debug("Started MockSourceTask at {} with failure scheduled in {} ms", startTimeMs, failureDelayMs);
                       }
                   }
               
              @@ -53,8 +58,10 @@ public void start(Map config) {
                   public List poll() throws InterruptedException {
                       if (MockConnector.TASK_FAILURE.equals(mockMode)) {
                           long now = System.currentTimeMillis();
              -            if (now > startTimeMs + failureDelayMs)
              +            if (now > startTimeMs + failureDelayMs) {
              +                log.debug("Triggering source task failure");
                               throw new RuntimeException();
              +            }
                       }
                       return Collections.emptyList();
                   }
              diff --git a/tests/kafkatest/tests/connect/connect_distributed_test.py b/tests/kafkatest/tests/connect/connect_distributed_test.py
              index b9757bad6d0a..1902c595e867 100644
              --- a/tests/kafkatest/tests/connect/connect_distributed_test.py
              +++ b/tests/kafkatest/tests/connect/connect_distributed_test.py
              @@ -171,7 +171,7 @@ def test_restart_failed_task(self, connector_type):
                       connector.start()
               
                       task_id = 0
              -        wait_until(lambda: self.task_is_failed(connector, task_id), timeout_sec=15,
              +        wait_until(lambda: self.task_is_failed(connector, task_id), timeout_sec=20,
                                  err_msg="Failed to see task transition to the FAILED state")
               
                       self.cc.restart_task(connector.name, task_id)
              
              From 817f237683750c815036c09a0c0fa64d4f45bff7 Mon Sep 17 00:00:00 2001
              From: Ewen Cheslack-Postava 
              Date: Tue, 26 Jul 2016 19:43:32 -0700
              Subject: [PATCH 224/267] KAFKA-3500: Handle null keys and values in
               KafkaOffsetBackingStore.
              
              Author: Ewen Cheslack-Postava 
              
              Reviewers: Ismael Juma, Jason Gustafson, Gwen Shapira
              
              Closes #1662 from ewencp/kafka-3500-kafka-offset-backing-store-null
              
              (cherry picked from commit 4059f07216a07db0cdd88b46db40914069171838)
              Signed-off-by: Gwen Shapira 
              ---
               .../storage/KafkaOffsetBackingStore.java      |  7 +-
               .../storage/KafkaOffsetBackingStoreTest.java  | 71 +++++++++++++++++++
               2 files changed, 76 insertions(+), 2 deletions(-)
              
              diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStore.java
              index e8984fb89434..921998697938 100644
              --- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStore.java
              +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStore.java
              @@ -118,8 +118,11 @@ public Map convert(Void result) {
                   public Future set(final Map values, final Callback callback) {
                       SetCallbackFuture producerCallback = new SetCallbackFuture(values.size(), callback);
               
              -        for (Map.Entry entry : values.entrySet())
              -            offsetLog.send(entry.getKey().array(), entry.getValue().array(), producerCallback);
              +        for (Map.Entry entry : values.entrySet()) {
              +            ByteBuffer key = entry.getKey();
              +            ByteBuffer value = entry.getValue();
              +            offsetLog.send(key == null ? null : key.array(), value == null ? null : value.array(), producerCallback);
              +        }
               
                       return producerCallback;
                   }
              diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStoreTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStoreTest.java
              index 38e0f7b1b9bc..4a244f0a6b66 100644
              --- a/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStoreTest.java
              +++ b/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStoreTest.java
              @@ -53,6 +53,7 @@
               import static org.junit.Assert.assertEquals;
               import static org.junit.Assert.assertFalse;
               import static org.junit.Assert.assertNotNull;
              +import static org.junit.Assert.assertNull;
               import static org.junit.Assert.assertTrue;
               import static org.junit.Assert.fail;
               
              @@ -270,6 +271,76 @@ public void onCompletion(Throwable error, Map result) {
                       PowerMock.verifyAll();
                   }
               
              +    @Test
              +    public void testGetSetNull() throws Exception {
              +        expectConfigure();
              +        expectStart(Collections.EMPTY_LIST);
              +
              +        // Set offsets
              +        Capture callback0 = EasyMock.newCapture();
              +        storeLog.send(EasyMock.isNull(byte[].class), EasyMock.aryEq(TP0_VALUE.array()), EasyMock.capture(callback0));
              +        PowerMock.expectLastCall();
              +        Capture callback1 = EasyMock.newCapture();
              +        storeLog.send(EasyMock.aryEq(TP1_KEY.array()), EasyMock.isNull(byte[].class), EasyMock.capture(callback1));
              +        PowerMock.expectLastCall();
              +
              +        // Second get() should get the produced data and return the new values
              +        final Capture> secondGetReadToEndCallback = EasyMock.newCapture();
              +        storeLog.readToEnd(EasyMock.capture(secondGetReadToEndCallback));
              +        PowerMock.expectLastCall().andAnswer(new IAnswer() {
              +            @Override
              +            public Object answer() throws Throwable {
              +                capturedConsumedCallback.getValue().onCompletion(null, new ConsumerRecord<>(TOPIC, 0, 0, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, (byte[]) null, TP0_VALUE.array()));
              +                capturedConsumedCallback.getValue().onCompletion(null, new ConsumerRecord<>(TOPIC, 1, 0, 0L, TimestampType.CREATE_TIME, 0L, 0, 0, TP1_KEY.array(), (byte[]) null));
              +                secondGetReadToEndCallback.getValue().onCompletion(null, null);
              +                return null;
              +            }
              +        });
              +
              +        expectStop();
              +
              +        PowerMock.replayAll();
              +
              +        store.configure(DEFAULT_DISTRIBUTED_CONFIG);
              +        store.start();
              +
              +        // Set offsets using null keys and values
              +        Map toSet = new HashMap<>();
              +        toSet.put(null, TP0_VALUE);
              +        toSet.put(TP1_KEY, null);
              +        final AtomicBoolean invoked = new AtomicBoolean(false);
              +        Future setFuture = store.set(toSet, new Callback() {
              +            @Override
              +            public void onCompletion(Throwable error, Void result) {
              +                invoked.set(true);
              +            }
              +        });
              +        assertFalse(setFuture.isDone());
              +        // Out of order callbacks shouldn't matter, should still require all to be invoked before invoking the callback
              +        // for the store's set callback
              +        callback1.getValue().onCompletion(null, null);
              +        assertFalse(invoked.get());
              +        callback0.getValue().onCompletion(null, null);
              +        setFuture.get(10000, TimeUnit.MILLISECONDS);
              +        assertTrue(invoked.get());
              +
              +        // Getting data should read to end of our published data and return it
              +        final AtomicBoolean secondGetInvokedAndPassed = new AtomicBoolean(false);
              +        store.get(Arrays.asList(null, TP1_KEY), new Callback>() {
              +            @Override
              +            public void onCompletion(Throwable error, Map result) {
              +                assertEquals(TP0_VALUE, result.get(null));
              +                assertNull(result.get(TP1_KEY));
              +                secondGetInvokedAndPassed.set(true);
              +            }
              +        }).get(10000, TimeUnit.MILLISECONDS);
              +        assertTrue(secondGetInvokedAndPassed.get());
              +
              +        store.stop();
              +
              +        PowerMock.verifyAll();
              +    }
              +
                   @Test
                   public void testSetFailure() throws Exception {
                       expectConfigure();
              
              From 0a37bedd95aa883a00fbd19c4a86ab53aeb2fcb2 Mon Sep 17 00:00:00 2001
              From: Ismael Juma 
              Date: Wed, 27 Jul 2016 07:51:40 -0700
              Subject: [PATCH 225/267] KAFKA-3996; ByteBufferMessageSet.writeTo() should be
               non-blocking
              
              Also:
              * Introduce a blocking variant to be used by `FileMessageSet.append`
              * Add tests
              * Minor clean-ups
              
              Author: Ismael Juma 
              
              Reviewers: Jun Rao 
              
              Closes #1669 from ijuma/kafka-3996-byte-buffer-message-set-write-to-non-blocking
              
              (cherry picked from commit 64842f47f8ad21c942be2e2dd1edb56e3bc76cb8)
              Signed-off-by: Jun Rao 
              ---
               .../kafka/common/network/ByteBufferSend.java  |  2 +-
               .../main/scala/kafka/log/FileMessageSet.scala |  2 +-
               .../kafka/message/ByteBufferMessageSet.scala  | 18 ++++-
               .../message/BaseMessageSetTestCases.scala     | 80 ++++++++++++++++---
               .../message/ByteBufferMessageSetTest.scala    | 10 +++
               5 files changed, 96 insertions(+), 16 deletions(-)
              
              diff --git a/clients/src/main/java/org/apache/kafka/common/network/ByteBufferSend.java b/clients/src/main/java/org/apache/kafka/common/network/ByteBufferSend.java
              index d7357b2ce61f..9e213ecd3027 100644
              --- a/clients/src/main/java/org/apache/kafka/common/network/ByteBufferSend.java
              +++ b/clients/src/main/java/org/apache/kafka/common/network/ByteBufferSend.java
              @@ -23,9 +23,9 @@
               public class ByteBufferSend implements Send {
               
                   private final String destination;
              +    private final int size;
                   protected final ByteBuffer[] buffers;
                   private int remaining;
              -    private int size;
                   private boolean pending = false;
               
                   public ByteBufferSend(String destination, ByteBuffer... buffers) {
              diff --git a/core/src/main/scala/kafka/log/FileMessageSet.scala b/core/src/main/scala/kafka/log/FileMessageSet.scala
              index 2ee2cc20aa4b..ac010101b58b 100755
              --- a/core/src/main/scala/kafka/log/FileMessageSet.scala
              +++ b/core/src/main/scala/kafka/log/FileMessageSet.scala
              @@ -294,7 +294,7 @@ class FileMessageSet private[kafka](@volatile var file: File,
                  * Append these messages to the message set
                  */
                 def append(messages: ByteBufferMessageSet) {
              -    val written = messages.writeTo(channel, 0, messages.sizeInBytes)
              +    val written = messages.writeFullyTo(channel)
                   _size.getAndAdd(written)
                 }
               
              diff --git a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala
              index 98f6131385ca..15d4eea455e3 100644
              --- a/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala
              +++ b/core/src/main/scala/kafka/message/ByteBufferMessageSet.scala
              @@ -294,16 +294,28 @@ class ByteBufferMessageSet(val buffer: ByteBuffer) extends MessageSet with Loggi
                 }
               
                 /** Write the messages in this set to the given channel */
              -  def writeTo(channel: GatheringByteChannel, offset: Long, size: Int): Int = {
              -    // Ignore offset and size from input. We just want to write the whole buffer to the channel.
              +  def writeFullyTo(channel: GatheringByteChannel): Int = {
                   buffer.mark()
                   var written = 0
              -    while(written < sizeInBytes)
              +    while (written < sizeInBytes)
                     written += channel.write(buffer)
                   buffer.reset()
                   written
                 }
               
              +  /** Write the messages in this set to the given channel starting at the given offset byte.
              +    * Less than the complete amount may be written, but no more than maxSize can be. The number
              +    * of bytes written is returned */
              +  def writeTo(channel: GatheringByteChannel, offset: Long, maxSize: Int): Int = {
              +    if (offset > Int.MaxValue)
              +      throw new IllegalArgumentException(s"offset should not be larger than Int.MaxValue: $offset")
              +    val dup = buffer.duplicate()
              +    val position = offset.toInt
              +    dup.position(position)
              +    dup.limit(math.min(buffer.limit, position + maxSize))
              +    channel.write(dup)
              +  }
              +
                 override def isMagicValueInAllWrapperMessages(expectedMagicValue: Byte): Boolean = {
                   for (messageAndOffset <- shallowIterator) {
                     if (messageAndOffset.message.magic != expectedMagicValue)
              diff --git a/core/src/test/scala/unit/kafka/message/BaseMessageSetTestCases.scala b/core/src/test/scala/unit/kafka/message/BaseMessageSetTestCases.scala
              index 10687d17944c..0d86128d4020 100644
              --- a/core/src/test/scala/unit/kafka/message/BaseMessageSetTestCases.scala
              +++ b/core/src/test/scala/unit/kafka/message/BaseMessageSetTestCases.scala
              @@ -17,15 +17,45 @@
               
               package kafka.message
               
              -import java.io.RandomAccessFile
              +import java.nio.ByteBuffer
              +import java.nio.channels.{FileChannel, GatheringByteChannel}
              +import java.nio.file.StandardOpenOption
              +
               import org.junit.Assert._
               import kafka.utils.TestUtils._
               import kafka.log.FileMessageSet
              +import kafka.utils.TestUtils
               import org.scalatest.junit.JUnitSuite
               import org.junit.Test
               
              +import scala.collection.mutable.ArrayBuffer
              +
               trait BaseMessageSetTestCases extends JUnitSuite {
              -  
              +
              +  private class StubByteChannel(bytesToConsumePerBuffer: Int) extends GatheringByteChannel {
              +
              +    val data = new ArrayBuffer[Byte]
              +
              +    def write(srcs: Array[ByteBuffer], offset: Int, length: Int): Long = {
              +      srcs.map { src =>
              +        val array = new Array[Byte](math.min(bytesToConsumePerBuffer, src.remaining))
              +        src.get(array)
              +        data ++= array
              +        array.length
              +      }.sum
              +    }
              +
              +    def write(srcs: Array[ByteBuffer]): Long = write(srcs, 0, srcs.map(_.remaining).sum)
              +
              +    def write(src: ByteBuffer): Int = write(Array(src)).toInt
              +
              +    def isOpen: Boolean = true
              +
              +    def close() {}
              +
              +  }
              +
              +
                 val messages = Array(new Message("abcd".getBytes), new Message("efgh".getBytes), new Message("ijkl".getBytes))
                 
                 def createMessageSet(messages: Seq[Message]): MessageSet
              @@ -56,20 +86,48 @@ trait BaseMessageSetTestCases extends JUnitSuite {
                 @Test
                 def testWriteTo() {
                   // test empty message set
              -    testWriteToWithMessageSet(createMessageSet(Array[Message]()))
              -    testWriteToWithMessageSet(createMessageSet(messages))
              +    checkWriteToWithMessageSet(createMessageSet(Array[Message]()))
              +    checkWriteToWithMessageSet(createMessageSet(messages))
                 }
               
              -  def testWriteToWithMessageSet(set: MessageSet) {
              +  /* Tests that writing to a channel that doesn't consume all the bytes in the buffer works correctly */
              +  @Test
              +  def testWriteToChannelThatConsumesPartially() {
              +    val bytesToConsumePerBuffer = 50
              +    val messages = (0 until 10).map(_ => new Message(TestUtils.randomString(100).getBytes))
              +    val messageSet = createMessageSet(messages)
              +    val messageSetSize = messageSet.sizeInBytes
              +
              +    val channel = new StubByteChannel(bytesToConsumePerBuffer)
              +
              +    var remaining = messageSetSize
              +    var iterations = 0
              +    while (remaining > 0) {
              +      remaining -= messageSet.writeTo(channel, messageSetSize - remaining, remaining)
              +      iterations += 1
              +    }
              +
              +    assertEquals((messageSetSize / bytesToConsumePerBuffer) + 1, iterations)
              +    checkEquals(new ByteBufferMessageSet(ByteBuffer.wrap(channel.data.toArray)).iterator, messageSet.iterator)
              +  }
              +
              +  def checkWriteToWithMessageSet(messageSet: MessageSet) {
              +    checkWriteWithMessageSet(messageSet, messageSet.writeTo(_, 0, messageSet.sizeInBytes))
              +  }
              +
              +  def checkWriteWithMessageSet(set: MessageSet, write: GatheringByteChannel => Long) {
                   // do the write twice to ensure the message set is restored to its original state
              -    for(i <- List(0,1)) {
              +    for (_ <- 0 to 1) {
                     val file = tempFile()
              -      val channel = new RandomAccessFile(file, "rw").getChannel()
              -      val written = set.writeTo(channel, 0, 1024)
              -      assertEquals("Expect to write the number of bytes in the set.", set.sizeInBytes, written)
              -      val newSet = new FileMessageSet(file, channel)
              -      checkEquals(set.iterator, newSet.iterator)
              +      val channel = FileChannel.open(file.toPath, StandardOpenOption.READ, StandardOpenOption.WRITE)
              +      try {
              +        val written = write(channel)
              +        assertEquals("Expect to write the number of bytes in the set.", set.sizeInBytes, written)
              +        val newSet = new FileMessageSet(file, channel)
              +        checkEquals(set.iterator, newSet.iterator)
              +      } finally channel.close()
                   }
                 }
                 
               }
              +
              diff --git a/core/src/test/scala/unit/kafka/message/ByteBufferMessageSetTest.scala b/core/src/test/scala/unit/kafka/message/ByteBufferMessageSetTest.scala
              index 758dad22eab9..e399e081a4fb 100644
              --- a/core/src/test/scala/unit/kafka/message/ByteBufferMessageSetTest.scala
              +++ b/core/src/test/scala/unit/kafka/message/ByteBufferMessageSetTest.scala
              @@ -380,6 +380,16 @@ class ByteBufferMessageSetTest extends BaseMessageSetTestCases {
                                                                                      messageTimestampType = TimestampType.CREATE_TIME,
                                                                                      messageTimestampDiffMaxMs = 5000L)._1, offset)
                 }
              +
              +  @Test
              +  def testWriteFullyTo() {
              +    checkWriteFullyToWithMessageSet(createMessageSet(Array[Message]()))
              +    checkWriteFullyToWithMessageSet(createMessageSet(messages))
              +  }
              +
              +  def checkWriteFullyToWithMessageSet(messageSet: ByteBufferMessageSet) {
              +    checkWriteWithMessageSet(messageSet, messageSet.writeFullyTo)
              +  }
                 
                 /* check that offsets are assigned based on byte offset from the given base offset */
                 def checkOffsets(messages: ByteBufferMessageSet, baseOffset: Long) {
              
              From 279f89a67c8e884a345ff88a4bde6399f48b2e64 Mon Sep 17 00:00:00 2001
              From: Maysam Yabandeh 
              Date: Wed, 27 Jul 2016 08:22:23 -0700
              Subject: [PATCH 226/267] =?UTF-8?q?KAFKA-3924;=20Replacing=20halt=20with?=
               =?UTF-8?q?=20exit=20upon=20LEO=20mismatch=20to=20trigger=20gra=E2=80=A6?=
              MIME-Version: 1.0
              Content-Type: text/plain; charset=UTF-8
              Content-Transfer-Encoding: 8bit
              
              …ceful shutdown
              
              The patch is pretty simple and the justification is explained in https://issues.apache.org/jira/browse/KAFKA-3924
              
              I could not find Andrew Olson, who seems to be the contributor of this part of the code, in github so I am not sure whom I should ask to review the patch.
              
               the contribution is my original work and that i license the work to the project under the project's open source license.
              
              Author: Maysam Yabandeh 
              
              Reviewers: Ismael Juma , Andrew Olson , Jun Rao 
              
              Closes #1634 from maysamyabandeh/KAFKA-3924
              
              (cherry picked from commit d58c2d9ac6047b14ab22e19c9f7c857bb85a970d)
              Signed-off-by: Jun Rao 
              ---
               core/src/main/scala/kafka/server/ReplicaFetcherThread.scala | 4 ++--
               1 file changed, 2 insertions(+), 2 deletions(-)
              
              diff --git a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala
              index d58f12073840..c6a28c620d8c 100644
              --- a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala
              +++ b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala
              @@ -175,10 +175,10 @@ class ReplicaFetcherThread(name: String,
                     if (!LogConfig.fromProps(brokerConfig.originals, AdminUtils.fetchEntityConfig(replicaMgr.zkUtils,
                       ConfigType.Topic, topicAndPartition.topic)).uncleanLeaderElectionEnable) {
                       // Log a fatal error and shutdown the broker to ensure that data loss does not unexpectedly occur.
              -        fatal("Halting because log truncation is not allowed for topic %s,".format(topicAndPartition.topic) +
              +        fatal("Exiting because log truncation is not allowed for topic %s,".format(topicAndPartition.topic) +
                         " Current leader %d's latest offset %d is less than replica %d's latest offset %d"
                         .format(sourceBroker.id, leaderEndOffset, brokerConfig.brokerId, replica.logEndOffset.messageOffset))
              -        Runtime.getRuntime.halt(1)
              +        System.exit(1)
                     }
               
                     warn("Replica %d for partition %s reset its fetch offset from %d to current leader %d's latest offset %d"
              
              From b297cead3ef7ee3516590e696975071cadb06fba Mon Sep 17 00:00:00 2001
              From: "Matthias J. Sax" 
              Date: Wed, 27 Jul 2016 15:55:06 -0700
              Subject: [PATCH 227/267] KAFKA-3185: [Streams] Added Kafka Streams Application
               Reset Tool
              
              Author: Matthias J. Sax 
              
              Reviewers: Guozhang Wang, Michael G. Noll
              
              Closes #1671 from mjsax/resetTool-0.10.0.1
              ---
               bin/kafka-streams-application-reset.sh        |  21 ++
               build.gradle                                  |   2 +
               checkstyle/import-control.xml                 |   5 +-
               .../apache/kafka/streams/KafkaStreams.java    |  96 ++++---
               .../kafka/streams/KafkaStreamsTest.java       | 129 +++++++--
               .../integration/ResetIntegrationTest.java     | 253 +++++++++++++++++
               .../utils/EmbeddedSingleNodeKafkaCluster.java |   9 +-
               .../utils/IntegrationTestUtils.java           | 133 +++++----
               .../integration/utils/KafkaEmbedded.java      | 105 ++++---
               .../apache/kafka/tools/StreamsResetter.java   | 260 ++++++++++++++++++
               10 files changed, 849 insertions(+), 164 deletions(-)
               create mode 100755 bin/kafka-streams-application-reset.sh
               create mode 100644 streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java
               create mode 100644 tools/src/main/java/org/apache/kafka/tools/StreamsResetter.java
              
              diff --git a/bin/kafka-streams-application-reset.sh b/bin/kafka-streams-application-reset.sh
              new file mode 100755
              index 000000000000..26ab7667137f
              --- /dev/null
              +++ b/bin/kafka-streams-application-reset.sh
              @@ -0,0 +1,21 @@
              +#!/bin/bash
              +# Licensed to the Apache Software Foundation (ASF) under one or more
              +# contributor license agreements.  See the NOTICE file distributed with
              +# this work for additional information regarding copyright ownership.
              +# The ASF licenses this file to You under the Apache License, Version 2.0
              +# (the "License"); you may not use this file except in compliance with
              +# the License.  You may obtain a copy of the License at
              +#
              +#    http://www.apache.org/licenses/LICENSE-2.0
              +#
              +# Unless required by applicable law or agreed to in writing, software
              +# distributed under the License is distributed on an "AS IS" BASIS,
              +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
              +# See the License for the specific language governing permissions and
              +# limitations under the License.
              +
              +if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
              +    export KAFKA_HEAP_OPTS="-Xmx512M"
              +fi
              +
              +exec $(dirname $0)/kafka-run-class.sh org.apache.kafka.tools.StreamsResetter "$@"
              diff --git a/build.gradle b/build.gradle
              index 36647b3468dc..e2f4da6ee56f 100644
              --- a/build.gradle
              +++ b/build.gradle
              @@ -634,6 +634,7 @@ project(':tools') {
                 archivesBaseName = "kafka-tools"
               
                 dependencies {
              +    compile project(':core')
                   compile project(':clients')
                   compile project(':log4j-appender')
                   compile libs.argparse4j
              @@ -682,6 +683,7 @@ project(':streams') {
                   testCompile project(':clients').sourceSets.test.output
                   testCompile project(':core')
                   testCompile project(':core').sourceSets.test.output
              +    testCompile project(':tools')
                   testCompile libs.junit
               
                   testRuntime libs.slf4jlog4j
              diff --git a/checkstyle/import-control.xml b/checkstyle/import-control.xml
              index 5f52cced89d8..1052d8e43ed7 100644
              --- a/checkstyle/import-control.xml
              +++ b/checkstyle/import-control.xml
              @@ -123,6 +123,8 @@
                   
                   
                   
              +    
              +    
                 
               
                 
              @@ -148,7 +150,8 @@
                     
                     
                     
              -      
              +      
              +      
                   
               
                   
              diff --git a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java
              index af6d973e3dc2..17c760e33c7e 100644
              --- a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java
              +++ b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java
              @@ -24,12 +24,14 @@
               import org.apache.kafka.common.metrics.MetricsReporter;
               import org.apache.kafka.common.utils.SystemTime;
               import org.apache.kafka.common.utils.Time;
              +import org.apache.kafka.common.utils.Utils;
               import org.apache.kafka.streams.processor.TopologyBuilder;
               import org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier;
               import org.apache.kafka.streams.processor.internals.StreamThread;
               import org.slf4j.Logger;
               import org.slf4j.LoggerFactory;
               
              +import java.io.File;
               import java.util.List;
               import java.util.Properties;
               import java.util.UUID;
              @@ -43,7 +45,7 @@
                * The computational logic can be specified either by using the {@link TopologyBuilder} class to define the a DAG topology of
                * {@link org.apache.kafka.streams.processor.Processor}s or by using the {@link org.apache.kafka.streams.kstream.KStreamBuilder}
                * class which provides the high-level {@link org.apache.kafka.streams.kstream.KStream} DSL to define the transformation.
              - *
              + * 

              * The {@link KafkaStreams} class manages the lifecycle of a Kafka Streams instance. One stream instance can contain one or * more threads specified in the configs for the processing work. *

              @@ -56,7 +58,7 @@ * Internally the {@link KafkaStreams} instance contains a normal {@link org.apache.kafka.clients.producer.KafkaProducer KafkaProducer} * and {@link org.apache.kafka.clients.consumer.KafkaConsumer KafkaConsumer} instance that is used for reading input and writing output. *

              - * + *

              * A simple example might look like this: *

                *    Map<String, Object> props = new HashMap<>();
              @@ -74,7 +76,6 @@
                *    KafkaStreams streams = new KafkaStreams(builder, config);
                *    streams.start();
                * 
              - * */ @InterfaceStability.Unstable @@ -99,52 +100,56 @@ public class KafkaStreams { // usage only and should not be exposed to users at all. private final UUID processId; + private final StreamsConfig config; + /** * Construct the stream instance. * - * @param builder the processor topology builder specifying the computational logic - * @param props properties for the {@link StreamsConfig} + * @param builder the processor topology builder specifying the computational logic + * @param props properties for the {@link StreamsConfig} */ - public KafkaStreams(TopologyBuilder builder, Properties props) { + public KafkaStreams(final TopologyBuilder builder, final Properties props) { this(builder, new StreamsConfig(props), new DefaultKafkaClientSupplier()); } /** * Construct the stream instance. * - * @param builder the processor topology builder specifying the computational logic - * @param config the stream configs + * @param builder the processor topology builder specifying the computational logic + * @param config the stream configs */ - public KafkaStreams(TopologyBuilder builder, StreamsConfig config) { + public KafkaStreams(final TopologyBuilder builder, final StreamsConfig config) { this(builder, config, new DefaultKafkaClientSupplier()); } /** * Construct the stream instance. * - * @param builder the processor topology builder specifying the computational logic - * @param config the stream configs - * @param clientSupplier the kafka clients supplier which provides underlying producer and consumer clients - * for this {@link KafkaStreams} instance + * @param builder the processor topology builder specifying the computational logic + * @param config the stream configs + * @param clientSupplier the kafka clients supplier which provides underlying producer and consumer clients + * for this {@link KafkaStreams} instance */ - public KafkaStreams(TopologyBuilder builder, StreamsConfig config, KafkaClientSupplier clientSupplier) { + public KafkaStreams(final TopologyBuilder builder, final StreamsConfig config, final KafkaClientSupplier clientSupplier) { // create the metrics - Time time = new SystemTime(); + final Time time = new SystemTime(); this.processId = UUID.randomUUID(); + this.config = config; + // The application ID is a required config and hence should always have value - String applicationId = config.getString(StreamsConfig.APPLICATION_ID_CONFIG); + final String applicationId = config.getString(StreamsConfig.APPLICATION_ID_CONFIG); String clientId = config.getString(StreamsConfig.CLIENT_ID_CONFIG); if (clientId.length() <= 0) clientId = applicationId + "-" + STREAM_CLIENT_ID_SEQUENCE.getAndIncrement(); - List reporters = config.getConfiguredInstances(StreamsConfig.METRIC_REPORTER_CLASSES_CONFIG, - MetricsReporter.class); + final List reporters = config.getConfiguredInstances(StreamsConfig.METRIC_REPORTER_CLASSES_CONFIG, + MetricsReporter.class); reporters.add(new JmxReporter(JMX_PREFIX)); - MetricConfig metricConfig = new MetricConfig().samples(config.getInt(StreamsConfig.METRICS_NUM_SAMPLES_CONFIG)) + final MetricConfig metricConfig = new MetricConfig().samples(config.getInt(StreamsConfig.METRICS_NUM_SAMPLES_CONFIG)) .timeWindow(config.getLong(StreamsConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS); @@ -152,25 +157,26 @@ public KafkaStreams(TopologyBuilder builder, StreamsConfig config, KafkaClientSu this.threads = new StreamThread[config.getInt(StreamsConfig.NUM_STREAM_THREADS_CONFIG)]; for (int i = 0; i < this.threads.length; i++) { - this.threads[i] = new StreamThread(builder, config, clientSupplier, applicationId, clientId, processId, metrics, time); + this.threads[i] = new StreamThread(builder, config, clientSupplier, applicationId, clientId, this.processId, this.metrics, time); } } /** * Start the stream instance by starting all its threads. + * * @throws IllegalStateException if process was already started */ public synchronized void start() { log.debug("Starting Kafka Stream process"); - if (state == CREATED) { - for (StreamThread thread : threads) + if (this.state == CREATED) { + for (final StreamThread thread : this.threads) thread.start(); - state = RUNNING; + this.state = RUNNING; log.info("Started Kafka Stream process"); - } else if (state == RUNNING) { + } else if (this.state == RUNNING) { throw new IllegalStateException("This process was already started."); } else { throw new IllegalStateException("Cannot restart after closing."); @@ -180,40 +186,64 @@ public synchronized void start() { /** * Shutdown this stream instance by signaling all the threads to stop, * and then wait for them to join. + * * @throws IllegalStateException if process has not started yet */ public synchronized void close() { log.debug("Stopping Kafka Stream process"); - if (state == RUNNING) { + if (this.state == RUNNING) { // signal the threads to stop and wait - for (StreamThread thread : threads) + for (final StreamThread thread : this.threads) thread.close(); - for (StreamThread thread : threads) { + for (final StreamThread thread : this.threads) { try { thread.join(); - } catch (InterruptedException ex) { + } catch (final InterruptedException ex) { Thread.interrupted(); } } } - if (state != STOPPED) { - metrics.close(); - state = STOPPED; + if (this.state != STOPPED) { + this.metrics.close(); + this.state = STOPPED; log.info("Stopped Kafka Stream process"); } } + /** + * Cleans up local state store directory ({@code state.dir}), by deleting all data with regard to the application-id. + *

              + * May only be called either before instance is started or after instance is closed. + * + * @throws IllegalStateException if instance is currently running + */ + public void cleanUp() { + if (this.state == RUNNING) { + throw new IllegalStateException("Cannot clean up while running."); + } + + final String localApplicationDir = this.config.getString(StreamsConfig.STATE_DIR_CONFIG) + + File.separator + + this.config.getString(StreamsConfig.APPLICATION_ID_CONFIG); + + log.debug("Clean up local Kafka Streams data in {}", localApplicationDir); + log.debug("Removing local Kafka Streams application data in {} for application {}", + localApplicationDir, + this.config.getString(StreamsConfig.APPLICATION_ID_CONFIG)); + Utils.delete(new File(localApplicationDir)); + } + /** * Sets the handler invoked when a stream thread abruptly terminates due to an uncaught exception. * * @param eh the object to use as this thread's uncaught exception handler. If null then this thread has no explicit handler. */ - public void setUncaughtExceptionHandler(Thread.UncaughtExceptionHandler eh) { - for (StreamThread thread : threads) + public void setUncaughtExceptionHandler(final Thread.UncaughtExceptionHandler eh) { + for (final StreamThread thread : this.threads) thread.setUncaughtExceptionHandler(eh); } diff --git a/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java index 22d8bf2dd168..af7e681b0ddc 100644 --- a/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *

              + * http://www.apache.org/licenses/LICENSE-2.0 + *

              * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,16 +19,21 @@ import org.apache.kafka.streams.kstream.KStreamBuilder; import org.apache.kafka.test.MockMetricsReporter; +import org.apache.kafka.test.TestUtils; import org.junit.Assert; import org.junit.Test; +import java.io.File; import java.util.Properties; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + public class KafkaStreamsTest { @Test public void testStartAndClose() throws Exception { - Properties props = new Properties(); + final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testStartAndClose"); props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); props.setProperty(StreamsConfig.METRIC_REPORTER_CLASSES_CONFIG, MockMetricsReporter.class.getName()); @@ -36,71 +41,143 @@ public void testStartAndClose() throws Exception { final int oldInitCount = MockMetricsReporter.INIT_COUNT.get(); final int oldCloseCount = MockMetricsReporter.CLOSE_COUNT.get(); - KStreamBuilder builder = new KStreamBuilder(); - KafkaStreams streams = new KafkaStreams(builder, props); + final KStreamBuilder builder = new KStreamBuilder(); + final KafkaStreams streams = new KafkaStreams(builder, props); streams.start(); final int newInitCount = MockMetricsReporter.INIT_COUNT.get(); final int initCountDifference = newInitCount - oldInitCount; - Assert.assertTrue("some reporters should be initialized by calling start()", initCountDifference > 0); + assertTrue("some reporters should be initialized by calling start()", initCountDifference > 0); streams.close(); Assert.assertEquals("each reporter initialized should also be closed", - oldCloseCount + initCountDifference, MockMetricsReporter.CLOSE_COUNT.get()); + oldCloseCount + initCountDifference, MockMetricsReporter.CLOSE_COUNT.get()); } @Test public void testCloseIsIdempotent() throws Exception { - Properties props = new Properties(); + final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCloseIsIdempotent"); props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); props.setProperty(StreamsConfig.METRIC_REPORTER_CLASSES_CONFIG, MockMetricsReporter.class.getName()); - KStreamBuilder builder = new KStreamBuilder(); - KafkaStreams streams = new KafkaStreams(builder, props); + final KStreamBuilder builder = new KStreamBuilder(); + final KafkaStreams streams = new KafkaStreams(builder, props); streams.close(); final int closeCount = MockMetricsReporter.CLOSE_COUNT.get(); streams.close(); Assert.assertEquals("subsequent close() calls should do nothing", - closeCount, MockMetricsReporter.CLOSE_COUNT.get()); + closeCount, MockMetricsReporter.CLOSE_COUNT.get()); } - @Test + @Test(expected = IllegalStateException.class) public void testCannotStartOnceClosed() throws Exception { - Properties props = new Properties(); + final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCannotStartOnceClosed"); props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); - KStreamBuilder builder = new KStreamBuilder(); - KafkaStreams streams = new KafkaStreams(builder, props); + final KStreamBuilder builder = new KStreamBuilder(); + final KafkaStreams streams = new KafkaStreams(builder, props); streams.close(); try { streams.start(); - } catch (IllegalStateException e) { + } catch (final IllegalStateException e) { Assert.assertEquals("Cannot restart after closing.", e.getMessage()); - return; + throw e; + } finally { + streams.close(); } - Assert.fail("should have caught an exception and returned"); } - @Test + @Test(expected = IllegalStateException.class) public void testCannotStartTwice() throws Exception { - Properties props = new Properties(); + final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCannotStartTwice"); props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); - KStreamBuilder builder = new KStreamBuilder(); - KafkaStreams streams = new KafkaStreams(builder, props); + final KStreamBuilder builder = new KStreamBuilder(); + final KafkaStreams streams = new KafkaStreams(builder, props); streams.start(); try { streams.start(); - } catch (IllegalStateException e) { + } catch (final IllegalStateException e) { Assert.assertEquals("This process was already started.", e.getMessage()); - return; + throw e; + } finally { + streams.close(); } - Assert.fail("should have caught an exception and returned"); } + + @Test + public void testCleanup() throws Exception { + final Properties props = new Properties(); + props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testLocalCleanup"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + + final KStreamBuilder builder = new KStreamBuilder(); + final KafkaStreams streams = new KafkaStreams(builder, props); + + streams.cleanUp(); + streams.start(); + streams.close(); + streams.cleanUp(); + } + + @Test + public void testCleanupIsolation() throws Exception { + final KStreamBuilder builder = new KStreamBuilder(); + + final String appId1 = "testIsolation-1"; + final String appId2 = "testIsolation-2"; + final String stateDir = TestUtils.tempDirectory("kafka-test").getPath(); + final File stateDirApp1 = new File(stateDir + File.separator + appId1); + final File stateDirApp2 = new File(stateDir + File.separator + appId2); + + final Properties props = new Properties(); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.put(StreamsConfig.STATE_DIR_CONFIG, stateDir); + + assertFalse(stateDirApp1.exists()); + assertFalse(stateDirApp2.exists()); + + props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, appId1); + final KafkaStreams streams1 = new KafkaStreams(builder, props); + props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, appId2); + final KafkaStreams streams2 = new KafkaStreams(builder, props); + + assertTrue(stateDirApp1.exists()); + assertTrue(stateDirApp2.exists()); + + streams1.cleanUp(); + assertFalse(stateDirApp1.exists()); + assertTrue(stateDirApp2.exists()); + + streams2.cleanUp(); + assertFalse(stateDirApp1.exists()); + assertFalse(stateDirApp2.exists()); + } + + @Test(expected = IllegalStateException.class) + public void testCannotCleanupWhileRunning() throws Exception { + final Properties props = new Properties(); + props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCannotCleanupWhileRunning"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + + final KStreamBuilder builder = new KStreamBuilder(); + final KafkaStreams streams = new KafkaStreams(builder, props); + + streams.start(); + try { + streams.cleanUp(); + } catch (final IllegalStateException e) { + Assert.assertEquals("Cannot clean up while running.", e.getMessage()); + throw e; + } finally { + streams.close(); + } + } + } diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java new file mode 100644 index 000000000000..28be86861f2e --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java @@ -0,0 +1,253 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + *

              + * http://www.apache.org/licenses/LICENSE-2.0 + *

              + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration; + +import kafka.utils.ZkUtils; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.security.JaasUtils; +import org.apache.kafka.common.serialization.LongDeserializer; +import org.apache.kafka.common.serialization.LongSerializer; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.integration.utils.EmbeddedSingleNodeKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; +import org.apache.kafka.streams.kstream.KStream; +import org.apache.kafka.streams.kstream.KStreamBuilder; +import org.apache.kafka.streams.kstream.KTable; +import org.apache.kafka.streams.kstream.KeyValueMapper; +import org.apache.kafka.streams.kstream.TimeWindows; +import org.apache.kafka.streams.kstream.Windowed; +import org.apache.kafka.test.TestUtils; +import org.apache.kafka.tools.StreamsResetter; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; + + +/** + * Tests local state store and global application cleanup. + */ +public class ResetIntegrationTest { + @ClassRule + public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); + + private static final String APP_ID = "cleanup-integration-test"; + private static final String INPUT_TOPIC = "inputTopic"; + private static final String OUTPUT_TOPIC = "outputTopic"; + private static final String OUTPUT_TOPIC_2 = "outputTopic2"; + private static final String INTERMEDIATE_USER_TOPIC = "userTopic"; + + private static final long STREAMS_CONSUMER_TIMEOUT = 2000L; + private static final long CLEANUP_CONSUMER_TIMEOUT = 2000L; + + @BeforeClass + public static void startKafkaCluster() throws Exception { + CLUSTER.createTopic(INPUT_TOPIC); + CLUSTER.createTopic(OUTPUT_TOPIC); + CLUSTER.createTopic(OUTPUT_TOPIC_2); + CLUSTER.createTopic(INTERMEDIATE_USER_TOPIC); + } + + @Test + public void testReprocessingFromScratchAfterCleanUp() throws Exception { + final Properties streamsConfiguration = prepareTest(); + final Properties resultTopicConsumerConfig = prepareResultConsumer(); + + prepareInputData(); + final KStreamBuilder builder = setupTopology(); + + // RUN + KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration); + streams.start(); + final List> result = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(resultTopicConsumerConfig, OUTPUT_TOPIC, 10); + // receive only first values to make sure intermediate user topic is not consumed completely + // => required to test "seekToEnd" for intermediate topics + final KeyValue result2 = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(resultTopicConsumerConfig, OUTPUT_TOPIC_2, 1).get(0); + + streams.close(); + + // RESET + Utils.sleep(STREAMS_CONSUMER_TIMEOUT); + streams.cleanUp(); + cleanGlobal(); + assertInternalTopicsGotDeleted(); + Utils.sleep(CLEANUP_CONSUMER_TIMEOUT); + + // RE-RUN + streams = new KafkaStreams(setupTopology(), streamsConfiguration); + streams.start(); + final List> resultRerun = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(resultTopicConsumerConfig, OUTPUT_TOPIC, 10); + final KeyValue resultRerun2 = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(resultTopicConsumerConfig, OUTPUT_TOPIC_2, 1).get(0); + streams.close(); + + assertThat(resultRerun, equalTo(result)); + assertThat(resultRerun2, equalTo(result2)); + } + + private Properties prepareTest() throws Exception { + final Properties streamsConfiguration = new Properties(); + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, APP_ID); + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + streamsConfiguration.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, CLUSTER.zKConnectString()); + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory("kafka-test").getPath()); + streamsConfiguration.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.Long().getClass()); + streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass()); + streamsConfiguration.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 8); + streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1); + streamsConfiguration.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, 100); + streamsConfiguration.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "" + STREAMS_CONSUMER_TIMEOUT); + streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + + IntegrationTestUtils.purgeLocalStreamsState(streamsConfiguration); + + return streamsConfiguration; + } + + private Properties prepareResultConsumer() { + final Properties resultTopicConsumerConfig = new Properties(); + resultTopicConsumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + resultTopicConsumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, APP_ID + "-standard-consumer-" + OUTPUT_TOPIC); + resultTopicConsumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + resultTopicConsumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class); + resultTopicConsumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class); + + return resultTopicConsumerConfig; + } + + private void prepareInputData() throws Exception { + final Properties producerConfig = new Properties(); + producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); + producerConfig.put(ProducerConfig.ACKS_CONFIG, "all"); + producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); + producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, LongSerializer.class); + producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); + + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(0L, "aaa")), producerConfig, 10L); + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(1L, "bbb")), producerConfig, 20L); + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(0L, "ccc")), producerConfig, 30L); + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(1L, "ddd")), producerConfig, 40L); + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(0L, "eee")), producerConfig, 50L); + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(1L, "fff")), producerConfig, 60L); + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(0L, "ggg")), producerConfig, 61L); + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(1L, "hhh")), producerConfig, 62L); + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(0L, "iii")), producerConfig, 63L); + IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(1L, "jjj")), producerConfig, 64L); + } + + private KStreamBuilder setupTopology() { + final KStreamBuilder builder = new KStreamBuilder(); + + final KStream input = builder.stream(INPUT_TOPIC); + + // use map to trigger internal re-partitioning before groupByKey + final KTable globalCounts = input + .map(new KeyValueMapper>() { + @Override + public KeyValue apply(final Long key, final String value) { + return new KeyValue<>(key, value); + } + }) + .countByKey("global-count"); + globalCounts.to(Serdes.Long(), Serdes.Long(), OUTPUT_TOPIC); + + final KStream windowedCounts = input + .through(INTERMEDIATE_USER_TOPIC) + .map(new KeyValueMapper>() { + @Override + public KeyValue apply(final Long key, final String value) { + // must sleep long enough to avoid processing the whole intermediate topic before application gets stopped + // => want to test "skip over" unprocessed records + // increasing the sleep time only has disadvantage that test run time is increased + Utils.sleep(1000); + return new KeyValue<>(key, value); + } + }) + .countByKey(TimeWindows.of("count", 35).advanceBy(10)) + .toStream() + .map(new KeyValueMapper, Long, KeyValue>() { + @Override + public KeyValue apply(final Windowed key, final Long value) { + return new KeyValue<>(key.window().start() + key.window().end(), value); + } + }); + windowedCounts.to(Serdes.Long(), Serdes.Long(), OUTPUT_TOPIC_2); + + return builder; + } + + private void cleanGlobal() { + final Properties cleanUpConfig = new Properties(); + cleanUpConfig.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, 100); + cleanUpConfig.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "" + CLEANUP_CONSUMER_TIMEOUT); + + final int exitCode = new StreamsResetter().run( + new String[]{ + "--application-id", APP_ID, + "--bootstrap-server", CLUSTER.bootstrapServers(), + "--zookeeper", CLUSTER.zKConnectString(), + "--input-topics", INPUT_TOPIC, + "--intermediate-topics", INTERMEDIATE_USER_TOPIC + }, + cleanUpConfig); + Assert.assertEquals(0, exitCode); + } + + private void assertInternalTopicsGotDeleted() { + final Set expectedRemainingTopicsAfterCleanup = new HashSet<>(); + expectedRemainingTopicsAfterCleanup.add(INPUT_TOPIC); + expectedRemainingTopicsAfterCleanup.add(INTERMEDIATE_USER_TOPIC); + expectedRemainingTopicsAfterCleanup.add(OUTPUT_TOPIC); + expectedRemainingTopicsAfterCleanup.add(OUTPUT_TOPIC_2); + expectedRemainingTopicsAfterCleanup.add("__consumer_offsets"); + + Set allTopics; + ZkUtils zkUtils = null; + try { + zkUtils = ZkUtils.apply(CLUSTER.zKConnectString(), + 30000, + 30000, + JaasUtils.isZkSecurityEnabled()); + + do { + Utils.sleep(100); + allTopics = new HashSet<>(); + allTopics.addAll(scala.collection.JavaConversions.seqAsJavaList(zkUtils.getAllTopics())); + } while (allTopics.size() != expectedRemainingTopicsAfterCleanup.size()); + } finally { + if (zkUtils != null) { + zkUtils.close(); + } + } + assertThat(allTopics, equalTo(expectedRemainingTopicsAfterCleanup)); + } + +} diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/utils/EmbeddedSingleNodeKafkaCluster.java b/streams/src/test/java/org/apache/kafka/streams/integration/utils/EmbeddedSingleNodeKafkaCluster.java index 34753ae2e672..b293a0273069 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/utils/EmbeddedSingleNodeKafkaCluster.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/utils/EmbeddedSingleNodeKafkaCluster.java @@ -48,6 +48,8 @@ public void start() throws IOException, InterruptedException { log.debug("ZooKeeper instance is running at {}", zKConnectString()); brokerConfig.put(KafkaConfig$.MODULE$.ZkConnectProp(), zKConnectString()); brokerConfig.put(KafkaConfig$.MODULE$.PortProp(), DEFAULT_BROKER_PORT); + brokerConfig.put(KafkaConfig$.MODULE$.DeleteTopicEnableProp(), true); + brokerConfig.put(KafkaConfig$.MODULE$.GroupMinSessionTimeoutMsProp(), 0); log.debug("Starting a Kafka instance on port {} ...", brokerConfig.getProperty(KafkaConfig$.MODULE$.PortProp())); broker = new KafkaEmbedded(brokerConfig); @@ -125,4 +127,9 @@ public void createTopic(String topic, Properties topicConfig) { broker.createTopic(topic, partitions, replication, topicConfig); } -} \ No newline at end of file + + public void deleteTopic(String topic) { + broker.deleteTopic(topic); + } + +} diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java b/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java index c3f90897fcc2..14b24aac4b32 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *

              + * http://www.apache.org/licenses/LICENSE-2.0 + *

              * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -55,10 +55,10 @@ public class IntegrationTestUtils { * @param maxMessages Maximum number of messages to read via the consumer. * @return The values retrieved via the consumer. */ - public static List readValues(String topic, Properties consumerConfig, int maxMessages) { - List returnList = new ArrayList<>(); - List> kvs = readKeyValues(topic, consumerConfig, maxMessages); - for (KeyValue kv : kvs) { + public static List readValues(final String topic, final Properties consumerConfig, final int maxMessages) { + final List returnList = new ArrayList<>(); + final List> kvs = readKeyValues(topic, consumerConfig, maxMessages); + for (final KeyValue kv : kvs) { returnList.add(kv.value); } return returnList; @@ -72,7 +72,7 @@ public static List readValues(String topic, Properties consumerConfig, in * @param consumerConfig Kafka consumer configuration * @return The KeyValue elements retrieved via the consumer. */ - public static List> readKeyValues(String topic, Properties consumerConfig) { + public static List> readKeyValues(final String topic, final Properties consumerConfig) { return readKeyValues(topic, consumerConfig, UNLIMITED_MESSAGES); } @@ -85,17 +85,17 @@ public static List> readKeyValues(String topic, Properties * @param maxMessages Maximum number of messages to read via the consumer * @return The KeyValue elements retrieved via the consumer */ - public static List> readKeyValues(String topic, Properties consumerConfig, int maxMessages) { - KafkaConsumer consumer = new KafkaConsumer<>(consumerConfig); + public static List> readKeyValues(final String topic, final Properties consumerConfig, final int maxMessages) { + final KafkaConsumer consumer = new KafkaConsumer<>(consumerConfig); consumer.subscribe(Collections.singletonList(topic)); - int pollIntervalMs = 100; - int maxTotalPollTimeMs = 2000; + final int pollIntervalMs = 100; + final int maxTotalPollTimeMs = 2000; int totalPollTimeMs = 0; - List> consumedValues = new ArrayList<>(); + final List> consumedValues = new ArrayList<>(); while (totalPollTimeMs < maxTotalPollTimeMs && continueConsuming(consumedValues.size(), maxMessages)) { totalPollTimeMs += pollIntervalMs; - ConsumerRecords records = consumer.poll(pollIntervalMs); - for (ConsumerRecord record : records) { + final ConsumerRecords records = consumer.poll(pollIntervalMs); + for (final ConsumerRecord record : records) { consumedValues.add(new KeyValue<>(record.key(), record.value())); } } @@ -103,7 +103,7 @@ public static List> readKeyValues(String topic, Properties return consumedValues; } - private static boolean continueConsuming(int messagesConsumed, int maxMessages) { + private static boolean continueConsuming(final int messagesConsumed, final int maxMessages) { return maxMessages <= 0 || messagesConsumed < maxMessages; } @@ -112,10 +112,10 @@ private static boolean continueConsuming(int messagesConsumed, int maxMessages) * * @param streamsConfiguration Streams configuration settings */ - public static void purgeLocalStreamsState(Properties streamsConfiguration) throws IOException { - String path = streamsConfiguration.getProperty(StreamsConfig.STATE_DIR_CONFIG); + public static void purgeLocalStreamsState(final Properties streamsConfiguration) throws IOException { + final String path = streamsConfiguration.getProperty(StreamsConfig.STATE_DIR_CONFIG); if (path != null) { - File node = Paths.get(path).normalize().toFile(); + final File node = Paths.get(path).normalize().toFile(); // Only purge state when it's under /tmp. This is a safety net to prevent accidentally // deleting important local directory trees. if (node.getAbsolutePath().startsWith("/tmp")) { @@ -132,11 +132,11 @@ public static void purgeLocalStreamsState(Properties streamsConfiguration) throw * @param Value type of the data records */ public static void produceKeyValuesSynchronously( - String topic, Collection> records, Properties producerConfig) + final String topic, final Collection> records, final Properties producerConfig) throws ExecutionException, InterruptedException { - Producer producer = new KafkaProducer<>(producerConfig); - for (KeyValue record : records) { - Future f = producer.send( + final Producer producer = new KafkaProducer<>(producerConfig); + for (final KeyValue record : records) { + final Future f = producer.send( new ProducerRecord<>(topic, record.key, record.value)); f.get(); } @@ -144,86 +144,103 @@ public static void produceKeyValuesSynchronously( producer.close(); } + public static void produceKeyValuesSynchronouslyWithTimestamp(final String topic, + final Collection> records, + final Properties producerConfig, + final Long timestamp) + throws ExecutionException, InterruptedException { + final Producer producer = new KafkaProducer<>(producerConfig); + for (final KeyValue record : records) { + final Future f = producer.send( + new ProducerRecord<>(topic, null, timestamp, record.key, record.value)); + f.get(); + } + producer.flush(); + producer.close(); + } + public static void produceValuesSynchronously( - String topic, Collection records, Properties producerConfig) + final String topic, final Collection records, final Properties producerConfig) throws ExecutionException, InterruptedException { - Collection> keyedRecords = new ArrayList<>(); - for (V value : records) { - KeyValue kv = new KeyValue<>(null, value); + final Collection> keyedRecords = new ArrayList<>(); + for (final V value : records) { + final KeyValue kv = new KeyValue<>(null, value); keyedRecords.add(kv); } produceKeyValuesSynchronously(topic, keyedRecords, producerConfig); } - public static List> waitUntilMinKeyValueRecordsReceived(Properties consumerConfig, - String topic, - int expectedNumRecords) throws InterruptedException { + public static List> waitUntilMinKeyValueRecordsReceived(final Properties consumerConfig, + final String topic, + final int expectedNumRecords) throws InterruptedException { return waitUntilMinKeyValueRecordsReceived(consumerConfig, topic, expectedNumRecords, DEFAULT_TIMEOUT); } /** * Wait until enough data (key-value records) has been consumed. - * @param consumerConfig Kafka Consumer configuration - * @param topic Topic to consume from + * + * @param consumerConfig Kafka Consumer configuration + * @param topic Topic to consume from * @param expectedNumRecords Minimum number of expected records - * @param waitTime Upper bound in waiting time in milliseconds + * @param waitTime Upper bound in waiting time in milliseconds * @return All the records consumed, or null if no records are consumed * @throws InterruptedException - * @throws AssertionError if the given wait time elapses + * @throws AssertionError if the given wait time elapses */ - public static List> waitUntilMinKeyValueRecordsReceived(Properties consumerConfig, - String topic, - int expectedNumRecords, - long waitTime) throws InterruptedException { - List> accumData = new ArrayList<>(); - long startTime = System.currentTimeMillis(); + public static List> waitUntilMinKeyValueRecordsReceived(final Properties consumerConfig, + final String topic, + final int expectedNumRecords, + final long waitTime) throws InterruptedException { + final List> accumData = new ArrayList<>(); + final long startTime = System.currentTimeMillis(); while (true) { - List> readData = readKeyValues(topic, consumerConfig); + final List> readData = readKeyValues(topic, consumerConfig); accumData.addAll(readData); if (accumData.size() >= expectedNumRecords) return accumData; if (System.currentTimeMillis() > startTime + waitTime) - throw new AssertionError("Expected " + expectedNumRecords + + throw new AssertionError("Expected " + expectedNumRecords + " but received only " + accumData.size() + " records before timeout " + waitTime + " ms"); Thread.sleep(Math.min(waitTime, 100L)); } } - public static List waitUntilMinValuesRecordsReceived(Properties consumerConfig, - String topic, - int expectedNumRecords) throws InterruptedException { + public static List waitUntilMinValuesRecordsReceived(final Properties consumerConfig, + final String topic, + final int expectedNumRecords) throws InterruptedException { return waitUntilMinValuesRecordsReceived(consumerConfig, topic, expectedNumRecords, DEFAULT_TIMEOUT); } /** * Wait until enough data (value records) has been consumed. - * @param consumerConfig Kafka Consumer configuration - * @param topic Topic to consume from + * + * @param consumerConfig Kafka Consumer configuration + * @param topic Topic to consume from * @param expectedNumRecords Minimum number of expected records - * @param waitTime Upper bound in waiting time in milliseconds + * @param waitTime Upper bound in waiting time in milliseconds * @return All the records consumed, or null if no records are consumed * @throws InterruptedException - * @throws AssertionError if the given wait time elapses + * @throws AssertionError if the given wait time elapses */ - public static List waitUntilMinValuesRecordsReceived(Properties consumerConfig, - String topic, - int expectedNumRecords, - long waitTime) throws InterruptedException { - List accumData = new ArrayList<>(); - long startTime = System.currentTimeMillis(); + public static List waitUntilMinValuesRecordsReceived(final Properties consumerConfig, + final String topic, + final int expectedNumRecords, + final long waitTime) throws InterruptedException { + final List accumData = new ArrayList<>(); + final long startTime = System.currentTimeMillis(); while (true) { - List readData = readValues(topic, consumerConfig, expectedNumRecords); + final List readData = readValues(topic, consumerConfig, expectedNumRecords); accumData.addAll(readData); if (accumData.size() >= expectedNumRecords) return accumData; if (System.currentTimeMillis() > startTime + waitTime) - throw new AssertionError("Expected " + expectedNumRecords + + throw new AssertionError("Expected " + expectedNumRecords + " but received only " + accumData.size() + " records before timeout " + waitTime + " ms"); Thread.sleep(Math.min(waitTime, 100L)); } } -} \ No newline at end of file +} diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/utils/KafkaEmbedded.java b/streams/src/test/java/org/apache/kafka/streams/integration/utils/KafkaEmbedded.java index 348b46b5c711..8e0d11c07c88 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/utils/KafkaEmbedded.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/utils/KafkaEmbedded.java @@ -18,20 +18,6 @@ package org.apache.kafka.streams.integration.utils; -import org.apache.kafka.common.protocol.SecurityProtocol; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.Properties; - -import org.I0Itec.zkclient.ZkClient; -import org.I0Itec.zkclient.ZkConnection; - -import java.io.File; -import java.util.Collections; -import java.util.List; - import kafka.admin.AdminUtils; import kafka.admin.RackAwareMode; import kafka.server.KafkaConfig; @@ -42,11 +28,23 @@ import kafka.utils.TestUtils; import kafka.utils.ZKStringSerializer$; import kafka.utils.ZkUtils; +import org.I0Itec.zkclient.ZkClient; +import org.I0Itec.zkclient.ZkConnection; +import org.apache.kafka.common.protocol.SecurityProtocol; import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Properties; + /** * Runs an in-memory, "embedded" instance of a Kafka broker, which listens at `127.0.0.1:9092` by * default. - * + *

              * Requires a running ZooKeeper instance to connect to. */ public class KafkaEmbedded { @@ -63,20 +61,21 @@ public class KafkaEmbedded { /** * Creates and starts an embedded Kafka broker. + * * @param config Broker configuration settings. Used to modify, for example, on which port the * broker should listen to. Note that you cannot change the `log.dirs` setting * currently. */ - public KafkaEmbedded(Properties config) throws IOException { - tmpFolder = new TemporaryFolder(); - tmpFolder.create(); - logDir = tmpFolder.newFolder(); - effectiveConfig = effectiveConfigFrom(config); - boolean loggingEnabled = true; - KafkaConfig kafkaConfig = new KafkaConfig(effectiveConfig, loggingEnabled); + public KafkaEmbedded(final Properties config) throws IOException { + this.tmpFolder = new TemporaryFolder(); + this.tmpFolder.create(); + this.logDir = this.tmpFolder.newFolder(); + this.effectiveConfig = effectiveConfigFrom(config); + final boolean loggingEnabled = true; + final KafkaConfig kafkaConfig = new KafkaConfig(this.effectiveConfig, loggingEnabled); log.debug("Starting embedded Kafka broker (with log.dirs={} and ZK ensemble at {}) ...", - logDir, zookeeperConnect()); - kafka = TestUtils.createServer(kafkaConfig, SystemTime$.MODULE$); + this.logDir, zookeeperConnect()); + this.kafka = TestUtils.createServer(kafkaConfig, SystemTime$.MODULE$); log.debug("Startup of embedded Kafka broker at {} completed (with ZK ensemble at {}) ...", brokerList(), zookeeperConnect()); } @@ -85,12 +84,13 @@ public KafkaEmbedded(Properties config) throws IOException { /** * Creates the configuration for starting the Kafka broker by merging default values with * overwrites. + * * @param initialConfig Broker configuration settings that override the default config. * @return * @throws IOException */ - private Properties effectiveConfigFrom(Properties initialConfig) throws IOException { - Properties effectiveConfig = new Properties(); + private Properties effectiveConfigFrom(final Properties initialConfig) throws IOException { + final Properties effectiveConfig = new Properties(); effectiveConfig.put(KafkaConfig$.MODULE$.BrokerIdProp(), 0); effectiveConfig.put(KafkaConfig$.MODULE$.HostNameProp(), "127.0.0.1"); effectiveConfig.put(KafkaConfig$.MODULE$.PortProp(), "9092"); @@ -100,17 +100,17 @@ private Properties effectiveConfigFrom(Properties initialConfig) throws IOExcept effectiveConfig.put(KafkaConfig$.MODULE$.ControlledShutdownEnableProp(), true); effectiveConfig.putAll(initialConfig); - effectiveConfig.setProperty(KafkaConfig$.MODULE$.LogDirProp(), logDir.getAbsolutePath()); + effectiveConfig.setProperty(KafkaConfig$.MODULE$.LogDirProp(), this.logDir.getAbsolutePath()); return effectiveConfig; } /** * This broker's `metadata.broker.list` value. Example: `127.0.0.1:9092`. - * + *

              * You can use this to tell Kafka producers and consumers how to connect to this instance. */ public String brokerList() { - return kafka.config().hostName() + ":" + kafka.boundPort(SecurityProtocol.PLAINTEXT); + return this.kafka.config().hostName() + ":" + this.kafka.boundPort(SecurityProtocol.PLAINTEXT); } @@ -118,7 +118,7 @@ public String brokerList() { * The ZooKeeper connection string aka `zookeeper.connect`. */ public String zookeeperConnect() { - return effectiveConfig.getProperty("zookeeper.connect", DEFAULT_ZK_CONNECT); + return this.effectiveConfig.getProperty("zookeeper.connect", DEFAULT_ZK_CONNECT); } /** @@ -127,12 +127,12 @@ public String zookeeperConnect() { public void stop() { log.debug("Shutting down embedded Kafka broker at {} (with ZK ensemble at {}) ...", brokerList(), zookeeperConnect()); - kafka.shutdown(); - kafka.awaitShutdown(); - log.debug("Removing logs.dir at {} ...", logDir); - List logDirs = Collections.singletonList(logDir.getAbsolutePath()); + this.kafka.shutdown(); + this.kafka.awaitShutdown(); + log.debug("Removing logs.dir at {} ...", this.logDir); + final List logDirs = Collections.singletonList(this.logDir.getAbsolutePath()); CoreUtils.delete(scala.collection.JavaConversions.asScalaBuffer(logDirs).seq()); - tmpFolder.delete(); + this.tmpFolder.delete(); log.debug("Shutdown of embedded Kafka broker at {} completed (with ZK ensemble at {}) ...", brokerList(), zookeeperConnect()); } @@ -142,7 +142,7 @@ public void stop() { * * @param topic The name of the topic. */ - public void createTopic(String topic) { + public void createTopic(final String topic) { createTopic(topic, 1, 1, new Properties()); } @@ -153,7 +153,7 @@ public void createTopic(String topic) { * @param partitions The number of partitions for this topic. * @param replication The replication factor for (the partitions of) this topic. */ - public void createTopic(String topic, int partitions, int replication) { + public void createTopic(final String topic, final int partitions, final int replication) { createTopic(topic, partitions, replication, new Properties()); } @@ -165,10 +165,10 @@ public void createTopic(String topic, int partitions, int replication) { * @param replication The replication factor for (partitions of) this topic. * @param topicConfig Additional topic-level configuration settings. */ - public void createTopic(String topic, - int partitions, - int replication, - Properties topicConfig) { + public void createTopic(final String topic, + final int partitions, + final int replication, + final Properties topicConfig) { log.debug("Creating topic { name: {}, partitions: {}, replication: {}, config: {} }", topic, partitions, replication, topicConfig); @@ -176,14 +176,29 @@ public void createTopic(String topic, // createTopic() will only seem to work (it will return without error). The topic will exist in // only ZooKeeper and will be returned when listing topics, but Kafka itself does not create the // topic. - ZkClient zkClient = new ZkClient( + final ZkClient zkClient = new ZkClient( zookeeperConnect(), DEFAULT_ZK_SESSION_TIMEOUT_MS, DEFAULT_ZK_CONNECTION_TIMEOUT_MS, ZKStringSerializer$.MODULE$); - boolean isSecure = false; - ZkUtils zkUtils = new ZkUtils(zkClient, new ZkConnection(zookeeperConnect()), isSecure); + final boolean isSecure = false; + final ZkUtils zkUtils = new ZkUtils(zkClient, new ZkConnection(zookeeperConnect()), isSecure); AdminUtils.createTopic(zkUtils, topic, partitions, replication, topicConfig, RackAwareMode.Enforced$.MODULE$); zkClient.close(); } -} \ No newline at end of file + + public void deleteTopic(final String topic) { + log.debug("Deleting topic { name: {} }", topic); + + final ZkClient zkClient = new ZkClient( + zookeeperConnect(), + DEFAULT_ZK_SESSION_TIMEOUT_MS, + DEFAULT_ZK_CONNECTION_TIMEOUT_MS, + ZKStringSerializer$.MODULE$); + final boolean isSecure = false; + final ZkUtils zkUtils = new ZkUtils(zkClient, new ZkConnection(zookeeperConnect()), isSecure); + AdminUtils.deleteTopic(zkUtils, topic); + zkClient.close(); + } + +} diff --git a/tools/src/main/java/org/apache/kafka/tools/StreamsResetter.java b/tools/src/main/java/org/apache/kafka/tools/StreamsResetter.java new file mode 100644 index 000000000000..734c15b0f278 --- /dev/null +++ b/tools/src/main/java/org/apache/kafka/tools/StreamsResetter.java @@ -0,0 +1,260 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file + * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + *

              + * http://www.apache.org/licenses/LICENSE-2.0 + *

              + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ +package org.apache.kafka.tools; + +import joptsimple.OptionException; +import joptsimple.OptionParser; +import joptsimple.OptionSet; +import joptsimple.OptionSpec; +import kafka.admin.TopicCommand; +import kafka.utils.ZkUtils; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.security.JaasUtils; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; + +import java.io.IOException; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; +import java.util.Set; + +/** + * {@link StreamsResetter} resets the processing state of a Kafka Streams application so that, for example, you can reprocess its input from scratch. + *

              + * Resetting the processing state of an application includes the following actions: + *

                + *
              1. setting the application's consumer offsets for input and internal topics to zero
              2. + *
              3. skip over all intermediate user topics (i.e., "seekToEnd" for consumers of intermediate topics)
              4. + *
              5. deleting any topics created internally by Kafka Streams for this application
              6. + *
              + *

              + * Do only use this tool if no application instance is running. Otherwise, the application will get into an invalid state and crash or produce wrong results. + *

              + * If you run multiple application instances, running this tool once is sufficient. + * However, you need to call {@code KafkaStreams#cleanUp()} before re-starting any instance (to clean local state store directory). + * Otherwise, your application is in an invalid state. + *

              + * User output topics will not be deleted or modified by this tool. + * If downstream applications consume intermediate or output topics, it is the user's responsibility to adjust those applications manually if required. + */ +public class StreamsResetter { + private static final int EXIT_CODE_SUCCESS = 0; + private static final int EXIT_CODE_ERROR = 1; + + private static OptionSpec bootstrapServerOption; + private static OptionSpec zookeeperOption; + private static OptionSpec applicationIdOption; + private static OptionSpec inputTopicsOption; + private static OptionSpec intermediateTopicsOption; + + private OptionSet options = null; + private final Properties consumerConfig = new Properties(); + private final List allTopics = new LinkedList<>(); + + public int run(final String[] args) { + return run(args, new Properties()); + } + + public int run(final String[] args, final Properties config) { + this.consumerConfig.clear(); + this.consumerConfig.putAll(config); + + int exitCode = EXIT_CODE_SUCCESS; + + ZkUtils zkUtils = null; + try { + parseArguments(args); + + zkUtils = ZkUtils.apply(this.options.valueOf(zookeeperOption), + 30000, + 30000, + JaasUtils.isZkSecurityEnabled()); + + this.allTopics.clear(); + this.allTopics.addAll(scala.collection.JavaConversions.seqAsJavaList(zkUtils.getAllTopics())); + + resetInputAndInternalTopicOffsets(); + seekToEndIntermediateTopics(); + deleteInternalTopics(zkUtils); + } catch (final Exception e) { + exitCode = EXIT_CODE_ERROR; + System.err.println("ERROR: " + e.getMessage()); + } finally { + if (zkUtils != null) { + zkUtils.close(); + } + } + + return exitCode; + } + + private void parseArguments(final String[] args) throws IOException { + final OptionParser optionParser = new OptionParser(); + applicationIdOption = optionParser.accepts("application-id", "The Kafka Streams application ID (application.id)") + .withRequiredArg() + .ofType(String.class) + .describedAs("id") + .required(); + bootstrapServerOption = optionParser.accepts("bootstrap-servers", "Comma-separated list of broker urls with format: HOST1:PORT1,HOST2:PORT2") + .withRequiredArg() + .ofType(String.class) + .defaultsTo("localhost:9092") + .describedAs("urls"); + zookeeperOption = optionParser.accepts("zookeeper", "Format: HOST:POST") + .withRequiredArg() + .ofType(String.class) + .defaultsTo("localhost:2181") + .describedAs("url"); + inputTopicsOption = optionParser.accepts("input-topics", "Comma-separated list of user input topics") + .withRequiredArg() + .ofType(String.class) + .withValuesSeparatedBy(',') + .describedAs("list"); + intermediateTopicsOption = optionParser.accepts("intermediate-topics", "Comma-separated list of intermediate user topics") + .withRequiredArg() + .ofType(String.class) + .withValuesSeparatedBy(',') + .describedAs("list"); + + try { + this.options = optionParser.parse(args); + } catch (final OptionException e) { + optionParser.printHelpOn(System.err); + throw e; + } + } + + private void resetInputAndInternalTopicOffsets() { + final List inputTopics = this.options.valuesOf(inputTopicsOption); + + if (inputTopics.size() == 0) { + System.out.println("No input topics specified."); + } else { + System.out.println("Resetting offsets to zero for input topics " + inputTopics + " and all internal topics."); + } + + final Properties config = new Properties(); + config.putAll(this.consumerConfig); + config.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, this.options.valueOf(bootstrapServerOption)); + config.setProperty(ConsumerConfig.GROUP_ID_CONFIG, this.options.valueOf(applicationIdOption)); + config.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); + + for (final String inTopic : inputTopics) { + if (!this.allTopics.contains(inTopic)) { + System.out.println("Input topic " + inTopic + " not found. Skipping."); + } + } + + for (final String topic : this.allTopics) { + if (isInputTopic(topic) || isInternalTopic(topic)) { + System.out.println("Topic: " + topic); + + try (final KafkaConsumer client = new KafkaConsumer<>(config, new ByteArrayDeserializer(), new ByteArrayDeserializer())) { + client.subscribe(Collections.singleton(topic)); + client.poll(1); + + final Set partitions = client.assignment(); + client.seekToBeginning(partitions); + for (final TopicPartition p : partitions) { + client.position(p); + } + client.commitSync(); + } catch (final RuntimeException e) { + System.err.println("ERROR: Resetting offsets for topic " + topic + " failed."); + throw e; + } + } + } + + System.out.println("Done."); + } + + private boolean isInputTopic(final String topic) { + return this.options.valuesOf(inputTopicsOption).contains(topic); + } + + private void seekToEndIntermediateTopics() { + final List intermediateTopics = this.options.valuesOf(intermediateTopicsOption); + + if (intermediateTopics.size() == 0) { + System.out.println("No intermediate user topics specified, skipping seek-to-end for user topic offsets."); + return; + } + + System.out.println("Seek-to-end for intermediate user topics " + intermediateTopics); + + final Properties config = new Properties(); + config.putAll(this.consumerConfig); + config.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, this.options.valueOf(bootstrapServerOption)); + config.setProperty(ConsumerConfig.GROUP_ID_CONFIG, this.options.valueOf(applicationIdOption)); + config.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); + + for (final String topic : intermediateTopics) { + if (this.allTopics.contains(topic)) { + System.out.println("Topic: " + topic); + + try (final KafkaConsumer client = new KafkaConsumer<>(config, new ByteArrayDeserializer(), new ByteArrayDeserializer())) { + client.subscribe(Collections.singleton(topic)); + client.poll(1); + + final Set partitions = client.assignment(); + client.seekToEnd(partitions); + for (final TopicPartition p : partitions) { + client.position(p); + } + client.commitSync(); + } catch (final RuntimeException e) { + System.err.println("ERROR: Seek-to-end for topic " + topic + " failed."); + throw e; + } + } else { + System.out.println("Topic " + topic + " not found. Skipping."); + } + } + + System.out.println("Done."); + } + + private void deleteInternalTopics(final ZkUtils zkUtils) { + System.out.println("Deleting all internal/auto-created topics for application " + this.options.valueOf(applicationIdOption)); + + for (final String topic : this.allTopics) { + if (isInternalTopic(topic)) { + final TopicCommand.TopicCommandOptions commandOptions = new TopicCommand.TopicCommandOptions(new String[]{ + "--zookeeper", this.options.valueOf(zookeeperOption), + "--delete", "--topic", topic}); + try { + TopicCommand.deleteTopic(zkUtils, commandOptions); + } catch (final RuntimeException e) { + System.err.println("ERROR: Deleting topic " + topic + " failed."); + throw e; + } + } + } + + System.out.println("Done."); + } + + private boolean isInternalTopic(final String topicName) { + return topicName.startsWith(this.options.valueOf(applicationIdOption) + "-") + && (topicName.endsWith("-changelog") || topicName.endsWith("-repartition")); + } + + public static void main(final String[] args) { + System.exit(new StreamsResetter().run(args)); + } + +} From f1430d0e5c3f0c3fc8e4eee0dcc84789e6d61c63 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 27 Jul 2016 23:54:04 +0100 Subject: [PATCH 228/267] KAFKA-3851; Automate release notes and include links to upgrade notes for release and most recent docs to forward users of older releases to newest docs. Author: Ewen Cheslack-Postava Reviewers: Jun Rao , Ismael Juma Closes #1670 from ewencp/kafka-3851-automate-release-notes (cherry picked from commit d5c821c198dcfbc69bd393c7725f93b22b479bca) Signed-off-by: Ismael Juma --- release_notes.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 release_notes.py diff --git a/release_notes.py b/release_notes.py new file mode 100755 index 000000000000..5940fb21ae5d --- /dev/null +++ b/release_notes.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +"""Usage: release_notes.py > RELEASE_NOTES.html + +Generates release notes for a Kafka release by generating an HTML doc containing some introductory information about the + release with links to the Kafka docs followed by a list of issues resolved in the release. The script will fail if it finds + any unresolved issues still marked with the target release. You should run this script after either resolving all issues or + moving outstanding issues to a later release. + +""" + +from jira import JIRA +import itertools, sys + +if len(sys.argv) < 2: + print >>sys.stderr, "Usage: release_notes.py " + sys.exit(1) + +version = sys.argv[1] +minor_version_dotless = "".join(version.split(".")[:3]) # i.e., 0.10.0 if version == 0.10.0.1 + +JIRA_BASE_URL = 'https://issues.apache.org/jira' +MAX_RESULTS = 100 # This is constrained for cloud instances so we need to fix this value + +def get_issues(jira, query, **kwargs): + """ + Get all issues matching the JQL query from the JIRA instance. This handles expanding paginated results for you. Any additional keyword arguments are forwarded to the JIRA.search_issues call. + """ + results = [] + startAt = 0 + new_results = None + while new_results == None or len(new_results) == MAX_RESULTS: + new_results = jira.search_issues(query, startAt=startAt, maxResults=MAX_RESULTS, **kwargs) + results += new_results + startAt += len(new_results) + return results + +def issue_link(issue): + return "%s/browse/%s" % (JIRA_BASE_URL, issue.key) + + +if __name__ == "__main__": + apache = JIRA(JIRA_BASE_URL) + issues = get_issues(apache, 'project=KAFKA and fixVersion=%s' % version) + if not issues: + print >>sys.stderr, "Didn't find any issues for the target fix version" + sys.exit(1) + + unresolved_issues = [issue for issue in issues if issue.fields.resolution is None] + if unresolved_issues: + print >>sys.stderr, "The release is not completed since unresolved issues were found still tagged with this release as the fix version:" + for issue in unresolved_issues: + print >>sys.stderr, "Unresolved issue: %s %s" % (issue.key, issue_link(issue)) + sys.exit(1) + + # Get list of (issue type, [issues]) sorted by the issue ID type, with each subset of issues sorted by their key so they + # are in increasing order of bug #. To get a nice ordering of the issue types we customize the key used to sort by issue + # type a bit to ensure features and improvements end up first. + def issue_type_key(issue): + if issue.fields.issuetype.name == 'New Feature': + return -2 + if issue.fields.issuetype.name == 'Improvement': + return -1 + return issue.fields.issuetype.id + by_group = [(k,sorted(g, key=lambda issue: issue.id)) for k,g in itertools.groupby(sorted(issues, key=issue_type_key), lambda issue: issue.fields.issuetype.name)] + + print "

              Release Notes - Kafka - Version %s

              " % version + print """

              Below is a summary of the JIRA issues addressed in the %(version)s release of Kafka. For full documentation of the + release, a guide to get started, and information about the project, see the Kafka + project site.

              + +

              Note about upgrades: Please carefully review the + upgrade documentation for this release thoroughly + before upgrading your cluster. The upgrade notes discuss any critical information about incompatibilities and breaking + changes, performance changes, and any other changes that might impact your production deployment of Kafka.

              + +

              The documentation for the most recent release can be found at + http://kafka.apache.org/documentation.html.

              """ % { 'version': version, 'minor': minor_version_dotless } + for itype, issues in by_group: + print "

              %s

              " % itype + print "
                " + for issue in issues: + print '
              • [%(key)s] - %(summary)s
              • ' % {'key': issue.key, 'link': issue_link(issue), 'summary': issue.fields.summary} + print "
              " From 0ea953f59245492e4e0455a02c9bab041eeb58d1 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Thu, 28 Jul 2016 11:15:45 +0100 Subject: [PATCH 229/267] KAFKA-3977; Defer fetch parsing for space efficiency and to ensure exceptions are raised to the user Author: Jason Gustafson Reviewers: Ewen Cheslack-Postava , Ismael Juma Closes #1656 from hachikuji/KAFKA-3977 (cherry picked from commit a750c5672ce4a5e3138df009fd41a5aef1b55e3f) Signed-off-by: Ismael Juma --- .../clients/consumer/internals/Fetcher.java | 325 ++++++++++-------- .../kafka/common/record/Compressor.java | 4 +- .../common/record/InvalidRecordException.java | 4 +- .../consumer/internals/FetcherTest.java | 153 +++++++-- 4 files changed, 310 insertions(+), 176 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java index ddfb5841e3f9..c811a0332b45 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java @@ -28,6 +28,7 @@ import org.apache.kafka.common.errors.InvalidTopicException; import org.apache.kafka.common.errors.RecordTooLargeException; import org.apache.kafka.common.errors.RetriableException; +import org.apache.kafka.common.errors.SerializationException; import org.apache.kafka.common.errors.TimeoutException; import org.apache.kafka.common.errors.TopicAuthorizationException; import org.apache.kafka.common.metrics.Metrics; @@ -38,8 +39,10 @@ import org.apache.kafka.common.metrics.stats.Rate; import org.apache.kafka.common.protocol.ApiKeys; import org.apache.kafka.common.protocol.Errors; +import org.apache.kafka.common.record.InvalidRecordException; import org.apache.kafka.common.record.LogEntry; import org.apache.kafka.common.record.MemoryRecords; +import org.apache.kafka.common.record.Record; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.common.requests.FetchRequest; import org.apache.kafka.common.requests.FetchResponse; @@ -59,7 +62,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Locale; import java.util.Map; @@ -83,13 +85,11 @@ public class Fetcher { private final Metadata metadata; private final FetchManagerMetrics sensors; private final SubscriptionState subscriptions; - private final List> records; + private final List completedFetches; private final Deserializer keyDeserializer; private final Deserializer valueDeserializer; - private final Map offsetOutOfRangePartitions; - private final Set unauthorizedTopics; - private final Map recordTooLargePartitions; + private PartitionRecords nextInLineRecords = null; public Fetcher(ConsumerNetworkClient client, int minBytes, @@ -105,7 +105,6 @@ public Fetcher(ConsumerNetworkClient client, String metricGrpPrefix, Time time, long retryBackoffMs) { - this.time = time; this.client = client; this.metadata = metadata; @@ -115,31 +114,37 @@ public Fetcher(ConsumerNetworkClient client, this.fetchSize = fetchSize; this.maxPollRecords = maxPollRecords; this.checkCrcs = checkCrcs; - this.keyDeserializer = keyDeserializer; this.valueDeserializer = valueDeserializer; - - this.records = new LinkedList<>(); - this.offsetOutOfRangePartitions = new HashMap<>(); - this.unauthorizedTopics = new HashSet<>(); - this.recordTooLargePartitions = new HashMap<>(); - + this.completedFetches = new ArrayList<>(); this.sensors = new FetchManagerMetrics(metrics, metricGrpPrefix); this.retryBackoffMs = retryBackoffMs; } /** - * Set-up a fetch request for any node that we have assigned partitions for which doesn't have one. - * + * Set-up a fetch request for any node that we have assigned partitions for which doesn't already have + * an in-flight fetch or pending fetch data. */ public void sendFetches() { for (Map.Entry fetchEntry: createFetchRequests().entrySet()) { - final FetchRequest fetch = fetchEntry.getValue(); - client.send(fetchEntry.getKey(), ApiKeys.FETCH, fetch) + final FetchRequest request = fetchEntry.getValue(); + client.send(fetchEntry.getKey(), ApiKeys.FETCH, request) .addListener(new RequestFutureListener() { @Override - public void onSuccess(ClientResponse response) { - handleFetchResponse(response, fetch); + public void onSuccess(ClientResponse resp) { + FetchResponse response = new FetchResponse(resp.responseBody()); + Set partitions = new HashSet<>(response.responseData().keySet()); + FetchResponseMetricAggregator metricAggregator = new FetchResponseMetricAggregator(sensors, partitions); + + for (Map.Entry entry : response.responseData().entrySet()) { + TopicPartition partition = entry.getKey(); + long fetchOffset = request.fetchData().get(partition).offset; + FetchResponse.PartitionData fetchData = entry.getValue(); + completedFetches.add(new CompletedFetch(partition, fetchOffset, fetchData, metricAggregator)); + } + + sensors.fetchLatency.record(resp.requestLatencyMs()); + sensors.fetchThrottleTimeSensor.record(response.getThrottleTime()); } @Override @@ -152,7 +157,7 @@ public void onFailure(RuntimeException e) { /** * Update the fetch positions for the provided partitions. - * @param partitions + * @param partitions the partitions to update positions for * @throws NoOffsetForPartitionException If no offset is stored for a given partition and no reset policy is available */ public void updateFetchPositions(Set partitions) { @@ -323,62 +328,6 @@ private long listOffset(TopicPartition partition, long timestamp) { } } - /** - * If any partition from previous fetchResponse contains OffsetOutOfRange error and - * the defaultResetPolicy is NONE, throw OffsetOutOfRangeException - * - * @throws OffsetOutOfRangeException If there is OffsetOutOfRange error in fetchResponse - */ - private void throwIfOffsetOutOfRange() throws OffsetOutOfRangeException { - Map currentOutOfRangePartitions = new HashMap<>(); - - // filter offsetOutOfRangePartitions to retain only the fetchable partitions - for (Map.Entry entry: this.offsetOutOfRangePartitions.entrySet()) { - if (!subscriptions.isFetchable(entry.getKey())) { - log.debug("Ignoring fetched records for {} since it is no longer fetchable", entry.getKey()); - continue; - } - Long position = subscriptions.position(entry.getKey()); - // ignore partition if the current position != the offset in fetchResponse, e.g. after seek() - if (position != null && entry.getValue().equals(position)) - currentOutOfRangePartitions.put(entry.getKey(), entry.getValue()); - } - this.offsetOutOfRangePartitions.clear(); - if (!currentOutOfRangePartitions.isEmpty()) - throw new OffsetOutOfRangeException(currentOutOfRangePartitions); - } - - /** - * If any topic from previous fetchResponse contains an Authorization error, raise an exception - * @throws TopicAuthorizationException - */ - private void throwIfUnauthorizedTopics() throws TopicAuthorizationException { - if (!unauthorizedTopics.isEmpty()) { - Set topics = new HashSet<>(unauthorizedTopics); - unauthorizedTopics.clear(); - throw new TopicAuthorizationException(topics); - } - } - - /** - * If any partition from previous fetchResponse gets a RecordTooLarge error, throw RecordTooLargeException - * - * @throws RecordTooLargeException If there is a message larger than fetch size and hence cannot be ever returned - */ - private void throwIfRecordTooLarge() throws RecordTooLargeException { - Map copiedRecordTooLargePartitions = new HashMap<>(this.recordTooLargePartitions); - this.recordTooLargePartitions.clear(); - - if (!copiedRecordTooLargePartitions.isEmpty()) - throw new RecordTooLargeException("There are some messages at [Partition=Offset]: " - + copiedRecordTooLargePartitions - + " whose size is larger than the fetch size " - + this.fetchSize - + " and hence cannot be ever returned." - + " Increase the fetch size, or decrease the maximum message size the broker will allow.", - copiedRecordTooLargePartitions); - } - /** * Return the fetched records, empty the record buffer and update the consumed position. * @@ -393,60 +342,68 @@ public Map>> fetchedRecords() { return Collections.emptyMap(); } else { Map>> drained = new HashMap<>(); - throwIfOffsetOutOfRange(); - throwIfUnauthorizedTopics(); - throwIfRecordTooLarge(); - - int maxRecords = maxPollRecords; - Iterator> iterator = records.iterator(); - while (iterator.hasNext() && maxRecords > 0) { - PartitionRecords part = iterator.next(); - maxRecords -= append(drained, part, maxRecords); - if (part.isConsumed()) - iterator.remove(); + int recordsRemaining = maxPollRecords; + Iterator completedFetchesIterator = completedFetches.iterator(); + + while (recordsRemaining > 0) { + if (nextInLineRecords == null || nextInLineRecords.isEmpty()) { + if (!completedFetchesIterator.hasNext()) + break; + + CompletedFetch completion = completedFetchesIterator.next(); + completedFetchesIterator.remove(); + nextInLineRecords = parseFetchedData(completion); + } else { + recordsRemaining -= append(drained, nextInLineRecords, recordsRemaining); + } } + return drained; } } private int append(Map>> drained, - PartitionRecords part, + PartitionRecords partitionRecords, int maxRecords) { - if (!subscriptions.isAssigned(part.partition)) { + if (partitionRecords.isEmpty()) + return 0; + + if (!subscriptions.isAssigned(partitionRecords.partition)) { // this can happen when a rebalance happened before fetched records are returned to the consumer's poll call - log.debug("Not returning fetched records for partition {} since it is no longer assigned", part.partition); + log.debug("Not returning fetched records for partition {} since it is no longer assigned", partitionRecords.partition); } else { // note that the consumed position should always be available as long as the partition is still assigned - long position = subscriptions.position(part.partition); - if (!subscriptions.isFetchable(part.partition)) { + long position = subscriptions.position(partitionRecords.partition); + if (!subscriptions.isFetchable(partitionRecords.partition)) { // this can happen when a partition is paused before fetched records are returned to the consumer's poll call - log.debug("Not returning fetched records for assigned partition {} since it is no longer fetchable", part.partition); - } else if (part.fetchOffset == position) { - List> partRecords = part.take(maxRecords); + log.debug("Not returning fetched records for assigned partition {} since it is no longer fetchable", partitionRecords.partition); + } else if (partitionRecords.fetchOffset == position) { + // we are ensured to have at least one record since we already checked for emptiness + List> partRecords = partitionRecords.take(maxRecords); long nextOffset = partRecords.get(partRecords.size() - 1).offset() + 1; log.trace("Returning fetched records at offset {} for assigned partition {} and update " + - "position to {}", position, part.partition, nextOffset); + "position to {}", position, partitionRecords.partition, nextOffset); - List> records = drained.get(part.partition); + List> records = drained.get(partitionRecords.partition); if (records == null) { records = partRecords; - drained.put(part.partition, records); + drained.put(partitionRecords.partition, records); } else { records.addAll(partRecords); } - subscriptions.position(part.partition, nextOffset); + subscriptions.position(partitionRecords.partition, nextOffset); return partRecords.size(); } else { // these records aren't next in line based on the last consumed position, ignore them // they must be from an obsolete request log.debug("Ignoring fetched records for {} at offset {} since the current position is {}", - part.partition, part.fetchOffset, position); + partitionRecords.partition, partitionRecords.fetchOffset, position); } } - part.discard(); + partitionRecords.discard(); return 0; } @@ -513,10 +470,10 @@ private void handleListOffsetResponse(TopicPartition topicPartition, private Set fetchablePartitions() { Set fetchable = subscriptions.fetchablePartitions(); - if (records.isEmpty()) - return fetchable; - for (PartitionRecords partitionRecords : records) - fetchable.remove(partitionRecords.partition); + if (nextInLineRecords != null && !nextInLineRecords.isEmpty()) + fetchable.remove(nextInLineRecords.partition); + for (CompletedFetch completedFetch : completedFetches) + fetchable.remove(completedFetch.partition); return fetchable; } @@ -559,30 +516,29 @@ private Map createFetchRequests() { /** * The callback for fetch completion */ - private void handleFetchResponse(ClientResponse resp, FetchRequest request) { - int totalBytes = 0; - int totalCount = 0; - FetchResponse response = new FetchResponse(resp.responseBody()); - for (Map.Entry entry : response.responseData().entrySet()) { - TopicPartition tp = entry.getKey(); - FetchResponse.PartitionData partition = entry.getValue(); + private PartitionRecords parseFetchedData(CompletedFetch completedFetch) { + TopicPartition tp = completedFetch.partition; + FetchResponse.PartitionData partition = completedFetch.partitionData; + long fetchOffset = completedFetch.fetchedOffset; + int bytes = 0; + int recordsCount = 0; + PartitionRecords parsedRecords = null; + + try { if (!subscriptions.isFetchable(tp)) { // this can happen when a rebalance happened or a partition consumption paused // while fetch is still in-flight log.debug("Ignoring fetched records for partition {} since it is no longer fetchable", tp); } else if (partition.errorCode == Errors.NONE.code()) { - long fetchOffset = request.fetchData().get(tp).offset; - // we are interested in this fetch only if the beginning offset matches the // current consumed position Long position = subscriptions.position(tp); if (position == null || position != fetchOffset) { - log.debug("Discarding fetch response for partition {} since its offset {} does not match " + + log.debug("Discarding stale fetch response for partition {} since its offset {} does not match " + "the expected offset {}", tp, fetchOffset, position); - continue; + return null; } - int bytes = 0; ByteBuffer buffer = partition.recordSet; MemoryRecords records = MemoryRecords.readableRecords(buffer); List> parsed = new ArrayList<>(); @@ -597,79 +553,95 @@ private void handleFetchResponse(ClientResponse resp, FetchRequest request) { } } + recordsCount = parsed.size(); + this.sensors.recordTopicFetchMetrics(tp.topic(), bytes, recordsCount); + if (!parsed.isEmpty()) { log.trace("Adding fetched record for partition {} with offset {} to buffered record list", tp, position); + parsedRecords = new PartitionRecords<>(fetchOffset, tp, parsed); ConsumerRecord record = parsed.get(parsed.size() - 1); - this.records.add(new PartitionRecords<>(fetchOffset, tp, parsed)); this.sensors.recordsFetchLag.record(partition.highWatermark - record.offset()); } else if (buffer.limit() > 0 && !skippedRecords) { // we did not read a single message from a non-empty buffer // because that message's size is larger than fetch size, in this case // record this exception - this.recordTooLargePartitions.put(tp, fetchOffset); + Map recordTooLargePartitions = Collections.singletonMap(tp, fetchOffset); + throw new RecordTooLargeException("There are some messages at [Partition=Offset]: " + + recordTooLargePartitions + + " whose size is larger than the fetch size " + + this.fetchSize + + " and hence cannot be ever returned." + + " Increase the fetch size on the client (using max.partition.fetch.bytes)," + + " or decrease the maximum message size the broker will allow (using message.max.bytes).", + recordTooLargePartitions); } - - this.sensors.recordTopicFetchMetrics(tp.topic(), bytes, parsed.size()); - totalBytes += bytes; - totalCount += parsed.size(); } else if (partition.errorCode == Errors.NOT_LEADER_FOR_PARTITION.code() - || partition.errorCode == Errors.UNKNOWN_TOPIC_OR_PARTITION.code()) { + || partition.errorCode == Errors.UNKNOWN_TOPIC_OR_PARTITION.code()) { this.metadata.requestUpdate(); } else if (partition.errorCode == Errors.OFFSET_OUT_OF_RANGE.code()) { - long fetchOffset = request.fetchData().get(tp).offset; - if (subscriptions.hasDefaultOffsetResetPolicy()) + if (fetchOffset != subscriptions.position(tp)) { + log.debug("Discarding stale fetch response for partition {} since the fetched offset {}" + + "does not match the current offset {}", tp, fetchOffset, subscriptions.position(tp)); + } else if (subscriptions.hasDefaultOffsetResetPolicy()) { + log.info("Fetch offset {} is out of range for partition {}, resetting offset", fetchOffset, tp); subscriptions.needOffsetReset(tp); - else - this.offsetOutOfRangePartitions.put(tp, fetchOffset); - log.info("Fetch offset {} is out of range, resetting offset", fetchOffset); + } else { + throw new OffsetOutOfRangeException(Collections.singletonMap(tp, fetchOffset)); + } } else if (partition.errorCode == Errors.TOPIC_AUTHORIZATION_FAILED.code()) { log.warn("Not authorized to read from topic {}.", tp.topic()); - unauthorizedTopics.add(tp.topic()); + throw new TopicAuthorizationException(Collections.singleton(tp.topic())); } else if (partition.errorCode == Errors.UNKNOWN.code()) { log.warn("Unknown error fetching data for topic-partition {}", tp); } else { throw new IllegalStateException("Unexpected error code " + partition.errorCode + " while fetching data"); } + } finally { + completedFetch.metricAggregator.record(tp, bytes, recordsCount); } - this.sensors.bytesFetched.record(totalBytes); - this.sensors.recordsFetched.record(totalCount); - this.sensors.fetchThrottleTimeSensor.record(response.getThrottleTime()); - this.sensors.fetchLatency.record(resp.requestLatencyMs()); + + return parsedRecords; } /** * Parse the record entry, deserializing the key / value fields if necessary */ private ConsumerRecord parseRecord(TopicPartition partition, LogEntry logEntry) { + Record record = logEntry.record(); + + if (this.checkCrcs && !record.isValid()) + throw new InvalidRecordException("Record for partition " + partition + " at offset " + + logEntry.offset() + " is corrupt (stored crc = " + record.checksum() + + ", computed crc = " + + record.computeChecksum() + + ")"); + try { - if (this.checkCrcs) - logEntry.record().ensureValid(); long offset = logEntry.offset(); - long timestamp = logEntry.record().timestamp(); - TimestampType timestampType = logEntry.record().timestampType(); - ByteBuffer keyBytes = logEntry.record().key(); + long timestamp = record.timestamp(); + TimestampType timestampType = record.timestampType(); + ByteBuffer keyBytes = record.key(); byte[] keyByteArray = keyBytes == null ? null : Utils.toArray(keyBytes); K key = keyBytes == null ? null : this.keyDeserializer.deserialize(partition.topic(), keyByteArray); - ByteBuffer valueBytes = logEntry.record().value(); + ByteBuffer valueBytes = record.value(); byte[] valueByteArray = valueBytes == null ? null : Utils.toArray(valueBytes); V value = valueBytes == null ? null : this.valueDeserializer.deserialize(partition.topic(), valueByteArray); return new ConsumerRecord<>(partition.topic(), partition.partition(), offset, - timestamp, timestampType, logEntry.record().checksum(), + timestamp, timestampType, record.checksum(), keyByteArray == null ? ConsumerRecord.NULL_SIZE : keyByteArray.length, valueByteArray == null ? ConsumerRecord.NULL_SIZE : valueByteArray.length, key, value); - } catch (KafkaException e) { - throw e; } catch (RuntimeException e) { - throw new KafkaException("Error deserializing key/value for partition " + partition + " at offset " + logEntry.offset(), e); + throw new SerializationException("Error deserializing key/value for partition " + partition + + " at offset " + logEntry.offset(), e); } } private static class PartitionRecords { - public long fetchOffset; - public TopicPartition partition; - public List> records; + private long fetchOffset; + private TopicPartition partition; + private List> records; public PartitionRecords(long fetchOffset, TopicPartition partition, List> records) { this.fetchOffset = fetchOffset; @@ -677,7 +649,7 @@ public PartitionRecords(long fetchOffset, TopicPartition partition, List> take(int n) { if (records == null) - return Collections.emptyList(); + return new ArrayList<>(); if (n >= records.size()) { List> res = this.records; @@ -709,7 +681,59 @@ private List> take(int n) { } } - private class FetchManagerMetrics { + private static class CompletedFetch { + private final TopicPartition partition; + private final long fetchedOffset; + private final FetchResponse.PartitionData partitionData; + private final FetchResponseMetricAggregator metricAggregator; + + public CompletedFetch(TopicPartition partition, + long fetchedOffset, + FetchResponse.PartitionData partitionData, + FetchResponseMetricAggregator metricAggregator) { + this.partition = partition; + this.fetchedOffset = fetchedOffset; + this.partitionData = partitionData; + this.metricAggregator = metricAggregator; + } + } + + /** + * Since we parse the message data for each partition from each fetch response lazily, fetch-level + * metrics need to be aggregated as the messages from each partition are parsed. This class is used + * to facilitate this incremental aggregation. + */ + private static class FetchResponseMetricAggregator { + private final FetchManagerMetrics sensors; + private final Set unrecordedPartitions; + + private int totalBytes; + private int totalRecords; + + public FetchResponseMetricAggregator(FetchManagerMetrics sensors, + Set partitions) { + this.sensors = sensors; + this.unrecordedPartitions = partitions; + } + + /** + * After each partition is parsed, we update the current metric totals with the total bytes + * and number of records parsed. After all partitions have reported, we write the metric. + */ + public void record(TopicPartition partition, int bytes, int records) { + unrecordedPartitions.remove(partition); + totalBytes += bytes; + totalRecords += records; + + if (unrecordedPartitions.isEmpty()) { + // once all expected partitions from the fetch have reported in, record the metrics + sensors.bytesFetched.record(totalBytes); + sensors.recordsFetched.record(totalRecords); + } + } + } + + private static class FetchManagerMetrics { public final Metrics metrics; public final String metricGrpName; @@ -719,7 +743,6 @@ private class FetchManagerMetrics { public final Sensor recordsFetchLag; public final Sensor fetchThrottleTimeSensor; - public FetchManagerMetrics(Metrics metrics, String metricGrpPrefix) { this.metrics = metrics; this.metricGrpName = metricGrpPrefix + "-fetch-manager-metrics"; diff --git a/clients/src/main/java/org/apache/kafka/common/record/Compressor.java b/clients/src/main/java/org/apache/kafka/common/record/Compressor.java index e23a52e710f0..a806975959ed 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/Compressor.java +++ b/clients/src/main/java/org/apache/kafka/common/record/Compressor.java @@ -242,7 +242,7 @@ public long estimatedBytesWritten() { // the following two functions also need to be public since they are used in MemoryRecords.iteration - static public DataOutputStream wrapForOutput(ByteBufferOutputStream buffer, CompressionType type, int bufferSize) { + public static DataOutputStream wrapForOutput(ByteBufferOutputStream buffer, CompressionType type, int bufferSize) { try { switch (type) { case NONE: @@ -271,7 +271,7 @@ static public DataOutputStream wrapForOutput(ByteBufferOutputStream buffer, Comp } } - static public DataInputStream wrapForInput(ByteBufferInputStream buffer, CompressionType type, byte messageVersion) { + public static DataInputStream wrapForInput(ByteBufferInputStream buffer, CompressionType type, byte messageVersion) { try { switch (type) { case NONE: diff --git a/clients/src/main/java/org/apache/kafka/common/record/InvalidRecordException.java b/clients/src/main/java/org/apache/kafka/common/record/InvalidRecordException.java index 5815b21591df..a1009ca2e383 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/InvalidRecordException.java +++ b/clients/src/main/java/org/apache/kafka/common/record/InvalidRecordException.java @@ -16,7 +16,9 @@ */ package org.apache.kafka.common.record; -public class InvalidRecordException extends RuntimeException { +import org.apache.kafka.common.KafkaException; + +public class InvalidRecordException extends KafkaException { private static final long serialVersionUID = 1; diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java index 8fad30f986b7..2fbd43ec7ae5 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java @@ -31,6 +31,7 @@ import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.errors.InvalidTopicException; import org.apache.kafka.common.errors.RecordTooLargeException; +import org.apache.kafka.common.errors.SerializationException; import org.apache.kafka.common.errors.TimeoutException; import org.apache.kafka.common.errors.TopicAuthorizationException; import org.apache.kafka.common.metrics.KafkaMetric; @@ -38,6 +39,8 @@ import org.apache.kafka.common.protocol.Errors; import org.apache.kafka.common.protocol.types.Struct; import org.apache.kafka.common.record.CompressionType; +import org.apache.kafka.common.record.Compressor; +import org.apache.kafka.common.record.InvalidRecordException; import org.apache.kafka.common.record.MemoryRecords; import org.apache.kafka.common.record.Record; import org.apache.kafka.common.requests.FetchRequest; @@ -47,6 +50,7 @@ import org.apache.kafka.common.requests.MetadataRequest; import org.apache.kafka.common.requests.MetadataResponse; import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.utils.MockTime; import org.apache.kafka.test.TestUtils; import org.junit.After; @@ -128,7 +132,7 @@ public void testFetchNormal() { consumerClient.poll(0); records = fetcher.fetchedRecords().get(tp); assertEquals(3, records.size()); - assertEquals(4L, (long) subscriptions.position(tp)); // this is the next fetching position + assertEquals(4L, subscriptions.position(tp).longValue()); // this is the next fetching position long offset = 1; for (ConsumerRecord record : records) { assertEquals(offset, record.offset()); @@ -147,9 +151,83 @@ public boolean matches(ClientRequest request) { }; } + @Test + public void testFetchedRecordsRaisesOnSerializationErrors() { + // raise an exception from somewhere in the middle of the fetch response + // so that we can verify that our position does not advance after raising + ByteArrayDeserializer deserializer = new ByteArrayDeserializer() { + int i = 0; + @Override + public byte[] deserialize(String topic, byte[] data) { + if (i++ == 1) + throw new SerializationException(); + return data; + } + }; + + Fetcher fetcher = createFetcher(subscriptions, new Metrics(time), deserializer, deserializer); + + subscriptions.assignFromUser(Collections.singleton(tp)); + subscriptions.seek(tp, 1); + + client.prepareResponse(matchesOffset(tp, 1), fetchResponse(this.records.buffer(), Errors.NONE.code(), 100L, 0)); + + fetcher.sendFetches(); + consumerClient.poll(0); + try { + fetcher.fetchedRecords(); + fail("fetchedRecords should have raised"); + } catch (SerializationException e) { + // the position should not advance since no data has been returned + assertEquals(1, subscriptions.position(tp).longValue()); + } + } + + @Test + public void testParseInvalidRecord() { + ByteBuffer buffer = ByteBuffer.allocate(1024); + Compressor compressor = new Compressor(buffer, CompressionType.NONE); + + byte[] key = "foo".getBytes(); + byte[] value = "baz".getBytes(); + long offset = 0; + long timestamp = 500L; + + int size = Record.recordSize(key, value); + long crc = Record.computeChecksum(timestamp, key, value, CompressionType.NONE, 0, -1); + + // write one valid record + compressor.putLong(offset); + compressor.putInt(size); + Record.write(compressor, crc, Record.computeAttributes(CompressionType.NONE), timestamp, key, value, 0, -1); + + // and one invalid record (note the crc) + compressor.putLong(offset); + compressor.putInt(size); + Record.write(compressor, crc + 1, Record.computeAttributes(CompressionType.NONE), timestamp, key, value, 0, -1); + + compressor.close(); + buffer.flip(); + + subscriptions.assignFromUser(Arrays.asList(tp)); + subscriptions.seek(tp, 0); + + // normal fetch + fetcher.sendFetches(); + client.prepareResponse(fetchResponse(buffer, Errors.NONE.code(), 100L, 0)); + consumerClient.poll(0); + try { + fetcher.fetchedRecords(); + fail("fetchedRecords should have raised"); + } catch (InvalidRecordException e) { + // the position should not advance since no data has been returned + assertEquals(0, subscriptions.position(tp).longValue()); + } + } + @Test public void testFetchMaxPollRecords() { - Fetcher fetcher = createFetcher(2, subscriptions, new Metrics(time)); + Fetcher fetcher = createFetcher(subscriptions, new Metrics(time), 2); List> records; subscriptions.assignFromUser(Arrays.asList(tp)); @@ -162,7 +240,7 @@ public void testFetchMaxPollRecords() { consumerClient.poll(0); records = fetcher.fetchedRecords().get(tp); assertEquals(2, records.size()); - assertEquals(3L, (long) subscriptions.position(tp)); + assertEquals(3L, subscriptions.position(tp).longValue()); assertEquals(1, records.get(0).offset()); assertEquals(2, records.get(1).offset()); @@ -170,14 +248,14 @@ public void testFetchMaxPollRecords() { consumerClient.poll(0); records = fetcher.fetchedRecords().get(tp); assertEquals(1, records.size()); - assertEquals(4L, (long) subscriptions.position(tp)); + assertEquals(4L, subscriptions.position(tp).longValue()); assertEquals(3, records.get(0).offset()); fetcher.sendFetches(); consumerClient.poll(0); records = fetcher.fetchedRecords().get(tp); assertEquals(2, records.size()); - assertEquals(6L, (long) subscriptions.position(tp)); + assertEquals(6L, subscriptions.position(tp).longValue()); assertEquals(4, records.get(0).offset()); assertEquals(5, records.get(1).offset()); } @@ -203,7 +281,7 @@ public void testFetchNonContinuousRecords() { consumerClient.poll(0); consumerRecords = fetcher.fetchedRecords().get(tp); assertEquals(3, consumerRecords.size()); - assertEquals(31L, (long) subscriptions.position(tp)); // this is the next fetching position + assertEquals(31L, subscriptions.position(tp).longValue()); // this is the next fetching position assertEquals(15L, consumerRecords.get(0).offset()); assertEquals(20L, consumerRecords.get(1).offset()); @@ -318,11 +396,27 @@ public void testFetchOffsetOutOfRange() { fetcher.sendFetches(); client.prepareResponse(fetchResponse(this.records.buffer(), Errors.OFFSET_OUT_OF_RANGE.code(), 100L, 0)); consumerClient.poll(0); - assertTrue(subscriptions.isOffsetResetNeeded(tp)); assertEquals(0, fetcher.fetchedRecords().size()); + assertTrue(subscriptions.isOffsetResetNeeded(tp)); assertEquals(null, subscriptions.position(tp)); } + @Test + public void testStaleOutOfRangeError() { + // verify that an out of range error which arrives after a seek + // does not cause us to reset our position or throw an exception + subscriptions.assignFromUser(Arrays.asList(tp)); + subscriptions.seek(tp, 0); + + fetcher.sendFetches(); + client.prepareResponse(fetchResponse(this.records.buffer(), Errors.OFFSET_OUT_OF_RANGE.code(), 100L, 0)); + subscriptions.seek(tp, 1); + consumerClient.poll(0); + assertEquals(0, fetcher.fetchedRecords().size()); + assertFalse(subscriptions.isOffsetResetNeeded(tp)); + assertEquals(1, subscriptions.position(tp).longValue()); + } + @Test public void testFetchedRecordsAfterSeek() { subscriptionsNoAutoReset.assignFromUser(Arrays.asList(tp)); @@ -368,7 +462,7 @@ public void testFetchDisconnected() { // disconnects should have no affect on subscription state assertFalse(subscriptions.isOffsetResetNeeded(tp)); assertTrue(subscriptions.isFetchable(tp)); - assertEquals(0, (long) subscriptions.position(tp)); + assertEquals(0, subscriptions.position(tp).longValue()); } @Test @@ -380,7 +474,7 @@ public void testUpdateFetchPositionToCommitted() { fetcher.updateFetchPositions(Collections.singleton(tp)); assertTrue(subscriptions.isFetchable(tp)); - assertEquals(5, (long) subscriptions.position(tp)); + assertEquals(5, subscriptions.position(tp).longValue()); } @Test @@ -393,7 +487,7 @@ public void testUpdateFetchPositionResetToDefaultOffset() { fetcher.updateFetchPositions(Collections.singleton(tp)); assertFalse(subscriptions.isOffsetResetNeeded(tp)); assertTrue(subscriptions.isFetchable(tp)); - assertEquals(5, (long) subscriptions.position(tp)); + assertEquals(5, subscriptions.position(tp).longValue()); } @Test @@ -406,7 +500,7 @@ public void testUpdateFetchPositionResetToLatestOffset() { fetcher.updateFetchPositions(Collections.singleton(tp)); assertFalse(subscriptions.isOffsetResetNeeded(tp)); assertTrue(subscriptions.isFetchable(tp)); - assertEquals(5, (long) subscriptions.position(tp)); + assertEquals(5, subscriptions.position(tp).longValue()); } @Test @@ -419,7 +513,7 @@ public void testUpdateFetchPositionResetToEarliestOffset() { fetcher.updateFetchPositions(Collections.singleton(tp)); assertFalse(subscriptions.isOffsetResetNeeded(tp)); assertTrue(subscriptions.isFetchable(tp)); - assertEquals(5, (long) subscriptions.position(tp)); + assertEquals(5, subscriptions.position(tp).longValue()); } @Test @@ -437,7 +531,7 @@ public void testUpdateFetchPositionDisconnect() { fetcher.updateFetchPositions(Collections.singleton(tp)); assertFalse(subscriptions.isOffsetResetNeeded(tp)); assertTrue(subscriptions.isFetchable(tp)); - assertEquals(5, (long) subscriptions.position(tp)); + assertEquals(5, subscriptions.position(tp).longValue()); } @Test @@ -575,17 +669,36 @@ private MetadataResponse newMetadataResponse(String topic, Errors error) { return new MetadataResponse(cluster.nodes(), MetadataResponse.NO_CONTROLLER_ID, Arrays.asList(topicMetadata)); } - private Fetcher createFetcher(int maxPollRecords, - SubscriptionState subscriptions, - Metrics metrics) { + private Fetcher createFetcher(SubscriptionState subscriptions, + Metrics metrics, + int maxPollRecords) { + return createFetcher(subscriptions, metrics, new ByteArrayDeserializer(), new ByteArrayDeserializer(), maxPollRecords); + } + + private Fetcher createFetcher(SubscriptionState subscriptions, Metrics metrics) { + return createFetcher(subscriptions, metrics, Integer.MAX_VALUE); + } + + private Fetcher createFetcher(SubscriptionState subscriptions, + Metrics metrics, + Deserializer keyDeserializer, + Deserializer valueDeserializer) { + return createFetcher(subscriptions, metrics, keyDeserializer, valueDeserializer, Integer.MAX_VALUE); + } + + private Fetcher createFetcher(SubscriptionState subscriptions, + Metrics metrics, + Deserializer keyDeserializer, + Deserializer valueDeserializer, + int maxPollRecords) { return new Fetcher<>(consumerClient, minBytes, maxWaitMs, fetchSize, maxPollRecords, true, // check crc - new ByteArrayDeserializer(), - new ByteArrayDeserializer(), + keyDeserializer, + valueDeserializer, metadata, subscriptions, metrics, @@ -594,8 +707,4 @@ private Fetcher createFetcher(int maxPollRecords, retryBackoffMs); } - - private Fetcher createFetcher(SubscriptionState subscriptions, Metrics metrics) { - return createFetcher(Integer.MAX_VALUE, subscriptions, metrics); - } } From 25f435098589d01f5cc514636f5be0dae8666611 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Thu, 28 Jul 2016 12:46:49 +0100 Subject: [PATCH 230/267] HOTFIX: Add license information to release_notes.py --- release_notes.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/release_notes.py b/release_notes.py index 5940fb21ae5d..b8b2ea15cd3e 100755 --- a/release_notes.py +++ b/release_notes.py @@ -1,5 +1,20 @@ #!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Usage: release_notes.py > RELEASE_NOTES.html Generates release notes for a Kafka release by generating an HTML doc containing some introductory information about the From 11c47dd953b2a59b37de5719454c523db3f34411 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Thu, 28 Jul 2016 21:36:16 +0100 Subject: [PATCH 231/267] MINOR: Consumer should throw KafkaException on invalid checksum InvalidRecordException is not part of the public API so go back to the behaviour before ff557f02 for now. Author: Jason Gustafson Reviewers: Ismael Juma Closes #1676 from hachikuji/raise-kafka-exception-on-invalid-crc (cherry picked from commit 131f96868aa24f38f69ee2ab769ea3f289e5c1fc) Signed-off-by: Ismael Juma --- .../org/apache/kafka/clients/consumer/internals/Fetcher.java | 3 +-- .../apache/kafka/clients/consumer/internals/FetcherTest.java | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java index c811a0332b45..cf2ebc31f170 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java @@ -39,7 +39,6 @@ import org.apache.kafka.common.metrics.stats.Rate; import org.apache.kafka.common.protocol.ApiKeys; import org.apache.kafka.common.protocol.Errors; -import org.apache.kafka.common.record.InvalidRecordException; import org.apache.kafka.common.record.LogEntry; import org.apache.kafka.common.record.MemoryRecords; import org.apache.kafka.common.record.Record; @@ -610,7 +609,7 @@ private ConsumerRecord parseRecord(TopicPartition partition, LogEntry logE Record record = logEntry.record(); if (this.checkCrcs && !record.isValid()) - throw new InvalidRecordException("Record for partition " + partition + " at offset " + throw new KafkaException("Record for partition " + partition + " at offset " + logEntry.offset() + " is corrupt (stored crc = " + record.checksum() + ", computed crc = " + record.computeChecksum() diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java index 2fbd43ec7ae5..ba04cb5be10a 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/FetcherTest.java @@ -25,6 +25,7 @@ import org.apache.kafka.clients.consumer.OffsetOutOfRangeException; import org.apache.kafka.clients.consumer.OffsetResetStrategy; import org.apache.kafka.common.Cluster; +import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.MetricName; import org.apache.kafka.common.Node; import org.apache.kafka.common.PartitionInfo; @@ -40,7 +41,6 @@ import org.apache.kafka.common.protocol.types.Struct; import org.apache.kafka.common.record.CompressionType; import org.apache.kafka.common.record.Compressor; -import org.apache.kafka.common.record.InvalidRecordException; import org.apache.kafka.common.record.MemoryRecords; import org.apache.kafka.common.record.Record; import org.apache.kafka.common.requests.FetchRequest; @@ -219,7 +219,7 @@ public void testParseInvalidRecord() { try { fetcher.fetchedRecords(); fail("fetchedRecords should have raised"); - } catch (InvalidRecordException e) { + } catch (KafkaException e) { // the position should not advance since no data has been returned assertEquals(0, subscriptions.position(tp).longValue()); } From 0d96bc469e7ef9fca8eb67ba1fb756a0a515e1dd Mon Sep 17 00:00:00 2001 From: Gwen Shapira Date: Thu, 28 Jul 2016 13:48:04 -0700 Subject: [PATCH 232/267] KAFKA-3852: Clarify how to handle message format upgrade without killing performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ing performance Author: Gwen Shapira Reviewers: Ismael Juma , Ewen Cheslack-Postava Closes #1678 from gwenshap/kafka-3852 (cherry picked from commit 071b76cc50b4f79c839306f40eb84ee2b9724ab2) Signed-off-by: Ewen Cheslack-Postava --- docs/upgrade.html | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/docs/upgrade.html b/docs/upgrade.html index a9a14433be63..34adce1fc6f3 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -18,9 +18,9 @@

              1.5 Upgrading From Previous Versions

              Upgrading from 0.8.x or 0.9.x to 0.10.0.0

              -0.10.0.0 has potential breaking changes (please review before upgrading) and -there may be a performance impact during the upgrade. Because new protocols -are introduced, it is important to upgrade your Kafka clusters before upgrading your clients. +0.10.0.0 has potential breaking changes (please review before upgrading) and possible performance impact following the upgrade. By following the recommended rolling upgrade plan below, you guarantee no downtime and no performance impact during and following the upgrade. +
              +Note: Because new protocols are introduced, it is important to upgrade your Kafka clusters before upgrading your clients.

              Notes to clients with version 0.9.0.0: Due to a bug introduced in 0.9.0.0, clients that depend on ZooKeeper (old Scala high-level Consumer and MirrorMaker if used with the old consumer) will not @@ -30,15 +30,16 @@

              Upgrading from 0.8.x or 0.9.x to 0.10.

              For a rolling upgrade:

                -
              1. Update server.properties file on all brokers and add the following property: inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. 0.8.2 or 0.9.0.0). - We recommend that users set log.message.format.version=CURRENT_KAFKA_VERSION as well to ensure that performance of 0.8 and 0.9 consumers is not affected - during the upgrade. See potential performance impact during upgrade for the details. +
              2. Update server.properties file on all brokers and add the following properties: +
              3. Upgrade the brokers. This can be done a broker at a time by simply bringing it down, updating the code, and restarting it.
              4. -
              5. Once the entire cluster is upgraded, bump the protocol version by editing inter.broker.protocol.version and setting it to 0.10.0.0.
              6. +
              7. Once the entire cluster is upgraded, bump the protocol version by editing inter.broker.protocol.version and setting it to 0.10.0.0. NOTE: You shouldn't touch log.message.format.version yet - this parameter should only change once all consumers have been upgraded to 0.10.0.0
              8. Restart the brokers one by one for the new protocol version to take effect.
              9. -
              10. Once most consumers have been upgraded to 0.10.0 and if you followed the recommendation to set log.message.format.version=CURRENT_KAFKA_VERSION, change - log.message.format.version to 0.10.0 on each broker and restart them one by one. +
              11. Once all consumers have been upgraded to 0.10.0, change log.message.format.version to 0.10.0 on each broker and restart them one by one.
              @@ -46,7 +47,7 @@

              Upgrading from 0.8.x or 0.9.x to 0.10.

              Note: Bumping the protocol version and restarting can be done any time after the brokers were upgraded. It does not have to be immediately after. -

              Potential performance impact during upgrade to 0.10.0.0
              +

              Potential performance impact following upgrade to 0.10.0.0

              The message format in 0.10.0 includes a new timestamp field and uses relative offsets for compressed messages. The on disk message format can be configured through log.message.format.version in the server.properties file. @@ -54,9 +55,11 @@

              message formats before 0.10.0. In this case, the broker is able to convert messages from the 0.10.0 format to an earlier format before sending the response to the consumer on an older version. However, the broker can't use zero-copy transfer in this case. - To avoid such message conversion before consumers are upgraded to 0.10.0.0, one can set the message format to - e.g. 0.9.0 when upgrading the broker to 0.10.0.0. This way, the broker can still use zero-copy transfer to send the - data to the old consumers. Once most consumers are upgraded, one can change the message format to 0.10.0 on the broker. + Reports from the Kafka community on the performance impact have shown CPU utilization going from 20% before to 100% after an upgrade, which forced an immediate upgrade of all clients to bring performance back to normal. + + To avoid such message conversion before consumers are upgraded to 0.10.0.0, one can set log.message.format.version to 0.8.2 or 0.9.0 when upgrading the broker to 0.10.0.0. This way, the broker can still use zero-copy transfer to send the data to the old consumers. Once consumers are upgraded, one can change the message format to 0.10.0 on the broker and enjoy the new message format that includes new timestamp and improved compression. + + The conversion is supported to ensure compatibility and can be useful to support a few apps that have not updated to newer clients yet, but is impractical to support all consumer traffic on even an overprovisioned cluster. Therefore it is critical to avoid the message conversion as much as possible when brokers have been upgraded but the majority of clients have not.

              For clients that are upgraded to 0.10.0.0, there is no performance impact. From ad1dab9c3d3ae14746ee5d94434ef98ef4889023 Mon Sep 17 00:00:00 2001 From: "Matthias J. Sax" Date: Thu, 28 Jul 2016 22:10:06 +0100 Subject: [PATCH 233/267] HOTFIX: Fix unstable Streams application reset integration test Author: Matthias J. Sax Reviewers: Eno Thereska , Ismael Juma Closes #1673 from mjsax/hotfix --- .../streams/integration/ResetIntegrationTest.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java index 28be86861f2e..8f7588fb49da 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java @@ -64,6 +64,7 @@ public class ResetIntegrationTest { private static final String INPUT_TOPIC = "inputTopic"; private static final String OUTPUT_TOPIC = "outputTopic"; private static final String OUTPUT_TOPIC_2 = "outputTopic2"; + private static final String OUTPUT_TOPIC_2_RERUN = "outputTopic2_rerun"; private static final String INTERMEDIATE_USER_TOPIC = "userTopic"; private static final long STREAMS_CONSUMER_TIMEOUT = 2000L; @@ -74,16 +75,17 @@ public static void startKafkaCluster() throws Exception { CLUSTER.createTopic(INPUT_TOPIC); CLUSTER.createTopic(OUTPUT_TOPIC); CLUSTER.createTopic(OUTPUT_TOPIC_2); + CLUSTER.createTopic(OUTPUT_TOPIC_2_RERUN); CLUSTER.createTopic(INTERMEDIATE_USER_TOPIC); } @Test - public void testReprocessingFromScratchAfterCleanUp() throws Exception { + public void testReprocessingFromScratchAfterReset() throws Exception { final Properties streamsConfiguration = prepareTest(); final Properties resultTopicConsumerConfig = prepareResultConsumer(); prepareInputData(); - final KStreamBuilder builder = setupTopology(); + final KStreamBuilder builder = setupTopology(OUTPUT_TOPIC_2); // RUN KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration); @@ -103,10 +105,10 @@ public void testReprocessingFromScratchAfterCleanUp() throws Exception { Utils.sleep(CLEANUP_CONSUMER_TIMEOUT); // RE-RUN - streams = new KafkaStreams(setupTopology(), streamsConfiguration); + streams = new KafkaStreams(setupTopology(OUTPUT_TOPIC_2_RERUN), streamsConfiguration); streams.start(); final List> resultRerun = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(resultTopicConsumerConfig, OUTPUT_TOPIC, 10); - final KeyValue resultRerun2 = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(resultTopicConsumerConfig, OUTPUT_TOPIC_2, 1).get(0); + final KeyValue resultRerun2 = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(resultTopicConsumerConfig, OUTPUT_TOPIC_2_RERUN, 1).get(0); streams.close(); assertThat(resultRerun, equalTo(result)); @@ -163,7 +165,7 @@ private void prepareInputData() throws Exception { IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(new KeyValue<>(1L, "jjj")), producerConfig, 64L); } - private KStreamBuilder setupTopology() { + private KStreamBuilder setupTopology(final String outputTopic2) { final KStreamBuilder builder = new KStreamBuilder(); final KStream input = builder.stream(INPUT_TOPIC); @@ -199,7 +201,7 @@ public KeyValue apply(final Windowed key, final Long value) { return new KeyValue<>(key.window().start() + key.window().end(), value); } }); - windowedCounts.to(Serdes.Long(), Serdes.Long(), OUTPUT_TOPIC_2); + windowedCounts.to(Serdes.Long(), Serdes.Long(), outputTopic2); return builder; } @@ -227,6 +229,7 @@ private void assertInternalTopicsGotDeleted() { expectedRemainingTopicsAfterCleanup.add(INTERMEDIATE_USER_TOPIC); expectedRemainingTopicsAfterCleanup.add(OUTPUT_TOPIC); expectedRemainingTopicsAfterCleanup.add(OUTPUT_TOPIC_2); + expectedRemainingTopicsAfterCleanup.add(OUTPUT_TOPIC_2_RERUN); expectedRemainingTopicsAfterCleanup.add("__consumer_offsets"); Set allTopics; From ab242337a8fa6fedf632edfc47cb9425a51d8bf3 Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Fri, 29 Jul 2016 17:55:57 +0100 Subject: [PATCH 234/267] MINOR: Replace reference to HoppingWindows in streams.html HoppingWindows was removed prior to the 0.10.0 release. I've updated the doc to refer to the correct TimeWindows Author: Damian Guy Reviewers: Eno Thereska , Ismael Juma Closes #1679 from dguy/0.10.0 --- docs/streams.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/streams.html b/docs/streams.html index 91fda368ae6d..82319753bd98 100644 --- a/docs/streams.html +++ b/docs/streams.html @@ -301,7 +301,7 @@

              Transform a stre KTable, Long> counts = source1.aggregateByKey( () -> 0L, // initial value (aggKey, value, aggregate) -> aggregate + 1L, // aggregating value - HoppingWindows.of("counts").with(5000L).every(1000L), // intervals in milliseconds + TimeWindows.of("counts",5000L).advanceBy(1000L), // intervals in milliseconds ); KStream joined = source1.leftJoin(source2, From 77ced78f1bed75649c90eb659a055d00c2011a17 Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Fri, 29 Jul 2016 21:03:22 +0100 Subject: [PATCH 235/267] HOTFIX: Non-unique state.dirs in integration tests causing build to hang Three Streams Integration tests were using the same directory for the state.dir config. This was causing the build to hang when run in parallel mode Author: Damian Guy Reviewers: Eno Thereska , Ismael Juma Closes #1682 from dguy/fix-state-dir --- .../kafka/streams/integration/InternalTopicIntegrationTest.java | 2 +- .../apache/kafka/streams/integration/JoinIntegrationTest.java | 2 +- .../kafka/streams/integration/WordCountIntegrationTest.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java index e431b57ad8d3..f5536b494f62 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java @@ -122,7 +122,7 @@ public void shouldCompactTopicsForStateChangelogs() throws Exception { streamsConfiguration.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, CLUSTER.zKConnectString()); streamsConfiguration.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); - streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/kafka-streams"); + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/ks-internal-topic-test"); KStreamBuilder builder = new KStreamBuilder(); diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java index 4f318ec37392..44244b495522 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/JoinIntegrationTest.java @@ -146,7 +146,7 @@ public void shouldCountClicksPerRegion() throws Exception { // StreamsConfig configuration (so we can retrieve whatever state directory Streams came up // with automatically) we don't need to set this anymore and can update `purgeLocalStreamsState` // accordingly. - streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/kafka-streams"); + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/ks-join-test"); // Remove any state from previous test runs IntegrationTestUtils.purgeLocalStreamsState(streamsConfiguration); diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java index c86409a97060..fe72110b2cc4 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/WordCountIntegrationTest.java @@ -88,7 +88,7 @@ public void shouldCountWords() throws Exception { // StreamsConfig configuration (so we can retrieve whatever state directory Streams came up // with automatically) we don't need to set this anymore and can update `purgeLocalStreamsState` // accordingly. - streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/kafka-streams"); + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, "/tmp/ks-word-count-test"); KStreamBuilder builder = new KStreamBuilder(); From 44ad3ec77364fab869cb5e01a182341bd0ae2698 Mon Sep 17 00:00:00 2001 From: Manikumar Reddy O Date: Sun, 31 Jul 2016 10:01:45 +0100 Subject: [PATCH 236/267] KAFKA-3950; Only throw authorization exception if pattern subscription matches topic Author: Manikumar Reddy O Author: Ismael Juma Reviewers: Ashish Singh , Sriharsha Chintalapani , Jason Gustafson , Ismael Juma Closes #1687 from omkreddy/KAFKA-3950 --- .../internals/ConsumerCoordinator.java | 22 ++++- .../kafka/api/AuthorizerIntegrationTest.scala | 98 ++++++++++++++++++- 2 files changed, 110 insertions(+), 10 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index 2880efce5342..59f3250e0ccf 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -146,20 +146,27 @@ private void addMetadataListener() { this.metadata.addListener(new Metadata.Listener() { @Override public void onMetadataUpdate(Cluster cluster) { - // if we encounter any unauthorized topics, raise an exception to the user - if (!cluster.unauthorizedTopics().isEmpty()) - throw new TopicAuthorizationException(new HashSet<>(cluster.unauthorizedTopics())); if (subscriptions.hasPatternSubscription()) { + + Set unauthorizedTopics = new HashSet(); + for (String topic : cluster.unauthorizedTopics()) { + if (filterTopic(topic)) + unauthorizedTopics.add(topic); + } + if (!unauthorizedTopics.isEmpty()) + throw new TopicAuthorizationException(unauthorizedTopics); + final List topicsToSubscribe = new ArrayList<>(); for (String topic : cluster.topics()) - if (subscriptions.getSubscribedPattern().matcher(topic).matches() && - !(excludeInternalTopics && TopicConstants.INTERNAL_TOPICS.contains(topic))) + if (filterTopic(topic)) topicsToSubscribe.add(topic); subscriptions.changeSubscription(topicsToSubscribe); metadata.setTopics(subscriptions.groupSubscription()); + } else if (!cluster.unauthorizedTopics().isEmpty()) { + throw new TopicAuthorizationException(new HashSet<>(cluster.unauthorizedTopics())); } // check if there are any changes to the metadata which should trigger a rebalance @@ -175,6 +182,11 @@ public void onMetadataUpdate(Cluster cluster) { }); } + private boolean filterTopic(String topic) { + return subscriptions.getSubscribedPattern().matcher(topic).matches() && + !(excludeInternalTopics && TopicConstants.INTERNAL_TOPICS.contains(topic)); + } + private PartitionAssignor lookupAssignor(String name) { for (PartitionAssignor assignor : this.assignors) { if (assignor.name().equals(name)) diff --git a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala index 60eb74cf2dd0..59ead2fe2ec8 100644 --- a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala +++ b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala @@ -16,6 +16,7 @@ import java.io.{DataInputStream, DataOutputStream} import java.net.Socket import java.nio.ByteBuffer import java.util.concurrent.ExecutionException +import java.util.regex.Pattern import java.util.{ArrayList, Collections, Properties} import kafka.cluster.EndPoint @@ -25,7 +26,8 @@ import kafka.integration.KafkaServerTestHarness import kafka.security.auth._ import kafka.server.KafkaConfig import kafka.utils.TestUtils -import org.apache.kafka.clients.consumer.{Consumer, ConsumerRecord, KafkaConsumer, OffsetAndMetadata} +import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener +import org.apache.kafka.clients.consumer._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import org.apache.kafka.common.errors._ import org.apache.kafka.common.protocol.{ApiKeys, Errors, SecurityProtocol} @@ -42,6 +44,7 @@ import org.apache.kafka.common.internals.TopicConstants class AuthorizerIntegrationTest extends KafkaServerTestHarness { val topic = "topic" + val topicPattern = "topic.*" val part = 0 val brokerId: Integer = 0 val correlationId = 0 @@ -353,7 +356,7 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness { consumeRecords(this.consumers.head) Assert.fail("should have thrown exception") } catch { - case e: TopicAuthorizationException => assertEquals(Collections.singleton(topic), e.unauthorizedTopics()); + case e: TopicAuthorizationException => assertEquals(Collections.singleton(topic), e.unauthorizedTopics()) } } @@ -370,7 +373,7 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness { consumeRecords(this.consumers.head) Assert.fail("should have thrown exception") } catch { - case e: TopicAuthorizationException => assertEquals(Collections.singleton(topic), e.unauthorizedTopics()); + case e: TopicAuthorizationException => assertEquals(Collections.singleton(topic), e.unauthorizedTopics()) } } @@ -388,7 +391,7 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness { Assert.fail("should have thrown exception") } catch { case e: TopicAuthorizationException => - assertEquals(Collections.singleton(topic), e.unauthorizedTopics()); + assertEquals(Collections.singleton(topic), e.unauthorizedTopics()) } } @@ -404,6 +407,91 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness { consumeRecords(this.consumers.head) } + @Test + def testPatternSubscriptionWithNoTopicAccess() { + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Write)), topicResource) + sendRecords(1, tp) + removeAllAcls() + + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Read)), groupResource) + try { + this.consumers.head.subscribe(Pattern.compile(topicPattern), new NoOpConsumerRebalanceListener) + this.consumers.head.poll(50) + Assert.fail("should have thrown exception") + } catch { + case e: TopicAuthorizationException => assertEquals(Collections.singleton(topic), e.unauthorizedTopics()) + } + } + + @Test + def testPatternSubscriptionWithTopicAndGroupRead() { + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Write)), topicResource) + sendRecords(1, tp) + + //create a unmatched topic + val unmatchedTopic = "unmatched" + TestUtils.createTopic(zkUtils, unmatchedTopic, 1, 1, this.servers) + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Write)), new Resource(Topic, unmatchedTopic)) + sendRecords(1, new TopicPartition(unmatchedTopic, part)) + removeAllAcls() + + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Read)), topicResource) + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Read)), groupResource) + val consumer = consumers.head + consumer.subscribe(Pattern.compile(topicPattern), new NoOpConsumerRebalanceListener) + consumeRecords(consumer) + + // set the subscription pattern to an internal topic that the consumer has no read permission for, but since + // `exclude.internal.topics` is true by default, the subscription should be empty and no authorization exception + // should be thrown + consumer.subscribe(Pattern.compile(TopicConstants.GROUP_METADATA_TOPIC_NAME), new NoOpConsumerRebalanceListener) + assertTrue(consumer.poll(50).isEmpty) + } + + + @Test + def testPatternSubscriptionMatchingInternalTopicWithNoPermission() { + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Write)), topicResource) + sendRecords(1, tp) + removeAllAcls() + + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Read)), topicResource) + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Read)), groupResource) + + val consumerConfig = new Properties + consumerConfig.put(ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_CONFIG, "false") + val consumer = TestUtils.createNewConsumer(TestUtils.getBrokerListStrFromServers(servers), groupId = group, + securityProtocol = SecurityProtocol.PLAINTEXT, props = Some(consumerConfig)) + try { + consumer.subscribe(Pattern.compile(".*"), new NoOpConsumerRebalanceListener) + consumeRecords(consumer) + fail("should have thrown exception") + } + catch { + case e: TopicAuthorizationException => + assertEquals(Collections.singleton(TopicConstants.GROUP_METADATA_TOPIC_NAME), e.unauthorizedTopics()) + } finally consumer.close() + } + + @Test + def testPatternSubscriptionNotMatchingInternalTopic() { + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Write)), topicResource) + sendRecords(1, tp) + removeAllAcls() + + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Read)), topicResource) + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Read)), groupResource) + + val consumerConfig = new Properties + consumerConfig.put(ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_CONFIG, "false") + val consumer = TestUtils.createNewConsumer(TestUtils.getBrokerListStrFromServers(servers), groupId = group, + securityProtocol = SecurityProtocol.PLAINTEXT, props = Some(consumerConfig)) + try { + consumer.subscribe(Pattern.compile(topicPattern), new NoOpConsumerRebalanceListener) + consumeRecords(consumer) + } finally consumer.close() + } + @Test def testCreatePermissionNeededToReadFromNonExistentTopic() { val newTopic = "newTopic" @@ -418,7 +506,7 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness { Assert.fail("should have thrown exception") } catch { case e: TopicAuthorizationException => - assertEquals(Collections.singleton(newTopic), e.unauthorizedTopics()); + assertEquals(Collections.singleton(newTopic), e.unauthorizedTopics()) } addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Write)), newTopicResource) From f2405a73ea2dd4b636832b7f8729fb06a04de1d5 Mon Sep 17 00:00:00 2001 From: "Matthias J. Sax" Date: Mon, 1 Aug 2016 20:12:22 -0700 Subject: [PATCH 237/267] KAFKA-4008: Module "tools" should not be dependent on "core" moved streams application reset tool from tools to core Author: Matthias J. Sax Reviewers: Ismael Juma , Damian Guy , Guozhang Wang , Ewen Cheslack-Postava Closes #1685 from mjsax/moveResetTool --- bin/kafka-streams-application-reset.sh | 2 +- build.gradle | 2 -- checkstyle/import-control-core.xml | 2 ++ checkstyle/import-control.xml | 4 +--- .../scala}/kafka/tools/StreamsResetter.java | 24 ++++++++++++------- .../integration/ResetIntegrationTest.java | 2 +- 6 files changed, 21 insertions(+), 15 deletions(-) rename {tools/src/main/java/org/apache => core/src/main/scala}/kafka/tools/StreamsResetter.java (92%) diff --git a/bin/kafka-streams-application-reset.sh b/bin/kafka-streams-application-reset.sh index 26ab7667137f..336373254004 100755 --- a/bin/kafka-streams-application-reset.sh +++ b/bin/kafka-streams-application-reset.sh @@ -18,4 +18,4 @@ if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then export KAFKA_HEAP_OPTS="-Xmx512M" fi -exec $(dirname $0)/kafka-run-class.sh org.apache.kafka.tools.StreamsResetter "$@" +exec $(dirname $0)/kafka-run-class.sh kafka.tools.StreamsResetter "$@" diff --git a/build.gradle b/build.gradle index e2f4da6ee56f..36647b3468dc 100644 --- a/build.gradle +++ b/build.gradle @@ -634,7 +634,6 @@ project(':tools') { archivesBaseName = "kafka-tools" dependencies { - compile project(':core') compile project(':clients') compile project(':log4j-appender') compile libs.argparse4j @@ -683,7 +682,6 @@ project(':streams') { testCompile project(':clients').sourceSets.test.output testCompile project(':core') testCompile project(':core').sourceSets.test.output - testCompile project(':tools') testCompile libs.junit testRuntime libs.slf4jlog4j diff --git a/checkstyle/import-control-core.xml b/checkstyle/import-control-core.xml index d53e9e873444..5714bfd1b8f2 100644 --- a/checkstyle/import-control-core.xml +++ b/checkstyle/import-control-core.xml @@ -53,10 +53,12 @@ + + diff --git a/checkstyle/import-control.xml b/checkstyle/import-control.xml index 1052d8e43ed7..632b516fbf28 100644 --- a/checkstyle/import-control.xml +++ b/checkstyle/import-control.xml @@ -123,8 +123,6 @@ - - @@ -144,6 +142,7 @@ + @@ -151,7 +150,6 @@ - diff --git a/tools/src/main/java/org/apache/kafka/tools/StreamsResetter.java b/core/src/main/scala/kafka/tools/StreamsResetter.java similarity index 92% rename from tools/src/main/java/org/apache/kafka/tools/StreamsResetter.java rename to core/src/main/scala/kafka/tools/StreamsResetter.java index 734c15b0f278..8e463d125498 100644 --- a/tools/src/main/java/org/apache/kafka/tools/StreamsResetter.java +++ b/core/src/main/scala/kafka/tools/StreamsResetter.java @@ -1,16 +1,20 @@ /** - * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE - * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file - * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the - * License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at *

              * http://www.apache.org/licenses/LICENSE-2.0 *

              - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.kafka.tools; +package kafka.tools; import joptsimple.OptionException; import joptsimple.OptionParser; @@ -21,6 +25,7 @@ import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.annotation.InterfaceStability; import org.apache.kafka.common.security.JaasUtils; import org.apache.kafka.common.serialization.ByteArrayDeserializer; @@ -34,6 +39,8 @@ /** * {@link StreamsResetter} resets the processing state of a Kafka Streams application so that, for example, you can reprocess its input from scratch. *

              + * This class is not part of public API. For backward compatibility, use the provided script in "bin/" instead of calling this class directly from your code. + *

              * Resetting the processing state of an application includes the following actions: *

                *
              1. setting the application's consumer offsets for input and internal topics to zero
              2. @@ -50,6 +57,7 @@ * User output topics will not be deleted or modified by this tool. * If downstream applications consume intermediate or output topics, it is the user's responsibility to adjust those applications manually if required. */ +@InterfaceStability.Unstable public class StreamsResetter { private static final int EXIT_CODE_SUCCESS = 0; private static final int EXIT_CODE_ERROR = 1; diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java index 8f7588fb49da..8dd1f098dd7c 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java @@ -16,6 +16,7 @@ */ package org.apache.kafka.streams.integration; +import kafka.tools.StreamsResetter; import kafka.utils.ZkUtils; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; @@ -37,7 +38,6 @@ import org.apache.kafka.streams.kstream.TimeWindows; import org.apache.kafka.streams.kstream.Windowed; import org.apache.kafka.test.TestUtils; -import org.apache.kafka.tools.StreamsResetter; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.ClassRule; From ce34614a43fb1f43ef6b5660fb37f7a0598d177a Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Tue, 2 Aug 2016 12:41:20 +0100 Subject: [PATCH 238/267] HOTFIX: Start embedded kafka in KafkaStreamsTest to avoid hanging The KafkaStreamsTest can occasionally hang if the test doesn't run fast enough. This is due to there being no brokers available on the broker.urls provided to the StreamsConfig. The KafkaConsumer does a poll and blocks causing the test to never complete. Author: Damian Guy Reviewers: Ismael Juma Closes #1693 from dguy/kafka-streams-test --- .../kafka/streams/KafkaStreamsTest.java | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java index af7e681b0ddc..f8293b8cf9e6 100644 --- a/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java @@ -17,10 +17,12 @@ package org.apache.kafka.streams; +import org.apache.kafka.streams.integration.utils.EmbeddedSingleNodeKafkaCluster; import org.apache.kafka.streams.kstream.KStreamBuilder; import org.apache.kafka.test.MockMetricsReporter; import org.apache.kafka.test.TestUtils; import org.junit.Assert; +import org.junit.ClassRule; import org.junit.Test; import java.io.File; @@ -31,11 +33,16 @@ public class KafkaStreamsTest { + // We need this to avoid the KafkaConsumer hanging on poll (this may occur if the test doesn't complete + // quick enough + @ClassRule + public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); + @Test public void testStartAndClose() throws Exception { final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testStartAndClose"); - props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); props.setProperty(StreamsConfig.METRIC_REPORTER_CLASSES_CONFIG, MockMetricsReporter.class.getName()); final int oldInitCount = MockMetricsReporter.INIT_COUNT.get(); @@ -58,7 +65,7 @@ public void testStartAndClose() throws Exception { public void testCloseIsIdempotent() throws Exception { final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCloseIsIdempotent"); - props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); props.setProperty(StreamsConfig.METRIC_REPORTER_CLASSES_CONFIG, MockMetricsReporter.class.getName()); final KStreamBuilder builder = new KStreamBuilder(); @@ -75,7 +82,7 @@ public void testCloseIsIdempotent() throws Exception { public void testCannotStartOnceClosed() throws Exception { final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCannotStartOnceClosed"); - props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); final KStreamBuilder builder = new KStreamBuilder(); final KafkaStreams streams = new KafkaStreams(builder, props); @@ -95,7 +102,7 @@ public void testCannotStartOnceClosed() throws Exception { public void testCannotStartTwice() throws Exception { final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCannotStartTwice"); - props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); final KStreamBuilder builder = new KStreamBuilder(); final KafkaStreams streams = new KafkaStreams(builder, props); @@ -115,7 +122,7 @@ public void testCannotStartTwice() throws Exception { public void testCleanup() throws Exception { final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testLocalCleanup"); - props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); final KStreamBuilder builder = new KStreamBuilder(); final KafkaStreams streams = new KafkaStreams(builder, props); @@ -137,7 +144,7 @@ public void testCleanupIsolation() throws Exception { final File stateDirApp2 = new File(stateDir + File.separator + appId2); final Properties props = new Properties(); - props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); props.put(StreamsConfig.STATE_DIR_CONFIG, stateDir); assertFalse(stateDirApp1.exists()); @@ -164,7 +171,7 @@ public void testCleanupIsolation() throws Exception { public void testCannotCleanupWhileRunning() throws Exception { final Properties props = new Properties(); props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "testCannotCleanupWhileRunning"); - props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999"); + props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()); final KStreamBuilder builder = new KStreamBuilder(); final KafkaStreams streams = new KafkaStreams(builder, props); From 1b333c23f4159bfbd3b5cd986bc40c5cdf6ddae1 Mon Sep 17 00:00:00 2001 From: Ryan P Date: Wed, 3 Aug 2016 09:52:38 +0100 Subject: [PATCH 239/267] KAFKA-3667; Improve Section 7.2 Encryption and Authentication using SSL to include proper hostname verification configuration By default Kafka is configured to allow ssl communication without hostname verification. This docs has been amended to include instructions on how to set that up in the event clients would like to take a more conservative approach. Author: Ryan P Reviewers: Ewen Cheslack-Postava , Ismael Juma Closes #1384 from rnpridgeon/KAFKA-3667 (cherry picked from commit c89707f) --- docs/security.html | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/docs/security.html b/docs/security.html index 2459f5493bb2..0bd5f87bffa8 100644 --- a/docs/security.html +++ b/docs/security.html @@ -45,8 +45,26 @@

                7.2 Encryption and Authentication
              3. keystore: the keystore file that stores the certificate. The keystore file contains the private key of the certificate; therefore, it needs to be kept safely.
              4. validity: the valid time of the certificate in days.
              - Ensure that common name (CN) matches exactly with the fully qualified domain name (FQDN) of the server. The client compares the CN with the DNS domain name to ensure that it is indeed connecting to the desired server, not the malicious one. - +
              + Note: By default the property ssl.endpoint.identification.algorithm is not defined, so hostname verification is not performed. In order to enable hostname verification, set the following property: + +
              	ssl.endpoint.identification.algorithm=HTTPS 
              + + Once enabled, clients will verify the server's fully qualified domain name (FQDN) against one of the following two fields: +
                +
              1. Common Name (CN) +
              2. Subject Alternative Name (SAN) +
              +
              + Both fields are valid, RFC-2818 recommends the use of SAN however. SAN is also more flexible, allowing for multiple DNS entries to be declared. Another advantage is that the CN can be set to a more meaningful value for authorization purposes. To add a SAN field append the following argument -ext SAN=DNS:{FQDN} to the keytool command: +
              +	keytool -keystore server.keystore.jks -alias localhost -validity {validity} -genkey -ext SAN=DNS:{FQDN}
              +	
              + The following command can be run afterwards to verify the contents of the generated certificate: +
              +	keytool -list -v -keystore server.keystore.jks
              +	
              +
            • Creating your own CA

              After the first step, each machine in the cluster has a public-private key pair, and a certificate to identify the machine. The certificate, however, is unsigned, which means that an attacker can create such a certificate to pretend to be any machine.

              Therefore, it is important to prevent forged certificates by signing them for each machine in the cluster. A certificate authority (CA) is responsible for signing certificates. CA works likes a government that issues passports—the government stamps (signs) each passport so that the passport becomes difficult to forge. Other governments verify the stamps to ensure the passport is authentic. Similarly, the CA signs the certificates, and the cryptography guarantees that a signed certificate is computationally difficult to forge. Thus, as long as the CA is a genuine and trusted authority, the clients have high assurance that they are connecting to the authentic machines. From 127bb7fca2b55b13ceb3e94662b0be1f091b9da1 Mon Sep 17 00:00:00 2001 From: Jendrik Poloczek Date: Wed, 3 Aug 2016 14:01:48 -0700 Subject: [PATCH 240/267] MINOR: Fixed documentation for KStream left join KStream-KTable We are not joining in a window here. Author: Jendrik Poloczek Reviewers: Guozhang Wang Closes #1692 from jpzk/trunk (cherry picked from commit 7d513055671664433a783e4a62e9cbad6c44fc83) Signed-off-by: Guozhang Wang --- .../src/main/java/org/apache/kafka/streams/kstream/KStream.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java index a1ecfa44ac25..6cf0c4a6a001 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java @@ -453,7 +453,7 @@ KStream leftJoin( * @param the value type of the new stream * * @return a {@link KStream} that contains join-records for each key and values computed by the given {@link ValueJoiner}, - * one for each matched record-pair with the same key and within the joining window intervals + * one for each matched record-pair with the same key */ KStream leftJoin(KTable table, ValueJoiner joiner); From ac994dd7651937c19cfa50e7d4e3685b76eacac5 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Thu, 4 Aug 2016 02:03:57 +0100 Subject: [PATCH 241/267] KAFKA-4018; Streams causing older slf4j-log4j library to be packaged along with newer version This is a regression caused by 0bb1d3ae. After that commit, Streams no longer has a direct dependency on slf4j-log4j12, but zkclient has a dependency on an older version of slf4j-log4j12, so we get a transitive dependency on the older version. The fix is to simply exclude the undesired dependencies from the zkclient dependency. Author: Ismael Juma Reviewers: Guozhang Wang Closes #1704 from ijuma/kafka-4018-streams-duplicate-slf4j-log4j (cherry picked from commit 2e3722a234df5c0cd2e91c258cc0e4d825ee0626) Signed-off-by: Ismael Juma --- build.gradle | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build.gradle b/build.gradle index 36647b3468dc..b565200966fa 100644 --- a/build.gradle +++ b/build.gradle @@ -674,6 +674,9 @@ project(':streams') { compile libs.rocksDBJni // this dependency should be removed after KIP-4 compile (libs.zkclient) { + // users should be able to choose the logging implementation (and slf4j bridge) + exclude module: 'slf4j-log4j12' + exclude module: 'log4j' exclude module: 'jline' exclude module: 'netty' } From a7a17cdec9eaa6c57cc634da6a7c766d98d4160e Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Thu, 4 Aug 2016 13:05:59 +0100 Subject: [PATCH 242/267] Bump version to 0.10.0.1 --- gradle.properties | 2 +- tests/kafkatest/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gradle.properties b/gradle.properties index 77977a04820b..590ff5223145 100644 --- a/gradle.properties +++ b/gradle.properties @@ -16,7 +16,7 @@ group=org.apache.kafka # NOTE: When you change this version number, you should also make sure to update # the version numbers in tests/kafkatest/__init__.py and kafka-merge-pr.py. -version=0.10.0.1-SNAPSHOT +version=0.10.0.1 scalaVersion=2.10.6 task=build org.gradle.jvmargs=-XX:MaxPermSize=512m -Xmx1024m -Xss2m diff --git a/tests/kafkatest/__init__.py b/tests/kafkatest/__init__.py index 5d697b24ebc3..8eaefb50acfd 100644 --- a/tests/kafkatest/__init__.py +++ b/tests/kafkatest/__init__.py @@ -23,4 +23,4 @@ # Instead, in trunk, the version should have a suffix of the form ".devN" # # For example, when Kafka is at version 0.9.0.0-SNAPSHOT, this should be something like "0.9.0.0.dev0" -__version__ = '0.10.0.1.dev0' +__version__ = '0.10.0.1' From 22f82abb5ac342ed94cacbe375495509f395b5cd Mon Sep 17 00:00:00 2001 From: Jan Filipiak Date: Fri, 5 Aug 2016 10:03:22 -0700 Subject: [PATCH 243/267] KAFKA-3817: KTableRepartitionMap publish old Change first, for non-count aggregates I affirm that the contribution is my original work and that I license the work to the project under the project's open source license. This cleans up misbehaviour that was introduce while fixing KAFKA-3817. It is impossible for a non-count aggregate to be build, when the addition happens before the removal. IMHO making sure that these details are correct is very important. This PR has local test errors. It somehow fails the ResetIntegrationTest. It doesn't quite appear to me why but it looks like this PR breaks it, especially because the error appears with the ordering of the events. Still I am unable to find where I could have broken it. Maybe not seems to fail on trunk aswell. Author: jfilipiak Reviewers: Guozhang Wang Closes #1705 from Kaiserchen/KAFKA-3817-preserve-order-for-aggreagators (cherry picked from 3dafb81da788294d4c2e9811f49437608e5b9ce8) --- .../internals/KTableRepartitionMap.java | 9 +++-- .../internals/KGroupedTableImplTest.java | 2 +- .../internals/KTableAggregateTest.java | 38 +++++++++++-------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java index ac7c00e1a73a..939a1dfaa0f2 100644 --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableRepartitionMap.java @@ -81,12 +81,15 @@ public void process(K key, Change change) { KeyValue oldPair = change.oldValue == null ? null : mapper.apply(key, change.oldValue); // if the selected repartition key or value is null, skip - if (newPair != null && newPair.key != null && newPair.value != null) { - context().forward(newPair.key, new Change<>(newPair.value, null)); - } + // forward oldPair first, to be consistent with reduce and aggregate if (oldPair != null && oldPair.key != null && oldPair.value != null) { context().forward(oldPair.key, new Change<>(null, oldPair.value)); } + + if (newPair != null && newPair.key != null && newPair.value != null) { + context().forward(newPair.key, new Change<>(newPair.value, null)); + } + } } diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KGroupedTableImplTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KGroupedTableImplTest.java index fc0451aea949..c47ae3f5d6da 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KGroupedTableImplTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KGroupedTableImplTest.java @@ -72,7 +72,7 @@ public KeyValue apply(final String key, final String value) { driver.process(input, "C", "yellow"); driver.process(input, "D", "green"); - final List expected = Arrays.asList("green:1", "green:2", "blue:1", "green:1", "yellow:1", "green:2"); + final List expected = Arrays.asList("green:1", "green:2", "green:1", "blue:1", "yellow:1", "green:2"); final List actual = processorSupplier.processed; assertEquals(expected, actual); } diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableAggregateTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableAggregateTest.java index 75e007dc62d8..7928c38e6641 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableAggregateTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableAggregateTest.java @@ -34,6 +34,7 @@ import org.junit.Before; import org.junit.Test; + import java.io.File; import java.io.IOException; @@ -74,8 +75,8 @@ public void testAggBasic() throws Exception { stringSerde, "topic1-Canonized"); - MockProcessorSupplier proc2 = new MockProcessorSupplier<>(); - table2.toStream().process(proc2); + MockProcessorSupplier proc = new MockProcessorSupplier<>(); + table2.toStream().process(proc); driver = new KStreamTestDriver(builder, stateDir); @@ -91,12 +92,12 @@ public void testAggBasic() throws Exception { assertEquals(Utils.mkList( "A:0+1", "B:0+2", - "A:0+1+3", "A:0+1+3-1", - "B:0+2+4", "B:0+2+4-2", + "A:0+1-1", "A:0+1-1+3", + "B:0+2-2", "B:0+2-2+4", "C:0+5", "D:0+6", - "B:0+2+4-2+7", "B:0+2+4-2+7-4", - "C:0+5+8", "C:0+5+8-5"), proc2.processed); + "B:0+2-2+4-4", "B:0+2-2+4-4+7", + "C:0+5-5", "C:0+5-5+8"), proc.processed); } @Test @@ -109,11 +110,11 @@ public void testAggRepartition() throws Exception { @Override public KeyValue apply(String key, String value) { if (key.equals("null")) { - return KeyValue.pair(null, value + "s"); + return KeyValue.pair(null, value); } else if (key.equals("NULL")) { return null; } else { - return KeyValue.pair(value, value + "s"); + return KeyValue.pair(value, value); } } }, @@ -126,11 +127,13 @@ public KeyValue apply(String key, String value) { stringSerde, "topic1-Canonized"); - MockProcessorSupplier proc2 = new MockProcessorSupplier<>(); - table2.toStream().process(proc2); + MockProcessorSupplier proc = new MockProcessorSupplier<>(); + table2.toStream().process(proc); driver = new KStreamTestDriver(builder, stateDir); + driver.process(topic1, "A", "1"); + driver.process(topic1, "A", null); driver.process(topic1, "A", "1"); driver.process(topic1, "B", "2"); driver.process(topic1, "null", "3"); @@ -139,11 +142,14 @@ public KeyValue apply(String key, String value) { driver.process(topic1, "B", "7"); assertEquals(Utils.mkList( - "1:0+1s", - "2:0+2s", - "4:0+4s", - "2:0+2s-2s", - "7:0+7s", - "4:0+4s-4s"), proc2.processed); + "1:0+1", + "1:0+1-1", + "1:0+1-1+1", + "2:0+2", + // noop + "2:0+2-2", "4:0+4", + // noop + "4:0+4-4", "7:0+7" + ), proc.processed); } } From aaa52996b9bc531fe8222a11fe732565c90388fb Mon Sep 17 00:00:00 2001 From: Kaufman Ng Date: Sun, 7 Aug 2016 14:29:03 -0700 Subject: [PATCH 244/267] KAFKA-3479: Add new consumer metrics documentation added new consumer metrics section refactored common metrics into new section updated TOC Author: Kaufman Ng Reviewers: Jason Gustafson , Ewen Cheslack-Postava Closes #1361 from coughman/KAFKA-3479-consumer-metrics-doc (cherry picked from commit 6b2564811a6137f1fe639dee236f2538bb7160b1) Signed-off-by: Ewen Cheslack-Postava --- .gitignore | 1 + docs/documentation.html | 5 + docs/ops.html | 399 +++++++++++++++++++++++++++++++--------- 3 files changed, 314 insertions(+), 91 deletions(-) diff --git a/.gitignore b/.gitignore index 73972e6a4f36..b54fcf39fbad 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ kafka.iws .vagrant Vagrantfile.local /logs +.DS_Store config/server-* config/zookeeper-* diff --git a/docs/documentation.html b/docs/documentation.html index 31dc03960c4f..c3425c0deb09 100644 --- a/docs/documentation.html +++ b/docs/documentation.html @@ -110,6 +110,11 @@

              Kafka 0.10.0 Documentation

            • Ext4 Notes
            • 6.6 Monitoring +
            • 6.7 ZooKeeper
              • Stable Version diff --git a/docs/ops.html b/docs/ops.html index d7b87e116ea1..98ce0c313492 100644 --- a/docs/ops.html +++ b/docs/ops.html @@ -689,6 +689,149 @@

                6.6 Monitoring

                +

                Common monitoring metrics for producer/consumer/connect

                + +The following metrics are available on producer/consumer/connector instances. For specific metrics, please see following sections. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                Metric/Attribute nameDescriptionMbean name
                connection-close-rateConnections closed per second in the window.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                connection-creation-rateNew connections established per second in the window.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                network-io-rateThe average number of network operations (reads or writes) on all connections per second.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                outgoing-byte-rateThe average number of outgoing bytes sent per second to all servers.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                request-rateThe average number of requests sent per second.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                request-size-avgThe average size of all requests in the window.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                request-size-maxThe maximum size of any request sent in the window.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                incoming-byte-rateBytes/second read off all sockets.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                response-rateResponses received sent per second.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                select-rateNumber of times the I/O layer checked for new I/O to perform per second.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                io-wait-time-ns-avgThe average length of time the I/O thread spent waiting for a socket ready for reads or writes in nanoseconds.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                io-wait-ratioThe fraction of time the I/O thread spent waiting.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                io-time-ns-avgThe average length of time for I/O per select call in nanoseconds.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                io-ratioThe fraction of time the I/O thread spent doing I/O.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                connection-countThe current number of active connections.kafka.[producer|consumer|connect]:type=[producer|consumer|connect]-metrics,client-id=([-.\w]+)
                + +

                Common Per-broker metrics for producer/consumer/connect

                + +The following metrics are available on producer/consumer/connector instances. For specific metrics, please see following sections. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                Metric/Attribute nameDescriptionMbean name
                outgoing-byte-rateThe average number of outgoing bytes sent per second for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
                request-rateThe average number of requests sent per second for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
                request-size-avgThe average size of all requests in the window for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
                request-size-maxThe maximum size of any request sent in the window for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
                incoming-byte-rateThe average number of responses received per second for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
                request-latency-avgThe average request latency in ms for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
                request-latency-maxThe maximum request latency in ms for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
                response-rateResponses received sent per second for a node.kafka.producer:type=[consumer|producer|connect]-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)
                +

                New producer monitoring

                The following metrics are available on new producer instances. @@ -794,157 +937,231 @@

                New producer The age in seconds of the current producer metadata being used. kafka.producer:type=producer-metrics,client-id=([-.\w]+) + - connection-close-rate - Connections closed per second in the window. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) + record-send-rate + The average number of records sent per second for a topic. + kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+) - connection-creation-rate - New connections established per second in the window. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) + byte-rate + The average number of bytes sent per second for a topic. + kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+) - network-io-rate - The average number of network operations (reads or writes) on all connections per second. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) + compression-rate + The average compression rate of record batches for a topic. + kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+) - outgoing-byte-rate - The average number of outgoing bytes sent per second to all servers. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) + record-retry-rate + The average per-second number of retried record sends for a topic. + kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+) - request-rate - The average number of requests sent per second. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) + record-error-rate + The average per-second number of record sends that resulted in errors for a topic. + kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+) - request-size-avg - The average size of all requests in the window. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) + produce-throttle-time-max + The maximum time in ms a request was throttled by a broker. + kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+) - request-size-max - The maximum size of any request sent in the window. - kafka.producer:type=producer-metrics,client-id=([-.\w]+) + produce-throttle-time-avg + The average time in ms a request was throttled by a broker. + kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+) + + + +

                New consumer monitoring

                + +The following metrics are available on new consumer instances. + +

                Consumer Group Metrics
                + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + + +
                incoming-byte-rateBytes/second read off all sockets.kafka.producer:type=producer-metrics,client-id=([-.\w]+)Metric/Attribute nameDescriptionMbean name
                response-rateResponses received sent per second.kafka.producer:type=producer-metrics,client-id=([-.\w]+)commit-latency-avgThe average time taken for a commit requestkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                select-rateNumber of times the I/O layer checked for new I/O to perform per second.kafka.producer:type=producer-metrics,client-id=([-.\w]+)commit-latency-maxThe max time taken for a commit requestkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                io-wait-time-ns-avgThe average length of time the I/O thread spent waiting for a socket ready for reads or writes in nanoseconds.kafka.producer:type=producer-metrics,client-id=([-.\w]+)commit-rateThe number of commit calls per secondkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                io-wait-ratioThe fraction of time the I/O thread spent waiting.kafka.producer:type=producer-metrics,client-id=([-.\w]+)assigned-partitionsThe number of partitions currently assigned to this consumerkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                io-time-ns-avgThe average length of time for I/O per select call in nanoseconds.kafka.producer:type=producer-metrics,client-id=([-.\w]+)heartbeat-response-time-maxThe max time taken to receive a response to a heartbeat requestkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                io-ratioThe fraction of time the I/O thread spent doing I/O.kafka.producer:type=producer-metrics,client-id=([-.\w]+)heartbeat-rateThe average number of heartbeats per secondkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                connection-countThe current number of active connections.kafka.producer:type=producer-metrics,client-id=([-.\w]+)join-time-avgThe average time taken for a group rejoinkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                outgoing-byte-rateThe average number of outgoing bytes sent per second for a node.kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)join-time-maxThe max time taken for a group rejoinkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                request-rateThe average number of requests sent per second for a node.kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)join-rateThe number of group joins per secondkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                request-size-avgThe average size of all requests in the window for a node.kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)sync-time-avgThe average time taken for a group synckafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                request-size-maxThe maximum size of any request sent in the window for a node.kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)sync-time-maxThe max time taken for a group synckafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                incoming-byte-rateThe average number of responses received per second for a node.kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)sync-rateThe number of group syncs per secondkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                request-latency-avgThe average request latency in ms for a node.kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)last-heartbeat-seconds-agoThe number of seconds since the last controller heartbeatkafka.consumer:type=consumer-coordinator-metrics,client-id=([-.\w]+)
                + +
                Consumer Fetch Metrics
                + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + -
                request-latency-maxThe maximum request latency in ms for a node.kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)Metric/Attribute nameDescriptionMbean name
                response-rateResponses received sent per second for a node.kafka.producer:type=producer-node-metrics,client-id=([-.\w]+),node-id=([0-9]+)fetch-size-avgThe average number of bytes fetched per requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
                record-send-rateThe average number of records sent per second for a topic.kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+)fetch-size-maxThe maximum number of bytes fetched per requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
                byte-rateThe average number of bytes sent per second for a topic.kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+)bytes-consumed-rateThe average number of bytes consumed per secondkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
                compression-rateThe average compression rate of record batches for a topic.kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+)records-per-request-avgThe average number of records in each requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
                record-retry-rateThe average per-second number of retried record sends for a topic.kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+)records-consumed-rateThe average number of records consumed per secondkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
                record-error-rateThe average per-second number of record sends that resulted in errors for a topic.kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+),topic=([-.\w]+)fetch-latency-avgThe average time taken for a fetch requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
                produce-throttle-time-maxThe maximum time in ms a request was throttled by a broker.kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+)fetch-latency-maxThe max time taken for a fetch requestkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
                produce-throttle-time-avgThe average time in ms a request was throttled by a broker.kafka.producer:type=producer-topic-metrics,client-id=([-.\w]+)fetch-rateThe number of fetch requests per secondkafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+)
                + + records-lag-max + The maximum lag in terms of number of records for any partition in this window + kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+) + + + fetch-throttle-time-avg + The average throttle time in ms + kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+) + + + fetch-throttle-time-max + The maximum throttle time in ms + kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+) + + + + + +
                Topic-level Fetch Metrics
                + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                Metric/Attribute nameDescriptionMbean name
                fetch-size-avgThe average number of bytes fetched per request for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
                fetch-size-maxThe maximum number of bytes fetched per request for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
                bytes-consumed-rateThe average number of bytes consumed per second for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
                records-per-request-avgThe average number of records in each request for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
                records-consumed-rateThe average number of records consumed per second for a specific topic.kafka.consumer:type=consumer-fetch-manager-metrics,client-id=([-.\w]+),topic=([-.\w]+)
                + +
                Others
                We recommend monitoring GC time and other stats and various server stats such as CPU utilization, I/O service time, etc. From 5852bb0e4bf6089fb43f54d81aa4493f7cf58b85 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Mon, 8 Aug 2016 15:48:32 +0100 Subject: [PATCH 245/267] Bump version to 0.10.0.2-SNAPSHOT --- gradle.properties | 2 +- tests/kafkatest/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gradle.properties b/gradle.properties index 590ff5223145..667b3fe34152 100644 --- a/gradle.properties +++ b/gradle.properties @@ -16,7 +16,7 @@ group=org.apache.kafka # NOTE: When you change this version number, you should also make sure to update # the version numbers in tests/kafkatest/__init__.py and kafka-merge-pr.py. -version=0.10.0.1 +version=0.10.0.2-SNAPSHOT scalaVersion=2.10.6 task=build org.gradle.jvmargs=-XX:MaxPermSize=512m -Xmx1024m -Xss2m diff --git a/tests/kafkatest/__init__.py b/tests/kafkatest/__init__.py index 8eaefb50acfd..32fb68abdce0 100644 --- a/tests/kafkatest/__init__.py +++ b/tests/kafkatest/__init__.py @@ -23,4 +23,4 @@ # Instead, in trunk, the version should have a suffix of the form ".devN" # # For example, when Kafka is at version 0.9.0.0-SNAPSHOT, this should be something like "0.9.0.0.dev0" -__version__ = '0.10.0.1' +__version__ = '0.10.0.2.dev0' From aef20e8353df22894bb43474cb8e31d1afd5ce6b Mon Sep 17 00:00:00 2001 From: dan norwood Date: Tue, 9 Aug 2016 01:34:04 +0100 Subject: [PATCH 246/267] MINOR: Add `fetchTopicMetadataFromZk` overload that takes `SecurityProtocol` parameter ijuma Author: dan norwood Reviewers: Ismael Juma Closes #1713 from norwood/add-security-protocol-option-for-fetch (cherry picked from commit 7b7f57df6229c02482e5176f2f265e7a890de2a3) Signed-off-by: Ismael Juma --- core/src/main/scala/kafka/admin/AdminUtils.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/kafka/admin/AdminUtils.scala b/core/src/main/scala/kafka/admin/AdminUtils.scala index a8a282e2c13a..963d7691f742 100644 --- a/core/src/main/scala/kafka/admin/AdminUtils.scala +++ b/core/src/main/scala/kafka/admin/AdminUtils.scala @@ -554,9 +554,12 @@ object AdminUtils extends Logging { def fetchTopicMetadataFromZk(topic: String, zkUtils: ZkUtils): MetadataResponse.TopicMetadata = fetchTopicMetadataFromZk(topic, zkUtils, new mutable.HashMap[Int, Broker]) - def fetchTopicMetadataFromZk(topics: Set[String], zkUtils: ZkUtils): Set[MetadataResponse.TopicMetadata] = { + def fetchTopicMetadataFromZk(topics: Set[String], zkUtils: ZkUtils): Set[MetadataResponse.TopicMetadata] = + fetchTopicMetadataFromZk(topics, zkUtils, SecurityProtocol.PLAINTEXT) + + def fetchTopicMetadataFromZk(topics: Set[String], zkUtils: ZkUtils, protocol: SecurityProtocol): Set[MetadataResponse.TopicMetadata] = { val cachedBrokerInfo = new mutable.HashMap[Int, Broker]() - topics.map(topic => fetchTopicMetadataFromZk(topic, zkUtils, cachedBrokerInfo)) + topics.map(topic => fetchTopicMetadataFromZk(topic, zkUtils, cachedBrokerInfo, protocol)) } private def fetchTopicMetadataFromZk(topic: String, From 55af7ec6b5a795997a5afc2643b45bd2ac243344 Mon Sep 17 00:00:00 2001 From: Jason Gustafson Date: Fri, 12 Aug 2016 23:26:41 +0100 Subject: [PATCH 247/267] KAFKA-4034; Avoid unnecessary consumer coordinator lookup Author: Jason Gustafson Reviewers: Guozhang Wang , Ismael Juma Closes #1720 from hachikuji/KAFKA-4034 --- .../kafka/clients/consumer/KafkaConsumer.java | 22 +++++++--- .../RetriableCommitFailedException.java | 4 ++ .../internals/AbstractCoordinator.java | 25 ++++++++++- .../internals/ConsumerCoordinator.java | 38 +++++++++++++--- .../clients/consumer/internals/Fetcher.java | 13 +++++- .../consumer/internals/SubscriptionState.java | 2 +- .../clients/consumer/KafkaConsumerTest.java | 44 ++++++++++++++++++- .../kafka/api/AuthorizerIntegrationTest.scala | 18 +++++++- 8 files changed, 147 insertions(+), 19 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java index 9ee6c95f4abc..c97da99e4ece 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java @@ -970,9 +970,6 @@ public ConsumerRecords poll(long timeout) { * @return The fetched records (may be empty) */ private Map>> pollOnce(long timeout) { - // TODO: Sub-requests should take into account the poll timeout (KAFKA-1894) - coordinator.ensureCoordinatorReady(); - // ensure we have partitions assigned if we expect to if (subscriptions.partitionsAutoAssigned()) coordinator.ensurePartitionAssignment(); @@ -1402,11 +1399,22 @@ private void close(boolean swallowException) { * defined */ private void updateFetchPositions(Set partitions) { - // refresh commits for all assigned partitions - coordinator.refreshCommittedOffsetsIfNeeded(); + // lookup any positions for partitions which are awaiting reset (which may be the + // case if the user called seekToBeginning or seekToEnd. We do this check first to + // avoid an unnecessary lookup of committed offsets (which typically occurs when + // the user is manually assigning partitions and managing their own offsets). + fetcher.resetOffsetsIfNeeded(partitions); + + if (!subscriptions.hasAllFetchPositions()) { + // if we still don't have offsets for all partitions, then we should either seek + // to the last committed position or reset using the auto reset policy - // then do any offset lookups in case some positions are not known - fetcher.updateFetchPositions(partitions); + // first refresh commits for all assigned partitions + coordinator.refreshCommittedOffsetsIfNeeded(); + + // then do any offset lookups in case some positions are not known + fetcher.updateFetchPositions(partitions); + } } /* diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/RetriableCommitFailedException.java b/clients/src/main/java/org/apache/kafka/clients/consumer/RetriableCommitFailedException.java index 459a8acbb553..1c1a2f513cc5 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/RetriableCommitFailedException.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/RetriableCommitFailedException.java @@ -22,6 +22,10 @@ public class RetriableCommitFailedException extends RetriableException { private static final long serialVersionUID = 1L; + public RetriableCommitFailedException(Throwable t) { + super("Offset commit failed with a retriable exception. You should retry committing offsets.", t); + } + public RetriableCommitFailedException(String message) { super(message); } diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java index 6bb4406cdb36..e957856536e6 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java @@ -98,6 +98,8 @@ public abstract class AbstractCoordinator implements Closeable { protected String protocol; protected int generation; + private RequestFuture findCoordinatorFuture = null; + /** * Initialize the coordination manager. */ @@ -175,7 +177,7 @@ protected abstract void onJoinComplete(int generation, */ public void ensureCoordinatorReady() { while (coordinatorUnknown()) { - RequestFuture future = sendGroupCoordinatorRequest(); + RequestFuture future = lookupCoordinator(); client.poll(future); if (future.failed()) { @@ -189,8 +191,25 @@ public void ensureCoordinatorReady() { coordinatorDead(); time.sleep(retryBackoffMs); } + } + } + + protected RequestFuture lookupCoordinator() { + if (findCoordinatorFuture == null) { + findCoordinatorFuture = sendGroupCoordinatorRequest(); + findCoordinatorFuture.addListener(new RequestFutureListener() { + @Override + public void onSuccess(Void value) { + findCoordinatorFuture = null; + } + @Override + public void onFailure(RuntimeException e) { + findCoordinatorFuture = null; + } + }); } + return findCoordinatorFuture; } /** @@ -205,6 +224,10 @@ protected boolean needRejoin() { * Ensure that the group is active (i.e. joined and synced) */ public void ensureActiveGroup() { + // always ensure that the coordinator is ready because we may have been disconnected + // when sending heartbeats and does not necessarily require us to rejoin the group. + ensureCoordinatorReady(); + if (!needRejoin()) return; diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index 59f3250e0ccf..2a945e7bbdbd 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -379,7 +379,36 @@ public void close() { } - public void commitOffsetsAsync(final Map offsets, OffsetCommitCallback callback) { + public void commitOffsetsAsync(final Map offsets, final OffsetCommitCallback callback) { + if (!coordinatorUnknown()) { + doCommitOffsetsAsync(offsets, callback); + } else { + // we don't know the current coordinator, so try to find it and then send the commit + // or fail (we don't want recursive retries which can cause offset commits to arrive + // out of order). Note that there may be multiple offset commits chained to the same + // coordinator lookup request. This is fine because the listeners will be invoked in + // the same order that they were added. Note also that AbstractCoordinator prevents + // multiple concurrent coordinator lookup requests. + lookupCoordinator().addListener(new RequestFutureListener() { + @Override + public void onSuccess(Void value) { + doCommitOffsetsAsync(offsets, callback); + } + + @Override + public void onFailure(RuntimeException e) { + callback.onComplete(offsets, new RetriableCommitFailedException(e)); + } + }); + } + + // ensure the commit has a chance to be transmitted (without blocking on its completion). + // Note that commits are treated as heartbeats by the coordinator, so there is no need to + // explicitly allow heartbeats through delayed task execution. + client.pollNoWakeup(); + } + + private void doCommitOffsetsAsync(final Map offsets, final OffsetCommitCallback callback) { this.subscriptions.needRefreshCommits(); RequestFuture future = sendOffsetCommitRequest(offsets); final OffsetCommitCallback cb = callback == null ? defaultOffsetCommitCallback : callback; @@ -394,17 +423,12 @@ public void onSuccess(Void value) { @Override public void onFailure(RuntimeException e) { if (e instanceof RetriableException) { - cb.onComplete(offsets, new RetriableCommitFailedException("Commit offsets failed with retriable exception. You should retry committing offsets.", e)); + cb.onComplete(offsets, new RetriableCommitFailedException(e)); } else { cb.onComplete(offsets, e); } } }); - - // ensure the commit has a chance to be transmitted (without blocking on its completion). - // Note that commits are treated as heartbeats by the coordinator, so there is no need to - // explicitly allow heartbeats through delayed task execution. - client.pollNoWakeup(); } /** diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java index cf2ebc31f170..fec9b6eb0e5d 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java @@ -154,6 +154,18 @@ public void onFailure(RuntimeException e) { } } + /** + * Lookup and set offsets for any partitions which are awaiting an explicit reset. + * @param partitions the partitions to reset + */ + public void resetOffsetsIfNeeded(Set partitions) { + for (TopicPartition tp : partitions) { + // TODO: If there are several offsets to reset, we could submit offset requests in parallel + if (subscriptions.isAssigned(tp) && subscriptions.isOffsetResetNeeded(tp)) + resetOffset(tp); + } + } + /** * Update the fetch positions for the provided partitions. * @param partitions the partitions to update positions for @@ -165,7 +177,6 @@ public void updateFetchPositions(Set partitions) { if (!subscriptions.isAssigned(tp) || subscriptions.isFetchable(tp)) continue; - // TODO: If there are several offsets to reset, we could submit offset requests in parallel if (subscriptions.isOffsetResetNeeded(tp)) { resetOffset(tp); } else if (subscriptions.committed(tp) == null) { diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java index 38660e1b9d3b..e9b2eb24ba3e 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java @@ -49,7 +49,7 @@ public class SubscriptionState { private enum SubscriptionType { NONE, AUTO_TOPICS, AUTO_PATTERN, USER_ASSIGNED - }; + } /* the type of subscription */ private SubscriptionType subscriptionType; diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java index d846a69c933e..4d4211747db2 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java @@ -41,6 +41,7 @@ import org.apache.kafka.common.requests.GroupCoordinatorResponse; import org.apache.kafka.common.requests.HeartbeatResponse; import org.apache.kafka.common.requests.JoinGroupResponse; +import org.apache.kafka.common.requests.ListOffsetResponse; import org.apache.kafka.common.requests.OffsetCommitRequest; import org.apache.kafka.common.requests.OffsetCommitResponse; import org.apache.kafka.common.requests.OffsetFetchResponse; @@ -86,7 +87,7 @@ public void testConstructorClose() throws Exception { final int oldInitCount = MockMetricsReporter.INIT_COUNT.get(); final int oldCloseCount = MockMetricsReporter.CLOSE_COUNT.get(); try { - KafkaConsumer consumer = new KafkaConsumer( + KafkaConsumer consumer = new KafkaConsumer<>( props, new ByteArrayDeserializer(), new ByteArrayDeserializer()); } catch (KafkaException e) { assertEquals(oldInitCount + 1, MockMetricsReporter.INIT_COUNT.get()); @@ -324,6 +325,38 @@ public boolean matches(ClientRequest request) { assertTrue(heartbeatReceived.get()); } + @Test + public void verifyNoCoordinatorLookupForManualAssignmentWithSeek() { + String topic = "topic"; + final TopicPartition partition = new TopicPartition(topic, 0); + int sessionTimeoutMs = 3000; + int heartbeatIntervalMs = 2000; + int autoCommitIntervalMs = 1000; + + Time time = new MockTime(); + MockClient client = new MockClient(time); + Cluster cluster = TestUtils.singletonCluster(topic, 1); + Node node = cluster.nodes().get(0); + client.setNode(node); + Metadata metadata = new Metadata(0, Long.MAX_VALUE); + metadata.update(cluster, time.milliseconds()); + PartitionAssignor assignor = new RoundRobinAssignor(); + + final KafkaConsumer consumer = newConsumer(time, client, metadata, assignor, + sessionTimeoutMs, heartbeatIntervalMs, autoCommitIntervalMs); + consumer.assign(Arrays.asList(partition)); + consumer.seekToBeginning(Arrays.asList(partition)); + + // there shouldn't be any need to lookup the coordinator or fetch committed offsets. + // we just lookup the starting position and send the record fetch. + client.prepareResponse(listOffsetsResponse(Collections.singletonMap(partition, 50L), Errors.NONE.code())); + client.prepareResponse(fetchResponse(partition, 50L, 5)); + + ConsumerRecords records = consumer.poll(0); + assertEquals(5, records.count()); + assertEquals(55L, consumer.position(partition)); + } + @Test public void testCommitsFetchedDuringAssign() { String topic = "topic"; @@ -539,6 +572,15 @@ private Struct offsetResponse(Map offsets, short error) { return new OffsetFetchResponse(partitionData).toStruct(); } + private Struct listOffsetsResponse(Map offsets, short error) { + Map partitionData = new HashMap<>(); + for (Map.Entry partitionOffset : offsets.entrySet()) { + partitionData.put(partitionOffset.getKey(), new ListOffsetResponse.PartitionData(error, + Collections.singletonList(partitionOffset.getValue()))); + } + return new ListOffsetResponse(partitionData).toStruct(); + } + private Struct fetchResponse(TopicPartition tp, long fetchOffset, int count) { MemoryRecords records = MemoryRecords.emptyRecords(ByteBuffer.allocate(1024), CompressionType.NONE); for (int i = 0; i < count; i++) diff --git a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala index 59ead2fe2ec8..380fa394d9eb 100644 --- a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala +++ b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala @@ -329,13 +329,15 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness { } @Test - def testConsumeWithNoGroupAccess(): Unit = { + def testSimpleConsumeWithOffsetLookupAndNoGroupAccess(): Unit = { addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Write)), topicResource) sendRecords(1, tp) removeAllAcls() addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Read)), topicResource) try { + // note this still depends on group access because we haven't set offsets explicitly, which means + // they will first be fetched from the consumer coordinator (which requires group access) this.consumers.head.assign(List(tp).asJava) consumeRecords(this.consumers.head) Assert.fail("should have thrown exception") @@ -344,6 +346,20 @@ class AuthorizerIntegrationTest extends KafkaServerTestHarness { } } + @Test + def testSimpleConsumeWithExplicitSeekAndNoGroupAccess(): Unit = { + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Write)), topicResource) + sendRecords(1, tp) + removeAllAcls() + + addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Read)), topicResource) + + // in this case, we do an explicit seek, so there should be no need to query the coordinator at all + this.consumers.head.assign(List(tp).asJava) + this.consumers.head.seekToBeginning(List(tp).asJava) + consumeRecords(this.consumers.head) + } + @Test def testConsumeWithNoTopicAccess() { addAndVerifyAcls(Set(new Acl(KafkaPrincipal.ANONYMOUS, Allow, Acl.WildCardHost, Write)), topicResource) From 801a706124af16f605abc6141f38f9eed916ffc2 Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Mon, 15 Aug 2016 23:04:40 -0700 Subject: [PATCH 248/267] MINOR: Add application id prefix for copartitionGroups in TopologyBuilder This is bugfix that is already in trunk but not backported to 0.10.0. Author: Guozhang Wang Reviewers: Damian Guy , Ewen Cheslack-Postava Closes #1735 from guozhangwang/Kminor-topology-applicationID-0.10.0 --- .../apache/kafka/streams/KafkaStreams.java | 2 + .../streams/processor/TopologyBuilder.java | 79 ++++++++++++++----- .../internals/StreamPartitionAssignor.java | 2 +- .../processor/internals/StreamThread.java | 6 +- .../kstream/internals/KStreamImplTest.java | 2 +- .../processor/TopologyBuilderTest.java | 25 +++--- .../internals/ProcessorTopologyTest.java | 2 +- .../StreamPartitionAssignorTest.java | 12 +-- .../processor/internals/StreamThreadTest.java | 6 +- .../apache/kafka/test/KStreamTestDriver.java | 2 +- .../test/ProcessorTopologyTestDriver.java | 2 +- 11 files changed, 91 insertions(+), 49 deletions(-) diff --git a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java index 17c760e33c7e..3a311a8f2810 100644 --- a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java +++ b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java @@ -141,6 +141,8 @@ public KafkaStreams(final TopologyBuilder builder, final StreamsConfig config, f // The application ID is a required config and hence should always have value final String applicationId = config.getString(StreamsConfig.APPLICATION_ID_CONFIG); + builder.setApplicationId(applicationId); + String clientId = config.getString(StreamsConfig.CLIENT_ID_CONFIG); if (clientId.length() <= 0) clientId = applicationId + "-" + STREAM_CLIENT_ID_SEQUENCE.getAndIncrement(); diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java b/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java index 7161a80c7450..6b57b1711d75 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/TopologyBuilder.java @@ -38,6 +38,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; /** @@ -64,6 +65,7 @@ public class TopologyBuilder { private final HashMap nodeToSourceTopics = new HashMap<>(); private final HashMap nodeToSinkTopic = new HashMap<>(); private Map> nodeGroups = null; + private String applicationId = null; private static class StateStoreFactory { public final Set users; @@ -85,7 +87,7 @@ private static abstract class NodeFactory { this.name = name; } - public abstract ProcessorNode build(String applicationId); + public abstract ProcessorNode build(); } private static class ProcessorNodeFactory extends NodeFactory { @@ -105,7 +107,7 @@ public void addStateStore(String stateStoreName) { @SuppressWarnings("unchecked") @Override - public ProcessorNode build(String applicationId) { + public ProcessorNode build() { return new ProcessorNode(name, supplier.get(), stateStoreNames); } } @@ -124,7 +126,7 @@ private SourceNodeFactory(String name, String[] topics, Deserializer keyDeserial @SuppressWarnings("unchecked") @Override - public ProcessorNode build(String applicationId) { + public ProcessorNode build() { return new SourceNode(name, keyDeserializer, valDeserializer); } } @@ -147,10 +149,10 @@ private SinkNodeFactory(String name, String[] parents, String topic, Serializer @SuppressWarnings("unchecked") @Override - public ProcessorNode build(String applicationId) { + public ProcessorNode build() { if (internalTopicNames.contains(topic)) { // prefix the internal topic name with the application id - return new SinkNode(name, applicationId + "-" + topic, keySerializer, valSerializer, partitioner); + return new SinkNode(name, decorateTopic(topic), keySerializer, valSerializer, partitioner); } else { return new SinkNode(name, topic, keySerializer, valSerializer, partitioner); } @@ -192,6 +194,22 @@ public int hashCode() { */ public TopologyBuilder() {} + /** + * Set the applicationId to be used for auto-generated internal topics. + * + * This is required before calling {@link #sourceTopics}, {@link #topicGroups}, + * {@link #copartitionSources} and {@link #build(Integer)}. + * + * @param applicationId the streams applicationId. Should be the same as set by + * {@link org.apache.kafka.streams.StreamsConfig#APPLICATION_ID_CONFIG} + */ + public synchronized final TopologyBuilder setApplicationId(String applicationId) { + Objects.requireNonNull(applicationId, "applicationId can't be null"); + this.applicationId = applicationId; + + return this; + } + /** * Add a new source that consumes the named topics and forwards the records to child processor and/or sink nodes. * The source will use the {@link org.apache.kafka.streams.StreamsConfig#KEY_SERDE_CLASS_CONFIG default key deserializer} and @@ -501,7 +519,7 @@ private void connectProcessorAndStateStore(String processorName, String stateSto * * @return groups of topic names */ - public synchronized Map topicGroups(String applicationId) { + public synchronized Map topicGroups() { Map topicGroups = new HashMap<>(); if (nodeGroups == null) @@ -520,7 +538,7 @@ public synchronized Map topicGroups(String applicationId) { for (String topic : topics) { if (this.internalTopicNames.contains(topic)) { // prefix the internal topic name with the application id - String internalTopic = applicationId + "-" + topic; + String internalTopic = decorateTopic(topic); internalSourceTopics.add(internalTopic); sourceTopics.add(internalTopic); } else { @@ -534,7 +552,7 @@ public synchronized Map topicGroups(String applicationId) { if (topic != null) { if (internalTopicNames.contains(topic)) { // prefix the change log topic name with the application id - sinkTopics.add(applicationId + "-" + topic); + sinkTopics.add(decorateTopic(topic)); } else { sinkTopics.add(topic); } @@ -544,7 +562,7 @@ public synchronized Map topicGroups(String applicationId) { for (StateStoreFactory stateFactory : stateFactories.values()) { if (stateFactory.isInternal && stateFactory.users.contains(node)) { // prefix the change log topic name with the application id - stateChangelogTopics.add(applicationId + "-" + stateFactory.supplier.name() + ProcessorStateManager.STATE_CHANGELOG_TOPIC_SUFFIX); + stateChangelogTopics.add(ProcessorStateManager.storeChangelogTopic(applicationId, stateFactory.supplier.name())); } } } @@ -629,7 +647,7 @@ public synchronized Collection> copartitionGroups() { for (String node : nodeNames) { String[] topics = nodeToSourceTopics.get(node); if (topics != null) - copartitionGroup.addAll(Arrays.asList(topics)); + copartitionGroup.addAll(maybeDecorateInternalSourceTopics(topics)); } list.add(Collections.unmodifiableSet(copartitionGroup)); } @@ -642,7 +660,7 @@ public synchronized Collection> copartitionGroups() { * * @see org.apache.kafka.streams.KafkaStreams#KafkaStreams(TopologyBuilder, org.apache.kafka.streams.StreamsConfig) */ - public synchronized ProcessorTopology build(String applicationId, Integer topicGroupId) { + public synchronized ProcessorTopology build(Integer topicGroupId) { Set nodeGroup; if (topicGroupId != null) { nodeGroup = nodeGroups().get(topicGroupId); @@ -650,11 +668,11 @@ public synchronized ProcessorTopology build(String applicationId, Integer topicG // when nodeGroup is null, we build the full topology. this is used in some tests. nodeGroup = null; } - return build(applicationId, nodeGroup); + return build(nodeGroup); } @SuppressWarnings("unchecked") - private ProcessorTopology build(String applicationId, Set nodeGroup) { + private ProcessorTopology build(Set nodeGroup) { List processorNodes = new ArrayList<>(nodeFactories.size()); Map processorMap = new HashMap<>(); Map topicSourceMap = new HashMap<>(); @@ -663,7 +681,7 @@ private ProcessorTopology build(String applicationId, Set nodeGroup) { // create processor nodes in a topological order ("nodeFactories" is already topologically sorted) for (NodeFactory factory : nodeFactories.values()) { if (nodeGroup == null || nodeGroup.contains(factory.name)) { - ProcessorNode node = factory.build(applicationId); + ProcessorNode node = factory.build(); processorNodes.add(node); processorMap.put(node.name(), node); @@ -680,7 +698,7 @@ private ProcessorTopology build(String applicationId, Set nodeGroup) { for (String topic : ((SourceNodeFactory) factory).topics) { if (internalTopicNames.contains(topic)) { // prefix the internal topic name with the application id - topicSourceMap.put(applicationId + "-" + topic, (SourceNode) node); + topicSourceMap.put(decorateTopic(topic), (SourceNode) node); } else { topicSourceMap.put(topic, (SourceNode) node); } @@ -702,15 +720,34 @@ private ProcessorTopology build(String applicationId, Set nodeGroup) { * Get the names of topics that are to be consumed by the source nodes created by this builder. * @return the unmodifiable set of topic names used by source nodes, which changes as new sources are added; never null */ - public synchronized Set sourceTopics(String applicationId) { - Set topics = new HashSet<>(); - for (String topic : sourceTopicNames) { + public synchronized Set sourceTopics() { + Set topics = maybeDecorateInternalSourceTopics(sourceTopicNames); + return Collections.unmodifiableSet(topics); + } + + private Set maybeDecorateInternalSourceTopics(final Set sourceTopics) { + return maybeDecorateInternalSourceTopics(sourceTopics.toArray(new String[sourceTopics.size()])); + } + + private Set maybeDecorateInternalSourceTopics(String ... sourceTopics) { + final Set decoratedTopics = new HashSet<>(); + for (String topic : sourceTopics) { if (internalTopicNames.contains(topic)) { - topics.add(applicationId + "-" + topic); + decoratedTopics.add(decorateTopic(topic)); } else { - topics.add(topic); + decoratedTopics.add(topic); } } - return Collections.unmodifiableSet(topics); + return decoratedTopics; + } + + private String decorateTopic(String topic) { + if (applicationId == null) { + throw new TopologyBuilderException("there are internal topics and " + + "applicationId hasn't been set. Call " + + "setApplicationId first"); + } + + return applicationId + "-" + topic; } } diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java index 085ff94aa0dd..2ddfe43b2b9d 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignor.java @@ -118,7 +118,7 @@ public void configure(Map configs) { streamThread = (StreamThread) o; streamThread.partitionAssignor(this); - this.topicGroups = streamThread.builder.topicGroups(streamThread.applicationId); + this.topicGroups = streamThread.builder.topicGroups(); if (configs.containsKey(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG)) { internalTopicManager = new InternalTopicManager( diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java index 72eeef54266d..bf88d1bbf715 100644 --- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java +++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java @@ -159,7 +159,7 @@ public StreamThread(TopologyBuilder builder, this.applicationId = applicationId; this.config = config; this.builder = builder; - this.sourceTopics = builder.sourceTopics(applicationId); + this.sourceTopics = builder.sourceTopics(); this.clientId = clientId; this.processId = processId; this.partitionGrouper = config.getConfiguredInstance(StreamsConfig.PARTITION_GROUPER_CLASS_CONFIG, PartitionGrouper.class); @@ -545,7 +545,7 @@ public Set cachedTasks() { protected StreamTask createStreamTask(TaskId id, Collection partitions) { sensors.taskCreationSensor.record(); - ProcessorTopology topology = builder.build(applicationId, id.topicGroupId); + ProcessorTopology topology = builder.build(id.topicGroupId); return new StreamTask(id, applicationId, partitions, topology, consumer, producer, restoreConsumer, config, sensors); } @@ -615,7 +615,7 @@ private void closeOne(AbstractTask task) { protected StandbyTask createStandbyTask(TaskId id, Collection partitions) { sensors.taskCreationSensor.record(); - ProcessorTopology topology = builder.build(applicationId, id.topicGroupId); + ProcessorTopology topology = builder.build(id.topicGroupId); if (!topology.stateStoreSuppliers().isEmpty()) { return new StandbyTask(id, applicationId, partitions, topology, consumer, restoreConsumer, config, sensors); diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java index a40c8fb0e794..ff16a799fcb3 100644 --- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java @@ -132,7 +132,7 @@ public Integer apply(Integer value1, Integer value2) { 1 + // to 2 + // through 1, // process - builder.build("X", null).processors().size()); + builder.build(null).processors().size()); } @Test diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/TopologyBuilderTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/TopologyBuilderTest.java index 9af313a958fa..a67b4a999e34 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/TopologyBuilderTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/TopologyBuilderTest.java @@ -144,12 +144,12 @@ public void testSourceTopics() { builder.addSource("source-3", "topic-3"); builder.addInternalTopic("topic-3"); - Set expected = new HashSet(); + Set expected = new HashSet<>(); expected.add("topic-1"); expected.add("topic-2"); expected.add("X-topic-3"); - assertEquals(expected, builder.sourceTopics("X")); + assertEquals(expected, builder.setApplicationId("X").sourceTopics()); } @Test(expected = TopologyBuilderException.class) @@ -190,21 +190,22 @@ public void testAddStateStore() { StateStoreSupplier supplier = new MockStateStoreSupplier("store-1", false); builder.addStateStore(supplier); - suppliers = builder.build("X", null).stateStoreSuppliers(); + suppliers = builder.build(null).stateStoreSuppliers(); assertEquals(0, suppliers.size()); builder.addSource("source-1", "topic-1"); builder.addProcessor("processor-1", new MockProcessorSupplier(), "source-1"); builder.connectProcessorAndStateStores("processor-1", "store-1"); - suppliers = builder.build("X", null).stateStoreSuppliers(); + suppliers = builder.build(null).stateStoreSuppliers(); assertEquals(1, suppliers.size()); assertEquals(supplier.name(), suppliers.get(0).name()); } @Test public void testTopicGroups() { - final TopologyBuilder builder = new TopologyBuilder(); + final TopologyBuilder builder = new TopologyBuilder().setApplicationId("X"); + builder.addInternalTopic("topic-1x"); builder.addSource("source-1", "topic-1", "topic-1x"); builder.addSource("source-2", "topic-2"); builder.addSource("source-3", "topic-3"); @@ -218,10 +219,10 @@ public void testTopicGroups() { builder.addProcessor("processor-3", new MockProcessorSupplier(), "source-3", "source-4"); - Map topicGroups = builder.topicGroups("X"); + Map topicGroups = builder.topicGroups(); Map expectedTopicGroups = new HashMap<>(); - expectedTopicGroups.put(0, new TopicsInfo(Collections.emptySet(), mkSet("topic-1", "topic-1x", "topic-2"), Collections.emptySet(), Collections.emptySet())); + expectedTopicGroups.put(0, new TopicsInfo(Collections.emptySet(), mkSet("topic-1", "X-topic-1x", "topic-2"), Collections.emptySet(), Collections.emptySet())); expectedTopicGroups.put(1, new TopicsInfo(Collections.emptySet(), mkSet("topic-3", "topic-4"), Collections.emptySet(), Collections.emptySet())); expectedTopicGroups.put(2, new TopicsInfo(Collections.emptySet(), mkSet("topic-5"), Collections.emptySet(), Collections.emptySet())); @@ -230,7 +231,7 @@ public void testTopicGroups() { Collection> copartitionGroups = builder.copartitionGroups(); - assertEquals(mkSet(mkSet("topic-1", "topic-1x", "topic-2")), new HashSet<>(copartitionGroups)); + assertEquals(mkSet(mkSet("topic-1", "X-topic-1x", "topic-2")), new HashSet<>(copartitionGroups)); } @Test @@ -256,7 +257,7 @@ public void testTopicGroupsByStateStore() { builder.addStateStore(supplier); builder.connectProcessorAndStateStores("processor-5", "store-3"); - Map topicGroups = builder.topicGroups("X"); + Map topicGroups = builder.setApplicationId("X").topicGroups(); Map expectedTopicGroups = new HashMap<>(); expectedTopicGroups.put(0, new TopicsInfo(Collections.emptySet(), mkSet("topic-1", "topic-1x", "topic-2"), Collections.emptySet(), mkSet(ProcessorStateManager.storeChangelogTopic("X", "store-1")))); @@ -281,9 +282,9 @@ public void testBuild() { builder.addProcessor("processor-2", new MockProcessorSupplier(), "source-2", "processor-1"); builder.addProcessor("processor-3", new MockProcessorSupplier(), "source-3", "source-4"); - ProcessorTopology topology0 = builder.build("X", 0); - ProcessorTopology topology1 = builder.build("X", 1); - ProcessorTopology topology2 = builder.build("X", 2); + ProcessorTopology topology0 = builder.build(0); + ProcessorTopology topology1 = builder.build(1); + ProcessorTopology topology2 = builder.build(2); assertEquals(mkSet("source-1", "source-2", "processor-1", "processor-2"), nodeNames(topology0.processors())); assertEquals(mkSet("source-3", "source-4", "processor-3"), nodeNames(topology1.processors())); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorTopologyTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorTopologyTest.java index 62b283aefd94..382e853907df 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorTopologyTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorTopologyTest.java @@ -96,7 +96,7 @@ public void testTopologyMetadata() { builder.addSink("sink-1", "topic-3", "processor-1"); builder.addSink("sink-2", "topic-4", "processor-1", "processor-2"); - final ProcessorTopology topology = builder.build("X", null); + final ProcessorTopology topology = builder.build(null); assertEquals(6, topology.processors().size()); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignorTest.java index 17bda54bea5f..f743631996c4 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignorTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamPartitionAssignorTest.java @@ -269,8 +269,9 @@ public void testAssignWithNewTasks() throws Exception { @Test public void testAssignWithStates() throws Exception { StreamsConfig config = new StreamsConfig(configProps()); - + String applicationId = "test"; TopologyBuilder builder = new TopologyBuilder(); + builder.setApplicationId(applicationId); builder.addSource("source1", "topic1"); builder.addSource("source2", "topic2"); @@ -295,10 +296,10 @@ public void testAssignWithStates() throws Exception { UUID uuid2 = UUID.randomUUID(); String client1 = "client1"; - StreamThread thread10 = new StreamThread(builder, config, new MockClientSupplier(), "test", client1, uuid1, new Metrics(), new SystemTime()); + StreamThread thread10 = new StreamThread(builder, config, new MockClientSupplier(), applicationId, client1, uuid1, new Metrics(), new SystemTime()); StreamPartitionAssignor partitionAssignor = new StreamPartitionAssignor(); - partitionAssignor.configure(config.getConsumerConfigs(thread10, "test", client1)); + partitionAssignor.configure(config.getConsumerConfigs(thread10, applicationId, client1)); Map subscriptions = new HashMap<>(); subscriptions.put("consumer10", @@ -474,6 +475,7 @@ public void testOnAssignment() throws Exception { @Test public void testAssignWithInternalTopics() throws Exception { StreamsConfig config = new StreamsConfig(configProps()); + String applicationId = "test"; TopologyBuilder builder = new TopologyBuilder(); builder.addInternalTopic("topicX"); @@ -489,10 +491,10 @@ public void testAssignWithInternalTopics() throws Exception { String client1 = "client1"; MockClientSupplier clientSupplier = new MockClientSupplier(); - StreamThread thread10 = new StreamThread(builder, config, clientSupplier, "test", client1, uuid1, new Metrics(), new SystemTime()); + StreamThread thread10 = new StreamThread(builder.setApplicationId(applicationId), config, clientSupplier, applicationId, client1, uuid1, new Metrics(), new SystemTime()); StreamPartitionAssignor partitionAssignor = new StreamPartitionAssignor(); - partitionAssignor.configure(config.getConsumerConfigs(thread10, "test", client1)); + partitionAssignor.configure(config.getConsumerConfigs(thread10, applicationId, client1)); MockInternalTopicManager internalTopicManager = new MockInternalTopicManager(clientSupplier.restoreConsumer); partitionAssignor.setInternalTopicManager(internalTopicManager); diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java index 4ae31e4ac655..b6a6bffb651c 100644 --- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java @@ -160,7 +160,7 @@ public void testPartitionAssignmentChange() throws Exception { StreamThread thread = new StreamThread(builder, config, new MockClientSupplier(), applicationId, clientId, processId, new Metrics(), new SystemTime()) { @Override protected StreamTask createStreamTask(TaskId id, Collection partitionsForTask) { - ProcessorTopology topology = builder.build("X", id.topicGroupId); + ProcessorTopology topology = builder.build(id.topicGroupId); return new TestStreamTask(id, applicationId, partitionsForTask, topology, consumer, producer, restoreConsumer, config); } }; @@ -284,7 +284,7 @@ public void maybeClean() { @Override protected StreamTask createStreamTask(TaskId id, Collection partitionsForTask) { - ProcessorTopology topology = builder.build("X", id.topicGroupId); + ProcessorTopology topology = builder.build(id.topicGroupId); return new TestStreamTask(id, applicationId, partitionsForTask, topology, consumer, producer, restoreConsumer, config); } }; @@ -403,7 +403,7 @@ public void maybeCommit() { @Override protected StreamTask createStreamTask(TaskId id, Collection partitionsForTask) { - ProcessorTopology topology = builder.build("X", id.topicGroupId); + ProcessorTopology topology = builder.build(id.topicGroupId); return new TestStreamTask(id, applicationId, partitionsForTask, topology, consumer, producer, restoreConsumer, config); } }; diff --git a/streams/src/test/java/org/apache/kafka/test/KStreamTestDriver.java b/streams/src/test/java/org/apache/kafka/test/KStreamTestDriver.java index 73168048f5e1..dfa7f5d04f86 100644 --- a/streams/src/test/java/org/apache/kafka/test/KStreamTestDriver.java +++ b/streams/src/test/java/org/apache/kafka/test/KStreamTestDriver.java @@ -57,7 +57,7 @@ public KStreamTestDriver(KStreamBuilder builder, File stateDir, Serde keySerde, Serde valSerde) { - this.topology = builder.build("X", null); + this.topology = builder.setApplicationId("KStreamTestDriver").build(null); this.stateDir = stateDir; this.context = new MockProcessorContext(this, stateDir, keySerde, valSerde, new MockRecordCollector()); this.context.setTime(0L); diff --git a/streams/src/test/java/org/apache/kafka/test/ProcessorTopologyTestDriver.java b/streams/src/test/java/org/apache/kafka/test/ProcessorTopologyTestDriver.java index 4ddbc2a6bdcd..5188f34699b9 100644 --- a/streams/src/test/java/org/apache/kafka/test/ProcessorTopologyTestDriver.java +++ b/streams/src/test/java/org/apache/kafka/test/ProcessorTopologyTestDriver.java @@ -147,7 +147,7 @@ public class ProcessorTopologyTestDriver { */ public ProcessorTopologyTestDriver(StreamsConfig config, TopologyBuilder builder, String... storeNames) { id = new TaskId(0, 0); - topology = builder.build("X", null); + topology = builder.build(null); // Set up the consumer and producer ... consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); From a707f573de55a97782397f6cb5be90876e20cc3e Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Thu, 18 Aug 2016 15:29:56 -0700 Subject: [PATCH 249/267] KAFKA-4037: Make Connect REST API retries aware of 409 CONFLICT errors Author: Ewen Cheslack-Postava Reviewers: Jason Gustafson , Ismael Juma Closes #1733 from ewencp/rest-api-retries (cherry picked from commit 59cfa84801c67de9729385a8f9b536721e0c37b9) Signed-off-by: Ewen Cheslack-Postava --- tests/kafkatest/services/connect.py | 62 ++++++++++++------- .../tests/connect/connect_rest_test.py | 12 ++-- tests/kafkatest/utils/util.py | 10 --- 3 files changed, 46 insertions(+), 38 deletions(-) diff --git a/tests/kafkatest/services/connect.py b/tests/kafkatest/services/connect.py index 7f36854f9a7c..ebc19b00f443 100644 --- a/tests/kafkatest/services/connect.py +++ b/tests/kafkatest/services/connect.py @@ -17,12 +17,12 @@ import os.path import random import signal +import time import requests from ducktape.errors import DucktapeError from ducktape.services.service import Service from ducktape.utils.util import wait_until -from kafkatest.utils.util import retry_on_exception from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin @@ -107,45 +107,49 @@ def clean_node(self, node): def config_filenames(self): return [os.path.join(self.PERSISTENT_ROOT, "connect-connector-" + str(idx) + ".properties") for idx, template in enumerate(self.connector_config_templates or [])] - def list_connectors(self, node=None, retries=0, retry_backoff=.01): - return self._rest_with_retry('/connectors', node=node, retries=retries, retry_backoff=retry_backoff) + def list_connectors(self, node=None, **kwargs): + return self._rest_with_retry('/connectors', node=node, **kwargs) - def create_connector(self, config, node=None, retries=0, retry_backoff=.01): + def create_connector(self, config, node=None, **kwargs): create_request = { 'name': config['name'], 'config': config } - return self._rest_with_retry('/connectors', create_request, node=node, method="POST", retries=retries, retry_backoff=retry_backoff) + return self._rest_with_retry('/connectors', create_request, node=node, method="POST", **kwargs) - def get_connector(self, name, node=None, retries=0, retry_backoff=.01): - return self._rest_with_retry('/connectors/' + name, node=node, retries=retries, retry_backoff=retry_backoff) + def get_connector(self, name, node=None, **kwargs): + return self._rest_with_retry('/connectors/' + name, node=node, **kwargs) - def get_connector_config(self, name, node=None, retries=0, retry_backoff=.01): - return self._rest_with_retry('/connectors/' + name + '/config', node=node, retries=retries, retry_backoff=retry_backoff) + def get_connector_config(self, name, node=None, **kwargs): + return self._rest_with_retry('/connectors/' + name + '/config', node=node, **kwargs) - def set_connector_config(self, name, config, node=None, retries=0, retry_backoff=.01): - return self._rest_with_retry('/connectors/' + name + '/config', config, node=node, method="PUT", retries=retries, retry_backoff=retry_backoff) + def set_connector_config(self, name, config, node=None, **kwargs): + # Unlike many other calls, a 409 when setting a connector config is expected if the connector already exists. + # However, we also might see 409s for other reasons (e.g. rebalancing). So we still perform retries at the cost + # of tests possibly taking longer to ultimately fail. Tests that care about this can explicitly override the + # number of retries. + return self._rest_with_retry('/connectors/' + name + '/config', config, node=node, method="PUT", **kwargs) - def get_connector_tasks(self, name, node=None, retries=0, retry_backoff=.01): - return self._rest_with_retry('/connectors/' + name + '/tasks', node=node, retries=retries, retry_backoff=retry_backoff) + def get_connector_tasks(self, name, node=None, **kwargs): + return self._rest_with_retry('/connectors/' + name + '/tasks', node=node, **kwargs) - def delete_connector(self, name, node=None, retries=0, retry_backoff=.01): - return self._rest_with_retry('/connectors/' + name, node=node, method="DELETE", retries=retries, retry_backoff=retry_backoff) + def delete_connector(self, name, node=None, **kwargs): + return self._rest_with_retry('/connectors/' + name, node=node, method="DELETE", **kwargs) def get_connector_status(self, name, node=None): return self._rest('/connectors/' + name + '/status', node=node) - def restart_connector(self, name, node=None): - return self._rest('/connectors/' + name + '/restart', method="POST") + def restart_connector(self, name, node=None, **kwargs): + return self._rest_with_retry('/connectors/' + name + '/restart', node=node, method="POST", **kwargs) def restart_task(self, connector_name, task_id, node=None): - return self._rest('/connectors/' + connector_name + '/tasks/' + str(task_id) + '/restart', method="POST") + return self._rest('/connectors/' + connector_name + '/tasks/' + str(task_id) + '/restart', node=node, method="POST") def pause_connector(self, name, node=None): - return self._rest('/connectors/' + name + '/pause', method="PUT") + return self._rest('/connectors/' + name + '/pause', node=node, method="PUT") def resume_connector(self, name, node=None): - return self._rest('/connectors/' + name + '/resume', method="PUT") + return self._rest('/connectors/' + name + '/resume', node=node, method="PUT") def list_connector_plugins(self, node=None): return self._rest('/connector-plugins/', node=node) @@ -163,14 +167,28 @@ def _rest(self, path, body=None, node=None, method="GET"): resp = meth(url, json=body) self.logger.debug("%s %s response: %d", url, method, resp.status_code) if resp.status_code > 400: + self.logger.debug("Connect REST API error for %s: %d %s", resp.url, resp.status_code, resp.text) raise ConnectRestError(resp.status_code, resp.text, resp.url) if resp.status_code == 204 or resp.status_code == 202: return None else: return resp.json() - def _rest_with_retry(self, path, body=None, node=None, method="GET", retries=0, retry_backoff=.01): - return retry_on_exception(lambda: self._rest(path, body, node, method), ConnectRestError, retries, retry_backoff) + def _rest_with_retry(self, path, body=None, node=None, method="GET", retries=40, retry_backoff=.25): + """ + Invokes a REST API with retries for errors that may occur during normal operation (notably 409 CONFLICT + responses that can occur due to rebalancing). + """ + exception_to_throw = None + for i in range(0, retries + 1): + try: + return self._rest(path, body, node, method) + except ConnectRestError as e: + exception_to_throw = e + if e.status != 409: + break + time.sleep(retry_backoff) + raise exception_to_throw def _base_url(self, node): return 'http://' + node.account.externally_routable_ip + ':' + '8083' diff --git a/tests/kafkatest/tests/connect/connect_rest_test.py b/tests/kafkatest/tests/connect/connect_rest_test.py index c32b8e179c91..0b004996cafd 100644 --- a/tests/kafkatest/tests/connect/connect_rest_test.py +++ b/tests/kafkatest/tests/connect/connect_rest_test.py @@ -84,11 +84,11 @@ def test_rest_api(self): self.verify_config(self.FILE_SINK_CONNECTOR, self.FILE_SINK_CONFIGS, configs) self.logger.info("Creating connectors") - self.cc.create_connector(source_connector_config, retries=120, retry_backoff=1) - self.cc.create_connector(sink_connector_config, retries=120, retry_backoff=1) + self.cc.create_connector(source_connector_config) + self.cc.create_connector(sink_connector_config) # We should see the connectors appear - wait_until(lambda: set(self.cc.list_connectors(retries=5, retry_backoff=1)) == set(["local-file-source", "local-file-sink"]), + wait_until(lambda: set(self.cc.list_connectors()) == set(["local-file-source", "local-file-sink"]), timeout_sec=10, err_msg="Connectors that were just created did not appear in connector listing") # We'll only do very simple validation that the connectors and tasks really ran. @@ -157,9 +157,9 @@ def test_rest_api(self): node.account.ssh("echo -e -n " + repr(self.LONER_INPUTS) + " >> " + self.INPUT_FILE2) wait_until(lambda: self.validate_output(self.LONGER_INPUT_LIST), timeout_sec=120, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.") - self.cc.delete_connector("local-file-source", retries=120, retry_backoff=1) - self.cc.delete_connector("local-file-sink", retries=120, retry_backoff=1) - wait_until(lambda: len(self.cc.list_connectors(retries=5, retry_backoff=1)) == 0, timeout_sec=10, err_msg="Deleted connectors did not disappear from REST listing") + self.cc.delete_connector("local-file-source") + self.cc.delete_connector("local-file-sink") + wait_until(lambda: len(self.cc.list_connectors()) == 0, timeout_sec=10, err_msg="Deleted connectors did not disappear from REST listing") def validate_output(self, input): input_set = set(input) diff --git a/tests/kafkatest/utils/util.py b/tests/kafkatest/utils/util.py index c043bec743fb..f004ece25d7f 100644 --- a/tests/kafkatest/utils/util.py +++ b/tests/kafkatest/utils/util.py @@ -73,13 +73,3 @@ def is_int_with_prefix(msg): "prefix dot integer value, but one of the two parts (before or after dot) " "are not integers. Message: %s" % (msg)) - -def retry_on_exception(fun, exception, retries, retry_backoff=.01): - exception_to_throw = None - for i in range(0, retries + 1): - try: - return fun() - except exception as e: - exception_to_throw = e - time.sleep(retry_backoff) - raise exception_to_throw From afb65688af643f72ccf1ab7414bdfc0adbb880e9 Mon Sep 17 00:00:00 2001 From: Todd Palino Date: Fri, 19 Aug 2016 11:05:39 -0700 Subject: [PATCH 250/267] KAFKA-4050; Allow configuration of the PRNG used for SSL Add an optional configuration for the SecureRandom PRNG implementation, with the default behavior being the same (use the default implementation in the JDK/JRE). Author: Todd Palino Reviewers: Grant Henke , Ismael Juma , Joel Koshy , Jiangjie Qin , Rajini Sivaram Closes #1747 from toddpalino/trunk (cherry picked from commit 104d2154b635c50efc80331ee2a4779cc3658414) Signed-off-by: Joel Koshy --- .../apache/kafka/common/config/SslConfigs.java | 6 +++++- .../kafka/common/security/ssl/SslFactory.java | 13 ++++++++++++- .../common/network/SslTransportLayerTest.java | 17 ++++++++++++++++- .../main/scala/kafka/server/KafkaConfig.scala | 3 +++ .../unit/kafka/server/KafkaConfigTest.scala | 1 + docs/security.html | 9 +++++++++ .../kafkatest/services/kafka/config_property.py | 1 + 7 files changed, 47 insertions(+), 3 deletions(-) diff --git a/clients/src/main/java/org/apache/kafka/common/config/SslConfigs.java b/clients/src/main/java/org/apache/kafka/common/config/SslConfigs.java index 1ccd039fb12b..ba1ff6b23126 100644 --- a/clients/src/main/java/org/apache/kafka/common/config/SslConfigs.java +++ b/clients/src/main/java/org/apache/kafka/common/config/SslConfigs.java @@ -85,6 +85,9 @@ public class SslConfigs { public static final String SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_CONFIG = "ssl.endpoint.identification.algorithm"; public static final String SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_DOC = "The endpoint identification algorithm to validate server hostname using server certificate. "; + public static final String SSL_SECURE_RANDOM_IMPLEMENTATION_CONFIG = "ssl.secure.random.implementation"; + public static final String SSL_SECURE_RANDOM_IMPLEMENTATION_DOC = "The SecureRandom PRNG implementation to use for SSL cryptography operations. "; + public static final String SSL_CLIENT_AUTH_CONFIG = "ssl.client.auth"; public static final String SSL_CLIENT_AUTH_DOC = "Configures kafka broker to request client authentication." + " The following settings are common: " @@ -109,6 +112,7 @@ public static void addClientSslSupport(ConfigDef config) { .define(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, ConfigDef.Type.PASSWORD, null, ConfigDef.Importance.HIGH, SslConfigs.SSL_TRUSTSTORE_PASSWORD_DOC) .define(SslConfigs.SSL_KEYMANAGER_ALGORITHM_CONFIG, ConfigDef.Type.STRING, SslConfigs.DEFAULT_SSL_KEYMANGER_ALGORITHM, ConfigDef.Importance.LOW, SslConfigs.SSL_KEYMANAGER_ALGORITHM_DOC) .define(SslConfigs.SSL_TRUSTMANAGER_ALGORITHM_CONFIG, ConfigDef.Type.STRING, SslConfigs.DEFAULT_SSL_TRUSTMANAGER_ALGORITHM, ConfigDef.Importance.LOW, SslConfigs.SSL_TRUSTMANAGER_ALGORITHM_DOC) - .define(SslConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_CONFIG, ConfigDef.Type.STRING, null, ConfigDef.Importance.LOW, SslConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_DOC); + .define(SslConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_CONFIG, ConfigDef.Type.STRING, null, ConfigDef.Importance.LOW, SslConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_DOC) + .define(SslConfigs.SSL_SECURE_RANDOM_IMPLEMENTATION_CONFIG, ConfigDef.Type.STRING, null, ConfigDef.Importance.LOW, SslConfigs.SSL_SECURE_RANDOM_IMPLEMENTATION_DOC); } } diff --git a/clients/src/main/java/org/apache/kafka/common/security/ssl/SslFactory.java b/clients/src/main/java/org/apache/kafka/common/security/ssl/SslFactory.java index d0fe2e8694fc..ee7b65b2c680 100644 --- a/clients/src/main/java/org/apache/kafka/common/security/ssl/SslFactory.java +++ b/clients/src/main/java/org/apache/kafka/common/security/ssl/SslFactory.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.security.GeneralSecurityException; import java.security.KeyStore; +import java.security.SecureRandom; import java.util.List; import java.util.Map; @@ -51,6 +52,7 @@ public class SslFactory implements Configurable { private String[] cipherSuites; private String[] enabledProtocols; private String endpointIdentification; + private SecureRandom secureRandomImplementation; private SSLContext sslContext; private boolean needClientAuth; private boolean wantClientAuth; @@ -83,6 +85,15 @@ public void configure(Map configs) throws KafkaException { if (endpointIdentification != null) this.endpointIdentification = endpointIdentification; + String secureRandomImplementation = (String) configs.get(SslConfigs.SSL_SECURE_RANDOM_IMPLEMENTATION_CONFIG); + if (secureRandomImplementation != null) { + try { + this.secureRandomImplementation = SecureRandom.getInstance(secureRandomImplementation); + } catch (GeneralSecurityException e) { + throw new KafkaException(e); + } + } + String clientAuthConfig = clientAuthConfigOverride; if (clientAuthConfig == null) clientAuthConfig = (String) configs.get(SslConfigs.SSL_CLIENT_AUTH_CONFIG); @@ -134,7 +145,7 @@ private SSLContext createSSLContext() throws GeneralSecurityException, IOExcepti KeyStore ts = truststore == null ? null : truststore.load(); tmf.init(ts); - sslContext.init(keyManagers, tmf.getTrustManagers(), null); + sslContext.init(keyManagers, tmf.getTrustManagers(), this.secureRandomImplementation); return sslContext; } diff --git a/clients/src/test/java/org/apache/kafka/common/network/SslTransportLayerTest.java b/clients/src/test/java/org/apache/kafka/common/network/SslTransportLayerTest.java index 4e96411c362a..a044dc97b1b2 100644 --- a/clients/src/test/java/org/apache/kafka/common/network/SslTransportLayerTest.java +++ b/clients/src/test/java/org/apache/kafka/common/network/SslTransportLayerTest.java @@ -252,7 +252,22 @@ public void testClientAuthenticationRequestedNotProvided() throws Exception { NetworkTestUtils.checkClientConnection(selector, node, 100, 10); } - + + /** + * Tests that an invalid SecureRandom implementation cannot be configured + */ + @Test + public void testInvalidSecureRandomImplementation() throws Exception { + SslChannelBuilder channelBuilder = new SslChannelBuilder(Mode.CLIENT); + try { + sslClientConfigs.put(SslConfigs.SSL_SECURE_RANDOM_IMPLEMENTATION_CONFIG, "invalid"); + channelBuilder.configure(sslClientConfigs); + fail("SSL channel configured with invalid SecureRandom implementation"); + } catch (KafkaException e) { + // Expected exception + } + } + /** * Tests that channels cannot be created if truststore cannot be loaded */ diff --git a/core/src/main/scala/kafka/server/KafkaConfig.scala b/core/src/main/scala/kafka/server/KafkaConfig.scala index d3d8c9e574fb..34024f90ba42 100755 --- a/core/src/main/scala/kafka/server/KafkaConfig.scala +++ b/core/src/main/scala/kafka/server/KafkaConfig.scala @@ -336,6 +336,7 @@ object KafkaConfig { val SslKeyManagerAlgorithmProp = SslConfigs.SSL_KEYMANAGER_ALGORITHM_CONFIG val SslTrustManagerAlgorithmProp = SslConfigs.SSL_TRUSTMANAGER_ALGORITHM_CONFIG val SslEndpointIdentificationAlgorithmProp = SslConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_CONFIG + val SslSecureRandomImplementationProp = SslConfigs.SSL_SECURE_RANDOM_IMPLEMENTATION_CONFIG val SslClientAuthProp = SslConfigs.SSL_CLIENT_AUTH_CONFIG /** ********* SASL Configuration ****************/ @@ -534,6 +535,7 @@ object KafkaConfig { val SslKeyManagerAlgorithmDoc = SslConfigs.SSL_KEYMANAGER_ALGORITHM_DOC val SslTrustManagerAlgorithmDoc = SslConfigs.SSL_TRUSTMANAGER_ALGORITHM_DOC val SslEndpointIdentificationAlgorithmDoc = SslConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_DOC + val SslSecureRandomImplementationDoc = SslConfigs.SSL_SECURE_RANDOM_IMPLEMENTATION_DOC val SslClientAuthDoc = SslConfigs.SSL_CLIENT_AUTH_DOC /** ********* Sasl Configuration ****************/ @@ -706,6 +708,7 @@ object KafkaConfig { .define(SslKeyManagerAlgorithmProp, STRING, Defaults.SslKeyManagerAlgorithm, MEDIUM, SslKeyManagerAlgorithmDoc) .define(SslTrustManagerAlgorithmProp, STRING, Defaults.SslTrustManagerAlgorithm, MEDIUM, SslTrustManagerAlgorithmDoc) .define(SslEndpointIdentificationAlgorithmProp, STRING, null, LOW, SslEndpointIdentificationAlgorithmDoc) + .define(SslSecureRandomImplementationProp, STRING, null, LOW, SslSecureRandomImplementationDoc) .define(SslClientAuthProp, STRING, Defaults.SslClientAuth, in(Defaults.SslClientAuthRequired, Defaults.SslClientAuthRequested, Defaults.SslClientAuthNone), MEDIUM, SslClientAuthDoc) .define(SslCipherSuitesProp, LIST, null, MEDIUM, SslCipherSuitesDoc) diff --git a/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala b/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala index f8476cd303be..eb4c0eab4fd1 100755 --- a/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala +++ b/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala @@ -547,6 +547,7 @@ class KafkaConfigTest { case KafkaConfig.SslTrustManagerAlgorithmProp => case KafkaConfig.SslClientAuthProp => // ignore string case KafkaConfig.SslEndpointIdentificationAlgorithmProp => // ignore string + case KafkaConfig.SslSecureRandomImplementationProp => // ignore string case KafkaConfig.SslCipherSuitesProp => // ignore string //Sasl Configs diff --git a/docs/security.html b/docs/security.html index 0bd5f87bffa8..c2113151a688 100644 --- a/docs/security.html +++ b/docs/security.html @@ -145,6 +145,7 @@

                7.2 Encryption and Authentication
              • ssl.enabled.protocols=TLSv1.2,TLSv1.1,TLSv1 (list out the SSL protocols that you are going to accept from clients. Do note that SSL is deprecated in favor of TLS and using SSL in production is not recommended)
              • ssl.keystore.type=JKS
              • ssl.truststore.type=JKS
              • +
              • ssl.secure.random.implementation=SHA1PRNG
              • If you want to enable SSL for inter-broker communication, add the following to the broker properties file (it defaults to PLAINTEXT)
                @@ -155,6 +156,14 @@ 

                7.2 Encryption and Authentication JCA Providers Documentation for more information.

                +

                + The JRE/JDK will have a default pseudo-random number generator (PRNG) that is used for cryptography operations, so it is not required to configure the + implementation used with the

                ssl.secure.random.implementation
                . However, there are performance issues with some implementations (notably, the + default chosen on Linux systems,
                NativePRNG
                , utilizes a global lock). In cases where performance of SSL connections becomes an issue, + consider explicitly setting the implementation to be used. The
                SHA1PRNG
                implementation is non-blocking, and has shown very good performance + characteristics under heavy load (50 MB/sec of produced messages, plus replication traffic, per-broker). +

                + Once you start the broker you should be able to see in the server.log
                         with addresses: PLAINTEXT -> EndPoint(192.168.64.1,9092,PLAINTEXT),SSL -> EndPoint(192.168.64.1,9093,SSL)
                diff --git a/tests/kafkatest/services/kafka/config_property.py b/tests/kafkatest/services/kafka/config_property.py index e1801efa9293..217e9703e883 100644 --- a/tests/kafkatest/services/kafka/config_property.py +++ b/tests/kafkatest/services/kafka/config_property.py @@ -173,6 +173,7 @@ val SSLKeyManagerAlgorithmProp = SSLConfigs.SSL_KEYMANAGER_ALGORITHM_CONFIG val SSLTrustManagerAlgorithmProp = SSLConfigs.SSL_TRUSTMANAGER_ALGORITHM_CONFIG val SSLEndpointIdentificationAlgorithmProp = SSLConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_CONFIG + val SSLSecureRandomImplementationProp = SSLConfigs.SSL_SECURE_RANDOM_IMPLEMENTATION_CONFIG val SSLClientAuthProp = SSLConfigs.SSL_CLIENT_AUTH_CONFIG """ From 45259d11aa77bda708a2e9422c3dc3899668965d Mon Sep 17 00:00:00 2001 From: "Matthias J. Sax" Date: Sat, 20 Aug 2016 12:06:58 -0700 Subject: [PATCH 251/267] MINOR: improve Streams application reset tool to make sure application is down guozhangwang miguno dguy enothereska hjafarpour See #1764 Author: Matthias J. Sax Reviewers: Guozhang Wang Closes #1765 from mjsax/improveResetTool-0.10.0 --- .../main/scala/kafka/tools/StreamsResetter.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/kafka/tools/StreamsResetter.java b/core/src/main/scala/kafka/tools/StreamsResetter.java index 8e463d125498..8d9cd5e347db 100644 --- a/core/src/main/scala/kafka/tools/StreamsResetter.java +++ b/core/src/main/scala/kafka/tools/StreamsResetter.java @@ -20,6 +20,7 @@ import joptsimple.OptionParser; import joptsimple.OptionSet; import joptsimple.OptionSpec; +import kafka.admin.AdminClient; import kafka.admin.TopicCommand; import kafka.utils.ZkUtils; import org.apache.kafka.clients.consumer.ConsumerConfig; @@ -82,10 +83,18 @@ public int run(final String[] args, final Properties config) { int exitCode = EXIT_CODE_SUCCESS; + AdminClient adminClient = null; ZkUtils zkUtils = null; try { parseArguments(args); + adminClient = AdminClient.createSimplePlaintext(this.options.valueOf(bootstrapServerOption)); + final String groupId = this.options.valueOf(applicationIdOption); + if (adminClient.describeConsumerGroup(groupId).size() != 0) { + throw new IllegalStateException("Consumer group '" + groupId + "' is still active. " + + "Make sure to stop all running application instances before running the reset tool."); + } + zkUtils = ZkUtils.apply(this.options.valueOf(zookeeperOption), 30000, 30000, @@ -97,10 +106,13 @@ public int run(final String[] args, final Properties config) { resetInputAndInternalTopicOffsets(); seekToEndIntermediateTopics(); deleteInternalTopics(zkUtils); - } catch (final Exception e) { + } catch (final Throwable e) { exitCode = EXIT_CODE_ERROR; System.err.println("ERROR: " + e.getMessage()); } finally { + if (adminClient != null) { + adminClient.close(); + } if (zkUtils != null) { zkUtils.close(); } From 8bf9addd25a0cccdd79a68ef792c106f671f842f Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Mon, 22 Aug 2016 23:52:50 +0100 Subject: [PATCH 252/267] MINOR: add slf4jlog4j to streams example Author: Guozhang Wang Reviewers: Ismael Juma Closes #1731 from guozhangwang/Kminor-log4j-streams-examples (cherry picked from commit d903babb726bdf725bd473c0590f9826b6fa2f14) Signed-off-by: Ismael Juma --- build.gradle | 8 +++++++- gradle/dependencies.gradle | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index b565200966fa..aa90fc7b32ae 100644 --- a/build.gradle +++ b/build.gradle @@ -680,7 +680,12 @@ project(':streams') { exclude module: 'jline' exclude module: 'netty' } - compile libs.jacksonDatabind // this dependency should be removed after KIP-4 + + // zkclient has a hard log4j dependency, we set it here as well to avoid copying an older + // version to the binary package; the following two dependencies should be removed after KIP-4 + // (along with the zkclient dependency) + compile libs.log4j + compile libs.jacksonDatabind testCompile project(':clients').sourceSets.test.output testCompile project(':core') @@ -728,6 +733,7 @@ project(':streams:examples') { dependencies { compile project(':streams') compile project(':connect:json') // this dependency should be removed after we unify data API + compile libs.slf4jlog4j // this dependency should be removed after KIP-4 } javadoc { diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index f1b4ecda57ed..83a2081d58f9 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -31,6 +31,7 @@ versions += [ jackson: "2.6.3", jetty: "9.2.15.v20160210", jersey: "2.22.2", + log4j: "1.2.17", jopt: "4.9", junit: "4.12", lz4: "1.3.0", @@ -85,6 +86,7 @@ libs += [ jettyServlets: "org.eclipse.jetty:jetty-servlets:$versions.jetty", jerseyContainerServlet: "org.glassfish.jersey.containers:jersey-container-servlet:$versions.jersey", junit: "junit:junit:$versions.junit", + log4j: "log4j:log4j:$versions.log4j", joptSimple: "net.sf.jopt-simple:jopt-simple:$versions.jopt", lz4: "net.jpountz.lz4:lz4:$versions.lz4", metrics: "com.yammer.metrics:metrics-core:$versions.metrics", From 4e4e2fb5085758ee9ccf6307433ad531a33198d3 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Mon, 22 Aug 2016 21:49:40 -0700 Subject: [PATCH 253/267] KAFKA-4073; MirrorMaker should handle messages without timestamp correctly Author: Ismael Juma Reviewers: Jun Rao Closes #1773 from ijuma/kafka-4073-mirror-maker-timestamps (cherry picked from commit a1e0b2240dba0740135621d959441eefa6fd3124) Signed-off-by: Jun Rao --- .../main/scala/kafka/tools/MirrorMaker.scala | 4 +++- .../unit/kafka/tools/MirrorMakerTest.scala | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/kafka/tools/MirrorMaker.scala b/core/src/main/scala/kafka/tools/MirrorMaker.scala index 7d6b5fbae527..5de20383b766 100755 --- a/core/src/main/scala/kafka/tools/MirrorMaker.scala +++ b/core/src/main/scala/kafka/tools/MirrorMaker.scala @@ -38,6 +38,7 @@ import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.ByteArrayDeserializer import org.apache.kafka.common.utils.Utils import org.apache.kafka.common.errors.WakeupException +import org.apache.kafka.common.record.Record import scala.collection.JavaConversions._ import scala.collection.mutable.HashMap @@ -675,7 +676,8 @@ object MirrorMaker extends Logging with KafkaMetricsGroup { private[tools] object defaultMirrorMakerMessageHandler extends MirrorMakerMessageHandler { override def handle(record: BaseConsumerRecord): util.List[ProducerRecord[Array[Byte], Array[Byte]]] = { - Collections.singletonList(new ProducerRecord[Array[Byte], Array[Byte]](record.topic, null, record.timestamp, record.key, record.value)) + val timestamp: java.lang.Long = if (record.timestamp == Record.NO_TIMESTAMP) null else record.timestamp + Collections.singletonList(new ProducerRecord[Array[Byte], Array[Byte]](record.topic, null, timestamp, record.key, record.value)) } } diff --git a/core/src/test/scala/unit/kafka/tools/MirrorMakerTest.scala b/core/src/test/scala/unit/kafka/tools/MirrorMakerTest.scala index 39a0ac9d6d12..d6a5470ce43f 100644 --- a/core/src/test/scala/unit/kafka/tools/MirrorMakerTest.scala +++ b/core/src/test/scala/unit/kafka/tools/MirrorMakerTest.scala @@ -18,7 +18,7 @@ package kafka.tools import kafka.consumer.BaseConsumerRecord -import org.apache.kafka.common.record.TimestampType +import org.apache.kafka.common.record.{Record, TimestampType} import org.junit.Assert._ import org.junit.Test @@ -39,4 +39,20 @@ class MirrorMakerTest { assertEquals("key", new String(producerRecord.key)) assertEquals("value", new String(producerRecord.value)) } + + @Test + def testDefaultMirrorMakerMessageHandlerWithNoTimestampInSourceMessage() { + val consumerRecord = BaseConsumerRecord("topic", 0, 1L, Record.NO_TIMESTAMP, TimestampType.CREATE_TIME, "key".getBytes, "value".getBytes) + + val result = MirrorMaker.defaultMirrorMakerMessageHandler.handle(consumerRecord) + assertEquals(1, result.size) + + val producerRecord = result.get(0) + assertNull(producerRecord.timestamp) + assertEquals("topic", producerRecord.topic) + assertNull(producerRecord.partition) + assertEquals("key", new String(producerRecord.key)) + assertEquals("value", new String(producerRecord.value)) + } + } From 9b3e9f3dfeecdfa9a52a844ba133b7f9d3f4dd7a Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Wed, 24 Aug 2016 13:27:14 -0700 Subject: [PATCH 254/267] KAFKA-4082; Support usage of Gradle 3.0 for bootstrapping gradlew in 0.10.0 The main requirement is to remove the usage of `useAnt` and we need to upgrade scoverage because the older version refers to `useAnt`. Author: Ismael Juma Reviewers: Ewen Cheslack-Postava Closes #1781 from ijuma/kafka-4082-support-gradle-3.0-0.10 --- build.gradle | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build.gradle b/build.gradle index aa90fc7b32ae..d46c8b315559 100644 --- a/build.gradle +++ b/build.gradle @@ -26,7 +26,7 @@ buildscript { // For Apache Rat plugin to ignore non-Git files classpath "org.ajoberstar:grgit:1.5.0" classpath 'com.github.ben-manes:gradle-versions-plugin:0.12.0' - classpath 'org.scoverage:gradle-scoverage:2.0.1' + classpath 'org.scoverage:gradle-scoverage:2.1.0' } } @@ -230,8 +230,6 @@ subprojects { } tasks.withType(ScalaCompile) { - scalaCompileOptions.useAnt = false - scalaCompileOptions.additionalParameters = [ "-deprecation", "-unchecked", From ca45bd0317c024b3ec06fc235cf6a0377733c6c2 Mon Sep 17 00:00:00 2001 From: Ismael Juma Date: Fri, 26 Aug 2016 11:12:25 -0700 Subject: [PATCH 255/267] MINOR: Include request header in exception when correlation of request/response fails Author: Ismael Juma Reviewers: Guozhang Wang Closes #1793 from ijuma/include-request-header-if-request-correlation-fails (cherry picked from commit d4c379832b525d1a9c20c5cb84f0e586b9d977e0) Signed-off-by: Guozhang Wang --- .../src/main/java/org/apache/kafka/clients/NetworkClient.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java index b134631bd8b1..bd54a29ea9a7 100644 --- a/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java +++ b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java @@ -484,7 +484,7 @@ private void handleConnections() { private static void correlate(RequestHeader requestHeader, ResponseHeader responseHeader) { if (requestHeader.correlationId() != responseHeader.correlationId()) throw new IllegalStateException("Correlation id for response (" + responseHeader.correlationId() - + ") does not match request (" + requestHeader.correlationId() + ")"); + + ") does not match request (" + requestHeader.correlationId() + "), request header: " + requestHeader); } /** From bff5349a49c2bf7c3b30c8ddc126e53cb6e06dca Mon Sep 17 00:00:00 2001 From: "Matthias J. Sax" Date: Tue, 30 Aug 2016 11:59:41 -0700 Subject: [PATCH 256/267] KAFKA-4058: Failure in org.apache.kafka.streams.integration.ResetIntegrationTest.testReprocessingFromScratchAfterReset - use AdminTool to check for active consumer group Author: Matthias J. Sax Reviewers: Ismael Juma, Guozhang Wang Closes #1756 from mjsax/kafka-4058-reset-tool-test --- .../org/apache/kafka/test/TestCondition.java | 26 +++++++ .../java/org/apache/kafka/test/TestUtils.java | 76 ++++++++++++------- .../scala/kafka/tools/StreamsResetter.java | 2 +- .../integration/ResetIntegrationTest.java | 40 +++++++++- 4 files changed, 112 insertions(+), 32 deletions(-) create mode 100644 clients/src/test/java/org/apache/kafka/test/TestCondition.java diff --git a/clients/src/test/java/org/apache/kafka/test/TestCondition.java b/clients/src/test/java/org/apache/kafka/test/TestCondition.java new file mode 100644 index 000000000000..f78c91bf5a2a --- /dev/null +++ b/clients/src/test/java/org/apache/kafka/test/TestCondition.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + *

                + * http://www.apache.org/licenses/LICENSE-2.0 + *

                + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ + + +package org.apache.kafka.test; + +/** + * Interface to wrap actions that are required to wait until a condition is met + * for testing purposes. Note that this is not intended to do any assertions. + */ +public interface TestCondition { + + boolean conditionMet(); +} diff --git a/clients/src/test/java/org/apache/kafka/test/TestUtils.java b/clients/src/test/java/org/apache/kafka/test/TestUtils.java index 1bfe578eb00b..ef3b6bc9559c 100644 --- a/clients/src/test/java/org/apache/kafka/test/TestUtils.java +++ b/clients/src/test/java/org/apache/kafka/test/TestUtils.java @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *

                + * http://www.apache.org/licenses/LICENSE-2.0 + *

                * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,7 +16,10 @@ */ package org.apache.kafka.test; -import static java.util.Arrays.asList; +import org.apache.kafka.common.Cluster; +import org.apache.kafka.common.Node; +import org.apache.kafka.common.PartitionInfo; +import org.apache.kafka.common.utils.Utils; import java.io.File; import java.io.IOException; @@ -28,10 +31,7 @@ import java.util.Map; import java.util.Random; -import org.apache.kafka.common.Cluster; -import org.apache.kafka.common.Node; -import org.apache.kafka.common.PartitionInfo; -import org.apache.kafka.common.utils.Utils; +import static java.util.Arrays.asList; /** @@ -49,51 +49,51 @@ public class TestUtils { public static final Random SEEDED_RANDOM = new Random(192348092834L); public static final Random RANDOM = new Random(); - public static Cluster singletonCluster(Map topicPartitionCounts) { + public static Cluster singletonCluster(final Map topicPartitionCounts) { return clusterWith(1, topicPartitionCounts); } - public static Cluster singletonCluster(String topic, int partitions) { + public static Cluster singletonCluster(final String topic, final int partitions) { return clusterWith(1, topic, partitions); } - public static Cluster clusterWith(int nodes, Map topicPartitionCounts) { - Node[] ns = new Node[nodes]; + public static Cluster clusterWith(final int nodes, final Map topicPartitionCounts) { + final Node[] ns = new Node[nodes]; for (int i = 0; i < nodes; i++) ns[i] = new Node(i, "localhost", 1969); - List parts = new ArrayList<>(); - for (Map.Entry topicPartition : topicPartitionCounts.entrySet()) { - String topic = topicPartition.getKey(); - int partitions = topicPartition.getValue(); + final List parts = new ArrayList<>(); + for (final Map.Entry topicPartition : topicPartitionCounts.entrySet()) { + final String topic = topicPartition.getKey(); + final int partitions = topicPartition.getValue(); for (int i = 0; i < partitions; i++) parts.add(new PartitionInfo(topic, i, ns[i % ns.length], ns, ns)); } return new Cluster(asList(ns), parts, Collections.emptySet()); } - public static Cluster clusterWith(int nodes, String topic, int partitions) { + public static Cluster clusterWith(final int nodes, final String topic, final int partitions) { return clusterWith(nodes, Collections.singletonMap(topic, partitions)); } /** * Generate an array of random bytes - * + * * @param size The size of the array */ - public static byte[] randomBytes(int size) { - byte[] bytes = new byte[size]; + public static byte[] randomBytes(final int size) { + final byte[] bytes = new byte[size]; SEEDED_RANDOM.nextBytes(bytes); return bytes; } /** * Generate a random string of letters and digits of the given length - * + * * @param len The length of the string * @return The random string */ - public static String randomString(int len) { - StringBuilder b = new StringBuilder(); + public static String randomString(final int len) { + final StringBuilder b = new StringBuilder(); for (int i = 0; i < len; i++) b.append(LETTERS_AND_DIGITS.charAt(SEEDED_RANDOM.nextInt(LETTERS_AND_DIGITS.length()))); return b.toString(); @@ -104,7 +104,7 @@ public static String randomString(int len) { * suffix to generate its name. */ public static File tempFile() throws IOException { - File file = File.createTempFile("kafka", ".tmp"); + final File file = File.createTempFile("kafka", ".tmp"); file.deleteOnExit(); return file; @@ -115,7 +115,7 @@ public static File tempFile() throws IOException { * * @param prefix The prefix of the temporary directory, if null using "kafka-" as default prefix */ - public static File tempDirectory(String prefix) throws IOException { + public static File tempDirectory(final String prefix) throws IOException { return tempDirectory(null, prefix); } @@ -125,10 +125,10 @@ public static File tempDirectory(String prefix) throws IOException { * @param parent The parent folder path name, if null using the default temporary-file directory * @param prefix The prefix of the temporary directory, if null using "kafka-" as default prefix */ - public static File tempDirectory(Path parent, String prefix) throws IOException { + public static File tempDirectory(final Path parent, final String prefix) throws IOException { final File file = parent == null ? - Files.createTempDirectory(prefix == null ? "kafka-" : prefix).toFile() : - Files.createTempDirectory(parent, prefix == null ? "kafka-" : prefix).toFile(); + Files.createTempDirectory(prefix == null ? "kafka-" : prefix).toFile() : + Files.createTempDirectory(parent, prefix == null ? "kafka-" : prefix).toFile(); file.deleteOnExit(); Runtime.getRuntime().addShutdownHook(new Thread() { @@ -141,4 +141,24 @@ public void run() { return file; } + /** + * Wait for condition to be met for at most {@code maxWaitMs} and throw assertion failure otherwise. + * This should be used instead of {@code Thread.sleep} whenever possible as it allows a longer timeout to be used + * without unnecessarily increasing test time (as the condition is checked frequently). The longer timeout is needed to + * avoid transient failures due to slow or overloaded machines. + */ + public static void waitForCondition(final TestCondition testCondition, final long maxWaitMs, String conditionDetails) throws InterruptedException { + final long startTime = System.currentTimeMillis(); + + + while (!testCondition.conditionMet() && ((System.currentTimeMillis() - startTime) < maxWaitMs)) { + Thread.sleep(Math.min(maxWaitMs, 100L)); + } + + if (!testCondition.conditionMet()) { + conditionDetails = conditionDetails != null ? conditionDetails : ""; + throw new AssertionError("Condition not met within timeout " + maxWaitMs + ". " + conditionDetails); + } + } + } diff --git a/core/src/main/scala/kafka/tools/StreamsResetter.java b/core/src/main/scala/kafka/tools/StreamsResetter.java index 8d9cd5e347db..7153790ab80b 100644 --- a/core/src/main/scala/kafka/tools/StreamsResetter.java +++ b/core/src/main/scala/kafka/tools/StreamsResetter.java @@ -90,7 +90,7 @@ public int run(final String[] args, final Properties config) { adminClient = AdminClient.createSimplePlaintext(this.options.valueOf(bootstrapServerOption)); final String groupId = this.options.valueOf(applicationIdOption); - if (adminClient.describeConsumerGroup(groupId).size() != 0) { + if (!adminClient.describeGroup(groupId).members().isEmpty()) { throw new IllegalStateException("Consumer group '" + groupId + "' is still active. " + "Make sure to stop all running application instances before running the reset tool."); } diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java index 8dd1f098dd7c..0e4129e09eaf 100644 --- a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java +++ b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java @@ -16,6 +16,7 @@ */ package org.apache.kafka.streams.integration; +import kafka.admin.AdminClient; import kafka.tools.StreamsResetter; import kafka.utils.ZkUtils; import org.apache.kafka.clients.consumer.ConsumerConfig; @@ -37,8 +38,11 @@ import org.apache.kafka.streams.kstream.KeyValueMapper; import org.apache.kafka.streams.kstream.TimeWindows; import org.apache.kafka.streams.kstream.Windowed; +import org.apache.kafka.test.TestCondition; import org.apache.kafka.test.TestUtils; +import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; @@ -70,6 +74,10 @@ public class ResetIntegrationTest { private static final long STREAMS_CONSUMER_TIMEOUT = 2000L; private static final long CLEANUP_CONSUMER_TIMEOUT = 2000L; + private final WaitUntilConsumerGroupGotClosed consumerGroupInactive = new WaitUntilConsumerGroupGotClosed(); + + private AdminClient adminClient = null; + @BeforeClass public static void startKafkaCluster() throws Exception { CLUSTER.createTopic(INPUT_TOPIC); @@ -79,6 +87,19 @@ public static void startKafkaCluster() throws Exception { CLUSTER.createTopic(INTERMEDIATE_USER_TOPIC); } + @Before + public void prepare() { + this.adminClient = AdminClient.createSimplePlaintext(CLUSTER.bootstrapServers()); + } + + @After + public void cleanup() { + if (this.adminClient != null) { + this.adminClient.close(); + this.adminClient = null; + } + } + @Test public void testReprocessingFromScratchAfterReset() throws Exception { final Properties streamsConfiguration = prepareTest(); @@ -96,13 +117,16 @@ public void testReprocessingFromScratchAfterReset() throws Exception { final KeyValue result2 = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(resultTopicConsumerConfig, OUTPUT_TOPIC_2, 1).get(0); streams.close(); + TestUtils.waitForCondition(this.consumerGroupInactive, 5 * STREAMS_CONSUMER_TIMEOUT, + "Streams Application consumer group did not time out after " + (5 * STREAMS_CONSUMER_TIMEOUT) + " ms."); // RESET - Utils.sleep(STREAMS_CONSUMER_TIMEOUT); streams.cleanUp(); cleanGlobal(); + TestUtils.waitForCondition(this.consumerGroupInactive, 5 * CLEANUP_CONSUMER_TIMEOUT, + "Reset Tool consumer group did not time out after " + (5 * CLEANUP_CONSUMER_TIMEOUT) + " ms."); + assertInternalTopicsGotDeleted(); - Utils.sleep(CLEANUP_CONSUMER_TIMEOUT); // RE-RUN streams = new KafkaStreams(setupTopology(OUTPUT_TOPIC_2_RERUN), streamsConfiguration); @@ -184,12 +208,15 @@ public KeyValue apply(final Long key, final String value) { final KStream windowedCounts = input .through(INTERMEDIATE_USER_TOPIC) .map(new KeyValueMapper>() { + private long sleep = 1000; + @Override public KeyValue apply(final Long key, final String value) { // must sleep long enough to avoid processing the whole intermediate topic before application gets stopped // => want to test "skip over" unprocessed records // increasing the sleep time only has disadvantage that test run time is increased - Utils.sleep(1000); + Utils.sleep(this.sleep); + this.sleep *= 2; return new KeyValue<>(key, value); } }) @@ -253,4 +280,11 @@ private void assertInternalTopicsGotDeleted() { assertThat(allTopics, equalTo(expectedRemainingTopicsAfterCleanup)); } + private class WaitUntilConsumerGroupGotClosed implements TestCondition { + @Override + public boolean conditionMet() { + return ResetIntegrationTest.this.adminClient.describeGroup(APP_ID).members().isEmpty(); + } + } + } From 5a03416fe21f3e5f3425f5a62c35b0859d5baf6e Mon Sep 17 00:00:00 2001 From: Damian Guy Date: Thu, 1 Sep 2016 12:21:55 -0700 Subject: [PATCH 257/267] KAFKA-4112: Remove alpha quality label from Kafka Streams in docs --- docs/api.html | 15 ++++++++------- docs/streams.html | 6 +++++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/api.html b/docs/api.html index c4572411c472..2cb9d8641044 100644 --- a/docs/api.html +++ b/docs/api.html @@ -168,19 +168,20 @@

                2.2.3 New Consumer API

                2.3 Streams API

                -As of the 0.10.0 release we have added a new client library named Kafka Streams to let users implement their stream processing -applications with data stored in Kafka topics. Kafka Streams is considered alpha quality and its public APIs are likely to change in -future releases. -You can use Kafka Streams by adding a dependency on the streams jar using -the following example maven co-ordinates (you can change the version numbers with new releases): +As of the 0.10.0 release we have added a stream processing engine to Apache Kafka called Kafka Streams, which is a client library that lets users implement their own stream processing applications for data stored in Kafka topics. +You can use Kafka Streams from within your Java applications by adding a dependency on the kafka-streams jar using the following maven co-ordinates:
                 	<dependency>
                 	    <groupId>org.apache.kafka</groupId>
                 	    <artifactId>kafka-streams</artifactId>
                -	    <version>0.10.0.0</version>
                +	    <version>0.10.0.1</version>
                 	</dependency>
                 
                Examples showing how to use this library are given in the -javadocs (note those classes annotated with @InterfaceStability.Unstable, indicating their public APIs may change without backward-compatibility in future releases). \ No newline at end of file +javadocs and kafka streams overview. +

                + Please note that Kafka Streams is a new component of Kafka, and its public APIs may change in future releases. + We use the @InterfaceStability.Unstable annotation to denote classes whose APIs may change without backward-compatibility in future releases. +

                \ No newline at end of file diff --git a/docs/streams.html b/docs/streams.html index 82319753bd98..6b760104e3ae 100644 --- a/docs/streams.html +++ b/docs/streams.html @@ -260,7 +260,7 @@
                Create Source Streams KStreamBuilder builder = new KStreamBuilder(); KStream source1 = builder.stream("topic1", "topic2"); - KTable source2 = builder.table("topic3"); + KTable source2 = builder.table("topic3", "stateStoreName");
                Transform a stream
                @@ -298,7 +298,11 @@

                Transform a stre
                     // written in Java 8+, using lambda expressions
                +<<<<<<< HEAD
                     KTable, Long> counts = source1.aggregateByKey(
                +=======
                +    KTable, Long> counts = source1.groupByKey().aggregate(
                +>>>>>>> 268cff7... KAFKA-4112: Remove alpha quality label from Kafka Streams in docs
                         () -> 0L,  // initial value
                         (aggKey, value, aggregate) -> aggregate + 1L,   // aggregating value
                         TimeWindows.of("counts",5000L).advanceBy(1000L), // intervals in milliseconds
                
                From 068f93511a93917c3894feecfd0810248461985a Mon Sep 17 00:00:00 2001
                From: David Chen 
                Date: Wed, 14 Sep 2016 10:38:40 -0700
                Subject: [PATCH 258/267] KAFKA-4162: Fixed typo "rebalance"
                
                Author: David Chen 
                
                Reviewers: Ewen Cheslack-Postava 
                
                Closes #1853 from mvj3/KAFKA-4162
                
                (cherry picked from commit c1bce2d757c5a93bebc1abaf1304bf84375a4cc9)
                Signed-off-by: Ewen Cheslack-Postava 
                ---
                 README.md                                                       | 2 +-
                 .../kafka/connect/runtime/distributed/DistributedHerder.java    | 2 +-
                 2 files changed, 2 insertions(+), 2 deletions(-)
                
                diff --git a/README.md b/README.md
                index 91c09805e749..063defe9c53e 100644
                --- a/README.md
                +++ b/README.md
                @@ -15,7 +15,7 @@ Java 7 should be used for building in order to support both Java 7 and Java 8 at
                 Now everything else will work.
                 
                 ### Building a jar and running it ###
                -    ./gradlew jar  
                +    ./gradlew jar
                 
                 Follow instructions in http://kafka.apache.org/documentation.html#quickstart
                 
                diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java
                index afabbeb1e37f..8eb4c58e0f01 100644
                --- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java
                +++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java
                @@ -1027,7 +1027,7 @@ public void onAssigned(ConnectProtocol.Assignment assignment, int generation) {
                                 rebalanceResolved = false;
                             }
                 
                -            // Delete the statuses of all connectors removed prior to the start of this reblaance. This has to
                +            // Delete the statuses of all connectors removed prior to the start of this rebalance. This has to
                             // be done after the rebalance completes to avoid race conditions as the previous generation attempts
                             // to change the state to UNASSIGNED after tasks have been stopped.
                             if (isLeader())
                
                From 3262ae29ae57966850b8dc064111ba30714b2f08 Mon Sep 17 00:00:00 2001
                From: Guozhang Wang 
                Date: Thu, 15 Sep 2016 08:42:21 -0700
                Subject: [PATCH 259/267] HOTFIX: set sourceNodes to null for selectKey
                
                To indicate its source topic is no longer guaranteed to be partitioned on key.
                
                Author: Guozhang Wang 
                
                Reviewers: Matthias J. Sax, Damian Guy, Eno Thereska
                
                Closes #1858 from guozhangwang/KHotfix-set-null-sourceNodes-selectKey
                ---
                 .../org/apache/kafka/streams/kstream/internals/KStreamImpl.java | 2 +-
                 1 file changed, 1 insertion(+), 1 deletion(-)
                
                diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java
                index 91bcef94eb35..60d2366e8db8 100644
                --- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java
                +++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java
                @@ -132,7 +132,7 @@ public KeyValue apply(K key, V value) {
                                 return new KeyValue(mapper.apply(key, value), value);
                             }
                         }), this.name);
                -        return new KStreamImpl<>(topology, name, sourceNodes);
                +        return new KStreamImpl<>(topology, name, null);
                     }
                 
                     @Override
                
                From ea8c1b394205342e988e4afd318c6b8338d04887 Mon Sep 17 00:00:00 2001
                From: "Matthias J. Sax" 
                Date: Sat, 17 Sep 2016 22:45:29 +0100
                Subject: [PATCH 260/267] HOTFIX: changed quickstart donwload from 0.10.0.0 to
                 0.10.0.1
                
                Author: Matthias J. Sax 
                
                Reviewers: Ismael Juma 
                
                Closes #1869 from mjsax/hotfix-doc
                
                (cherry picked from commit b8683026ea7ebfddcec41274b12e8a579dc4ca71)
                Signed-off-by: Ismael Juma 
                ---
                 docs/quickstart.html | 6 +++---
                 1 file changed, 3 insertions(+), 3 deletions(-)
                
                diff --git a/docs/quickstart.html b/docs/quickstart.html
                index 6c090d0d619d..3784556c33a7 100644
                --- a/docs/quickstart.html
                +++ b/docs/quickstart.html
                @@ -21,11 +21,11 @@ 

                1.3 Quick Start

                Step 1: Download the code

                -Download the 0.10.0.0 release and un-tar it. +Download the 0.10.0.1 release and un-tar it.
                -> tar -xzf kafka_2.11-0.10.0.0.tgz
                -> cd kafka_2.11-0.10.0.0
                +> tar -xzf kafka_2.11-0.10.0.1.tgz
                +> cd kafka_2.11-0.10.0.1
                 

                Step 2: Start the server

                From ee6e50024d8a09960b2b72628c62c6532c09d84d Mon Sep 17 00:00:00 2001 From: Grant Henke Date: Sat, 17 Sep 2016 22:47:56 +0100 Subject: [PATCH 261/267] KAFKA-4157; Transient system test failure in replica_verification_test.test_replica_lags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …t.test_replica_lags Author: Grant Henke Reviewers: Ashish Singh , Ismael Juma Closes #1849 from granthenke/replica-verification-fix (cherry picked from commit b5bc1a1caaa8a4ad3b10bfdfa9c2250ef8e09b1d) Signed-off-by: Ismael Juma --- tests/kafkatest/services/replica_verification_tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/kafkatest/services/replica_verification_tool.py b/tests/kafkatest/services/replica_verification_tool.py index 20332427f6a4..2f29d1631092 100644 --- a/tests/kafkatest/services/replica_verification_tool.py +++ b/tests/kafkatest/services/replica_verification_tool.py @@ -63,7 +63,7 @@ def get_lag_for_partition(self, topic, partition): partition: a partition of the topic """ topic_partition = topic + ',' + str(partition) - lag = self.partition_lag[topic_partition] + lag = self.partition_lag.get(topic_partition, -1) self.logger.debug("Retuning lag for {} as {}".format(topic_partition, lag)) return lag @@ -84,4 +84,4 @@ def stop_node(self, node): def clean_node(self, node): node.account.kill_process("java", clean_shutdown=False, allow_fail=True) - node.account.ssh("rm -rf /mnt/replica_verification_tool.log", allow_fail=False) \ No newline at end of file + node.account.ssh("rm -rf /mnt/replica_verification_tool.log", allow_fail=False) From 1e5bf02ab338ba05342709666e47d76bdb5670b0 Mon Sep 17 00:00:00 2001 From: Randall Hauch Date: Fri, 16 Sep 2016 14:55:46 -0700 Subject: [PATCH 262/267] KAFKA-4183; Corrected Kafka Connect's JSON Converter to properly convert from null to logical values The `JsonConverter` class has `LogicalTypeConverter` implementations for Date, Time, Timestamp, and Decimal, but these implementations fail when the input literal value (deserialized from the message) is null. Test cases were added to check for these cases, and these failed before the `LogicalTypeConverter` implementations were fixed to consider whether the schema has a default value or is optional, similarly to how the `JsonToConnectTypeConverter` implementations do this. Once the fixes were made, the new tests pass. Author: Randall Hauch Reviewers: Shikhar Bhushan , Jason Gustafson Closes #1867 from rhauch/kafka-4183 --- .../kafka/connect/json/JsonConverter.java | 4 ++ .../kafka/connect/json/JsonConverterTest.java | 38 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java b/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java index d9a685953d2d..b35b24aeec38 100644 --- a/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java +++ b/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java @@ -211,6 +211,7 @@ public Object convert(Schema schema, JsonNode value) { TO_CONNECT_LOGICAL_CONVERTERS.put(Decimal.LOGICAL_NAME, new LogicalTypeConverter() { @Override public Object convert(Schema schema, Object value) { + if (value == null) return checkOptionalAndDefault(schema); if (!(value instanceof byte[])) throw new DataException("Invalid type for Decimal, underlying representation should be bytes but was " + value.getClass()); return Decimal.toLogical(schema, (byte[]) value); @@ -220,6 +221,7 @@ public Object convert(Schema schema, Object value) { TO_CONNECT_LOGICAL_CONVERTERS.put(Date.LOGICAL_NAME, new LogicalTypeConverter() { @Override public Object convert(Schema schema, Object value) { + if (value == null) return checkOptionalAndDefault(schema); if (!(value instanceof Integer)) throw new DataException("Invalid type for Date, underlying representation should be int32 but was " + value.getClass()); return Date.toLogical(schema, (int) value); @@ -229,6 +231,7 @@ public Object convert(Schema schema, Object value) { TO_CONNECT_LOGICAL_CONVERTERS.put(Time.LOGICAL_NAME, new LogicalTypeConverter() { @Override public Object convert(Schema schema, Object value) { + if (value == null) return checkOptionalAndDefault(schema); if (!(value instanceof Integer)) throw new DataException("Invalid type for Time, underlying representation should be int32 but was " + value.getClass()); return Time.toLogical(schema, (int) value); @@ -238,6 +241,7 @@ public Object convert(Schema schema, Object value) { TO_CONNECT_LOGICAL_CONVERTERS.put(Timestamp.LOGICAL_NAME, new LogicalTypeConverter() { @Override public Object convert(Schema schema, Object value) { + if (value == null) return checkOptionalAndDefault(schema); if (!(value instanceof Long)) throw new DataException("Invalid type for Timestamp, underlying representation should be int64 but was " + value.getClass()); return Timestamp.toLogical(schema, (long) value); diff --git a/connect/json/src/test/java/org/apache/kafka/connect/json/JsonConverterTest.java b/connect/json/src/test/java/org/apache/kafka/connect/json/JsonConverterTest.java index c92328530677..7700f18c6dcb 100644 --- a/connect/json/src/test/java/org/apache/kafka/connect/json/JsonConverterTest.java +++ b/connect/json/src/test/java/org/apache/kafka/connect/json/JsonConverterTest.java @@ -54,6 +54,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -216,6 +217,16 @@ public void decimalToConnect() { assertEquals(reference, converted); } + @Test + public void decimalToConnectOptional() { + Schema schema = Decimal.builder(2).optional().schema(); + // Payload is base64 encoded byte[]{0, -100}, which is the two's complement encoding of 156. + String msg = "{ \"schema\": { \"type\": \"bytes\", \"name\": \"org.apache.kafka.connect.data.Decimal\", \"version\": 1, \"optional\": true, \"parameters\": { \"scale\": \"2\" } }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertNull(schemaAndValue.value()); + } + @Test public void dateToConnect() { Schema schema = Date.SCHEMA; @@ -230,6 +241,15 @@ public void dateToConnect() { assertEquals(reference, converted); } + @Test + public void dateToConnectOptional() { + Schema schema = Date.builder().optional().schema(); + String msg = "{ \"schema\": { \"type\": \"int32\", \"name\": \"org.apache.kafka.connect.data.Date\", \"version\": 1, \"optional\": true }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertNull(schemaAndValue.value()); + } + @Test public void timeToConnect() { Schema schema = Time.SCHEMA; @@ -244,6 +264,15 @@ public void timeToConnect() { assertEquals(reference, converted); } + @Test + public void timeToConnectOptional() { + Schema schema = Time.builder().optional().schema(); + String msg = "{ \"schema\": { \"type\": \"int32\", \"name\": \"org.apache.kafka.connect.data.Time\", \"version\": 1, \"optional\": true }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertNull(schemaAndValue.value()); + } + @Test public void timestampToConnect() { Schema schema = Timestamp.SCHEMA; @@ -259,6 +288,15 @@ public void timestampToConnect() { assertEquals(reference, converted); } + @Test + public void timestampToConnectOptional() { + Schema schema = Timestamp.builder().optional().schema(); + String msg = "{ \"schema\": { \"type\": \"int64\", \"name\": \"org.apache.kafka.connect.data.Timestamp\", \"version\": 1, \"optional\": true }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertNull(schemaAndValue.value()); + } + // Schema metadata @Test From fbcff9b50788497ca548fdfb44c7dc1eb53af1ee Mon Sep 17 00:00:00 2001 From: Shikhar Bhushan Date: Fri, 16 Sep 2016 15:54:33 -0700 Subject: [PATCH 263/267] KAFKA-4173; SchemaProjector should successfully project missing Struct field when target field is optional Author: Shikhar Bhushan Reviewers: Konstantine Karantasis , Jason Gustafson Closes #1865 from shikhar/kafka-4173 --- .../kafka/connect/data/SchemaProjector.java | 12 +++++------ .../connect/data/SchemaProjectorTest.java | 21 +++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaProjector.java b/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaProjector.java index ad0caf85f83f..6277e44c4e0d 100644 --- a/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaProjector.java +++ b/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaProjector.java @@ -111,14 +111,12 @@ private static Object projectStruct(Schema source, Struct sourceStruct, Schema t } catch (SchemaProjectorException e) { throw new SchemaProjectorException("Error projecting " + sourceField.name(), e); } + } else if (targetField.schema().isOptional()) { + // Ignore missing field + } else if (targetField.schema().defaultValue() != null) { + targetStruct.put(fieldName, targetField.schema().defaultValue()); } else { - Object targetDefault; - if (targetField.schema().defaultValue() != null) { - targetDefault = targetField.schema().defaultValue(); - } else { - throw new SchemaProjectorException("Cannot project " + source.schema() + " to " + target.schema()); - } - targetStruct.put(fieldName, targetDefault); + throw new SchemaProjectorException("Required field `" + fieldName + "` is missing from source schema: " + source); } } return targetStruct; diff --git a/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaProjectorTest.java b/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaProjectorTest.java index 0b1760be7a2a..101be0432267 100644 --- a/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaProjectorTest.java +++ b/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaProjectorTest.java @@ -469,6 +469,27 @@ public void testMaybeCompatible() throws Exception { } } + @Test + public void testProjectMissingDefaultValuedStructField() { + final Schema source = SchemaBuilder.struct().build(); + final Schema target = SchemaBuilder.struct().field("id", SchemaBuilder.int64().defaultValue(42L).build()).build(); + assertEquals(42L, (long) ((Struct) SchemaProjector.project(source, new Struct(source), target)).getInt64("id")); + } + + @Test + public void testProjectMissingOptionalStructField() { + final Schema source = SchemaBuilder.struct().build(); + final Schema target = SchemaBuilder.struct().field("id", SchemaBuilder.OPTIONAL_INT64_SCHEMA).build(); + assertEquals(null, ((Struct) SchemaProjector.project(source, new Struct(source), target)).getInt64("id")); + } + + @Test(expected = SchemaProjectorException.class) + public void testProjectMissingRequiredField() { + final Schema source = SchemaBuilder.struct().build(); + final Schema target = SchemaBuilder.struct().field("id", SchemaBuilder.INT64_SCHEMA).build(); + SchemaProjector.project(source, new Struct(source), target); + } + private void verifyOptionalProjection(Schema source, Type targetType, Object value, Object defaultValue, Object expectedProjected, boolean optional) { Schema target; assert source.isOptional(); From d37b036090ce296809774bfaa1739aab39c6eeb3 Mon Sep 17 00:00:00 2001 From: Shikhar Bhushan Date: Mon, 19 Sep 2016 12:49:38 -0700 Subject: [PATCH 264/267] KAFKA-4183; Centralize checking for optional and default values in JsonConverter Cleaner to just check once for optional & default value from the `convertToConnect()` function. It also helps address an issue with conversions for logical type schemas that have default values and null as the included value. That test case is _probably_ not an issue in practice, since when using the `JsonConverter` to serialize a missing field with a default value, it will serialize the default value for the field. But in the face of JSON data streaming in from a topic being [generous on input, strict on output](http://tedwise.com/2009/05/27/generous-on-input-strict-on-output) seems best. Author: Shikhar Bhushan Reviewers: Randall Hauch , Jason Gustafson Closes #1872 from shikhar/kafka-4183 --- .../kafka/connect/json/JsonConverter.java | 41 +++------- .../kafka/connect/json/JsonConverterTest.java | 79 ++++++++++++++++++- 2 files changed, 88 insertions(+), 32 deletions(-) diff --git a/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java b/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java index b35b24aeec38..55871add927d 100644 --- a/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java +++ b/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java @@ -58,61 +58,46 @@ public class JsonConverter implements Converter { private static final HashMap TO_CONNECT_CONVERTERS = new HashMap<>(); - private static Object checkOptionalAndDefault(Schema schema) { - if (schema.defaultValue() != null) - return schema.defaultValue(); - if (schema.isOptional()) - return null; - throw new DataException("Invalid null value for required field"); - } - static { TO_CONNECT_CONVERTERS.put(Schema.Type.BOOLEAN, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); return value.booleanValue(); } }); TO_CONNECT_CONVERTERS.put(Schema.Type.INT8, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); return (byte) value.intValue(); } }); TO_CONNECT_CONVERTERS.put(Schema.Type.INT16, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); return (short) value.intValue(); } }); TO_CONNECT_CONVERTERS.put(Schema.Type.INT32, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); return value.intValue(); } }); TO_CONNECT_CONVERTERS.put(Schema.Type.INT64, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); return value.longValue(); } }); TO_CONNECT_CONVERTERS.put(Schema.Type.FLOAT32, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); return value.floatValue(); } }); TO_CONNECT_CONVERTERS.put(Schema.Type.FLOAT64, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); return value.doubleValue(); } }); @@ -120,7 +105,6 @@ public Object convert(Schema schema, JsonNode value) { @Override public Object convert(Schema schema, JsonNode value) { try { - if (value.isNull()) return checkOptionalAndDefault(schema); return value.binaryValue(); } catch (IOException e) { throw new DataException("Invalid bytes field", e); @@ -130,15 +114,12 @@ public Object convert(Schema schema, JsonNode value) { TO_CONNECT_CONVERTERS.put(Schema.Type.STRING, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); return value.textValue(); } }); TO_CONNECT_CONVERTERS.put(Schema.Type.ARRAY, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); - Schema elemSchema = schema == null ? null : schema.valueSchema(); ArrayList result = new ArrayList<>(); for (JsonNode elem : value) { @@ -150,8 +131,6 @@ public Object convert(Schema schema, JsonNode value) { TO_CONNECT_CONVERTERS.put(Schema.Type.MAP, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); - Schema keySchema = schema == null ? null : schema.keySchema(); Schema valueSchema = schema == null ? null : schema.valueSchema(); @@ -185,8 +164,6 @@ public Object convert(Schema schema, JsonNode value) { TO_CONNECT_CONVERTERS.put(Schema.Type.STRUCT, new JsonToConnectTypeConverter() { @Override public Object convert(Schema schema, JsonNode value) { - if (value.isNull()) return checkOptionalAndDefault(schema); - if (!value.isObject()) throw new DataException("Structs should be encoded as JSON objects, but found " + value.getNodeType()); @@ -211,7 +188,6 @@ public Object convert(Schema schema, JsonNode value) { TO_CONNECT_LOGICAL_CONVERTERS.put(Decimal.LOGICAL_NAME, new LogicalTypeConverter() { @Override public Object convert(Schema schema, Object value) { - if (value == null) return checkOptionalAndDefault(schema); if (!(value instanceof byte[])) throw new DataException("Invalid type for Decimal, underlying representation should be bytes but was " + value.getClass()); return Decimal.toLogical(schema, (byte[]) value); @@ -221,7 +197,6 @@ public Object convert(Schema schema, Object value) { TO_CONNECT_LOGICAL_CONVERTERS.put(Date.LOGICAL_NAME, new LogicalTypeConverter() { @Override public Object convert(Schema schema, Object value) { - if (value == null) return checkOptionalAndDefault(schema); if (!(value instanceof Integer)) throw new DataException("Invalid type for Date, underlying representation should be int32 but was " + value.getClass()); return Date.toLogical(schema, (int) value); @@ -231,7 +206,6 @@ public Object convert(Schema schema, Object value) { TO_CONNECT_LOGICAL_CONVERTERS.put(Time.LOGICAL_NAME, new LogicalTypeConverter() { @Override public Object convert(Schema schema, Object value) { - if (value == null) return checkOptionalAndDefault(schema); if (!(value instanceof Integer)) throw new DataException("Invalid type for Time, underlying representation should be int32 but was " + value.getClass()); return Time.toLogical(schema, (int) value); @@ -241,7 +215,6 @@ public Object convert(Schema schema, Object value) { TO_CONNECT_LOGICAL_CONVERTERS.put(Timestamp.LOGICAL_NAME, new LogicalTypeConverter() { @Override public Object convert(Schema schema, Object value) { - if (value == null) return checkOptionalAndDefault(schema); if (!(value instanceof Long)) throw new DataException("Invalid type for Timestamp, underlying representation should be int64 but was " + value.getClass()); return Timestamp.toLogical(schema, (long) value); @@ -688,10 +661,16 @@ else if (value instanceof ByteBuffer) private static Object convertToConnect(Schema schema, JsonNode jsonValue) { - JsonToConnectTypeConverter typeConverter; final Schema.Type schemaType; if (schema != null) { schemaType = schema.type(); + if (jsonValue.isNull()) { + if (schema.defaultValue() != null) + return schema.defaultValue(); // any logical type conversions should already have been applied + if (schema.isOptional()) + return null; + throw new DataException("Invalid null value for required " + schemaType + " field"); + } } else { switch (jsonValue.getNodeType()) { case NULL: @@ -724,9 +703,10 @@ private static Object convertToConnect(Schema schema, JsonNode jsonValue) { break; } } - typeConverter = TO_CONNECT_CONVERTERS.get(schemaType); + + final JsonToConnectTypeConverter typeConverter = TO_CONNECT_CONVERTERS.get(schemaType); if (typeConverter == null) - throw new DataException("Unknown schema type: " + schema.type()); + throw new DataException("Unknown schema type: " + String.valueOf(schemaType)); Object converted = typeConverter.convert(schema, jsonValue); if (schema != null && schema.name() != null) { @@ -737,7 +717,6 @@ private static Object convertToConnect(Schema schema, JsonNode jsonValue) { return converted; } - private interface JsonToConnectTypeConverter { Object convert(Schema schema, JsonNode value); } diff --git a/connect/json/src/test/java/org/apache/kafka/connect/json/JsonConverterTest.java b/connect/json/src/test/java/org/apache/kafka/connect/json/JsonConverterTest.java index 7700f18c6dcb..4a6ae2281a16 100644 --- a/connect/json/src/test/java/org/apache/kafka/connect/json/JsonConverterTest.java +++ b/connect/json/src/test/java/org/apache/kafka/connect/json/JsonConverterTest.java @@ -220,13 +220,32 @@ public void decimalToConnect() { @Test public void decimalToConnectOptional() { Schema schema = Decimal.builder(2).optional().schema(); - // Payload is base64 encoded byte[]{0, -100}, which is the two's complement encoding of 156. String msg = "{ \"schema\": { \"type\": \"bytes\", \"name\": \"org.apache.kafka.connect.data.Decimal\", \"version\": 1, \"optional\": true, \"parameters\": { \"scale\": \"2\" } }, \"payload\": null }"; SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); assertEquals(schema, schemaAndValue.schema()); assertNull(schemaAndValue.value()); } + @Test + public void decimalToConnectWithDefaultValue() { + BigDecimal reference = new BigDecimal(new BigInteger("156"), 2); + Schema schema = Decimal.builder(2).defaultValue(reference).build(); + String msg = "{ \"schema\": { \"type\": \"bytes\", \"name\": \"org.apache.kafka.connect.data.Decimal\", \"version\": 1, \"default\": \"AJw=\", \"parameters\": { \"scale\": \"2\" } }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertEquals(reference, schemaAndValue.value()); + } + + @Test + public void decimalToConnectOptionalWithDefaultValue() { + BigDecimal reference = new BigDecimal(new BigInteger("156"), 2); + Schema schema = Decimal.builder(2).optional().defaultValue(reference).build(); + String msg = "{ \"schema\": { \"type\": \"bytes\", \"name\": \"org.apache.kafka.connect.data.Decimal\", \"version\": 1, \"optional\": true, \"default\": \"AJw=\", \"parameters\": { \"scale\": \"2\" } }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertEquals(reference, schemaAndValue.value()); + } + @Test public void dateToConnect() { Schema schema = Date.SCHEMA; @@ -250,6 +269,26 @@ public void dateToConnectOptional() { assertNull(schemaAndValue.value()); } + @Test + public void dateToConnectWithDefaultValue() { + java.util.Date reference = new java.util.Date(0); + Schema schema = Date.builder().defaultValue(reference).schema(); + String msg = "{ \"schema\": { \"type\": \"int32\", \"name\": \"org.apache.kafka.connect.data.Date\", \"version\": 1, \"default\": 0 }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertEquals(reference, schemaAndValue.value()); + } + + @Test + public void dateToConnectOptionalWithDefaultValue() { + java.util.Date reference = new java.util.Date(0); + Schema schema = Date.builder().optional().defaultValue(reference).schema(); + String msg = "{ \"schema\": { \"type\": \"int32\", \"name\": \"org.apache.kafka.connect.data.Date\", \"version\": 1, \"optional\": true, \"default\": 0 }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertEquals(reference, schemaAndValue.value()); + } + @Test public void timeToConnect() { Schema schema = Time.SCHEMA; @@ -273,6 +312,26 @@ public void timeToConnectOptional() { assertNull(schemaAndValue.value()); } + @Test + public void timeToConnectWithDefaultValue() { + java.util.Date reference = new java.util.Date(0); + Schema schema = Time.builder().defaultValue(reference).schema(); + String msg = "{ \"schema\": { \"type\": \"int32\", \"name\": \"org.apache.kafka.connect.data.Time\", \"version\": 1, \"default\": 0 }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertEquals(reference, schemaAndValue.value()); + } + + @Test + public void timeToConnectOptionalWithDefaultValue() { + java.util.Date reference = new java.util.Date(0); + Schema schema = Time.builder().optional().defaultValue(reference).schema(); + String msg = "{ \"schema\": { \"type\": \"int32\", \"name\": \"org.apache.kafka.connect.data.Time\", \"version\": 1, \"optional\": true, \"default\": 0 }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertEquals(reference, schemaAndValue.value()); + } + @Test public void timestampToConnect() { Schema schema = Timestamp.SCHEMA; @@ -297,6 +356,24 @@ public void timestampToConnectOptional() { assertNull(schemaAndValue.value()); } + @Test + public void timestampToConnectWithDefaultValue() { + Schema schema = Timestamp.builder().defaultValue(new java.util.Date(42)).schema(); + String msg = "{ \"schema\": { \"type\": \"int64\", \"name\": \"org.apache.kafka.connect.data.Timestamp\", \"version\": 1, \"default\": 42 }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertEquals(new java.util.Date(42), schemaAndValue.value()); + } + + @Test + public void timestampToConnectOptionalWithDefaultValue() { + Schema schema = Timestamp.builder().optional().defaultValue(new java.util.Date(42)).schema(); + String msg = "{ \"schema\": { \"type\": \"int64\", \"name\": \"org.apache.kafka.connect.data.Timestamp\", \"version\": 1, \"optional\": true, \"default\": 42 }, \"payload\": null }"; + SchemaAndValue schemaAndValue = converter.toConnectData(TOPIC, msg.getBytes()); + assertEquals(schema, schemaAndValue.schema()); + assertEquals(new java.util.Date(42), schemaAndValue.value()); + } + // Schema metadata @Test From 64acd90859af0c3ec0a1b7a042a95e53fe2ea6df Mon Sep 17 00:00:00 2001 From: Shikhar Bhushan Date: Mon, 29 Aug 2016 19:08:52 -0700 Subject: [PATCH 265/267] KAFKA-4100: Ensure 'fields' and 'fieldsByName' are not null for Struct schemas Author: Shikhar Bhushan Reviewers: Ewen Cheslack-Postava Closes #1800 from shikhar/kafka-4100 --- .../org/apache/kafka/connect/data/ConnectSchema.java | 10 ++++++---- .../org/apache/kafka/connect/data/SchemaBuilder.java | 5 +++-- .../apache/kafka/connect/data/ConnectSchemaTest.java | 7 +++++++ .../apache/kafka/connect/data/SchemaBuilderTest.java | 11 ++++++++++- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/connect/api/src/main/java/org/apache/kafka/connect/data/ConnectSchema.java b/connect/api/src/main/java/org/apache/kafka/connect/data/ConnectSchema.java index 591644c1cbd3..d1fd9cdaa7aa 100644 --- a/connect/api/src/main/java/org/apache/kafka/connect/data/ConnectSchema.java +++ b/connect/api/src/main/java/org/apache/kafka/connect/data/ConnectSchema.java @@ -22,6 +22,7 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -106,12 +107,13 @@ public ConnectSchema(Type type, boolean optional, Object defaultValue, String na this.doc = doc; this.parameters = parameters; - this.fields = fields; - if (this.fields != null && this.type == Type.STRUCT) { - this.fieldsByName = new HashMap<>(); - for (Field field : fields) + if (this.type == Type.STRUCT) { + this.fields = fields == null ? Collections.emptyList() : fields; + this.fieldsByName = new HashMap<>(this.fields.size()); + for (Field field : this.fields) fieldsByName.put(field.name(), field); } else { + this.fields = null; this.fieldsByName = null; } diff --git a/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaBuilder.java b/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaBuilder.java index 32045f965122..2ec1cf3d2a1a 100644 --- a/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaBuilder.java +++ b/connect/api/src/main/java/org/apache/kafka/connect/data/SchemaBuilder.java @@ -78,6 +78,9 @@ public class SchemaBuilder implements Schema { private SchemaBuilder(Type type) { this.type = type; + if (type == Type.STRUCT) { + fields = new ArrayList<>(); + } } // Common/metadata fields @@ -317,8 +320,6 @@ public static SchemaBuilder struct() { public SchemaBuilder field(String fieldName, Schema fieldSchema) { if (type != Type.STRUCT) throw new SchemaBuilderException("Cannot create fields on type " + type); - if (fields == null) - fields = new ArrayList<>(); int fieldIndex = fields.size(); fields.add(new Field(fieldName, fieldIndex, fieldSchema)); return this; diff --git a/connect/api/src/test/java/org/apache/kafka/connect/data/ConnectSchemaTest.java b/connect/api/src/test/java/org/apache/kafka/connect/data/ConnectSchemaTest.java index 4388ade9f21b..f5c6e2fc9805 100644 --- a/connect/api/src/test/java/org/apache/kafka/connect/data/ConnectSchemaTest.java +++ b/connect/api/src/test/java/org/apache/kafka/connect/data/ConnectSchemaTest.java @@ -300,4 +300,11 @@ public void testStructEquality() { assertNotEquals(s1, differentField); } + @Test + public void testEmptyStruct() { + final ConnectSchema emptyStruct = new ConnectSchema(Schema.Type.STRUCT, false, null, null, null, null); + assertEquals(0, emptyStruct.fields().size()); + new Struct(emptyStruct); + } + } diff --git a/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaBuilderTest.java b/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaBuilderTest.java index fdbaa0ae15e2..37cc47c73d8e 100644 --- a/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaBuilderTest.java +++ b/connect/api/src/test/java/org/apache/kafka/connect/data/SchemaBuilderTest.java @@ -283,7 +283,16 @@ public void testMapBuilderInvalidDefault() { .defaultValue(defMap).build(); } - + @Test + public void testEmptyStruct() { + final SchemaBuilder emptyStructSchemaBuilder = SchemaBuilder.struct(); + assertEquals(0, emptyStructSchemaBuilder.fields().size()); + new Struct(emptyStructSchemaBuilder); + + final Schema emptyStructSchema = emptyStructSchemaBuilder.build(); + assertEquals(0, emptyStructSchema.fields().size()); + new Struct(emptyStructSchema); + } private void assertTypeAndDefault(Schema schema, Schema.Type type, boolean optional, Object defaultValue) { assertEquals(type, schema.type()); From 8bbb0b216fe2573c1f9716bcdb1021daf784968a Mon Sep 17 00:00:00 2001 From: Guozhang Wang Date: Wed, 28 Sep 2016 02:31:58 +0100 Subject: [PATCH 266/267] HOTFIX: Remove git conflict markers in streams.html Author: Guozhang Wang Reviewers: Ismael Juma Closes #1922 from guozhangwang/KHotfix-streams-html --- docs/streams.html | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/streams.html b/docs/streams.html index 6b760104e3ae..7321ca9cedaf 100644 --- a/docs/streams.html +++ b/docs/streams.html @@ -298,11 +298,8 @@
                Transform a stre
                     // written in Java 8+, using lambda expressions
                -<<<<<<< HEAD
                +
                     KTable, Long> counts = source1.aggregateByKey(
                -=======
                -    KTable, Long> counts = source1.groupByKey().aggregate(
                ->>>>>>> 268cff7... KAFKA-4112: Remove alpha quality label from Kafka Streams in docs
                         () -> 0L,  // initial value
                         (aggKey, value, aggregate) -> aggregate + 1L,   // aggregating value
                         TimeWindows.of("counts",5000L).advanceBy(1000L), // intervals in milliseconds
                
                From fe57fe6943338b136e722e85fa9c860646a06da7 Mon Sep 17 00:00:00 2001
                From: Ismael Juma 
                Date: Wed, 28 Sep 2016 19:15:00 -0700
                Subject: [PATCH 267/267] MINOR: Set JVM parameters for the Gradle Test
                 executor processes
                
                We suspect that the test suite hangs we have been seeing are
                due to PermGen exhaustion. It is a common reason for
                hard JVM lock-ups.
                
                Author: Ismael Juma 
                
                Reviewers: Jason Gustafson 
                
                Closes #1926 from ijuma/test-jvm-params
                
                (cherry picked from commit 67e99d0869dd49358d7ca549ac715b722fda89f5)
                Signed-off-by: Jason Gustafson 
                ---
                 build.gradle | 12 +++++++++++-
                 1 file changed, 11 insertions(+), 1 deletion(-)
                
                diff --git a/build.gradle b/build.gradle
                index d46c8b315559..a24a786ee5ae 100644
                --- a/build.gradle
                +++ b/build.gradle
                @@ -71,6 +71,10 @@ ext {
                   gradleVersion = "2.13"
                   buildVersionFileName = "kafka-version.properties"
                 
                +  maxPermSizeArgs = []
                +  if (!JavaVersion.current().isJava8Compatible())
                +    maxPermSizeArgs = ['-XX:MaxPermSize=512m']
                +
                   userMaxForks = project.hasProperty('maxParallelForks') ? maxParallelForks.toInteger() : null
                 
                   skipSigning = project.hasProperty('skipSigning') && skipSigning.toBoolean()
                @@ -157,11 +161,17 @@ subprojects {
                 
                   test {
                     maxParallelForks = userMaxForks ?: Runtime.runtime.availableProcessors()
                +
                +    minHeapSize = "256m"
                +    maxHeapSize = "2048m"
                +    jvmArgs = maxPermSizeArgs
                +
                     testLogging {
                       events = userTestLoggingEvents ?: ["passed", "skipped", "failed"]
                       showStandardStreams = userShowStandardStreams ?: false
                       exceptionFormat = 'full'
                     }
                +
                   }
                 
                   jar {
                @@ -244,7 +254,7 @@ subprojects {
                 
                     configure(scalaCompileOptions.forkOptions) {
                       memoryMaximumSize = '1g'
                -      jvmArgs = ['-XX:MaxPermSize=512m', '-Xss2m']
                +      jvmArgs = ['-Xss2m'] + maxPermSizeArgs
                     }
                   }