From ed8207bee29a49d666f9bb6e9e35199ec92d2ff1 Mon Sep 17 00:00:00 2001 From: Mark Payne Date: Wed, 14 Dec 2016 14:23:21 -0500 Subject: [PATCH] NIFI-3198: Refactored how PublishKafka and PublishKafka_0_10 work to improve throughput and resilience. Fixed bug in StreamDemarcator. Slight refactoring of consume processors to simplify code. --- .../io/exception/TokenTooLargeException.java | 26 + .../nifi/stream/io/util/StreamDemarcator.java | 39 +- .../stream/io/util/StreamDemarcatorTest.java | 84 ++- .../util/StandardProcessorTestRunner.java | 31 +- .../java/org/apache/nifi/util/TestRunner.java | 16 +- .../kafka/pubsub/ConsumeKafka_0_10.java | 315 +++------ .../kafka/pubsub/ConsumerLease.java | 367 +++++++++- .../processors/kafka/pubsub/ConsumerPool.java | 287 ++++---- .../kafka/pubsub/InFlightMessageTracker.java | 178 +++++ .../kafka/pubsub/KafkaProcessorUtils.java | 37 +- .../kafka/pubsub/KafkaPublisher.java | 236 ------- .../kafka/pubsub/PublishKafka_0_10.java | 646 +++++++----------- .../kafka/pubsub/PublishResult.java | 56 ++ .../kafka/pubsub/PublisherLease.java | 132 ++++ .../kafka/pubsub/PublisherPool.java | 98 +++ .../kafka/pubsub/PublishingContext.java | 124 ---- .../kafka/pubsub/ConsumeKafkaTest.java | 548 ++------------- .../kafka/pubsub/ConsumerPoolTest.java | 172 +++-- .../kafka/pubsub/KafkaPublisherTest.java | 306 --------- .../kafka/pubsub/PublishKafkaTest.java | 375 ---------- .../kafka/pubsub/PublishingContextTest.java | 91 --- .../kafka/pubsub/StubPublishKafka.java | 143 ---- .../pubsub/TestInFlightMessageTracker.java | 87 +++ .../kafka/pubsub/TestPublishKafka.java | 262 +++++++ .../kafka/pubsub/TestPublisherLease.java | 194 ++++++ .../kafka/pubsub/TestPublisherPool.java | 68 ++ .../nifi/processors/kafka/KafkaPublisher.java | 4 +- .../processors/kafka/pubsub/ConsumeKafka.java | 313 +++------ .../kafka/pubsub/ConsumerLease.java | 367 +++++++++- .../processors/kafka/pubsub/ConsumerPool.java | 288 ++++---- .../kafka/pubsub/InFlightMessageTracker.java | 178 +++++ .../kafka/pubsub/KafkaProcessorUtils.java | 37 +- .../kafka/pubsub/KafkaPublisher.java | 236 ------- .../processors/kafka/pubsub/PublishKafka.java | 641 +++++++---------- .../kafka/pubsub/PublishResult.java | 56 ++ .../kafka/pubsub/PublisherLease.java | 132 ++++ .../kafka/pubsub/PublisherPool.java | 98 +++ .../kafka/pubsub/PublishingContext.java | 124 ---- .../kafka/pubsub/ConsumeKafkaTest.java | 555 ++------------- .../kafka/pubsub/ConsumerPoolTest.java | 172 +++-- .../kafka/pubsub/KafkaPublisherTest.java | 306 --------- .../kafka/pubsub/PublishKafkaTest.java | 375 ---------- .../kafka/pubsub/PublishingContextTest.java | 91 --- .../kafka/pubsub/StubPublishKafka.java | 144 ---- .../pubsub/TestInFlightMessageTracker.java | 87 +++ .../kafka/pubsub/TestPublishKafka.java | 262 +++++++ .../kafka/pubsub/TestPublisherLease.java | 194 ++++++ .../kafka/pubsub/TestPublisherPool.java | 68 ++ 48 files changed, 4338 insertions(+), 5308 deletions(-) create mode 100644 nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/exception/TokenTooLargeException.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisher.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishResult.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherPool.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishingContext.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisherTest.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaTest.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishingContextTest.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/StubPublishKafka.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherPool.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisher.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishResult.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherPool.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishingContext.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisherTest.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaTest.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishingContextTest.java delete mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/StubPublishKafka.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java create mode 100644 nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherPool.java diff --git a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/exception/TokenTooLargeException.java b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/exception/TokenTooLargeException.java new file mode 100644 index 000000000000..7024f34ccf60 --- /dev/null +++ b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/exception/TokenTooLargeException.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.stream.io.exception; + +import java.io.IOException; + +public class TokenTooLargeException extends IOException { + public TokenTooLargeException(final String message) { + super(message); + } +} diff --git a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/StreamDemarcator.java b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/StreamDemarcator.java index 3064f1c4a84a..840bdb04a195 100644 --- a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/StreamDemarcator.java +++ b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/util/StreamDemarcator.java @@ -16,9 +16,12 @@ */ package org.apache.nifi.stream.io.util; +import java.io.Closeable; import java.io.IOException; import java.io.InputStream; +import org.apache.nifi.stream.io.exception.TokenTooLargeException; + /** * The StreamDemarcator class takes an input stream and demarcates * it so it could be read (see {@link #nextToken()}) as individual byte[] @@ -26,7 +29,7 @@ * stream will be read into a single token which may result in * {@link OutOfMemoryError} if stream is too large. */ -public class StreamDemarcator { +public class StreamDemarcator implements Closeable { private final static int INIT_BUFFER_SIZE = 8192; @@ -95,8 +98,10 @@ public StreamDemarcator(InputStream is, byte[] delimiterBytes, int maxDataSize, /** * Will read the next data token from the {@link InputStream} returning null * when it reaches the end of the stream. + * + * @throws IOException if unable to read from the stream */ - public byte[] nextToken() { + public byte[] nextToken() throws IOException { byte[] data = null; int j = 0; @@ -126,8 +131,10 @@ public byte[] nextToken() { /** * Will fill the current buffer from current 'index' position, expanding it * and or shuffling it if necessary + * + * @throws IOException if unable to read from the stream */ - private void fill() { + private void fill() throws IOException { if (this.index >= this.buffer.length) { if (this.mark == 0) { // expand byte[] newBuff = new byte[this.buffer.length + this.initialBufferSize]; @@ -138,23 +145,20 @@ private void fill() { System.arraycopy(this.buffer, this.mark, this.buffer, 0, length); this.index = length; this.mark = 0; + this.readAheadLength = length; } } - try { - int bytesRead; - do { - bytesRead = this.is.read(this.buffer, this.index, this.buffer.length - this.index); - } while (bytesRead == 0); + int bytesRead; + do { + bytesRead = this.is.read(this.buffer, this.index, this.buffer.length - this.index); + } while (bytesRead == 0); - if (bytesRead != -1) { - this.readAheadLength = this.index + bytesRead; - if (this.readAheadLength > this.maxDataSize) { - throw new IllegalStateException("Maximum allowed data size of " + this.maxDataSize + " exceeded."); - } + if (bytesRead != -1) { + this.readAheadLength = this.index + bytesRead; + if (this.readAheadLength > this.maxDataSize) { + throw new TokenTooLargeException("A message in the stream exceeds the maximum allowed message size of " + this.maxDataSize + " bytes."); } - } catch (IOException e) { - throw new IllegalStateException("Failed while reading InputStream", e); } } @@ -188,4 +192,9 @@ private void validateInput(InputStream is, byte[] delimiterBytes, int maxDataSiz throw new IllegalArgumentException("'delimiterBytes' is an optional argument, but when provided its length must be > 0"); } } + + @Override + public void close() throws IOException { + is.close(); + } } diff --git a/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/StreamDemarcatorTest.java b/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/StreamDemarcatorTest.java index 93082a2b7010..66d266848a04 100644 --- a/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/StreamDemarcatorTest.java +++ b/nifi-commons/nifi-utils/src/test/java/org/apache/nifi/stream/io/util/StreamDemarcatorTest.java @@ -25,6 +25,7 @@ import static org.mockito.Mockito.mock; import java.io.ByteArrayInputStream; +import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -65,7 +66,7 @@ public void validateInitializationFailure() { } @Test - public void validateNoDelimiter() { + public void validateNoDelimiter() throws IOException { String data = "Learn from yesterday, live for today, hope for tomorrow. The important thing is not to stop questioning."; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, null, 1000); @@ -76,7 +77,7 @@ public void validateNoDelimiter() { } @Test - public void validateNoDelimiterSmallInitialBuffer() { + public void validateNoDelimiterSmallInitialBuffer() throws IOException { String data = "Learn from yesterday, live for today, hope for tomorrow. The important thing is not to stop questioning."; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, null, 1000, 1); @@ -84,7 +85,7 @@ public void validateNoDelimiterSmallInitialBuffer() { } @Test - public void validateSingleByteDelimiter() { + public void validateSingleByteDelimiter() throws IOException { String data = "Learn from yesterday, live for today, hope for tomorrow. The important thing is not to stop questioning."; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, ",".getBytes(StandardCharsets.UTF_8), 1000); @@ -95,7 +96,7 @@ public void validateSingleByteDelimiter() { } @Test - public void validateDelimiterAtTheBeginning() { + public void validateDelimiterAtTheBeginning() throws IOException { String data = ",Learn from yesterday, live for today, hope for tomorrow. The important thing is not to stop questioning."; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, ",".getBytes(StandardCharsets.UTF_8), 1000); @@ -106,7 +107,7 @@ public void validateDelimiterAtTheBeginning() { } @Test - public void validateEmptyDelimiterSegments() { + public void validateEmptyDelimiterSegments() throws IOException { String data = ",,,,,Learn from yesterday, live for today, hope for tomorrow. The important thing is not to stop questioning."; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, ",".getBytes(StandardCharsets.UTF_8), 1000); @@ -117,7 +118,7 @@ public void validateEmptyDelimiterSegments() { } @Test - public void validateSingleByteDelimiterSmallInitialBuffer() { + public void validateSingleByteDelimiterSmallInitialBuffer() throws IOException { String data = "Learn from yesterday, live for today, hope for tomorrow. The important thing is not to stop questioning."; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, ",".getBytes(StandardCharsets.UTF_8), 1000, 2); @@ -128,7 +129,7 @@ public void validateSingleByteDelimiterSmallInitialBuffer() { } @Test - public void validateWithMultiByteDelimiter() { + public void validateWithMultiByteDelimiter() throws IOException { String data = "foodaabardaabazzz"; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, "daa".getBytes(StandardCharsets.UTF_8), 1000); @@ -139,7 +140,7 @@ public void validateWithMultiByteDelimiter() { } @Test - public void validateWithMultiByteDelimiterAtTheBeginning() { + public void validateWithMultiByteDelimiterAtTheBeginning() throws IOException { String data = "daafoodaabardaabazzz"; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, "daa".getBytes(StandardCharsets.UTF_8), 1000); @@ -150,7 +151,7 @@ public void validateWithMultiByteDelimiterAtTheBeginning() { } @Test - public void validateWithMultiByteDelimiterSmallInitialBuffer() { + public void validateWithMultiByteDelimiterSmallInitialBuffer() throws IOException { String data = "foodaabarffdaabazz"; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, "daa".getBytes(StandardCharsets.UTF_8), 1000, 1); @@ -161,7 +162,7 @@ public void validateWithMultiByteDelimiterSmallInitialBuffer() { } @Test - public void validateWithMultiByteCharsNoDelimiter() { + public void validateWithMultiByteCharsNoDelimiter() throws IOException { String data = "僠THIS IS MY NEW TEXT.僠IT HAS A NEWLINE."; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, null, 1000); @@ -172,7 +173,7 @@ public void validateWithMultiByteCharsNoDelimiter() { } @Test - public void validateWithMultiByteCharsNoDelimiterSmallInitialBuffer() { + public void validateWithMultiByteCharsNoDelimiterSmallInitialBuffer() throws IOException { String data = "僠THIS IS MY NEW TEXT.僠IT HAS A NEWLINE."; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); StreamDemarcator scanner = new StreamDemarcator(is, null, 1000, 2); @@ -183,7 +184,7 @@ public void validateWithMultiByteCharsNoDelimiterSmallInitialBuffer() { } @Test - public void validateWithComplexDelimiter() { + public void validateWithComplexDelimiter() throws IOException { String data = "THIS IS MY TEXTTHIS IS MY NEW TEXTTHIS IS MY NEWEST TEXT"; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes()); StreamDemarcator scanner = new StreamDemarcator(is, "".getBytes(StandardCharsets.UTF_8), 1000); @@ -193,8 +194,8 @@ public void validateWithComplexDelimiter() { assertNull(scanner.nextToken()); } - @Test(expected = IllegalStateException.class) - public void validateMaxBufferSize() { + @Test(expected = IOException.class) + public void validateMaxBufferSize() throws IOException { String data = "THIS IS MY TEXTTHIS IS MY NEW TEXTTHIS IS MY NEWEST TEXT"; ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes()); StreamDemarcator scanner = new StreamDemarcator(is, "".getBytes(StandardCharsets.UTF_8), 20); @@ -202,7 +203,7 @@ public void validateMaxBufferSize() { } @Test - public void validateScannerHandlesNegativeOneByteInputsNoDelimiter() { + public void validateScannerHandlesNegativeOneByteInputsNoDelimiter() throws IOException { ByteArrayInputStream is = new ByteArrayInputStream(new byte[] { 0, 0, 0, 0, -1, 0, 0, 0 }); StreamDemarcator scanner = new StreamDemarcator(is, null, 20); byte[] b = scanner.nextToken(); @@ -210,7 +211,7 @@ public void validateScannerHandlesNegativeOneByteInputsNoDelimiter() { } @Test - public void validateScannerHandlesNegativeOneByteInputs() { + public void validateScannerHandlesNegativeOneByteInputs() throws IOException { ByteArrayInputStream is = new ByteArrayInputStream(new byte[] { 0, 0, 0, 0, -1, 0, 0, 0 }); StreamDemarcator scanner = new StreamDemarcator(is, "water".getBytes(StandardCharsets.UTF_8), 20, 1024); byte[] b = scanner.nextToken(); @@ -218,10 +219,59 @@ public void validateScannerHandlesNegativeOneByteInputs() { } @Test - public void verifyScannerHandlesNegativeOneByteDelimiter() { + public void verifyScannerHandlesNegativeOneByteDelimiter() throws IOException { ByteArrayInputStream is = new ByteArrayInputStream(new byte[] { 0, 0, 0, 0, -1, 0, 0, 0 }); StreamDemarcator scanner = new StreamDemarcator(is, new byte[] { -1 }, 20, 1024); assertArrayEquals(scanner.nextToken(), new byte[] { 0, 0, 0, 0 }); assertArrayEquals(scanner.nextToken(), new byte[] { 0, 0, 0 }); } + + @Test + public void testWithoutTrailingDelimiter() throws IOException { + final byte[] inputData = "Larger Message First\nSmall".getBytes(StandardCharsets.UTF_8); + ByteArrayInputStream is = new ByteArrayInputStream(inputData); + StreamDemarcator scanner = new StreamDemarcator(is, "\n".getBytes(), 1000); + + final byte[] first = scanner.nextToken(); + final byte[] second = scanner.nextToken(); + assertNotNull(first); + assertNotNull(second); + + assertEquals("Larger Message First", new String(first, StandardCharsets.UTF_8)); + assertEquals("Small", new String(second, StandardCharsets.UTF_8)); + } + + @Test + public void testOnBufferSplitNoTrailingDelimiter() throws IOException { + final byte[] inputData = "Yes\nNo".getBytes(StandardCharsets.UTF_8); + ByteArrayInputStream is = new ByteArrayInputStream(inputData); + StreamDemarcator scanner = new StreamDemarcator(is, "\n".getBytes(), 1000, 3); + + final byte[] first = scanner.nextToken(); + final byte[] second = scanner.nextToken(); + assertNotNull(first); + assertNotNull(second); + + assertArrayEquals(first, new byte[] {'Y', 'e', 's'}); + assertArrayEquals(second, new byte[] {'N', 'o'}); + } + + @Test + public void testOnBufferSplit() throws IOException { + final byte[] inputData = "123\n456\n789".getBytes(StandardCharsets.UTF_8); + ByteArrayInputStream is = new ByteArrayInputStream(inputData); + StreamDemarcator scanner = new StreamDemarcator(is, "\n".getBytes(), 1000, 3); + + final byte[] first = scanner.nextToken(); + final byte[] second = scanner.nextToken(); + final byte[] third = scanner.nextToken(); + assertNotNull(first); + assertNotNull(second); + assertNotNull(third); + + assertArrayEquals(first, new byte[] {'1', '2', '3'}); + assertArrayEquals(second, new byte[] {'4', '5', '6'}); + assertArrayEquals(third, new byte[] {'7', '8', '9'}); + } + } diff --git a/nifi-mock/src/main/java/org/apache/nifi/util/StandardProcessorTestRunner.java b/nifi-mock/src/main/java/org/apache/nifi/util/StandardProcessorTestRunner.java index 69118dbccb2a..e1de2b9ea342 100644 --- a/nifi-mock/src/main/java/org/apache/nifi/util/StandardProcessorTestRunner.java +++ b/nifi-mock/src/main/java/org/apache/nifi/util/StandardProcessorTestRunner.java @@ -368,54 +368,55 @@ public void enqueue(final FlowFile... flowFiles) { } @Override - public void enqueue(final Path path) throws IOException { - enqueue(path, new HashMap()); + public MockFlowFile enqueue(final Path path) throws IOException { + return enqueue(path, new HashMap()); } @Override - public void enqueue(final Path path, final Map attributes) throws IOException { + public MockFlowFile enqueue(final Path path, final Map attributes) throws IOException { final Map modifiedAttributes = new HashMap<>(attributes); if (!modifiedAttributes.containsKey(CoreAttributes.FILENAME.key())) { modifiedAttributes.put(CoreAttributes.FILENAME.key(), path.toFile().getName()); } try (final InputStream in = Files.newInputStream(path)) { - enqueue(in, modifiedAttributes); + return enqueue(in, modifiedAttributes); } } @Override - public void enqueue(final byte[] data) { - enqueue(data, new HashMap()); + public MockFlowFile enqueue(final byte[] data) { + return enqueue(data, new HashMap()); } @Override - public void enqueue(final String data) { - enqueue(data.getBytes(StandardCharsets.UTF_8), Collections. emptyMap()); + public MockFlowFile enqueue(final String data) { + return enqueue(data.getBytes(StandardCharsets.UTF_8), Collections. emptyMap()); } @Override - public void enqueue(final byte[] data, final Map attributes) { - enqueue(new ByteArrayInputStream(data), attributes); + public MockFlowFile enqueue(final byte[] data, final Map attributes) { + return enqueue(new ByteArrayInputStream(data), attributes); } @Override - public void enqueue(final String data, final Map attributes) { - enqueue(data.getBytes(StandardCharsets.UTF_8), attributes); + public MockFlowFile enqueue(final String data, final Map attributes) { + return enqueue(data.getBytes(StandardCharsets.UTF_8), attributes); } @Override - public void enqueue(final InputStream data) { - enqueue(data, new HashMap()); + public MockFlowFile enqueue(final InputStream data) { + return enqueue(data, new HashMap()); } @Override - public void enqueue(final InputStream data, final Map attributes) { + public MockFlowFile enqueue(final InputStream data, final Map attributes) { final MockProcessSession session = new MockProcessSession(new SharedSessionState(processor, idGenerator), processor); MockFlowFile flowFile = session.create(); flowFile = session.importFrom(data, flowFile); flowFile = session.putAllAttributes(flowFile, attributes); enqueue(flowFile); + return flowFile; } @Override diff --git a/nifi-mock/src/main/java/org/apache/nifi/util/TestRunner.java b/nifi-mock/src/main/java/org/apache/nifi/util/TestRunner.java index 78d4d008f8e9..5d8b494aab06 100644 --- a/nifi-mock/src/main/java/org/apache/nifi/util/TestRunner.java +++ b/nifi-mock/src/main/java/org/apache/nifi/util/TestRunner.java @@ -382,7 +382,7 @@ public interface TestRunner { * @param path to read content from * @throws IOException if unable to read content */ - void enqueue(Path path) throws IOException; + MockFlowFile enqueue(Path path) throws IOException; /** * Reads the content from the given {@link Path} into memory and creates a @@ -393,7 +393,7 @@ public interface TestRunner { * @param attributes attributes to use for new flow file * @throws IOException if unable to read content */ - void enqueue(Path path, Map attributes) throws IOException; + MockFlowFile enqueue(Path path, Map attributes) throws IOException; /** * Copies the content from the given byte array into memory and creates a @@ -402,7 +402,7 @@ public interface TestRunner { * * @param data to enqueue */ - void enqueue(byte[] data); + MockFlowFile enqueue(byte[] data); /** * Creates a FlowFile with the content set to the given string (in UTF-8 format), with no attributes, @@ -410,7 +410,7 @@ public interface TestRunner { * * @param data to enqueue */ - void enqueue(String data); + MockFlowFile enqueue(String data); /** * Copies the content from the given byte array into memory and creates a @@ -420,7 +420,7 @@ public interface TestRunner { * @param data to enqueue * @param attributes to use for enqueued item */ - void enqueue(byte[] data, Map attributes); + MockFlowFile enqueue(byte[] data, Map attributes); /** * Creates a FlowFile with the content set to the given string (in UTF-8 format), with the given attributes, @@ -429,7 +429,7 @@ public interface TestRunner { * @param data to enqueue * @param attributes to use for enqueued item */ - void enqueue(String data, Map attributes); + MockFlowFile enqueue(String data, Map attributes); /** * Reads the content from the given {@link InputStream} into memory and @@ -438,7 +438,7 @@ public interface TestRunner { * * @param data to source data from */ - void enqueue(InputStream data); + MockFlowFile enqueue(InputStream data); /** * Reads the content from the given {@link InputStream} into memory and @@ -448,7 +448,7 @@ public interface TestRunner { * @param data source of data * @param attributes to use for flow files */ - void enqueue(InputStream data, Map attributes); + MockFlowFile enqueue(InputStream data, Map attributes); /** * Copies the contents of the given {@link MockFlowFile} into a byte array diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka_0_10.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka_0_10.java index 847f8a451632..e859f94ce55b 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka_0_10.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka_0_10.java @@ -21,17 +21,14 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; -import javax.xml.bind.DatatypeConverter; import org.apache.kafka.clients.consumer.ConsumerConfig; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.KafkaException; -import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.WakeupException; import org.apache.kafka.common.serialization.ByteArrayDeserializer; import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.InputRequirement; @@ -39,13 +36,12 @@ import org.apache.nifi.annotation.behavior.WritesAttributes; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.annotation.lifecycle.OnStopped; +import org.apache.nifi.annotation.lifecycle.OnUnscheduled; import org.apache.nifi.components.AllowableValue; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.ProcessContext; @@ -53,17 +49,18 @@ import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.util.StandardValidators; -import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.SECURITY_PROTOCOL; +import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.HEX_ENCODING; +import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.UTF8_ENCODING; -@CapabilityDescription("Consumes messages from Apache Kafka specifically built against the Kafka 0.10 Consumer API. " +@CapabilityDescription("Consumes messages from Apache Kafka specifically built against the Kafka 0.10.x Consumer API. " + " Please note there are cases where the publisher can get into an indefinite stuck state. We are closely monitoring" - + " how this evolves in the Kafka community and will take advantage of those fixes as soon as we can. In the mean time" - + " it is possible to enter states where the only resolution will be to restart the JVM NiFi runs on.") -@Tags({"Kafka", "Get", "Ingest", "Ingress", "Topic", "PubSub", "Consume", "0.10"}) + + " how this evolves in the Kafka community and will take advantage of those fixes as soon as we can. In the meantime" + + " it is possible to enter states where the only resolution will be to restart the JVM NiFi runs on. The complementary NiFi processor for sending messages is PublishKafka_0_10.") +@Tags({"Kafka", "Get", "Ingest", "Ingress", "Topic", "PubSub", "Consume", "0.10.x"}) @WritesAttributes({ @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_COUNT, description = "The number of messages written if more than one"), @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_KEY, description = "The key of message if present and if single message. " - + "How the key is encoded depends on the value of the 'Key Attribute Encoding' property."), + + "How the key is encoded depends on the value of the 'Key Attribute Encoding' property."), @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_OFFSET, description = "The offset of the message in the partition of the topic."), @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_PARTITION, description = "The partition of the topic the message or message bundle is from"), @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_TOPIC, description = "The topic the message or message bundle is from") @@ -75,22 +72,16 @@ + " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ") public class ConsumeKafka_0_10 extends AbstractProcessor { - private static final long FIVE_MB = 5L * 1024L * 1024L; - static final AllowableValue OFFSET_EARLIEST = new AllowableValue("earliest", "earliest", "Automatically reset the offset to the earliest offset"); static final AllowableValue OFFSET_LATEST = new AllowableValue("latest", "latest", "Automatically reset the offset to the latest offset"); static final AllowableValue OFFSET_NONE = new AllowableValue("none", "none", "Throw exception to the consumer if no previous offset is found for the consumer's group"); - static final AllowableValue UTF8_ENCODING = new AllowableValue("utf-8", "UTF-8 Encoded", "The key is interpreted as a UTF-8 Encoded string."); - static final AllowableValue HEX_ENCODING = new AllowableValue("hex", "Hex Encoded", - "The key is interpreted as arbitrary binary data and is encoded using hexadecimal characters with uppercase letters"); - static final PropertyDescriptor TOPICS = new PropertyDescriptor.Builder() .name("topic") .displayName("Topic Name(s)") - .description("The name of the Kafka Topic(s) to pull from. More than one can be supplied if comma seperated.") + .description("The name of the Kafka Topic(s) to pull from. More than one can be supplied if comma separated.") .required(true) .addValidator(StandardValidators.NON_BLANK_VALIDATOR) .expressionLanguageSupported(true) @@ -136,6 +127,7 @@ public class ConsumeKafka_0_10 extends AbstractProcessor { + "will result in a single FlowFile which " + "time it is triggered. To enter special character such as 'new line' use CTRL+Enter or Shift+Enter depending on the OS") .build(); + static final PropertyDescriptor MAX_POLL_RECORDS = new PropertyDescriptor.Builder() .name("max.poll.records") .displayName("Max Poll Records") @@ -145,6 +137,20 @@ public class ConsumeKafka_0_10 extends AbstractProcessor { .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) .build(); + static final PropertyDescriptor MAX_UNCOMMITTED_TIME = new PropertyDescriptor.Builder() + .name("max-uncommit-offset-wait") + .displayName("Max Uncommitted Time") + .description("Specifies the maximum amount of time allowed to pass before offsets must be committed. " + + "This value impacts how often offsets will be committed. Committing offsets less often increases " + + "throughput but also increases the window of potential data duplication in the event of a rebalance " + + "or JVM restart between commits. This value is also related to maximum poll records and the use " + + "of a message demarcator. When using a message demarcator we can have far more uncommitted messages " + + "than when we're not as there is much less for us to keep track of in memory.") + .required(false) + .defaultValue("1 secs") + .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) + .build(); + static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("FlowFiles received from Kafka. Depending on demarcation strategy it is a flow file per message or a bundle of messages grouped by topic and partition.") @@ -153,8 +159,8 @@ public class ConsumeKafka_0_10 extends AbstractProcessor { static final List DESCRIPTORS; static final Set RELATIONSHIPS; - private volatile byte[] demarcatorBytes = null; private volatile ConsumerPool consumerPool = null; + private final Set activeLeases = Collections.synchronizedSet(new HashSet<>()); static { List descriptors = new ArrayList<>(); @@ -165,6 +171,7 @@ public class ConsumeKafka_0_10 extends AbstractProcessor { descriptors.add(KEY_ATTRIBUTE_ENCODING); descriptors.add(MESSAGE_DEMARCATOR); descriptors.add(MAX_POLL_RECORDS); + descriptors.add(MAX_UNCOMMITTED_TIME); DESCRIPTORS = Collections.unmodifiableList(descriptors); RELATIONSHIPS = Collections.singleton(REL_SUCCESS); } @@ -179,16 +186,8 @@ protected List getSupportedPropertyDescriptors() { return DESCRIPTORS; } - @OnScheduled - public void prepareProcessing(final ProcessContext context) { - this.demarcatorBytes = context.getProperty(MESSAGE_DEMARCATOR).isSet() - ? context.getProperty(MESSAGE_DEMARCATOR).evaluateAttributeExpressions().getValue().getBytes(StandardCharsets.UTF_8) - : null; - } - @OnStopped public void close() { - demarcatorBytes = null; final ConsumerPool pool = consumerPool; consumerPool = null; if (pool != null) { @@ -215,9 +214,21 @@ private synchronized ConsumerPool getConsumerPool(final ProcessContext context) return pool; } - final Map props = new HashMap<>(); + return consumerPool = createConsumerPool(context, getLogger()); + } + + protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) { + final int maxLeases = context.getMaxConcurrentTasks(); + final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS); + final byte[] demarcator = context.getProperty(ConsumeKafka_0_10.MESSAGE_DEMARCATOR).isSet() + ? context.getProperty(ConsumeKafka_0_10.MESSAGE_DEMARCATOR).evaluateAttributeExpressions().getValue().getBytes(StandardCharsets.UTF_8) + : null; + final Map props = new HashMap<>(); KafkaProcessorUtils.buildCommonKafkaProperties(context, ConsumerConfig.class, props); - final String topicListing = context.getProperty(TOPICS).evaluateAttributeExpressions().getValue(); + props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); + props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + final String topicListing = context.getProperty(ConsumeKafka_0_10.TOPICS).evaluateAttributeExpressions().getValue(); final List topics = new ArrayList<>(); for (final String topic : topicListing.split(",", 100)) { final String trimmedName = topic.trim(); @@ -225,212 +236,78 @@ private synchronized ConsumerPool getConsumerPool(final ProcessContext context) topics.add(trimmedName); } } - props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); - props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); - props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); - return consumerPool = createConsumerPool(context.getMaxConcurrentTasks(), topics, props, getLogger()); - } + final String keyEncoding = context.getProperty(KEY_ATTRIBUTE_ENCODING).getValue(); + final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue(); + final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).getValue(); - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return new ConsumerPool(maxLeases, topics, props, log); + return new ConsumerPool(maxLeases, demarcator, props, topics, maxUncommittedTime, keyEncoding, securityProtocol, bootstrapServers, log); } - @Override - public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { - final long startTimeNanos = System.nanoTime(); - final ConsumerPool pool = getConsumerPool(context); - if (pool == null) { - context.yield(); - return; - } - final Map>> partitionRecordMap = new HashMap<>(); - - try (final ConsumerLease lease = pool.obtainConsumer()) { + @OnUnscheduled + public void interruptActiveThreads() { + // There are known issues with the Kafka client library that result in the client code hanging + // indefinitely when unable to communicate with the broker. In order to address this, we will wait + // up to 30 seconds for the Threads to finish and then will call Consumer.wakeup() to trigger the + // thread to wakeup when it is blocked, waiting on a response. + final long nanosToWait = TimeUnit.SECONDS.toNanos(5L); + final long start = System.nanoTime(); + while (System.nanoTime() - start < nanosToWait && !activeLeases.isEmpty()) { try { - if (lease == null) { - context.yield(); - return; - } - - final boolean foundData = gatherDataFromKafka(lease, partitionRecordMap, context); - if (!foundData) { - session.rollback(); - return; - } - - writeSessionData(context, session, partitionRecordMap, startTimeNanos); - //At-least once commit handling (if order is reversed it is at-most once) - session.commit(); - commitOffsets(lease, partitionRecordMap); - } catch (final KafkaException ke) { - lease.poison(); - getLogger().error("Problem while accessing kafka consumer " + ke, ke); - context.yield(); - session.rollback(); + Thread.sleep(100L); + } catch (final InterruptedException ie) { + Thread.currentThread().interrupt(); + return; } } - } - private void commitOffsets(final ConsumerLease lease, final Map>> partitionRecordMap) { - final Map partOffsetMap = new HashMap<>(); - partitionRecordMap.entrySet().stream() - .filter(entry -> !entry.getValue().isEmpty()) - .forEach((entry) -> { - long maxOffset = entry.getValue().stream() - .mapToLong(record -> record.offset()) - .max() - .getAsLong(); - partOffsetMap.put(entry.getKey(), new OffsetAndMetadata(maxOffset + 1L)); - }); - lease.commitOffsets(partOffsetMap); - } + if (!activeLeases.isEmpty()) { + int count = 0; + for (final ConsumerLease lease : activeLeases) { + getLogger().info("Consumer {} has not finished after waiting 30 seconds; will attempt to wake-up the lease", new Object[] {lease}); + lease.wakeup(); + count++; + } - private void writeSessionData( - final ProcessContext context, final ProcessSession session, - final Map>> partitionRecordMap, - final long startTimeNanos) { - if (demarcatorBytes != null) { - partitionRecordMap.entrySet().stream() - .filter(entry -> !entry.getValue().isEmpty()) - .forEach(entry -> { - writeData(context, session, entry.getValue(), startTimeNanos); - }); - } else { - partitionRecordMap.entrySet().stream() - .filter(entry -> !entry.getValue().isEmpty()) - .flatMap(entry -> entry.getValue().stream()) - .forEach(record -> { - writeData(context, session, Collections.singletonList(record), startTimeNanos); - }); + getLogger().info("Woke up {} consumers", new Object[] {count}); } - } - private String encodeKafkaKey(final byte[] key, final String encoding) { - if (key == null) { - return null; - } + activeLeases.clear(); + } - if (HEX_ENCODING.getValue().equals(encoding)) { - return DatatypeConverter.printHexBinary(key); - } else if (UTF8_ENCODING.getValue().equals(encoding)) { - return new String(key, StandardCharsets.UTF_8); - } else { - return null; // won't happen because it is guaranteed by the Allowable Values + @Override + public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { + final ConsumerPool pool = getConsumerPool(context); + if (pool == null) { + context.yield(); + return; } - } - private void writeData(final ProcessContext context, final ProcessSession session, final List> records, final long startTimeNanos) { - final ConsumerRecord firstRecord = records.get(0); - final String offset = String.valueOf(firstRecord.offset()); - final String keyValue = encodeKafkaKey(firstRecord.key(), context.getProperty(KEY_ATTRIBUTE_ENCODING).getValue()); - final String topic = firstRecord.topic(); - final String partition = String.valueOf(firstRecord.partition()); - FlowFile flowFile = session.create(); - flowFile = session.write(flowFile, out -> { - boolean useDemarcator = false; - for (final ConsumerRecord record : records) { - if (useDemarcator) { - out.write(demarcatorBytes); - } - out.write(record.value()); - useDemarcator = true; + try (final ConsumerLease lease = pool.obtainConsumer(session)) { + if (lease == null) { + context.yield(); + return; } - }); - final Map kafkaAttrs = new HashMap<>(); - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_OFFSET, offset); - if (keyValue != null && records.size() == 1) { - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_KEY, keyValue); - } - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_PARTITION, partition); - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_TOPIC, topic); - if (records.size() > 1) { - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_COUNT, String.valueOf(records.size())); - } - flowFile = session.putAllAttributes(flowFile, kafkaAttrs); - final long executionDurationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNanos); - final String transitUri = KafkaProcessorUtils.buildTransitURI( - context.getProperty(SECURITY_PROTOCOL).getValue(), - context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).getValue(), - topic); - session.getProvenanceReporter().receive(flowFile, transitUri, executionDurationMillis); - this.getLogger().debug("Created {} containing {} messages from Kafka topic {}, partition {}, starting offset {} in {} millis", - new Object[]{flowFile, records.size(), topic, partition, offset, executionDurationMillis}); - session.transfer(flowFile, REL_SUCCESS); - } - /** - * Populates the given partitionRecordMap with new records until we poll - * that returns no records or until we have enough data. It is important to - * ensure we keep items grouped by their topic and partition so that when we - * bundle them we bundle them intelligently and so that we can set offsets - * properly even across multiple poll calls. - */ - private boolean gatherDataFromKafka(final ConsumerLease lease, final Map>> partitionRecordMap, ProcessContext context) { - final long startNanos = System.nanoTime(); - boolean foundData = false; - ConsumerRecords records; - final int maxRecords = context.getProperty(MAX_POLL_RECORDS).asInteger(); - do { - records = lease.poll(); - - for (final TopicPartition partition : records.partitions()) { - List> currList = partitionRecordMap.get(partition); - if (currList == null) { - currList = new ArrayList<>(); - partitionRecordMap.put(partition, currList); + activeLeases.add(lease); + try { + while (this.isScheduled() && lease.continuePolling()) { + lease.poll(); } - currList.addAll(records.records(partition)); - if (currList.size() > 0) { - foundData = true; + if (this.isScheduled() && !lease.commit()) { + context.yield(); } + } catch (final WakeupException we) { + getLogger().warn("Was interrupted while trying to communicate with Kafka with lease {}. " + + "Will roll back session and discard any partially received data.", new Object[] {lease}); + } catch (final KafkaException kex) { + getLogger().error("Exception while interacting with Kafka so will close the lease {} due to {}", + new Object[]{lease, kex}, kex); + } catch (final Throwable t) { + getLogger().error("Exception while processing data from kafka so will close the lease {} due to {}", + new Object[]{lease, t}, t); + } finally { + activeLeases.remove(lease); } - //If we received data and we still want to get more - } while (!records.isEmpty() && !checkIfGatheredEnoughData(partitionRecordMap, maxRecords, startNanos)); - return foundData; - } - - /** - * Determines if we have enough data as-is and should move on. - * - * @return true if we've been gathering for more than 500 ms or if we're - * demarcating and have more than 50 flowfiles worth or if we're per message - * and have more than 2000 flowfiles or if totalMessageSize is greater than - * two megabytes; false otherwise - * - * Implementation note: 500 millis and 5 MB are magic numbers. These may - * need to be tuned. They get at how often offsets will get committed to - * kafka relative to how many records will get buffered into memory in a - * poll call before writing to repos. - */ - private boolean checkIfGatheredEnoughData(final Map>> partitionRecordMap, final int maxRecords, final long startTimeNanos) { - - final long durationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNanos); - - if (durationMillis > 500) { - return true; - } - - int topicPartitionsFilled = 0; - int totalRecords = 0; - long totalRecordSize = 0; - - for (final List> recordList : partitionRecordMap.values()) { - if (!recordList.isEmpty()) { - topicPartitionsFilled++; - } - totalRecords += recordList.size(); - for (final ConsumerRecord rec : recordList) { - totalRecordSize += rec.value().length; - } - } - - if (demarcatorBytes != null && demarcatorBytes.length > 0) { - return topicPartitionsFilled > 50; - } else if (totalRecordSize > FIVE_MB) { - return true; - } else { - return totalRecords > maxRecords; } } - } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java index b954eba344c1..97ebfc6ee7d1 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java @@ -17,11 +17,28 @@ package org.apache.nifi.processors.kafka.pubsub; import java.io.Closeable; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; +import javax.xml.bind.DatatypeConverter; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; +import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.TopicPartition; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.ProcessSession; +import static org.apache.nifi.processors.kafka.pubsub.ConsumeKafka_0_10.REL_SUCCESS; +import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.HEX_ENCODING; +import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.UTF8_ENCODING; /** * This class represents a lease to access a Kafka Consumer object. The lease is @@ -30,15 +47,108 @@ * the lease will be returned to the pool for future use by others. A given * lease may only belong to a single thread a time. */ -public interface ConsumerLease extends Closeable { +public abstract class ConsumerLease implements Closeable, ConsumerRebalanceListener { + + private final long maxWaitMillis; + private final Consumer kafkaConsumer; + private final ComponentLog logger; + private final byte[] demarcatorBytes; + private final String keyEncoding; + private final String securityProtocol; + private final String bootstrapServers; + private boolean poisoned = false; + //used for tracking demarcated flowfiles to their TopicPartition so we can append + //to them on subsequent poll calls + private final Map bundleMap = new HashMap<>(); + private final Map uncommittedOffsetsMap = new HashMap<>(); + private long leaseStartNanos = -1; + private boolean lastPollEmpty = false; + private int totalFlowFiles = 0; + + ConsumerLease( + final long maxWaitMillis, + final Consumer kafkaConsumer, + final byte[] demarcatorBytes, + final String keyEncoding, + final String securityProtocol, + final String bootstrapServers, + final ComponentLog logger) { + this.maxWaitMillis = maxWaitMillis; + this.kafkaConsumer = kafkaConsumer; + this.demarcatorBytes = demarcatorBytes; + this.keyEncoding = keyEncoding; + this.securityProtocol = securityProtocol; + this.bootstrapServers = bootstrapServers; + this.logger = logger; + } + + /** + * clears out internal state elements excluding session and consumer as + * those are managed by the pool itself + */ + private void resetInternalState() { + bundleMap.clear(); + uncommittedOffsetsMap.clear(); + leaseStartNanos = -1; + lastPollEmpty = false; + totalFlowFiles = 0; + } /** - * Executes a poll on the underlying Kafka Consumer. + * Kafka will call this method whenever it is about to rebalance the + * consumers for the given partitions. We'll simply take this to mean that + * we need to quickly commit what we've got and will return the consumer to + * the pool. This method will be called during the poll() method call of + * this class and will be called by the same thread calling poll according + * to the Kafka API docs. After this method executes the session and kafka + * offsets are committed and this lease is closed. * - * @return ConsumerRecords retrieved in the poll. - * @throws KafkaException if issue occurs talking to underlying resource. + * @param partitions partitions being reassigned + */ + @Override + public void onPartitionsRevoked(final Collection partitions) { + logger.debug("Rebalance Alert: Paritions '{}' revoked for lease '{}' with consumer '{}'", new Object[]{partitions, this, kafkaConsumer}); + //force a commit here. Can reuse the session and consumer after this but must commit now to avoid duplicates if kafka reassigns partition + commit(); + } + + /** + * This will be called by Kafka when the rebalance has completed. We don't + * need to do anything with this information other than optionally log it as + * by this point we've committed what we've got and moved on. + * + * @param partitions topic partition set being reassigned + */ + @Override + public void onPartitionsAssigned(final Collection partitions) { + logger.debug("Rebalance Alert: Paritions '{}' assigned for lease '{}' with consumer '{}'", new Object[]{partitions, this, kafkaConsumer}); + } + + /** + * Executes a poll on the underlying Kafka Consumer and creates any new + * flowfiles necessary or appends to existing ones if in demarcation mode. */ - ConsumerRecords poll() throws KafkaException; + void poll() { + /** + * Implementation note: If we take too long (30 secs?) between kafka + * poll calls and our own record processing to any subsequent poll calls + * or the commit we can run into a situation where the commit will + * succeed to the session but fail on committing offsets. This is + * apparently different than the Kafka scenario of electing to rebalance + * for other reasons but in this case is due a session timeout. It + * appears Kafka KIP-62 aims to offer more control over the meaning of + * various timeouts. If we do run into this case it could result in + * duplicates. + */ + try { + final ConsumerRecords records = kafkaConsumer.poll(10); + lastPollEmpty = records.count() == 0; + processRecords(records); + } catch (final Throwable t) { + this.poison(); + throw t; + } + } /** * Notifies Kafka to commit the offsets for the specified topic/partition @@ -47,22 +157,251 @@ public interface ConsumerLease extends Closeable { * kafka client to collect more data from Kafka before committing the * offsets. * - * @param offsets offsets - * @throws KafkaException if issue occurs talking to underlying resource. + * if false then we didn't do anything and should probably yield if true + * then we committed new data + * */ - void commitOffsets(Map offsets) throws KafkaException; + boolean commit() { + if (uncommittedOffsetsMap.isEmpty()) { + resetInternalState(); + return false; + } + try { + /** + * Committing the nifi session then the offsets means we have an at + * least once guarantee here. If we reversed the order we'd have at + * most once. + */ + final Collection bundledFlowFiles = getBundles(); + if (!bundledFlowFiles.isEmpty()) { + getProcessSession().transfer(bundledFlowFiles, REL_SUCCESS); + } + getProcessSession().commit(); + kafkaConsumer.commitSync(uncommittedOffsetsMap); + resetInternalState(); + return true; + } catch (final KafkaException kex) { + poison(); + logger.warn("Duplicates are likely as we were able to commit the process" + + " session but received an exception from Kafka while committing" + + " offsets."); + throw kex; + } catch (final Throwable t) { + poison(); + throw t; + } + } /** - * Notifies that this lease is poisoned and should not be reused. + * Indicates whether we should continue polling for data. If we are not + * writing data with a demarcator then we're writing individual flow files + * per kafka message therefore we must be very mindful of memory usage for + * the flow file objects (not their content) being held in memory. The + * content of kafka messages will be written to the content repository + * immediately upon each poll call but we must still be mindful of how much + * memory can be used in each poll call. We will indicate that we should + * stop polling our last poll call produced no new results or if we've + * polling and processing data longer than the specified maximum polling + * time or if we have reached out specified max flow file limit or if a + * rebalance has been initiated for one of the partitions we're watching; + * otherwise true. + * + * @return true if should keep polling; false otherwise */ - void poison(); + boolean continuePolling() { + //stop if the last poll produced new no data + if (lastPollEmpty) { + return false; + } + + //stop if we've gone past our desired max uncommitted wait time + if (leaseStartNanos < 0) { + leaseStartNanos = System.nanoTime(); + } + final long durationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos); + if (durationMillis > maxWaitMillis) { + return false; + } + + //stop if we've generated enough flowfiles that we need to be concerned about memory usage for the objects + if (bundleMap.size() > 200) { //a magic number - the number of simultaneous bundles to track + return false; + } else { + return totalFlowFiles < 15000;//admittedlly a magic number - good candidate for processor property + } + } /** - * Notifies that this lease is to be returned. The pool may optionally reuse - * this lease with another client. No further references by the caller - * should occur after calling close. + * Indicates that the underlying session and consumer should be immediately + * considered invalid. Once closed the session will be rolled back and the + * pool should destroy the underlying consumer. This is useful if due to + * external reasons, such as the processor no longer being scheduled, this + * lease should be terminated immediately. + */ + private void poison() { + poisoned = true; + } + + /** + * @return true if this lease has been poisoned; false otherwise + */ + boolean isPoisoned() { + return poisoned; + } + + /** + * Trigger the consumer's {@link KafkaConsumer#wakeup() wakeup()} method. + */ + public void wakeup() { + kafkaConsumer.wakeup(); + } + + /** + * Abstract method that is intended to be extended by the pool that created + * this ConsumerLease object. It should ensure that the session given to + * create this session is rolled back and that the underlying kafka consumer + * is either returned to the pool for continued use or destroyed if this + * lease has been poisoned. It can only be called once. Calling it more than + * once can result in undefined and non threadsafe behavior. */ @Override - void close(); + public void close() { + resetInternalState(); + } + + public abstract ProcessSession getProcessSession(); + + private void processRecords(final ConsumerRecords records) { + + records.partitions().stream().forEach(partition -> { + List> messages = records.records(partition); + if (!messages.isEmpty()) { + //update maximum offset map for this topic partition + long maxOffset = messages.stream() + .mapToLong(record -> record.offset()) + .max() + .getAsLong(); + uncommittedOffsetsMap.put(partition, new OffsetAndMetadata(maxOffset + 1L)); + + //write records to content repository and session + if (demarcatorBytes == null) { + totalFlowFiles += messages.size(); + messages.stream().forEach(message -> { + writeData(getProcessSession(), message, partition); + }); + } else { + writeData(getProcessSession(), messages, partition); + } + } + }); + } + + private static String encodeKafkaKey(final byte[] key, final String encoding) { + if (key == null) { + return null; + } + + if (HEX_ENCODING.getValue().equals(encoding)) { + return DatatypeConverter.printHexBinary(key); + } else if (UTF8_ENCODING.getValue().equals(encoding)) { + return new String(key, StandardCharsets.UTF_8); + } else { + return null; // won't happen because it is guaranteed by the Allowable Values + } + } + + private Collection getBundles() { + final List flowFiles = new ArrayList<>(); + for (final BundleTracker tracker : bundleMap.values()) { + populateAttributes(tracker); + flowFiles.add(tracker.flowFile); + } + return flowFiles; + } + + private void writeData(final ProcessSession session, ConsumerRecord record, final TopicPartition topicPartition) { + FlowFile flowFile = session.create(); + final BundleTracker tracker = new BundleTracker(record, topicPartition, keyEncoding); + tracker.incrementRecordCount(1); + flowFile = session.write(flowFile, out -> { + out.write(record.value()); + }); + tracker.updateFlowFile(flowFile); + populateAttributes(tracker); + session.transfer(tracker.flowFile, REL_SUCCESS); + } + + private void writeData(final ProcessSession session, final List> records, final TopicPartition topicPartition) { + final ConsumerRecord firstRecord = records.get(0); + final boolean demarcateFirstRecord; + BundleTracker tracker = bundleMap.get(topicPartition); + FlowFile flowFile; + if (tracker == null) { + tracker = new BundleTracker(firstRecord, topicPartition, keyEncoding); + flowFile = session.create(); + tracker.updateFlowFile(flowFile); + demarcateFirstRecord = false; //have not yet written records for this topic/partition in this lease + } else { + demarcateFirstRecord = true; //have already been writing records for this topic/partition in this lease + } + flowFile = tracker.flowFile; + tracker.incrementRecordCount(records.size()); + flowFile = session.append(flowFile, out -> { + boolean useDemarcator = demarcateFirstRecord; + for (final ConsumerRecord record : records) { + if (useDemarcator) { + out.write(demarcatorBytes); + } + out.write(record.value()); + useDemarcator = true; + } + }); + tracker.updateFlowFile(flowFile); + bundleMap.put(topicPartition, tracker); + } + + private void populateAttributes(final BundleTracker tracker) { + final Map kafkaAttrs = new HashMap<>(); + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_OFFSET, String.valueOf(tracker.initialOffset)); + if (tracker.key != null && tracker.totalRecords == 1) { + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_KEY, tracker.key); + } + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_PARTITION, String.valueOf(tracker.partition)); + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_TOPIC, tracker.topic); + if (tracker.totalRecords > 1) { + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_COUNT, String.valueOf(tracker.totalRecords)); + } + final FlowFile newFlowFile = getProcessSession().putAllAttributes(tracker.flowFile, kafkaAttrs); + final long executionDurationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos); + final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, tracker.topic); + getProcessSession().getProvenanceReporter().receive(newFlowFile, transitUri, executionDurationMillis); + tracker.updateFlowFile(newFlowFile); + } + + private static class BundleTracker { + + final long initialOffset; + final int partition; + final String topic; + final String key; + FlowFile flowFile; + long totalRecords = 0; + + private BundleTracker(final ConsumerRecord initialRecord, final TopicPartition topicPartition, final String keyEncoding) { + this.initialOffset = initialRecord.offset(); + this.partition = topicPartition.partition(); + this.topic = topicPartition.topic(); + this.key = encodeKafkaKey(initialRecord.key(), keyEncoding); + } + + private void incrementRecordCount(final long count) { + totalRecords += count; + } + + private void updateFlowFile(final FlowFile flowFile) { + this.flowFile = flowFile; + } + + } } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java index 3f20b8f4cb2b..baacdc7619ba 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java @@ -21,18 +21,15 @@ import org.apache.nifi.logging.ComponentLog; import java.io.Closeable; -import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.HashMap; +import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Queue; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.atomic.AtomicLong; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.KafkaException; -import org.apache.kafka.common.TopicPartition; +import org.apache.nifi.processor.ProcessSession; /** * A pool of Kafka Consumers for a given topic. Consumers can be obtained by @@ -41,176 +38,118 @@ */ public class ConsumerPool implements Closeable { - private final AtomicInteger activeLeaseCount = new AtomicInteger(0); - private final int maxLeases; - private final Queue consumerLeases; + private final BlockingQueue pooledLeases; private final List topics; private final Map kafkaProperties; + private final long maxWaitMillis; private final ComponentLog logger; - + private final byte[] demarcatorBytes; + private final String keyEncoding; + private final String securityProtocol; + private final String bootstrapServers; private final AtomicLong consumerCreatedCountRef = new AtomicLong(); private final AtomicLong consumerClosedCountRef = new AtomicLong(); private final AtomicLong leasesObtainedCountRef = new AtomicLong(); - private final AtomicLong productivePollCountRef = new AtomicLong(); - private final AtomicLong unproductivePollCountRef = new AtomicLong(); /** * Creates a pool of KafkaConsumer objects that will grow up to the maximum - * indicated leases. Consumers are lazily initialized. + * indicated threads from the given context. Consumers are lazily + * initialized. We may elect to not create up to the maximum number of + * configured consumers if the broker reported lag time for all topics is + * below a certain threshold. * - * @param maxLeases maximum number of active leases in the pool - * @param topics the topics to consume from - * @param kafkaProperties the properties for each consumer + * @param maxConcurrentLeases max allowable consumers at once + * @param demarcator bytes to use as demarcator between messages; null or + * empty means no demarcator + * @param kafkaProperties properties to use to initialize kafka consumers + * @param topics the topics to subscribe to + * @param maxWaitMillis maximum time to wait for a given lease to acquire + * data before committing + * @param keyEncoding the encoding to use for the key of a kafka message if + * found + * @param securityProtocol the security protocol used + * @param bootstrapServers the bootstrap servers * @param logger the logger to report any errors/warnings */ - public ConsumerPool(final int maxLeases, final List topics, final Map kafkaProperties, final ComponentLog logger) { - this.maxLeases = maxLeases; - if (maxLeases <= 0) { - throw new IllegalArgumentException("Max leases value must be greather than zero."); - } + public ConsumerPool( + final int maxConcurrentLeases, + final byte[] demarcator, + final Map kafkaProperties, + final List topics, + final long maxWaitMillis, + final String keyEncoding, + final String securityProtocol, + final String bootstrapServers, + final ComponentLog logger) { + this.pooledLeases = new ArrayBlockingQueue<>(maxConcurrentLeases); + this.maxWaitMillis = maxWaitMillis; this.logger = logger; - if (topics == null || topics.isEmpty()) { - throw new IllegalArgumentException("Must have a list of one or more topics"); - } - this.topics = topics; - this.kafkaProperties = new HashMap<>(kafkaProperties); - this.consumerLeases = new ArrayDeque<>(); + this.demarcatorBytes = demarcator; + this.keyEncoding = keyEncoding; + this.securityProtocol = securityProtocol; + this.bootstrapServers = bootstrapServers; + this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties); + this.topics = Collections.unmodifiableList(topics); } /** - * Obtains a consumer from the pool if one is available + * Obtains a consumer from the pool if one is available or lazily + * initializes a new one if deemed necessary. * - * @return consumer from the pool - * @throws IllegalArgumentException if pool already contains + * @param session the session for which the consumer lease will be + * associated + * @return consumer to use or null if not available or necessary */ - public ConsumerLease obtainConsumer() { - final ConsumerLease lease; - final int activeLeases; - synchronized (this) { - lease = consumerLeases.poll(); - activeLeases = activeLeaseCount.get(); - } - if (lease == null && activeLeases >= maxLeases) { - logger.warn("No available consumers and cannot create any as max consumer leases limit reached - verify pool settings"); - return null; + public ConsumerLease obtainConsumer(final ProcessSession session) { + SimpleConsumerLease lease = pooledLeases.poll(); + if (lease == null) { + final Consumer consumer = createKafkaConsumer(); + consumerCreatedCountRef.incrementAndGet(); + /** + * For now return a new consumer lease. But we could later elect to + * have this return null if we determine the broker indicates that + * the lag time on all topics being monitored is sufficiently low. + * For now we should encourage conservative use of threads because + * having too many means we'll have at best useless threads sitting + * around doing frequent network calls and at worst having consumers + * sitting idle which could prompt excessive rebalances. + */ + lease = new SimpleConsumerLease(consumer); + /** + * This subscription tightly couples the lease to the given + * consumer. They cannot be separated from then on. + */ + consumer.subscribe(topics, lease); } + lease.setProcessSession(session); leasesObtainedCountRef.incrementAndGet(); - return (lease == null) ? createConsumer() : lease; + return lease; } + /** + * Exposed as protected method for easier unit testing + * + * @return consumer + * @throws KafkaException if unable to subscribe to the given topics + */ protected Consumer createKafkaConsumer() { return new KafkaConsumer<>(kafkaProperties); } - private ConsumerLease createConsumer() { - final Consumer kafkaConsumer = createKafkaConsumer(); - consumerCreatedCountRef.incrementAndGet(); - try { - kafkaConsumer.subscribe(topics); - } catch (final KafkaException kex) { - try { - kafkaConsumer.close(); - consumerClosedCountRef.incrementAndGet(); - } catch (final Exception ex) { - consumerClosedCountRef.incrementAndGet(); - //ignore - } - throw kex; - } - - final ConsumerLease lease = new ConsumerLease() { - - private volatile boolean poisoned = false; - private volatile boolean closed = false; - - @Override - public ConsumerRecords poll() { - - if (poisoned) { - throw new KafkaException("The consumer is poisoned and should no longer be used"); - } - - try { - final ConsumerRecords records = kafkaConsumer.poll(50); - if (records.isEmpty()) { - unproductivePollCountRef.incrementAndGet(); - } else { - productivePollCountRef.incrementAndGet(); - } - return records; - } catch (final KafkaException kex) { - logger.warn("Unable to poll from Kafka consumer so will poison and close this " + kafkaConsumer, kex); - poison(); - close(); - throw kex; - } - } - - @Override - public void commitOffsets(final Map offsets) { - - if (poisoned) { - throw new KafkaException("The consumer is poisoned and should no longer be used"); - } - try { - kafkaConsumer.commitSync(offsets); - } catch (final KafkaException kex) { - logger.warn("Unable to commit kafka consumer offsets so will poison and close this " + kafkaConsumer, kex); - poison(); - close(); - throw kex; - } - } - - @Override - public void close() { - if (closed) { - return; - } - if (poisoned || activeLeaseCount.get() > maxLeases) { - closeConsumer(kafkaConsumer); - activeLeaseCount.decrementAndGet(); - closed = true; - } else { - final boolean added; - synchronized (ConsumerPool.this) { - added = consumerLeases.offer(this); - } - if (!added) { - closeConsumer(kafkaConsumer); - activeLeaseCount.decrementAndGet(); - } - } - } - - @Override - public void poison() { - poisoned = true; - } - }; - activeLeaseCount.incrementAndGet(); - return lease; - } - /** - * Closes all consumers in the pool. Can be safely recalled. + * Closes all consumers in the pool. Can be safely called repeatedly. */ @Override public void close() { - final List leases = new ArrayList<>(); - synchronized (this) { - ConsumerLease lease = null; - while ((lease = consumerLeases.poll()) != null) { - leases.add(lease); - } - } - for (final ConsumerLease lease : leases) { - lease.poison(); - lease.close(); - } + final List leases = new ArrayList<>(); + pooledLeases.drainTo(leases); + leases.stream().forEach((lease) -> { + lease.close(true); + }); } - private void closeConsumer(final Consumer consumer) { + private void closeConsumer(final Consumer consumer) { + consumerClosedCountRef.incrementAndGet(); try { consumer.unsubscribe(); } catch (Exception e) { @@ -219,15 +158,55 @@ private void closeConsumer(final Consumer consumer) { try { consumer.close(); - consumerClosedCountRef.incrementAndGet(); } catch (Exception e) { - consumerClosedCountRef.incrementAndGet(); logger.warn("Failed while closing " + consumer, e); } } PoolStats getPoolStats() { - return new PoolStats(consumerCreatedCountRef.get(), consumerClosedCountRef.get(), leasesObtainedCountRef.get(), productivePollCountRef.get(), unproductivePollCountRef.get()); + return new PoolStats(consumerCreatedCountRef.get(), consumerClosedCountRef.get(), leasesObtainedCountRef.get()); + } + + private class SimpleConsumerLease extends ConsumerLease { + + private final Consumer consumer; + private volatile ProcessSession session; + private volatile boolean closedConsumer; + + private SimpleConsumerLease(final Consumer consumer) { + super(maxWaitMillis, consumer, demarcatorBytes, keyEncoding, securityProtocol, bootstrapServers, logger); + this.consumer = consumer; + } + + void setProcessSession(final ProcessSession session) { + this.session = session; + } + + @Override + public ProcessSession getProcessSession() { + return session; + } + + @Override + public void close() { + super.close(); + close(false); + } + + public void close(final boolean forceClose) { + if (closedConsumer) { + return; + } + super.close(); + if (session != null) { + session.rollback(); + setProcessSession(null); + } + if (forceClose || isPoisoned() || !pooledLeases.offer(this)) { + closedConsumer = true; + closeConsumer(consumer); + } + } } static final class PoolStats { @@ -235,30 +214,22 @@ static final class PoolStats { final long consumerCreatedCount; final long consumerClosedCount; final long leasesObtainedCount; - final long productivePollCount; - final long unproductivePollCount; PoolStats( final long consumerCreatedCount, final long consumerClosedCount, - final long leasesObtainedCount, - final long productivePollCount, - final long unproductivePollCount + final long leasesObtainedCount ) { this.consumerCreatedCount = consumerCreatedCount; this.consumerClosedCount = consumerClosedCount; this.leasesObtainedCount = leasesObtainedCount; - this.productivePollCount = productivePollCount; - this.unproductivePollCount = unproductivePollCount; } @Override public String toString() { return "Created Consumers [" + consumerCreatedCount + "]\n" + "Closed Consumers [" + consumerClosedCount + "]\n" - + "Leases Obtained [" + leasesObtainedCount + "]\n" - + "Productive Polls [" + productivePollCount + "]\n" - + "Unproductive Polls [" + unproductivePollCount + "]\n"; + + "Leases Obtained [" + leasesObtainedCount + "]\n"; } } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java new file mode 100644 index 000000000000..e7d5cb71634e --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.nifi.flowfile.FlowFile; + +public class InFlightMessageTracker { + private final ConcurrentMap messageCountsByFlowFile = new ConcurrentHashMap<>(); + private final ConcurrentMap failures = new ConcurrentHashMap<>(); + private final Object progressMutex = new Object(); + + public void incrementAcknowledgedCount(final FlowFile flowFile) { + final Counts counter = messageCountsByFlowFile.computeIfAbsent(flowFile, ff -> new Counts()); + counter.incrementAcknowledgedCount(); + + synchronized (progressMutex) { + progressMutex.notify(); + } + } + + public int getAcknowledgedCount(final FlowFile flowFile) { + final Counts counter = messageCountsByFlowFile.get(flowFile); + return (counter == null) ? 0 : counter.getAcknowledgedCount(); + } + + public void incrementSentCount(final FlowFile flowFile) { + final Counts counter = messageCountsByFlowFile.computeIfAbsent(flowFile, ff -> new Counts()); + counter.incrementSentCount(); + } + + public int getSentCount(final FlowFile flowFile) { + final Counts counter = messageCountsByFlowFile.get(flowFile); + return (counter == null) ? 0 : counter.getSentCount(); + } + + public void fail(final FlowFile flowFile, final Exception exception) { + failures.putIfAbsent(flowFile, exception); + + synchronized (progressMutex) { + progressMutex.notify(); + } + } + + public Exception getFailure(final FlowFile flowFile) { + return failures.get(flowFile); + } + + public boolean isFailed(final FlowFile flowFile) { + return getFailure(flowFile) != null; + } + + public void reset() { + messageCountsByFlowFile.clear(); + failures.clear(); + } + + public PublishResult failOutstanding(final Exception exception) { + messageCountsByFlowFile.keySet().stream() + .filter(ff -> !isComplete(ff)) + .filter(ff -> !failures.containsKey(ff)) + .forEach(ff -> failures.put(ff, exception)); + + return createPublishResult(); + } + + private boolean isComplete(final FlowFile flowFile) { + final Counts counts = messageCountsByFlowFile.get(flowFile); + if (counts.getAcknowledgedCount() == counts.getSentCount()) { + // all messages received successfully. + return true; + } + + if (failures.containsKey(flowFile)) { + // FlowFile failed so is complete + return true; + } + + return false; + } + + private boolean isComplete() { + return messageCountsByFlowFile.keySet().stream() + .allMatch(flowFile -> isComplete(flowFile)); + } + + void awaitCompletion(final long millis) throws InterruptedException, TimeoutException { + final long startTime = System.nanoTime(); + final long maxTime = startTime + TimeUnit.MILLISECONDS.toNanos(millis); + + while (System.nanoTime() < maxTime) { + synchronized (progressMutex) { + if (isComplete()) { + return; + } + + progressMutex.wait(millis); + } + } + + throw new TimeoutException(); + } + + + PublishResult createPublishResult() { + return new PublishResult() { + @Override + public Collection getSuccessfulFlowFiles() { + if (failures.isEmpty()) { + return messageCountsByFlowFile.keySet(); + } + + final Set flowFiles = new HashSet<>(messageCountsByFlowFile.keySet()); + flowFiles.removeAll(failures.keySet()); + return flowFiles; + } + + @Override + public Collection getFailedFlowFiles() { + return failures.keySet(); + } + + @Override + public int getSuccessfulMessageCount(final FlowFile flowFile) { + return getAcknowledgedCount(flowFile); + } + + @Override + public Exception getReasonForFailure(final FlowFile flowFile) { + return getFailure(flowFile); + } + }; + } + + public static class Counts { + private final AtomicInteger sentCount = new AtomicInteger(0); + private final AtomicInteger acknowledgedCount = new AtomicInteger(0); + + public void incrementSentCount() { + sentCount.incrementAndGet(); + } + + public void incrementAcknowledgedCount() { + acknowledgedCount.incrementAndGet(); + } + + public int getAcknowledgedCount() { + return acknowledgedCount.get(); + } + + public int getSentCount() { + return sentCount.get(); + } + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java index 3ae749544c9b..3d09f2df03c0 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java @@ -27,8 +27,9 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; -import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.commons.lang3.StringUtils; +import org.apache.kafka.clients.CommonClientConfigs; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.config.SaslConfigs; @@ -55,6 +56,10 @@ final class KafkaProcessorUtils { private static final String BROKER_REGEX = SINGLE_BROKER_REGEX + "(?:,\\s*" + SINGLE_BROKER_REGEX + ")*"; + static final AllowableValue UTF8_ENCODING = new AllowableValue("utf-8", "UTF-8 Encoded", "The key is interpreted as a UTF-8 Encoded string."); + static final AllowableValue HEX_ENCODING = new AllowableValue("hex", "Hex Encoded", + "The key is interpreted as arbitrary binary data and is encoded using hexadecimal characters with uppercase letters"); + static final Pattern HEX_KEY_PATTERN = Pattern.compile("(?:[0123456789abcdefABCDEF]{2})+"); static final String KAFKA_KEY = "kafka.key"; @@ -182,7 +187,7 @@ static final class KafkaConfigValidator implements Validator { final Class classType; - public KafkaConfigValidator(final Class classType) { + public KafkaConfigValidator(final Class classType) { this.classType = classType; } @@ -207,7 +212,8 @@ static String buildTransitURI(String securityProtocol, String brokers, String to return builder.toString(); } - static void buildCommonKafkaProperties(final ProcessContext context, final Class kafkaConfigClass, final Map mapToPopulate) { + + static void buildCommonKafkaProperties(final ProcessContext context, final Class kafkaConfigClass, final Map mapToPopulate) { for (PropertyDescriptor propertyDescriptor : context.getProperties().keySet()) { if (propertyDescriptor.equals(SSL_CONTEXT_SERVICE)) { // Translate SSLContext Service configuration into Kafka properties @@ -226,28 +232,33 @@ static void buildCommonKafkaProperties(final ProcessContext context, final Class mapToPopulate.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, sslContextService.getTrustStoreType()); } } - String pName = propertyDescriptor.getName(); - String pValue = propertyDescriptor.isExpressionLanguageSupported() + + String propertyName = propertyDescriptor.getName(); + String propertyValue = propertyDescriptor.isExpressionLanguageSupported() ? context.getProperty(propertyDescriptor).evaluateAttributeExpressions().getValue() : context.getProperty(propertyDescriptor).getValue(); - if (pValue != null) { - if (pName.endsWith(".ms")) { // kafka standard time notation - pValue = String.valueOf(FormatUtils.getTimeDuration(pValue.trim(), TimeUnit.MILLISECONDS)); + + if (propertyValue != null) { + // If the property name ends in ".ms" then it is a time period. We want to accept either an integer as number of milliseconds + // or the standard NiFi time period such as "5 secs" + if (propertyName.endsWith(".ms") && !StringUtils.isNumeric(propertyValue.trim())) { // kafka standard time notation + propertyValue = String.valueOf(FormatUtils.getTimeDuration(propertyValue.trim(), TimeUnit.MILLISECONDS)); } - if (isStaticStringFieldNamePresent(pName, kafkaConfigClass, CommonClientConfigs.class, SslConfigs.class, SaslConfigs.class)) { - mapToPopulate.put(pName, pValue); + + if (isStaticStringFieldNamePresent(propertyName, kafkaConfigClass, CommonClientConfigs.class, SslConfigs.class, SaslConfigs.class)) { + mapToPopulate.put(propertyName, propertyValue); } } } } - private static boolean isStaticStringFieldNamePresent(final String name, final Class... classes) { + private static boolean isStaticStringFieldNamePresent(final String name, final Class... classes) { return KafkaProcessorUtils.getPublicStaticStringFieldValues(classes).contains(name); } - private static Set getPublicStaticStringFieldValues(final Class... classes) { + private static Set getPublicStaticStringFieldValues(final Class... classes) { final Set strings = new HashSet<>(); - for (final Class classType : classes) { + for (final Class classType : classes) { for (final Field field : classType.getDeclaredFields()) { if (Modifier.isPublic(field.getModifiers()) && Modifier.isStatic(field.getModifiers()) && field.getType().equals(String.class)) { try { diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisher.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisher.java deleted file mode 100644 index 31a084f133d6..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisher.java +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import java.io.Closeable; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -import org.apache.kafka.clients.producer.KafkaProducer; -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.clients.producer.RecordMetadata; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.stream.io.util.StreamDemarcator; - -/** - * Wrapper over {@link KafkaProducer} to assist {@link PublishKafka} processor - * with sending contents of the {@link FlowFile}s to Kafka. - */ -class KafkaPublisher implements Closeable { - - private final Producer kafkaProducer; - - private volatile long ackWaitTime = 30000; - - private final ComponentLog componentLog; - - private final int ackCheckSize; - - KafkaPublisher(Properties kafkaProperties, ComponentLog componentLog) { - this(kafkaProperties, 100, componentLog); - } - - /** - * Creates an instance of this class as well as the instance of the - * corresponding Kafka {@link KafkaProducer} using provided Kafka - * configuration properties. - * - * @param kafkaProperties instance of {@link Properties} used to bootstrap - * {@link KafkaProducer} - */ - KafkaPublisher(Properties kafkaProperties, int ackCheckSize, ComponentLog componentLog) { - this.kafkaProducer = new KafkaProducer<>(kafkaProperties); - this.ackCheckSize = ackCheckSize; - this.componentLog = componentLog; - } - - /** - * Publishes messages to Kafka topic. It uses {@link StreamDemarcator} to - * determine how many messages to Kafka will be sent from a provided - * {@link InputStream} (see {@link PublishingContext#getContentStream()}). - * It supports two publishing modes: - *
    - *
  • Sending all messages constructed from - * {@link StreamDemarcator#nextToken()} operation.
  • - *
  • Sending only unacknowledged messages constructed from - * {@link StreamDemarcator#nextToken()} operation.
  • - *
- * The unacknowledged messages are determined from the value of - * {@link PublishingContext#getLastAckedMessageIndex()}. - *
- * This method assumes content stream affinity where it is expected that the - * content stream that represents the same Kafka message(s) will remain the - * same across possible retries. This is required specifically for cases - * where delimiter is used and a single content stream may represent - * multiple Kafka messages. The - * {@link PublishingContext#getLastAckedMessageIndex()} will provide the - * index of the last ACKed message, so upon retry only messages with the - * higher index are sent. - * - * @param publishingContext instance of {@link PublishingContext} which hold - * context information about the message(s) to be sent. - * @return The index of the last successful offset. - */ - KafkaPublisherResult publish(PublishingContext publishingContext) { - StreamDemarcator streamTokenizer = new StreamDemarcator(publishingContext.getContentStream(), - publishingContext.getDelimiterBytes(), publishingContext.getMaxRequestSize()); - - int prevLastAckedMessageIndex = publishingContext.getLastAckedMessageIndex(); - List> resultFutures = new ArrayList<>(); - - byte[] messageBytes; - int tokenCounter = 0; - boolean continueSending = true; - KafkaPublisherResult result = null; - for (; continueSending && (messageBytes = streamTokenizer.nextToken()) != null; tokenCounter++) { - if (prevLastAckedMessageIndex < tokenCounter) { - ProducerRecord message = new ProducerRecord<>(publishingContext.getTopic(), publishingContext.getKeyBytes(), messageBytes); - resultFutures.add(this.kafkaProducer.send(message)); - - if (tokenCounter % this.ackCheckSize == 0) { - int lastAckedMessageIndex = this.processAcks(resultFutures, prevLastAckedMessageIndex); - resultFutures.clear(); - if (lastAckedMessageIndex % this.ackCheckSize != 0) { - continueSending = false; - result = new KafkaPublisherResult(tokenCounter, lastAckedMessageIndex); - } - prevLastAckedMessageIndex = lastAckedMessageIndex; - } - } - } - - if (result == null) { - int lastAckedMessageIndex = this.processAcks(resultFutures, prevLastAckedMessageIndex); - resultFutures.clear(); - result = new KafkaPublisherResult(tokenCounter, lastAckedMessageIndex); - } - return result; - } - - /** - * Sets the time this publisher will wait for the {@link Future#get()} - * operation (the Future returned by - * {@link KafkaProducer#send(ProducerRecord)}) to complete before timing - * out. - * - * This value will also be used as a timeout when closing the underlying - * {@link KafkaProducer}. See {@link #close()}. - */ - void setAckWaitTime(long ackWaitTime) { - this.ackWaitTime = ackWaitTime; - } - - /** - * This operation will process ACKs from Kafka in the order in which - * {@link KafkaProducer#send(ProducerRecord)} invocation were made returning - * the index of the last ACKed message. Within this operation processing ACK - * simply means successful invocation of 'get()' operation on the - * {@link Future} returned by {@link KafkaProducer#send(ProducerRecord)} - * operation. Upon encountering any type of error while interrogating such - * {@link Future} the ACK loop will end. Messages that were not ACKed would - * be considered non-delivered and therefore could be resent at the later - * time. - * - * @param sendFutures list of {@link Future}s representing results of - * publishing to Kafka - * - * @param lastAckMessageIndex the index of the last ACKed message. It is - * important to provide the last ACKed message especially while re-trying so - * the proper index is maintained. - */ - private int processAcks(List> sendFutures, int lastAckMessageIndex) { - boolean exceptionThrown = false; - for (int segmentCounter = 0; segmentCounter < sendFutures.size() && !exceptionThrown; segmentCounter++) { - Future future = sendFutures.get(segmentCounter); - try { - future.get(this.ackWaitTime, TimeUnit.MILLISECONDS); - lastAckMessageIndex++; - } catch (InterruptedException e) { - exceptionThrown = true; - Thread.currentThread().interrupt(); - this.warnOrError("Interrupted while waiting for acks from Kafka", null); - } catch (ExecutionException e) { - exceptionThrown = true; - this.warnOrError("Failed while waiting for acks from Kafka", e); - } catch (TimeoutException e) { - exceptionThrown = true; - this.warnOrError("Timed out while waiting for acks from Kafka", null); - } - } - - return lastAckMessageIndex; - } - - /** - * Will close the underlying {@link KafkaProducer} waiting if necessary for - * the same duration as supplied {@link #setAckWaitTime(long)} - */ - @Override - public void close() { - this.kafkaProducer.close(this.ackWaitTime, TimeUnit.MILLISECONDS); - } - - /** - * - */ - private void warnOrError(String message, Exception e) { - if (e == null) { - this.componentLog.warn(message); - } else { - this.componentLog.error(message, e); - } - } - - /** - * Encapsulates the result received from publishing messages to Kafka - */ - static class KafkaPublisherResult { - - private final int messagesSent; - private final int lastMessageAcked; - - KafkaPublisherResult(int messagesSent, int lastMessageAcked) { - this.messagesSent = messagesSent; - this.lastMessageAcked = lastMessageAcked; - } - - public int getMessagesSent() { - return this.messagesSent; - } - - public int getLastMessageAcked() { - return this.lastMessageAcked; - } - - public boolean isAllAcked() { - return this.lastMessageAcked > -1 && this.messagesSent - 1 == this.lastMessageAcked; - } - - @Override - public String toString() { - return "Sent:" + this.messagesSent + "; Last ACK:" + this.lastMessageAcked; - } - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafka_0_10.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafka_0_10.java index 18f3018f0e9e..bb0bed257052 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafka_0_10.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafka_0_10.java @@ -14,9 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.nifi.processors.kafka.pubsub; -import java.io.Closeable; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; @@ -27,17 +28,16 @@ import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Properties; import java.util.Set; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; + import javax.xml.bind.DatatypeConverter; -import org.apache.kafka.clients.consumer.KafkaConsumer; + import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.ByteArraySerializer; import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnStopped; @@ -46,202 +46,192 @@ import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.processor.AbstractSessionFactoryProcessor; +import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.DataUnit; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessSessionFactory; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.util.FlowFileFilters; import org.apache.nifi.processor.util.StandardValidators; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@Tags({"Apache", "Kafka", "Put", "Send", "Message", "PubSub", "0.10"}) -@CapabilityDescription("Sends the contents of a FlowFile as a message to Apache Kafka using the Kafka 0.10 producer. " - + "The messages to send may be individual FlowFiles or may be delimited, using a " - + "user-specified delimiter, such as a new-line. " - + " Please note there are cases where the publisher can get into an indefinite stuck state. We are closely monitoring" - + " how this evolves in the Kafka community and will take advantage of those fixes as soon as we can. In the mean time" - + " it is possible to enter states where the only resolution will be to restart the JVM NiFi runs on.") + +@Tags({"Apache", "Kafka", "Put", "Send", "Message", "PubSub", "0.10.x"}) +@CapabilityDescription("Sends the contents of a FlowFile as a message to Apache Kafka using the Kafka 0.10.x Producer API." + + "The messages to send may be individual FlowFiles or may be delimited, using a " + + "user-specified delimiter, such as a new-line. " + + " Please note there are cases where the publisher can get into an indefinite stuck state. We are closely monitoring" + + " how this evolves in the Kafka community and will take advantage of those fixes as soon as we can. In the meantime" + + " it is possible to enter states where the only resolution will be to restart the JVM NiFi runs on. The complementary NiFi processor for fetching messages is ConsumeKafka_0_10.") @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) @DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.", - description = "These properties will be added on the Kafka configuration after loading any provided configuration properties." + description = "These properties will be added on the Kafka configuration after loading any provided configuration properties." + " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged." + " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ") -public class PublishKafka_0_10 extends AbstractSessionFactoryProcessor { - - private final Logger logger = LoggerFactory.getLogger(this.getClass()); - - protected static final String FAILED_PROC_ID_ATTR = "failed.proc.id"; - - protected static final String FAILED_LAST_ACK_IDX = "failed.last.idx"; - - protected static final String FAILED_TOPIC_ATTR = "failed.topic"; - - protected static final String FAILED_KEY_ATTR = "failed.key"; - - protected static final String FAILED_DELIMITER_ATTR = "failed.delimiter"; - +@WritesAttribute(attribute = "msg.count", description = "The number of messages that were sent to Kafka for this FlowFile. This attribute is added only to " + + "FlowFiles that are routed to success. If the Property is not set, this will always be 1, but if the Property is set, it may " + + "be greater than 1.") +public class PublishKafka_0_10 extends AbstractProcessor { protected static final String MSG_COUNT = "msg.count"; static final AllowableValue DELIVERY_REPLICATED = new AllowableValue("all", "Guarantee Replicated Delivery", - "FlowFile will be routed to failure unless the message is replicated to the appropriate " + "FlowFile will be routed to failure unless the message is replicated to the appropriate " + "number of Kafka Nodes according to the Topic configuration"); static final AllowableValue DELIVERY_ONE_NODE = new AllowableValue("1", "Guarantee Single Node Delivery", - "FlowFile will be routed to success if the message is received by a single Kafka node, " + "FlowFile will be routed to success if the message is received by a single Kafka node, " + "whether or not it is replicated. This is faster than " + "but can result in data loss if a Kafka node crashes"); static final AllowableValue DELIVERY_BEST_EFFORT = new AllowableValue("0", "Best Effort", - "FlowFile will be routed to success after successfully writing the content to a Kafka node, " + "FlowFile will be routed to success after successfully writing the content to a Kafka node, " + "without waiting for a response. This provides the best performance but may result in data loss."); static final AllowableValue ROUND_ROBIN_PARTITIONING = new AllowableValue(Partitioners.RoundRobinPartitioner.class.getName(), - Partitioners.RoundRobinPartitioner.class.getSimpleName(), - "Messages will be assigned partitions in a round-robin fashion, sending the first message to Partition 1, " + Partitioners.RoundRobinPartitioner.class.getSimpleName(), + "Messages will be assigned partitions in a round-robin fashion, sending the first message to Partition 1, " + "the next Partition to Partition 2, and so on, wrapping as necessary."); static final AllowableValue RANDOM_PARTITIONING = new AllowableValue("org.apache.kafka.clients.producer.internals.DefaultPartitioner", - "DefaultPartitioner", "Messages will be assigned to random partitions."); + "DefaultPartitioner", "Messages will be assigned to random partitions."); static final AllowableValue UTF8_ENCODING = new AllowableValue("utf-8", "UTF-8 Encoded", "The key is interpreted as a UTF-8 Encoded string."); static final AllowableValue HEX_ENCODING = new AllowableValue("hex", "Hex Encoded", "The key is interpreted as arbitrary binary data that is encoded using hexadecimal characters with uppercase letters."); static final PropertyDescriptor TOPIC = new PropertyDescriptor.Builder() - .name("topic") - .displayName("Topic Name") - .description("The name of the Kafka Topic to publish to.") - .required(true) - .addValidator(StandardValidators.NON_BLANK_VALIDATOR) - .expressionLanguageSupported(true) - .build(); + .name("topic") + .displayName("Topic Name") + .description("The name of the Kafka Topic to publish to.") + .required(true) + .addValidator(StandardValidators.NON_BLANK_VALIDATOR) + .expressionLanguageSupported(true) + .build(); static final PropertyDescriptor DELIVERY_GUARANTEE = new PropertyDescriptor.Builder() - .name(ProducerConfig.ACKS_CONFIG) - .displayName("Delivery Guarantee") - .description("Specifies the requirement for guaranteeing that a message is sent to Kafka. Corresponds to Kafka's 'acks' property.") - .required(true) - .expressionLanguageSupported(false) - .allowableValues(DELIVERY_BEST_EFFORT, DELIVERY_ONE_NODE, DELIVERY_REPLICATED) - .defaultValue(DELIVERY_BEST_EFFORT.getValue()) - .build(); - - static final PropertyDescriptor META_WAIT_TIME = new PropertyDescriptor.Builder() - .name(ProducerConfig.MAX_BLOCK_MS_CONFIG) - .displayName("Meta Data Wait Time") - .description("The amount of time KafkaConsumer will wait to obtain metadata during the 'send' call before failing the " - + "entire 'send' call. Corresponds to Kafka's 'max.block.ms' property") - .required(true) - .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) - .expressionLanguageSupported(true) - .defaultValue("30 sec") - .build(); + .name(ProducerConfig.ACKS_CONFIG) + .displayName("Delivery Guarantee") + .description("Specifies the requirement for guaranteeing that a message is sent to Kafka. Corresponds to Kafka's 'acks' property.") + .required(true) + .expressionLanguageSupported(false) + .allowableValues(DELIVERY_BEST_EFFORT, DELIVERY_ONE_NODE, DELIVERY_REPLICATED) + .defaultValue(DELIVERY_BEST_EFFORT.getValue()) + .build(); + + static final PropertyDescriptor METADATA_WAIT_TIME = new PropertyDescriptor.Builder() + .name(ProducerConfig.MAX_BLOCK_MS_CONFIG) + .displayName("Max Metadata Wait Time") + .description("The amount of time publisher will wait to obtain metadata or wait for the buffer to flush during the 'send' call before failing the " + + "entire 'send' call. Corresponds to Kafka's 'max.block.ms' property") + .required(true) + .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) + .expressionLanguageSupported(true) + .defaultValue("5 sec") + .build(); + + static final PropertyDescriptor ACK_WAIT_TIME = new PropertyDescriptor.Builder() + .name("ack.wait.time") + .displayName("Acknowledgment Wait Time") + .description("After sending a message to Kafka, this indicates the amount of time that we are willing to wait for a response from Kafka. " + + "If Kafka does not acknowledge the message within this time period, the FlowFile will be routed to 'failure'.") + .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) + .expressionLanguageSupported(false) + .required(true) + .defaultValue("5 secs") + .build(); static final PropertyDescriptor MAX_REQUEST_SIZE = new PropertyDescriptor.Builder() - .name("max.request.size") - .displayName("Max Request Size") - .description("The maximum size of a request in bytes. Corresponds to Kafka's 'max.request.size' property and defaults to 1 MB (1048576).") - .required(true) - .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) - .defaultValue("1 MB") - .build(); + .name("max.request.size") + .displayName("Max Request Size") + .description("The maximum size of a request in bytes. Corresponds to Kafka's 'max.request.size' property and defaults to 1 MB (1048576).") + .required(true) + .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) + .defaultValue("1 MB") + .build(); static final PropertyDescriptor KEY = new PropertyDescriptor.Builder() - .name("kafka-key") - .displayName("Kafka Key") - .description("The Key to use for the Message. It will be serialized as UTF-8 bytes. " - + "If not specified then the flow file attribute kafka.key is used if present " - + "and we're not demarcating. In that case the hex string is coverted to its byte" - + "form and written as a byte[] key.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(true) - .build(); + .name("kafka-key") + .displayName("Kafka Key") + .description("The Key to use for the Message. " + + "If not specified, the flow file attribute 'kafka.key' is used as the message key, if it is present " + + "and we're not demarcating.") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); static final PropertyDescriptor KEY_ATTRIBUTE_ENCODING = new PropertyDescriptor.Builder() - .name("key-attribute-encoding") - .displayName("Key Attribute Encoding") - .description("FlowFiles that are emitted have an attribute named '" + KafkaProcessorUtils.KAFKA_KEY + "'. This property dictates how the value of the attribute should be encoded.") - .required(true) - .defaultValue(UTF8_ENCODING.getValue()) - .allowableValues(UTF8_ENCODING, HEX_ENCODING) - .build(); + .name("key-attribute-encoding") + .displayName("Key Attribute Encoding") + .description("FlowFiles that are emitted have an attribute named '" + KafkaProcessorUtils.KAFKA_KEY + "'. This property dictates how the value of the attribute should be encoded.") + .required(true) + .defaultValue(UTF8_ENCODING.getValue()) + .allowableValues(UTF8_ENCODING, HEX_ENCODING) + .build(); static final PropertyDescriptor MESSAGE_DEMARCATOR = new PropertyDescriptor.Builder() - .name("message-demarcator") - .displayName("Message Demarcator") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(true) - .description("Specifies the string (interpreted as UTF-8) to use for demarcating multiple messages within " - + "a single FlowFile. If not specified, the entire content of the FlowFile will be used as a single message. If specified, the " - + "contents of the FlowFile will be split on this delimiter and each section sent as a separate Kafka message. " - + "To enter special character such as 'new line' use CTRL+Enter or Shift+Enter depending on your OS.") - .build(); + .name("message-demarcator") + .displayName("Message Demarcator") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .description("Specifies the string (interpreted as UTF-8) to use for demarcating multiple messages within " + + "a single FlowFile. If not specified, the entire content of the FlowFile will be used as a single message. If specified, the " + + "contents of the FlowFile will be split on this delimiter and each section sent as a separate Kafka message. " + + "To enter special character such as 'new line' use CTRL+Enter or Shift+Enter, depending on your OS.") + .build(); static final PropertyDescriptor PARTITION_CLASS = new PropertyDescriptor.Builder() - .name(ProducerConfig.PARTITIONER_CLASS_CONFIG) - .displayName("Partitioner class") - .description("Specifies which class to use to compute a partition id for a message. Corresponds to Kafka's 'partitioner.class' property.") - .allowableValues(ROUND_ROBIN_PARTITIONING, RANDOM_PARTITIONING) - .defaultValue(RANDOM_PARTITIONING.getValue()) - .required(false) - .build(); + .name(ProducerConfig.PARTITIONER_CLASS_CONFIG) + .displayName("Partitioner class") + .description("Specifies which class to use to compute a partition id for a message. Corresponds to Kafka's 'partitioner.class' property.") + .allowableValues(ROUND_ROBIN_PARTITIONING, RANDOM_PARTITIONING) + .defaultValue(RANDOM_PARTITIONING.getValue()) + .required(false) + .build(); static final PropertyDescriptor COMPRESSION_CODEC = new PropertyDescriptor.Builder() - .name(ProducerConfig.COMPRESSION_TYPE_CONFIG) - .displayName("Compression Type") - .description("This parameter allows you to specify the compression codec for all data generated by this producer.") - .required(true) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .allowableValues("none", "gzip", "snappy", "lz4") - .defaultValue("none") - .build(); + .name(ProducerConfig.COMPRESSION_TYPE_CONFIG) + .displayName("Compression Type") + .description("This parameter allows you to specify the compression codec for all data generated by this producer.") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .allowableValues("none", "gzip", "snappy", "lz4") + .defaultValue("none") + .build(); static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("FlowFiles for which all content was sent to Kafka.") - .build(); + .name("success") + .description("FlowFiles for which all content was sent to Kafka.") + .build(); static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("Any FlowFile that cannot be sent to Kafka will be routed to this Relationship") - .build(); - - static final List DESCRIPTORS; - - static final Set RELATIONSHIPS; + .name("failure") + .description("Any FlowFile that cannot be sent to Kafka will be routed to this Relationship") + .build(); - private volatile String brokers; + private static final List PROPERTIES; + private static final Set RELATIONSHIPS; - private final AtomicInteger taskCounter = new AtomicInteger(); + private volatile PublisherPool publisherPool = null; - private volatile boolean acceptTask = true; - - /* - * Will ensure that list of PropertyDescriptors is build only once, since - * all other lifecycle methods are invoked multiple times. - */ static { - final List _descriptors = new ArrayList<>(); - _descriptors.addAll(KafkaProcessorUtils.getCommonPropertyDescriptors()); - _descriptors.add(TOPIC); - _descriptors.add(DELIVERY_GUARANTEE); - _descriptors.add(KEY); - _descriptors.add(KEY_ATTRIBUTE_ENCODING); - _descriptors.add(MESSAGE_DEMARCATOR); - _descriptors.add(MAX_REQUEST_SIZE); - _descriptors.add(META_WAIT_TIME); - _descriptors.add(PARTITION_CLASS); - _descriptors.add(COMPRESSION_CODEC); - - DESCRIPTORS = Collections.unmodifiableList(_descriptors); - - final Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - RELATIONSHIPS = Collections.unmodifiableSet(_relationships); + final List properties = new ArrayList<>(); + properties.addAll(KafkaProcessorUtils.getCommonPropertyDescriptors()); + properties.add(TOPIC); + properties.add(DELIVERY_GUARANTEE); + properties.add(KEY); + properties.add(KEY_ATTRIBUTE_ENCODING); + properties.add(MESSAGE_DEMARCATOR); + properties.add(MAX_REQUEST_SIZE); + properties.add(ACK_WAIT_TIME); + properties.add(METADATA_WAIT_TIME); + properties.add(PARTITION_CLASS); + properties.add(COMPRESSION_CODEC); + + PROPERTIES = Collections.unmodifiableList(properties); + + final Set relationships = new HashSet<>(); + relationships.add(REL_SUCCESS); + relationships.add(REL_FAILURE); + RELATIONSHIPS = Collections.unmodifiableSet(relationships); } @Override @@ -251,15 +241,17 @@ public Set getRelationships() { @Override protected List getSupportedPropertyDescriptors() { - return DESCRIPTORS; + return PROPERTIES; } @Override protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { return new PropertyDescriptor.Builder() - .description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.") - .name(propertyDescriptorName).addValidator(new KafkaProcessorUtils.KafkaConfigValidator(ProducerConfig.class)).dynamic(true) - .build(); + .description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.") + .name(propertyDescriptorName) + .addValidator(new KafkaProcessorUtils.KafkaConfigValidator(ProducerConfig.class)) + .dynamic(true) + .build(); } @Override @@ -267,226 +259,123 @@ protected Collection customValidate(final ValidationContext va return KafkaProcessorUtils.validateCommonProperties(validationContext); } - volatile KafkaPublisher kafkaPublisher; - - /** - * This thread-safe operation will delegate to - * {@link #rendezvousWithKafka(ProcessContext, ProcessSession)} after first - * checking and creating (if necessary) Kafka resource which could be either - * {@link KafkaPublisher} or {@link KafkaConsumer}. It will also close and - * destroy the underlying Kafka resource upon catching an {@link Exception} - * raised by {@link #rendezvousWithKafka(ProcessContext, ProcessSession)}. - * After Kafka resource is destroyed it will be re-created upon the next - * invocation of this operation essentially providing a self healing - * mechanism to deal with potentially corrupted resource. - *

- * Keep in mind that upon catching an exception the state of this processor - * will be set to no longer accept any more tasks, until Kafka resource is - * reset. This means that in a multi-threaded situation currently executing - * tasks will be given a chance to complete while no new tasks will be - * accepted. - * - * @param context context - * @param sessionFactory factory - */ - @Override - public final void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException { - if (this.acceptTask) { // acts as a circuit breaker to allow existing tasks to wind down so 'kafkaPublisher' can be reset before new tasks are accepted. - this.taskCounter.incrementAndGet(); - final ProcessSession session = sessionFactory.createSession(); - try { - /* - * We can't be doing double null check here since as a pattern - * it only works for lazy init but not reset, which is what we - * are doing here. In fact the first null check is dangerous - * since 'kafkaPublisher' can become null right after its null - * check passed causing subsequent NPE. - */ - synchronized (this) { - if (this.kafkaPublisher == null) { - this.kafkaPublisher = this.buildKafkaResource(context, session); - } - } - - /* - * The 'processed' boolean flag does not imply any failure or success. It simply states that: - * - ConsumeKafka - some messages were received form Kafka and 1_ FlowFile were generated - * - PublishKafka0_10 - some messages were sent to Kafka based on existence of the input FlowFile - */ - boolean processed = this.rendezvousWithKafka(context, session); - session.commit(); - if (!processed) { - context.yield(); - } - } catch (Throwable e) { - this.acceptTask = false; - session.rollback(true); - this.getLogger().error("{} failed to process due to {}; rolling back session", new Object[]{this, e}); - } finally { - synchronized (this) { - if (this.taskCounter.decrementAndGet() == 0 && !this.acceptTask) { - this.close(); - this.acceptTask = true; - } - } - } - } else { - this.logger.debug("Task was not accepted due to the processor being in 'reset' state. It will be re-submitted upon completion of the reset."); - this.getLogger().debug("Task was not accepted due to the processor being in 'reset' state. It will be re-submitted upon completion of the reset."); - context.yield(); + private synchronized PublisherPool getPublisherPool(final ProcessContext context) { + PublisherPool pool = publisherPool; + if (pool != null) { + return pool; } + + return publisherPool = createPublisherPool(context); + } + + protected PublisherPool createPublisherPool(final ProcessContext context) { + final int maxMessageSize = context.getProperty(MAX_REQUEST_SIZE).asDataSize(DataUnit.B).intValue(); + final long maxAckWaitMillis = context.getProperty(ACK_WAIT_TIME).asTimePeriod(TimeUnit.MILLISECONDS).longValue(); + + final Map kafkaProperties = new HashMap<>(); + KafkaProcessorUtils.buildCommonKafkaProperties(context, ProducerConfig.class, kafkaProperties); + kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + kafkaProperties.put("max.request.size", String.valueOf(maxMessageSize)); + + return new PublisherPool(kafkaProperties, getLogger(), maxMessageSize, maxAckWaitMillis); } - /** - * Will call {@link Closeable#close()} on the target resource after which - * the target resource will be set to null. Should only be called when there - * are no more threads being executed on this processor or when it has been - * verified that only a single thread remains. - * - * @see KafkaPublisher - * @see KafkaConsumer - */ @OnStopped - public void close() { - try { - if (this.kafkaPublisher != null) { - try { - this.kafkaPublisher.close(); - } catch (Exception e) { - this.getLogger().warn("Failed while closing " + this.kafkaPublisher, e); - } - } - } finally { - this.kafkaPublisher = null; + public void closePool() { + if (publisherPool != null) { + publisherPool.close(); } + + publisherPool = null; } - /** - * Will rendezvous with Kafka if {@link ProcessSession} contains - * {@link FlowFile} producing a result {@link FlowFile}. - *
- * The result {@link FlowFile} that is successful is then transfered to - * {@link #REL_SUCCESS} - *
- * The result {@link FlowFile} that is failed is then transfered to - * {@link #REL_FAILURE} - * - */ - protected boolean rendezvousWithKafka(ProcessContext context, ProcessSession session) { - FlowFile flowFile = session.get(); - if (flowFile != null) { - long start = System.nanoTime(); - flowFile = this.doRendezvousWithKafka(flowFile, context, session); - Relationship relationship = REL_SUCCESS; - if (!this.isFailedFlowFile(flowFile)) { - String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue(); - long executionDuration = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start); - String transitUri = KafkaProcessorUtils.buildTransitURI(context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue(), this.brokers, topic); - session.getProvenanceReporter().send(flowFile, transitUri, "Sent " + flowFile.getAttribute(MSG_COUNT) + " Kafka messages", executionDuration); - this.getLogger().debug("Successfully sent {} to Kafka as {} message(s) in {} millis", - new Object[]{flowFile, flowFile.getAttribute(MSG_COUNT), executionDuration}); - } else { - relationship = REL_FAILURE; - flowFile = session.penalize(flowFile); - } - session.transfer(flowFile, relationship); + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + final boolean useDemarcator = context.getProperty(MESSAGE_DEMARCATOR).isSet(); + + final List flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(250, DataUnit.KB, 500)); + if (flowFiles.isEmpty()) { + return; } - return flowFile != null; - } - /** - * Builds and instance of {@link KafkaPublisher}. - */ - protected KafkaPublisher buildKafkaResource(ProcessContext context, ProcessSession session) { - final Map kafkaProps = new HashMap<>(); - KafkaProcessorUtils.buildCommonKafkaProperties(context, ProducerConfig.class, kafkaProps); - kafkaProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProps.put("max.request.size", String.valueOf(context.getProperty(MAX_REQUEST_SIZE).asDataSize(DataUnit.B).intValue())); - this.brokers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue(); - final Properties props = new Properties(); - props.putAll(kafkaProps); - KafkaPublisher publisher = new KafkaPublisher(props, this.getLogger()); - return publisher; - } + final PublisherPool pool = getPublisherPool(context); + if (pool == null) { + context.yield(); + return; + } + + final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue(); + final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue(); + + final long startTime = System.nanoTime(); + try (final PublisherLease lease = pool.obtainPublisher()) { + // Send each FlowFile to Kafka asynchronously. + for (final FlowFile flowFile : flowFiles) { + if (!isScheduled()) { + // If stopped, re-queue FlowFile instead of sending it + session.transfer(flowFile); + continue; + } - /** - * Will rendezvous with {@link KafkaPublisher} after building - * {@link PublishingContext} and will produce the resulting - * {@link FlowFile}. The resulting FlowFile contains all required - * information to determine if message publishing originated from the - * provided FlowFile has actually succeeded fully, partially or failed - * completely (see {@link #isFailedFlowFile(FlowFile)}. - */ - private FlowFile doRendezvousWithKafka(final FlowFile flowFile, final ProcessContext context, final ProcessSession session) { - final AtomicReference publishResultRef = new AtomicReference<>(); - session.read(flowFile, new InputStreamCallback() { - @Override - public void process(InputStream contentStream) throws IOException { - PublishingContext publishingContext = PublishKafka_0_10.this.buildPublishingContext(flowFile, context, contentStream); - KafkaPublisher.KafkaPublisherResult result = PublishKafka_0_10.this.kafkaPublisher.publish(publishingContext); - publishResultRef.set(result); + final byte[] messageKey = getMessageKey(flowFile, context); + final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue(); + final byte[] demarcatorBytes; + if (useDemarcator) { + demarcatorBytes = context.getProperty(MESSAGE_DEMARCATOR).evaluateAttributeExpressions(flowFile).getValue().getBytes(StandardCharsets.UTF_8); + } else { + demarcatorBytes = null; + } + + session.read(flowFile, new InputStreamCallback() { + @Override + public void process(final InputStream rawIn) throws IOException { + try (final InputStream in = new BufferedInputStream(rawIn)) { + lease.publish(flowFile, in, messageKey, demarcatorBytes, topic); + } + } + }); } - }); - FlowFile resultFile = publishResultRef.get().isAllAcked() - ? this.cleanUpFlowFileIfNecessary(flowFile, session) - : session.putAllAttributes(flowFile, this.buildFailedFlowFileAttributes(publishResultRef.get().getLastMessageAcked(), flowFile, context)); + // Complete the send + final PublishResult publishResult = lease.complete(); - if (!this.isFailedFlowFile(resultFile)) { - resultFile = session.putAttribute(resultFile, MSG_COUNT, String.valueOf(publishResultRef.get().getMessagesSent())); - } - return resultFile; - } + // Transfer any successful FlowFiles. + final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime); + for (FlowFile success : publishResult.getSuccessfulFlowFiles()) { + final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue(); - /** - * Builds {@link PublishingContext} for message(s) to be sent to Kafka. - * {@link PublishingContext} contains all contextual information required by - * {@link KafkaPublisher} to publish to Kafka. Such information contains - * things like topic name, content stream, delimiter, key and last ACKed - * message for cases where provided FlowFile is being retried (failed in the - * past). - *
- * For the clean FlowFile (file that has been sent for the first time), - * PublishingContext will be built form {@link ProcessContext} associated - * with this invocation. - *
- * For the failed FlowFile, {@link PublishingContext} will be built from - * attributes of that FlowFile which by then will already contain required - * information (e.g., topic, key, delimiter etc.). This is required to - * ensure the affinity of the retry in the even where processor - * configuration has changed. However keep in mind that failed FlowFile is - * only considered a failed FlowFile if it is being re-processed by the same - * processor (determined via {@link #FAILED_PROC_ID_ATTR}, see - * {@link #isFailedFlowFile(FlowFile)}). If failed FlowFile is being sent to - * another PublishKafka0_10 processor it is treated as a fresh FlowFile - * regardless if it has #FAILED* attributes set. - */ - private PublishingContext buildPublishingContext(FlowFile flowFile, ProcessContext context, InputStream contentStream) { - final byte[] keyBytes = getMessageKey(flowFile, context); - - final String topicName; - final byte[] delimiterBytes; - int lastAckedMessageIndex = -1; - if (this.isFailedFlowFile(flowFile)) { - lastAckedMessageIndex = Integer.valueOf(flowFile.getAttribute(FAILED_LAST_ACK_IDX)); - topicName = flowFile.getAttribute(FAILED_TOPIC_ATTR); - delimiterBytes = flowFile.getAttribute(FAILED_DELIMITER_ATTR) != null - ? flowFile.getAttribute(FAILED_DELIMITER_ATTR).getBytes(StandardCharsets.UTF_8) : null; - } else { - topicName = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue(); - delimiterBytes = context.getProperty(MESSAGE_DEMARCATOR).isSet() ? context.getProperty(MESSAGE_DEMARCATOR) - .evaluateAttributeExpressions(flowFile).getValue().getBytes(StandardCharsets.UTF_8) : null; - } + final int msgCount = publishResult.getSuccessfulMessageCount(success); + success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount)); + session.adjustCounter("Messages Sent", msgCount, true); + + final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic); + session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis); + session.transfer(success, REL_SUCCESS); + } + + // Transfer any failures. + for (final FlowFile failure : publishResult.getFailedFlowFiles()) { + final int successCount = publishResult.getSuccessfulMessageCount(failure); + if (successCount > 0) { + getLogger().error("Failed to send some messages for {} to Kafka, but {} messages were acknowledged by Kafka. Routing to failure due to {}", + new Object[] {failure, successCount, publishResult.getReasonForFailure(failure)}); + } else { + getLogger().error("Failed to send all message for {} to Kafka; routing to failure due to {}", + new Object[] {failure, publishResult.getReasonForFailure(failure)}); + } - PublishingContext publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex, - context.getProperty(MAX_REQUEST_SIZE).asDataSize(DataUnit.B).intValue()); - publishingContext.setKeyBytes(keyBytes); - publishingContext.setDelimiterBytes(delimiterBytes); - return publishingContext; + session.transfer(failure, REL_FAILURE); + } + } } + private byte[] getMessageKey(final FlowFile flowFile, final ProcessContext context) { + if (context.getProperty(MESSAGE_DEMARCATOR).isSet()) { + return null; + } + final String uninterpretedKey; if (context.getProperty(KEY).isSet()) { uninterpretedKey = context.getProperty(KEY).evaluateAttributeExpressions(flowFile).getValue(); @@ -505,51 +394,4 @@ private byte[] getMessageKey(final FlowFile flowFile, final ProcessContext conte return DatatypeConverter.parseHexBinary(uninterpretedKey); } - - /** - * Will remove FAILED_* attributes if FlowFile is no longer considered a - * failed FlowFile - * - * @see #isFailedFlowFile(FlowFile) - */ - private FlowFile cleanUpFlowFileIfNecessary(FlowFile flowFile, ProcessSession session) { - if (this.isFailedFlowFile(flowFile)) { - Set keysToRemove = new HashSet<>(); - keysToRemove.add(FAILED_DELIMITER_ATTR); - keysToRemove.add(FAILED_KEY_ATTR); - keysToRemove.add(FAILED_TOPIC_ATTR); - keysToRemove.add(FAILED_PROC_ID_ATTR); - keysToRemove.add(FAILED_LAST_ACK_IDX); - flowFile = session.removeAllAttributes(flowFile, keysToRemove); - } - return flowFile; - } - - /** - * Builds a {@link Map} of FAILED_* attributes - * - * @see #FAILED_PROC_ID_ATTR - * @see #FAILED_LAST_ACK_IDX - * @see #FAILED_TOPIC_ATTR - * @see #FAILED_KEY_ATTR - * @see #FAILED_DELIMITER_ATTR - */ - private Map buildFailedFlowFileAttributes(int lastAckedMessageIndex, FlowFile sourceFlowFile, ProcessContext context) { - Map attributes = new HashMap<>(); - attributes.put(FAILED_PROC_ID_ATTR, this.getIdentifier()); - attributes.put(FAILED_LAST_ACK_IDX, String.valueOf(lastAckedMessageIndex)); - attributes.put(FAILED_TOPIC_ATTR, context.getProperty(TOPIC).evaluateAttributeExpressions(sourceFlowFile).getValue()); - attributes.put(FAILED_KEY_ATTR, context.getProperty(KEY).evaluateAttributeExpressions(sourceFlowFile).getValue()); - attributes.put(FAILED_DELIMITER_ATTR, context.getProperty(MESSAGE_DEMARCATOR).isSet() - ? context.getProperty(MESSAGE_DEMARCATOR).evaluateAttributeExpressions(sourceFlowFile).getValue() : null); - return attributes; - } - - /** - * Returns 'true' if provided FlowFile is a failed FlowFile. A failed - * FlowFile contains {@link #FAILED_PROC_ID_ATTR}. - */ - private boolean isFailedFlowFile(FlowFile flowFile) { - return this.getIdentifier().equals(flowFile.getAttribute(FAILED_PROC_ID_ATTR)); - } } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishResult.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishResult.java new file mode 100644 index 000000000000..b68526501f13 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishResult.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.util.Collection; +import java.util.Collections; + +import org.apache.nifi.flowfile.FlowFile; + +public interface PublishResult { + Collection getSuccessfulFlowFiles(); + + Collection getFailedFlowFiles(); + + int getSuccessfulMessageCount(FlowFile flowFile); + + Exception getReasonForFailure(FlowFile flowFile); + + + public static final PublishResult EMPTY = new PublishResult() { + @Override + public Collection getSuccessfulFlowFiles() { + return Collections.emptyList(); + } + + @Override + public Collection getFailedFlowFiles() { + return Collections.emptyList(); + } + + @Override + public int getSuccessfulMessageCount(FlowFile flowFile) { + return 0; + } + + @Override + public Exception getReasonForFailure(FlowFile flowFile) { + return null; + } + }; +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java new file mode 100644 index 000000000000..b67e8a8614d8 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.stream.io.exception.TokenTooLargeException; +import org.apache.nifi.stream.io.util.StreamDemarcator; + +public class PublisherLease implements Closeable { + private final ComponentLog logger; + private final Producer producer; + private final int maxMessageSize; + private final long maxAckWaitMillis; + private volatile boolean poisoned = false; + + private InFlightMessageTracker tracker; + + public PublisherLease(final Producer producer, final int maxMessageSize, final long maxAckWaitMillis, final ComponentLog logger) { + this.producer = producer; + this.maxMessageSize = maxMessageSize; + this.logger = logger; + this.maxAckWaitMillis = maxAckWaitMillis; + } + + protected void poison() { + this.poisoned = true; + } + + public boolean isPoisoned() { + return poisoned; + } + + void publish(final FlowFile flowFile, final InputStream flowFileContent, final byte[] messageKey, final byte[] demarcatorBytes, final String topic) throws IOException { + if (tracker == null) { + tracker = new InFlightMessageTracker(); + } + + try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) { + byte[] messageContent; + try { + while ((messageContent = demarcator.nextToken()) != null) { + // We do not want to use any key if we have a demarcator because that would result in + // the key being the same for multiple messages + final byte[] keyToUse = demarcatorBytes == null ? messageKey : null; + publish(flowFile, keyToUse, messageContent, topic, tracker); + + if (tracker.isFailed(flowFile)) { + // If we have a failure, don't try to send anything else. + return; + } + } + } catch (final TokenTooLargeException ttle) { + tracker.fail(flowFile, ttle); + } + } catch (final Exception e) { + tracker.fail(flowFile, e); + poison(); + throw e; + } + } + + private void publish(final FlowFile flowFile, final byte[] messageKey, final byte[] messageContent, final String topic, final InFlightMessageTracker tracker) { + final ProducerRecord record = new ProducerRecord<>(topic, null, messageKey, messageContent); + producer.send(record, new Callback() { + @Override + public void onCompletion(final RecordMetadata metadata, final Exception exception) { + if (exception == null) { + tracker.incrementAcknowledgedCount(flowFile); + } else { + tracker.fail(flowFile, exception); + poison(); + } + } + }); + + tracker.incrementSentCount(flowFile); + } + + public PublishResult complete() { + if (tracker == null) { + throw new IllegalStateException("Cannot complete publishing to Kafka because Publisher Lease was already closed"); + } + + producer.flush(); + + try { + tracker.awaitCompletion(maxAckWaitMillis); + return tracker.createPublishResult(); + } catch (final InterruptedException e) { + logger.warn("Interrupted while waiting for an acknowledgement from Kafka; some FlowFiles may be transferred to 'failure' even though they were received by Kafka"); + Thread.currentThread().interrupt(); + return tracker.failOutstanding(e); + } catch (final TimeoutException e) { + logger.warn("Timed out while waiting for an acknowledgement from Kafka; some FlowFiles may be transferred to 'failure' even though they were received by Kafka"); + return tracker.failOutstanding(e); + } finally { + tracker = null; + } + } + + @Override + public void close() { + producer.close(maxAckWaitMillis, TimeUnit.MILLISECONDS); + tracker = null; + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherPool.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherPool.java new file mode 100644 index 000000000000..5902b038c61d --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherPool.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.io.Closeable; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.nifi.logging.ComponentLog; + +public class PublisherPool implements Closeable { + private final ComponentLog logger; + private final BlockingQueue publisherQueue; + private final Map kafkaProperties; + private final int maxMessageSize; + private final long maxAckWaitMillis; + + private volatile boolean closed = false; + + PublisherPool(final Map kafkaProperties, final ComponentLog logger, final int maxMessageSize, final long maxAckWaitMillis) { + this.logger = logger; + this.publisherQueue = new LinkedBlockingQueue<>(); + this.kafkaProperties = kafkaProperties; + this.maxMessageSize = maxMessageSize; + this.maxAckWaitMillis = maxAckWaitMillis; + } + + public PublisherLease obtainPublisher() { + if (isClosed()) { + throw new IllegalStateException("Connection Pool is closed"); + } + + PublisherLease lease = publisherQueue.poll(); + if (lease != null) { + return lease; + } + + lease = createLease(); + return lease; + } + + private PublisherLease createLease() { + final Producer producer = new KafkaProducer<>(kafkaProperties); + final PublisherLease lease = new PublisherLease(producer, maxMessageSize, maxAckWaitMillis, logger) { + @Override + public void close() { + if (isPoisoned() || isClosed()) { + super.close(); + } else { + publisherQueue.offer(this); + } + } + }; + + return lease; + } + + public synchronized boolean isClosed() { + return closed; + } + + @Override + public synchronized void close() { + closed = true; + + PublisherLease lease; + while ((lease = publisherQueue.poll()) != null) { + lease.close(); + } + } + + /** + * Returns the number of leases that are currently available + * + * @return the number of leases currently available + */ + protected int available() { + return publisherQueue.size(); + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishingContext.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishingContext.java deleted file mode 100644 index 1513481df136..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishingContext.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import java.io.InputStream; -import java.nio.charset.StandardCharsets; - -/** - * Holder of context information used by {@link KafkaPublisher} required to - * publish messages to Kafka. - */ -class PublishingContext { - - private final InputStream contentStream; - - private final String topic; - - private final int lastAckedMessageIndex; - - private final int maxRequestSize; - - private byte[] keyBytes; - - private byte[] delimiterBytes; - - PublishingContext(InputStream contentStream, String topic) { - this(contentStream, topic, -1); - } - - PublishingContext(InputStream contentStream, String topic, int lastAckedMessageIndex) { - this(contentStream, topic, lastAckedMessageIndex, 1048576); - } - - PublishingContext(InputStream contentStream, String topic, int lastAckedMessageIndex, int maxRequestSize) { - this.validateInput(contentStream, topic, lastAckedMessageIndex); - this.contentStream = contentStream; - this.topic = topic; - this.lastAckedMessageIndex = lastAckedMessageIndex; - this.maxRequestSize = maxRequestSize; - } - - @Override - public String toString() { - return "topic: '" + this.topic + "'; delimiter: '" + new String(this.delimiterBytes, StandardCharsets.UTF_8) + "'"; - } - - int getLastAckedMessageIndex() { - return this.lastAckedMessageIndex; - } - - int getMaxRequestSize() { - return this.maxRequestSize; - } - - byte[] getKeyBytes() { - return this.keyBytes; - } - - byte[] getDelimiterBytes() { - return this.delimiterBytes; - } - - InputStream getContentStream() { - return this.contentStream; - } - - String getTopic() { - return this.topic; - } - - void setKeyBytes(byte[] keyBytes) { - if (this.keyBytes == null) { - if (keyBytes != null) { - this.assertBytesValid(keyBytes); - this.keyBytes = keyBytes; - } - } else { - throw new IllegalArgumentException("'keyBytes' can only be set once per instance"); - } - } - - void setDelimiterBytes(byte[] delimiterBytes) { - if (this.delimiterBytes == null) { - if (delimiterBytes != null) { - this.assertBytesValid(delimiterBytes); - this.delimiterBytes = delimiterBytes; - } - } else { - throw new IllegalArgumentException("'delimiterBytes' can only be set once per instance"); - } - } - - private void assertBytesValid(byte[] bytes) { - if (bytes != null) { - if (bytes.length == 0) { - throw new IllegalArgumentException("'bytes' must not be empty"); - } - } - } - - private void validateInput(InputStream contentStream, String topic, int lastAckedMessageIndex) { - if (contentStream == null) { - throw new IllegalArgumentException("'contentStream' must not be null"); - } else if (topic == null || topic.trim().length() == 0) { - throw new IllegalArgumentException("'topic' must not be null or empty"); - } else if (lastAckedMessageIndex < -1) { - throw new IllegalArgumentException("'lastAckedMessageIndex' must be >= -1"); - } - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java index a85563d98020..6fd90534f00d 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java @@ -16,104 +16,36 @@ */ package org.apache.nifi.processors.kafka.pubsub; -import java.nio.charset.StandardCharsets; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.UUID; +import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerConfig; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; -import org.apache.kafka.common.KafkaException; -import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.serialization.ByteArrayDeserializer; import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import org.junit.Before; +import static org.mockito.Matchers.anyObject; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; public class ConsumeKafkaTest { - static class MockConsumerPool extends ConsumerPool { - - final int actualMaxLeases; - final List actualTopics; - final Map actualKafkaProperties; - boolean throwKafkaExceptionOnPoll = false; - boolean throwKafkaExceptionOnCommit = false; - Queue> nextPlannedRecordsQueue = new ArrayDeque<>(); - Map nextExpectedCommitOffsets = null; - Map actualCommitOffsets = null; - boolean wasConsumerLeasePoisoned = false; - boolean wasConsumerLeaseClosed = false; - boolean wasPoolClosed = false; - - public MockConsumerPool(int maxLeases, List topics, Map kafkaProperties, ComponentLog logger) { - super(maxLeases, topics, kafkaProperties, null); - actualMaxLeases = maxLeases; - actualTopics = topics; - actualKafkaProperties = kafkaProperties; - } - - @Override - public ConsumerLease obtainConsumer() { - return new ConsumerLease() { - @Override - public ConsumerRecords poll() { - if (throwKafkaExceptionOnPoll) { - throw new KafkaException("i planned to fail"); - } - final ConsumerRecords records = nextPlannedRecordsQueue.poll(); - return (records == null) ? ConsumerRecords.empty() : records; - } - - @Override - public void commitOffsets(Map offsets) { - if (throwKafkaExceptionOnCommit) { - throw new KafkaException("i planned to fail"); - } - actualCommitOffsets = offsets; - } - - @Override - public void poison() { - wasConsumerLeasePoisoned = true; - } - - @Override - public void close() { - wasConsumerLeaseClosed = true; - } - }; - } - - @Override - public void close() { - wasPoolClosed = true; - } - - void resetState() { - throwKafkaExceptionOnPoll = false; - throwKafkaExceptionOnCommit = false; - nextPlannedRecordsQueue = null; - nextExpectedCommitOffsets = null; - wasConsumerLeasePoisoned = false; - wasConsumerLeaseClosed = false; - wasPoolClosed = false; - } + Consumer mockConsumer = null; + ConsumerLease mockLease = null; + ConsumerPool mockConsumerPool = null; + @Before + public void setup() { + mockConsumer = mock(Consumer.class); + mockLease = mock(ConsumerLease.class); + mockConsumerPool = mock(ConsumerPool.class); } @Test @@ -174,31 +106,14 @@ public void validatePropertiesValidation() throws Exception { public void validateGetAllMessages() throws Exception { String groupName = "validateGetAllMessages"; - final byte[][] firstPassValues = new byte[][]{ - "Hello-1".getBytes(StandardCharsets.UTF_8), - "Hello-2".getBytes(StandardCharsets.UTF_8), - "Hello-3".getBytes(StandardCharsets.UTF_8) - }; - final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues); - - final byte[][] secondPassValues = new byte[][]{ - "Hello-4".getBytes(StandardCharsets.UTF_8), - "Hello-5".getBytes(StandardCharsets.UTF_8), - "Hello-6".getBytes(StandardCharsets.UTF_8) - }; - final ConsumerRecords secondRecs = createConsumerRecords("bar", 1, 1L, secondPassValues); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(firstRecs); - mockPool.nextPlannedRecordsQueue.add(secondRecs); + when(mockConsumerPool.obtainConsumer(anyObject())).thenReturn(mockLease); + when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE); + when(mockLease.commit()).thenReturn(Boolean.TRUE); ConsumeKafka_0_10 proc = new ConsumeKafka_0_10() { @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; + protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) { + return mockConsumerPool; } }; final TestRunner runner = TestRunners.newTestRunner(proc); @@ -207,69 +122,29 @@ protected ConsumerPool createConsumerPool(final int maxLeases, final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka_0_10.REL_SUCCESS); - - assertEquals(expectedTopics, mockPool.actualTopics); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-2")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-3")).count()); - - if (mockPool.nextPlannedRecordsQueue.isEmpty()) { - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-4")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-5")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-6")).count()); - assertEquals(2, mockPool.actualCommitOffsets.size()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("foo", 1)).offset()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("bar", 1)).offset()); - } else { - assertEquals(2, mockPool.actualCommitOffsets.size()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("foo", 1)).offset()); - } - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - + verify(mockConsumerPool, times(1)).obtainConsumer(anyObject()); + verify(mockLease, times(3)).continuePolling(); + verify(mockLease, times(2)).poll(); + verify(mockLease, times(1)).commit(); + verify(mockLease, times(1)).close(); + verifyNoMoreInteractions(mockConsumerPool); + verifyNoMoreInteractions(mockLease); } @Test - public void validateGetLotsOfMessages() throws Exception { - String groupName = "validateGetLotsOfMessages"; - - final byte[][] firstPassValues = new byte[10010][1]; - for (final byte[] value : firstPassValues) { - value[0] = 0x12; - } - final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues); + public void validateGetErrorMessages() throws Exception { + String groupName = "validateGetErrorMessages"; - final byte[][] secondPassValues = new byte[][]{ - "Hello-4".getBytes(StandardCharsets.UTF_8), - "Hello-5".getBytes(StandardCharsets.UTF_8), - "Hello-6".getBytes(StandardCharsets.UTF_8) - }; - final ConsumerRecords secondRecs = createConsumerRecords("bar", 1, 1L, secondPassValues); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(firstRecs); - mockPool.nextPlannedRecordsQueue.add(secondRecs); + when(mockConsumerPool.obtainConsumer(anyObject())).thenReturn(mockLease); + when(mockLease.continuePolling()).thenReturn(true, false); + when(mockLease.commit()).thenReturn(Boolean.FALSE); ConsumeKafka_0_10 proc = new ConsumeKafka_0_10() { @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; + protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) { + return mockConsumerPool; } }; final TestRunner runner = TestRunners.newTestRunner(proc); @@ -278,352 +153,15 @@ protected ConsumerPool createConsumerPool(final int maxLeases, final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka_0_10.REL_SUCCESS); - - assertEquals(10010, flowFiles.stream().map(ff -> ff.toByteArray()).filter(content -> content.length == 1 && content[0] == 0x12).count()); - assertEquals(1, mockPool.nextPlannedRecordsQueue.size()); - - assertEquals(1, mockPool.actualCommitOffsets.size()); - assertEquals(10011L, mockPool.actualCommitOffsets.get(new TopicPartition("foo", 1)).offset()); - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - + verify(mockConsumerPool, times(1)).obtainConsumer(anyObject()); + verify(mockLease, times(2)).continuePolling(); + verify(mockLease, times(1)).poll(); + verify(mockLease, times(1)).commit(); + verify(mockLease, times(1)).close(); + verifyNoMoreInteractions(mockConsumerPool); + verifyNoMoreInteractions(mockLease); } - @SuppressWarnings({"rawtypes", "unchecked"}) - private ConsumerRecords createConsumerRecords(final String topic, final int partition, final long startingOffset, final byte[][] rawRecords) { - final Map>> map = new HashMap<>(); - final TopicPartition tPart = new TopicPartition(topic, partition); - final List> records = new ArrayList<>(); - long offset = startingOffset; - for (final byte[] rawRecord : rawRecords) { - final ConsumerRecord rec = new ConsumerRecord(topic, partition, offset++, UUID.randomUUID().toString().getBytes(), rawRecord); - records.add(rec); - } - map.put(tPart, records); - return new ConsumerRecords(map); - } - - @SuppressWarnings({"rawtypes", "unchecked"}) - private ConsumerRecords createConsumerRecords(final String topic, final int partition, final long startingOffset, final Map rawRecords) { - final Map>> map = new HashMap<>(); - final TopicPartition tPart = new TopicPartition(topic, partition); - final List> records = new ArrayList<>(); - long offset = startingOffset; - - for (final Map.Entry entry : rawRecords.entrySet()) { - final byte[] key = entry.getKey(); - final byte[] rawRecord = entry.getValue(); - final ConsumerRecord rec = new ConsumerRecord(topic, partition, offset++, key, rawRecord); - records.add(rec); - } - map.put(tPart, records); - return new ConsumerRecords(map); - } - - private ConsumerRecords mergeRecords(final ConsumerRecords... records) { - final Map>> map = new HashMap<>(); - for (final ConsumerRecords rec : records) { - rec.partitions().stream().forEach((part) -> { - final List> conRecs = rec.records(part); - if (map.get(part) != null) { - throw new IllegalStateException("already have that topic/partition in the record map"); - } - map.put(part, conRecs); - }); - } - return new ConsumerRecords<>(map); - } - - @Test - public void validateGetAllMessagesWithProvidedDemarcator() throws Exception { - String groupName = "validateGetAllMessagesWithProvidedDemarcator"; - - final byte[][] firstPassValues = new byte[][]{ - "Hello-1".getBytes(StandardCharsets.UTF_8), - "Hello-2".getBytes(StandardCharsets.UTF_8), - "Hello-3".getBytes(StandardCharsets.UTF_8) - }; - - final byte[][] secondPassValues = new byte[][]{ - "Hello-4".getBytes(StandardCharsets.UTF_8), - "Hello-5".getBytes(StandardCharsets.UTF_8), - "Hello-6".getBytes(StandardCharsets.UTF_8) - }; - final ConsumerRecords consumerRecs = mergeRecords( - createConsumerRecords("foo", 1, 1L, firstPassValues), - createConsumerRecords("bar", 1, 1L, secondPassValues) - ); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(consumerRecs); - - ConsumeKafka_0_10 proc = new ConsumeKafka_0_10() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka_0_10.TOPICS, "foo,bar"); - runner.setProperty(ConsumeKafka_0_10.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka_0_10.AUTO_OFFSET_RESET, ConsumeKafka_0_10.OFFSET_EARLIEST); - runner.setProperty(ConsumeKafka_0_10.MESSAGE_DEMARCATOR, "blah"); - - runner.run(1, false); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka_0_10.REL_SUCCESS); - - assertEquals(2, flowFiles.size()); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1blahHello-2blahHello-3")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-4blahHello-5blahHello-6")).count()); - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - - assertEquals(2, mockPool.actualCommitOffsets.size()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("foo", 1)).offset()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("bar", 1)).offset()); - } - - @Test - public void validatePollException() throws Exception { - String groupName = "validatePollException"; - - final byte[][] firstPassValues = new byte[][]{ - "Hello-1".getBytes(StandardCharsets.UTF_8), - "Hello-2".getBytes(StandardCharsets.UTF_8), - "Hello-3".getBytes(StandardCharsets.UTF_8) - }; - - final ConsumerRecords consumerRecs = mergeRecords( - createConsumerRecords("foo", 1, 1L, firstPassValues) - ); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(consumerRecs); - mockPool.throwKafkaExceptionOnPoll = true; - - ConsumeKafka_0_10 proc = new ConsumeKafka_0_10() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka_0_10.TOPICS, "foo"); - runner.setProperty(ConsumeKafka_0_10.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka_0_10.AUTO_OFFSET_RESET, ConsumeKafka_0_10.OFFSET_EARLIEST); - runner.setProperty(ConsumeKafka_0_10.MESSAGE_DEMARCATOR, "blah"); - - runner.run(1, true); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka_0_10.REL_SUCCESS); - - assertEquals(0, flowFiles.size()); - assertNull(null, mockPool.actualCommitOffsets); - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertTrue(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - } - - @Test - public void validateCommitOffsetException() throws Exception { - String groupName = "validateCommitOffsetException"; - - final byte[][] firstPassValues = new byte[][]{ - "Hello-1".getBytes(StandardCharsets.UTF_8), - "Hello-2".getBytes(StandardCharsets.UTF_8), - "Hello-3".getBytes(StandardCharsets.UTF_8) - }; - - final ConsumerRecords consumerRecs = mergeRecords( - createConsumerRecords("foo", 1, 1L, firstPassValues) - ); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(consumerRecs); - mockPool.throwKafkaExceptionOnCommit = true; - - ConsumeKafka_0_10 proc = new ConsumeKafka_0_10() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka_0_10.TOPICS, "foo"); - runner.setProperty(ConsumeKafka_0_10.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka_0_10.AUTO_OFFSET_RESET, ConsumeKafka_0_10.OFFSET_EARLIEST); - runner.setProperty(ConsumeKafka_0_10.MESSAGE_DEMARCATOR, "blah"); - - runner.run(1, true); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka_0_10.REL_SUCCESS); - - assertEquals(1, flowFiles.size()); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1blahHello-2blahHello-3")).count()); - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertTrue(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - - assertNull(null, mockPool.actualCommitOffsets); - } - - @Test - public void validateUtf8Key() { - String groupName = "validateGetAllMessages"; - - final Map rawRecords = new HashMap<>(); - rawRecords.put("key1".getBytes(), "Hello-1".getBytes()); - rawRecords.put(new byte[0], "Hello-2".getBytes()); - rawRecords.put(null, "Hello-3".getBytes()); - - final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, rawRecords); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.emptyMap(), null); - mockPool.nextPlannedRecordsQueue.add(firstRecs); - - ConsumeKafka_0_10 proc = new ConsumeKafka_0_10() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka_0_10.TOPICS, "foo,bar"); - runner.setProperty(ConsumeKafka_0_10.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka_0_10.AUTO_OFFSET_RESET, ConsumeKafka_0_10.OFFSET_EARLIEST); - - runner.run(1, false); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka_0_10.REL_SUCCESS); - - assertEquals(expectedTopics, mockPool.actualTopics); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-2")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-3")).count()); - - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> "key1".equals(key)).count()); - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> key == null).count()); - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> "".equals(key)).count()); - - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - } - - @Test - public void validateHexKey() { - String groupName = "validateGetAllMessages"; - - final Map rawRecords = new HashMap<>(); - rawRecords.put("key1".getBytes(), "Hello-1".getBytes()); - rawRecords.put(new byte[0], "Hello-2".getBytes()); - rawRecords.put(null, "Hello-3".getBytes()); - - final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, rawRecords); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.emptyMap(), null); - mockPool.nextPlannedRecordsQueue.add(firstRecs); - - ConsumeKafka_0_10 proc = new ConsumeKafka_0_10() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka_0_10.TOPICS, "foo,bar"); - runner.setProperty(ConsumeKafka_0_10.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka_0_10.AUTO_OFFSET_RESET, ConsumeKafka_0_10.OFFSET_EARLIEST); - runner.setProperty(ConsumeKafka_0_10.KEY_ATTRIBUTE_ENCODING, ConsumeKafka_0_10.HEX_ENCODING); - - runner.run(1, false); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka_0_10.REL_SUCCESS); - - assertEquals(expectedTopics, mockPool.actualTopics); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-2")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-3")).count()); - - final String expectedHex = (Integer.toHexString('k') + Integer.toHexString('e') + Integer.toHexString('y') + Integer.toHexString('1')).toUpperCase(); - - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> expectedHex.equals(key)).count()); - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> key == null).count()); - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> "".equals(key)).count()); - - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - } } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java index 7f88ea2b2bb1..0ebf2b3ce337 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java @@ -16,109 +16,203 @@ */ package org.apache.nifi.processors.kafka.pubsub; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicPartition; import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.provenance.ProvenanceReporter; import org.apache.nifi.processors.kafka.pubsub.ConsumerPool.PoolStats; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import org.junit.Before; import org.junit.Test; -import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.anyLong; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; public class ConsumerPoolTest { Consumer consumer = null; + ProcessSession mockSession = null; + ProvenanceReporter mockReporter = null; + ConsumerPool testPool = null; + ConsumerPool testDemarcatedPool = null; ComponentLog logger = null; @Before public void setup() { consumer = mock(Consumer.class); logger = mock(ComponentLog.class); - } - - @Test - public void validatePoolSimpleCreateClose() throws Exception { - - final ConsumerPool testPool = new ConsumerPool(1, Collections.singletonList("nifi"), Collections.emptyMap(), logger) { + mockSession = mock(ProcessSession.class); + mockReporter = mock(ProvenanceReporter.class); + when(mockSession.getProvenanceReporter()).thenReturn(mockReporter); + testPool = new ConsumerPool( + 1, + null, + Collections.emptyMap(), + Collections.singletonList("nifi"), + 100L, + "utf-8", + "ssl", + "localhost", + logger) { @Override protected Consumer createKafkaConsumer() { return consumer; } }; + testDemarcatedPool = new ConsumerPool( + 1, + "--demarcator--".getBytes(StandardCharsets.UTF_8), + Collections.emptyMap(), + Collections.singletonList("nifi"), + 100L, + "utf-8", + "ssl", + "localhost", + logger) { + @Override + protected Consumer createKafkaConsumer() { + return consumer; + } + }; + } - when(consumer.poll(anyInt())).thenReturn(ConsumerRecords.empty()); + @Test + public void validatePoolSimpleCreateClose() throws Exception { - try (final ConsumerLease lease = testPool.obtainConsumer()) { + when(consumer.poll(anyLong())).thenReturn(createConsumerRecords("nifi", 0, 0L, new byte[][]{})); + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + lease.poll(); + } + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + lease.poll(); + } + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + lease.poll(); + } + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { lease.poll(); - lease.commitOffsets(Collections.emptyMap()); } testPool.close(); + verify(mockSession, times(0)).create(); + verify(mockSession, times(0)).commit(); final PoolStats stats = testPool.getPoolStats(); assertEquals(1, stats.consumerCreatedCount); assertEquals(1, stats.consumerClosedCount); - assertEquals(1, stats.leasesObtainedCount); - assertEquals(1, stats.unproductivePollCount); - assertEquals(0, stats.productivePollCount); + assertEquals(4, stats.leasesObtainedCount); } @Test - public void validatePoolSimpleBatchCreateClose() throws Exception { - - final ConsumerPool testPool = new ConsumerPool(5, Collections.singletonList("nifi"), Collections.emptyMap(), logger) { - @Override - protected Consumer createKafkaConsumer() { - return consumer; - } + public void validatePoolSimpleCreatePollClose() throws Exception { + final byte[][] firstPassValues = new byte[][]{ + "Hello-1".getBytes(StandardCharsets.UTF_8), + "Hello-2".getBytes(StandardCharsets.UTF_8), + "Hello-3".getBytes(StandardCharsets.UTF_8) }; + final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues); - when(consumer.poll(anyInt())).thenReturn(ConsumerRecords.empty()); + when(consumer.poll(anyLong())).thenReturn(firstRecs, createConsumerRecords("nifi", 0, 0L, new byte[][]{})); + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + lease.poll(); + lease.commit(); + } + testPool.close(); + verify(mockSession, times(3)).create(); + verify(mockSession, times(1)).commit(); + final PoolStats stats = testPool.getPoolStats(); + assertEquals(1, stats.consumerCreatedCount); + assertEquals(1, stats.consumerClosedCount); + assertEquals(1, stats.leasesObtainedCount); + } + @Test + public void validatePoolSimpleBatchCreateClose() throws Exception { + when(consumer.poll(anyLong())).thenReturn(createConsumerRecords("nifi", 0, 0L, new byte[][]{})); for (int i = 0; i < 100; i++) { - try (final ConsumerLease lease = testPool.obtainConsumer()) { + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { for (int j = 0; j < 100; j++) { lease.poll(); } - lease.commitOffsets(Collections.emptyMap()); } } testPool.close(); + verify(mockSession, times(0)).create(); + verify(mockSession, times(0)).commit(); final PoolStats stats = testPool.getPoolStats(); assertEquals(1, stats.consumerCreatedCount); assertEquals(1, stats.consumerClosedCount); assertEquals(100, stats.leasesObtainedCount); - assertEquals(10000, stats.unproductivePollCount); - assertEquals(0, stats.productivePollCount); } @Test - public void validatePoolConsumerFails() throws Exception { - - final ConsumerPool testPool = new ConsumerPool(1, Collections.singletonList("nifi"), Collections.emptyMap(), logger) { - @Override - protected Consumer createKafkaConsumer() { - return consumer; - } + public void validatePoolBatchCreatePollClose() throws Exception { + final byte[][] firstPassValues = new byte[][]{ + "Hello-1".getBytes(StandardCharsets.UTF_8), + "Hello-2".getBytes(StandardCharsets.UTF_8), + "Hello-3".getBytes(StandardCharsets.UTF_8) }; + final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues); - when(consumer.poll(anyInt())).thenThrow(new KafkaException()); - - try (final ConsumerLease lease = testPool.obtainConsumer()) { + when(consumer.poll(anyLong())).thenReturn(firstRecs, createConsumerRecords("nifi", 0, 0L, new byte[][]{})); + try (final ConsumerLease lease = testDemarcatedPool.obtainConsumer(mockSession)) { lease.poll(); - fail(); - } catch (final KafkaException ke) { + lease.commit(); + } + testDemarcatedPool.close(); + verify(mockSession, times(1)).create(); + verify(mockSession, times(1)).commit(); + final PoolStats stats = testDemarcatedPool.getPoolStats(); + assertEquals(1, stats.consumerCreatedCount); + assertEquals(1, stats.consumerClosedCount); + assertEquals(1, stats.leasesObtainedCount); + } + + @Test + public void validatePoolConsumerFails() throws Exception { + + when(consumer.poll(anyLong())).thenThrow(new KafkaException("oops")); + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + try { + lease.poll(); + fail(); + } catch (final KafkaException ke) { + } } testPool.close(); + verify(mockSession, times(0)).create(); + verify(mockSession, times(0)).commit(); final PoolStats stats = testPool.getPoolStats(); assertEquals(1, stats.consumerCreatedCount); assertEquals(1, stats.consumerClosedCount); assertEquals(1, stats.leasesObtainedCount); - assertEquals(0, stats.unproductivePollCount); - assertEquals(0, stats.productivePollCount); } + + @SuppressWarnings({"rawtypes", "unchecked"}) + static ConsumerRecords createConsumerRecords(final String topic, final int partition, final long startingOffset, final byte[][] rawRecords) { + final Map>> map = new HashMap<>(); + final TopicPartition tPart = new TopicPartition(topic, partition); + final List> records = new ArrayList<>(); + long offset = startingOffset; + for (final byte[] rawRecord : rawRecords) { + final ConsumerRecord rec = new ConsumerRecord(topic, partition, offset++, UUID.randomUUID().toString().getBytes(), rawRecord); + records.add(rec); + } + map.put(tPart, records); + return new ConsumerRecords(map); + } + } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisherTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisherTest.java deleted file mode 100644 index 19c64af1af48..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisherTest.java +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.kafka.clients.producer.Partitioner; -import org.apache.kafka.common.Cluster; -import org.apache.kafka.common.serialization.ByteArraySerializer; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processors.kafka.pubsub.KafkaPublisher.KafkaPublisherResult; -import org.apache.nifi.processors.kafka.test.EmbeddedKafka; -import org.apache.nifi.processors.kafka.test.EmbeddedKafkaProducerHelper; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -import kafka.consumer.Consumer; -import kafka.consumer.ConsumerConfig; -import kafka.consumer.ConsumerIterator; -import kafka.consumer.ConsumerTimeoutException; -import kafka.consumer.KafkaStream; -import kafka.javaapi.consumer.ConsumerConnector; -import org.apache.kafka.clients.producer.ProducerConfig; - -public class KafkaPublisherTest { - - private static EmbeddedKafka kafkaLocal; - - private static EmbeddedKafkaProducerHelper producerHelper; - - @BeforeClass - public static void beforeClass() { - kafkaLocal = new EmbeddedKafka(); - kafkaLocal.start(); - producerHelper = new EmbeddedKafkaProducerHelper(kafkaLocal); - } - - @AfterClass - public static void afterClass() throws Exception { - producerHelper.close(); - kafkaLocal.stop(); - } - - @Test - public void validateSuccessfulSendAsWhole() throws Exception { - InputStream contentStream = new ByteArrayInputStream("Hello Kafka".getBytes(StandardCharsets.UTF_8)); - String topicName = "validateSuccessfulSendAsWhole"; - - Properties kafkaProperties = this.buildProducerProperties(); - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - - PublishingContext publishingContext = new PublishingContext(contentStream, topicName); - KafkaPublisherResult result = publisher.publish(publishingContext); - - assertEquals(0, result.getLastMessageAcked()); - assertEquals(1, result.getMessagesSent()); - contentStream.close(); - publisher.close(); - - ConsumerIterator iter = this.buildConsumer(topicName); - assertNotNull(iter.next()); - try { - iter.next(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - } - - @Test - public void validateSuccessfulSendAsDelimited() throws Exception { - InputStream contentStream = new ByteArrayInputStream( - "Hello Kafka\nHello Kafka\nHello Kafka\nHello Kafka\n".getBytes(StandardCharsets.UTF_8)); - String topicName = "validateSuccessfulSendAsDelimited"; - - Properties kafkaProperties = this.buildProducerProperties(); - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - - PublishingContext publishingContext = new PublishingContext(contentStream, topicName); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - KafkaPublisherResult result = publisher.publish(publishingContext); - - assertEquals(3, result.getLastMessageAcked()); - assertEquals(4, result.getMessagesSent()); - contentStream.close(); - publisher.close(); - - ConsumerIterator iter = this.buildConsumer(topicName); - assertNotNull(iter.next()); - assertNotNull(iter.next()); - assertNotNull(iter.next()); - assertNotNull(iter.next()); - try { - iter.next(); - fail(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - } - - /* - * This test simulates the condition where not all messages were ACKed by - * Kafka - */ - @Test - public void validateRetries() throws Exception { - byte[] testValue = "Hello Kafka1\nHello Kafka2\nHello Kafka3\nHello Kafka4\n".getBytes(StandardCharsets.UTF_8); - InputStream contentStream = new ByteArrayInputStream(testValue); - String topicName = "validateSuccessfulReSendOfFailedSegments"; - - Properties kafkaProperties = this.buildProducerProperties(); - - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - - // simulates the first re-try - int lastAckedMessageIndex = 1; - PublishingContext publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - - publisher.publish(publishingContext); - - ConsumerIterator iter = this.buildConsumer(topicName); - String m1 = new String(iter.next().message()); - String m2 = new String(iter.next().message()); - assertEquals("Hello Kafka3", m1); - assertEquals("Hello Kafka4", m2); - try { - iter.next(); - fail(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - - // simulates the second re-try - lastAckedMessageIndex = 2; - contentStream = new ByteArrayInputStream(testValue); - publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - publisher.publish(publishingContext); - - m1 = new String(iter.next().message()); - assertEquals("Hello Kafka4", m1); - - publisher.close(); - } - - /* - * Similar to the above test, but it sets the first retry index to the last - * possible message index and second index to an out of bound index. The - * expectation is that no messages will be sent to Kafka - */ - @Test - public void validateRetriesWithWrongIndex() throws Exception { - byte[] testValue = "Hello Kafka1\nHello Kafka2\nHello Kafka3\nHello Kafka4\n".getBytes(StandardCharsets.UTF_8); - InputStream contentStream = new ByteArrayInputStream(testValue); - String topicName = "validateRetriesWithWrongIndex"; - - Properties kafkaProperties = this.buildProducerProperties(); - - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - - // simulates the first re-try - int lastAckedMessageIndex = 3; - PublishingContext publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - - publisher.publish(publishingContext); - - ConsumerIterator iter = this.buildConsumer(topicName); - try { - iter.next(); - fail(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - - // simulates the second re-try - lastAckedMessageIndex = 6; - contentStream = new ByteArrayInputStream(testValue); - publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - publisher.publish(publishingContext); - try { - iter.next(); - fail(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - - publisher.close(); - } - - @Test - public void validateWithMultiByteCharactersNoDelimiter() throws Exception { - String data = "僠THIS IS MY NEW TEXT.僠IT HAS A NEWLINE."; - InputStream contentStream = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); - String topicName = "validateWithMultiByteCharacters"; - - Properties kafkaProperties = this.buildProducerProperties(); - - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - PublishingContext publishingContext = new PublishingContext(contentStream, topicName); - - publisher.publish(publishingContext); - publisher.close(); - - ConsumerIterator iter = this.buildConsumer(topicName); - String r = new String(iter.next().message(), StandardCharsets.UTF_8); - assertEquals(data, r); - } - - @Test - public void validateWithNonDefaultPartitioner() throws Exception { - String data = "fooandbarandbaz"; - InputStream contentStream = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); - String topicName = "validateWithNonDefaultPartitioner"; - - Properties kafkaProperties = this.buildProducerProperties(); - kafkaProperties.setProperty("partitioner.class", TestPartitioner.class.getName()); - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - PublishingContext publishingContext = new PublishingContext(contentStream, topicName); - publishingContext.setDelimiterBytes("and".getBytes(StandardCharsets.UTF_8)); - - try { - publisher.publish(publishingContext); - // partitioner should be invoked 3 times - assertTrue(TestPartitioner.counter == 3); - publisher.close(); - } finally { - TestPartitioner.counter = 0; - } - } - - private Properties buildProducerProperties() { - Properties kafkaProperties = new Properties(); - kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProperties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:" + kafkaLocal.getKafkaPort()); - kafkaProperties.put("auto.create.topics.enable", "true"); - return kafkaProperties; - } - - private ConsumerIterator buildConsumer(String topic) { - Properties props = new Properties(); - props.put("zookeeper.connect", "localhost:" + kafkaLocal.getZookeeperPort()); - props.put("group.id", "test"); - props.put("consumer.timeout.ms", "500"); - props.put("auto.offset.reset", "smallest"); - ConsumerConfig consumerConfig = new ConsumerConfig(props); - ConsumerConnector consumer = Consumer.createJavaConsumerConnector(consumerConfig); - Map topicCountMap = new HashMap<>(1); - topicCountMap.put(topic, 1); - Map>> consumerMap = consumer.createMessageStreams(topicCountMap); - List> streams = consumerMap.get(topic); - ConsumerIterator iter = streams.get(0).iterator(); - return iter; - } - - public static class TestPartitioner implements Partitioner { - - static int counter; - - @Override - public void configure(Map configs) { - // nothing to do, test - } - - @Override - public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, - Cluster cluster) { - counter++; - return 0; - } - - @Override - public void close() { - counter = 0; - } - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaTest.java deleted file mode 100644 index af0d343ad55d..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaTest.java +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.Map; - -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerConfig; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.common.serialization.ByteArraySerializer; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.Test; -import org.mockito.Mockito; -import static org.mockito.Mockito.times; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.verify; - -public class PublishKafkaTest { - - @Test - public void validateCustomSerilaizerDeserializerSettings() throws Exception { - PublishKafka_0_10 publishKafka = new PublishKafka_0_10(); - TestRunner runner = TestRunners.newTestRunner(publishKafka); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234"); - runner.setProperty(PublishKafka_0_10.TOPIC, "foo"); - runner.setProperty(PublishKafka_0_10.META_WAIT_TIME, "3 sec"); - runner.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - runner.assertValid(); - runner.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "Foo"); - runner.assertNotValid(); - } - - @Test - public void validatePropertiesValidation() throws Exception { - PublishKafka_0_10 publishKafka = new PublishKafka_0_10(); - TestRunner runner = TestRunners.newTestRunner(publishKafka); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234"); - runner.setProperty(PublishKafka_0_10.TOPIC, "foo"); - runner.setProperty(PublishKafka_0_10.META_WAIT_TIME, "foo"); - - try { - runner.assertValid(); - fail(); - } catch (AssertionError e) { - assertTrue(e.getMessage().contains("'max.block.ms' validated against 'foo' is invalid")); - } - } - - @Test - public void validateCustomValidation() { - String topicName = "validateCustomValidation"; - PublishKafka_0_10 publishKafka = new PublishKafka_0_10(); - - /* - * Validates that Kerberos principle is required if one of SASL set for - * secirity protocol - */ - TestRunner runner = TestRunners.newTestRunner(publishKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(KafkaProcessorUtils.SECURITY_PROTOCOL, KafkaProcessorUtils.SEC_SASL_PLAINTEXT); - try { - runner.run(); - fail(); - } catch (Throwable e) { - assertTrue(e.getMessage().contains("'Kerberos Service Name' is invalid because")); - } - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateSingleCharacterDemarcatedMessages() { - String topicName = "validateSingleCharacterDemarcatedMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(PublishKafka_0_10.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "\n"); - - runner.enqueue("Hello World\nGoodbye\n1\n2\n3\n4\n5".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(7)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateMultiCharacterDemarcatedMessagesAndCustomPartitionerA() { - String topicName = "validateMultiCharacterDemarcatedMessagesAndCustomPartitioner"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(PublishKafka_0_10.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.PARTITION_CLASS, Partitioners.RoundRobinPartitioner.class.getName()); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "foo"); - - runner.enqueue("Hello WorldfooGoodbyefoo1foo2foo3foo4foo5".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(7)).send(Mockito.any(ProducerRecord.class)); - - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateMultiCharacterDemarcatedMessagesAndCustomPartitionerB() { - String topicName = "validateMultiCharacterDemarcatedMessagesAndCustomPartitioner"; - StubPublishKafka putKafka = new StubPublishKafka(1); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(PublishKafka_0_10.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.PARTITION_CLASS, Partitioners.RoundRobinPartitioner.class.getName()); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "foo"); - - runner.enqueue("Hello WorldfooGoodbyefoo1foo2foo3foo4foo5".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(7)).send(Mockito.any(ProducerRecord.class)); - - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateOnSendFailureAndThenResendSuccessA() throws Exception { - String topicName = "validateSendFailureAndThenResendSuccess"; - StubPublishKafka putKafka = new StubPublishKafka(100); - - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(PublishKafka_0_10.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "\n"); - runner.setProperty(PublishKafka_0_10.META_WAIT_TIME, "3000 millis"); - - final String text = "Hello World\nGoodbye\nfail\n2"; - runner.enqueue(text.getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(1, runner.getQueueSize().getObjectCount()); // due to failure - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(4)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - putKafka.destroy(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateOnSendFailureAndThenResendSuccessB() throws Exception { - String topicName = "validateSendFailureAndThenResendSuccess"; - StubPublishKafka putKafka = new StubPublishKafka(1); - - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(PublishKafka_0_10.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "\n"); - runner.setProperty(PublishKafka_0_10.META_WAIT_TIME, "500 millis"); - - final String text = "Hello World\nGoodbye\nfail\n2"; - runner.enqueue(text.getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(1, runner.getQueueSize().getObjectCount()); // due to failure - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(4)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateOnFutureGetFailureAndThenResendSuccessFirstMessageFail() throws Exception { - String topicName = "validateSendFailureAndThenResendSuccess"; - StubPublishKafka putKafka = new StubPublishKafka(100); - - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(PublishKafka_0_10.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "\n"); - runner.setProperty(PublishKafka_0_10.META_WAIT_TIME, "500 millis"); - - final String text = "futurefail\nHello World\nGoodbye\n2"; - runner.enqueue(text.getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - MockFlowFile ff = runner.getFlowFilesForRelationship(PublishKafka_0_10.REL_FAILURE).get(0); - assertNotNull(ff); - runner.enqueue(ff); - - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - // 6 sends due to duplication - verify(producer, times(5)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateOnFutureGetFailureAndThenResendSuccess() throws Exception { - String topicName = "validateSendFailureAndThenResendSuccess"; - StubPublishKafka putKafka = new StubPublishKafka(100); - - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(PublishKafka_0_10.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "\n"); - runner.setProperty(PublishKafka_0_10.META_WAIT_TIME, "500 millis"); - - final String text = "Hello World\nGoodbye\nfuturefail\n2"; - runner.enqueue(text.getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - MockFlowFile ff = runner.getFlowFilesForRelationship(PublishKafka_0_10.REL_FAILURE).get(0); - assertNotNull(ff); - runner.enqueue(ff); - - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - // 6 sends due to duplication - verify(producer, times(6)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateDemarcationIntoEmptyMessages() { - String topicName = "validateDemarcationIntoEmptyMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - final TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(PublishKafka_0_10.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "\n"); - - final byte[] bytes = "\n\n\n1\n2\n\n\n\n3\n4\n\n\n".getBytes(StandardCharsets.UTF_8); - runner.enqueue(bytes); - runner.run(1); - Producer producer = putKafka.getProducer(); - verify(producer, times(4)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateComplexRightPartialDemarcatedMessages() { - String topicName = "validateComplexRightPartialDemarcatedMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "僠<僠WILDSTUFF僠>僠"); - - runner.enqueue("Hello World僠<僠WILDSTUFF僠>僠Goodbye僠<僠WILDSTUFF僠>僠I Mean IT!僠<僠WILDSTUFF僠>".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - - Producer producer = putKafka.getProducer(); - verify(producer, times(3)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateComplexLeftPartialDemarcatedMessages() { - String topicName = "validateComplexLeftPartialDemarcatedMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "僠<僠WILDSTUFF僠>僠"); - - runner.enqueue("Hello World僠<僠WILDSTUFF僠>僠Goodbye僠<僠WILDSTUFF僠>僠I Mean IT!僠<僠WILDSTUFF僠>僠<僠WILDSTUFF僠>僠".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - - runner.assertAllFlowFilesTransferred(PublishKafka_0_10.REL_SUCCESS, 1); - Producer producer = putKafka.getProducer(); - verify(producer, times(4)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateComplexPartialMatchDemarcatedMessages() { - String topicName = "validateComplexPartialMatchDemarcatedMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.MESSAGE_DEMARCATOR, "僠<僠WILDSTUFF僠>僠"); - - runner.enqueue("Hello World僠<僠WILDSTUFF僠>僠Goodbye僠<僠WILDBOOMSTUFF僠>僠".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - - runner.assertAllFlowFilesTransferred(PublishKafka_0_10.REL_SUCCESS, 1); - Producer producer = putKafka.getProducer(); - verify(producer, times(2)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @Test - public void validateUtf8Key() { - String topicName = "validateUtf8Key"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.KEY, "${myKey}"); - - final Map attributes = Collections.singletonMap("myKey", "key1"); - runner.enqueue("Hello World".getBytes(StandardCharsets.UTF_8), attributes); - runner.run(1); - - runner.assertAllFlowFilesTransferred(PublishKafka_0_10.REL_SUCCESS, 1); - final Map msgs = putKafka.getMessagesSent(); - assertEquals(1, msgs.size()); - final byte[] msgKey = (byte[]) msgs.keySet().iterator().next(); - assertTrue(Arrays.equals("key1".getBytes(StandardCharsets.UTF_8), msgKey)); - } - - @Test - public void validateHexKey() { - String topicName = "validateUtf8Key"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka_0_10.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka_0_10.KEY_ATTRIBUTE_ENCODING, PublishKafka_0_10.HEX_ENCODING); - runner.setProperty(PublishKafka_0_10.KEY, "${myKey}"); - - final Map attributes = Collections.singletonMap("myKey", "6B657931"); - runner.enqueue("Hello World".getBytes(StandardCharsets.UTF_8), attributes); - runner.run(1); - - runner.assertAllFlowFilesTransferred(PublishKafka_0_10.REL_SUCCESS, 1); - final Map msgs = putKafka.getMessagesSent(); - assertEquals(1, msgs.size()); - final byte[] msgKey = (byte[]) msgs.keySet().iterator().next(); - - assertTrue(Arrays.equals(new byte[] {0x6B, 0x65, 0x79, 0x31}, msgKey)); - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishingContextTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishingContextTest.java deleted file mode 100644 index 76c29cdd97f9..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishingContextTest.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; - -import java.io.InputStream; -import java.nio.charset.StandardCharsets; - -import org.junit.Test; - -public class PublishingContextTest { - - @Test - public void failInvalidConstructorArgs() { - try { - new PublishingContext(null, null); - fail(); - } catch (IllegalArgumentException e) { - // success - } - try { - new PublishingContext(mock(InputStream.class), null); - fail(); - } catch (IllegalArgumentException e) { - // success - } - - try { - new PublishingContext(mock(InputStream.class), ""); - fail(); - } catch (IllegalArgumentException e) { - // success - } - - try { - new PublishingContext(mock(InputStream.class), "mytopic", -3); - fail(); - } catch (IllegalArgumentException e) { - // success - } - } - - @Test - public void validateFullSetting() { - PublishingContext publishingContext = new PublishingContext(mock(InputStream.class), "topic", 3); - publishingContext.setDelimiterBytes("delimiter".getBytes(StandardCharsets.UTF_8)); - publishingContext.setKeyBytes("key".getBytes(StandardCharsets.UTF_8)); - - assertEquals("delimiter", new String(publishingContext.getDelimiterBytes(), StandardCharsets.UTF_8)); - assertEquals("key", new String(publishingContext.getKeyBytes(), StandardCharsets.UTF_8)); - assertEquals("topic", publishingContext.getTopic()); - assertEquals("topic: 'topic'; delimiter: 'delimiter'", publishingContext.toString()); - } - - @Test - public void validateOnlyOnceSetPerInstance() { - PublishingContext publishingContext = new PublishingContext(mock(InputStream.class), "topic"); - publishingContext.setKeyBytes(new byte[]{0}); - try { - publishingContext.setKeyBytes(new byte[]{0}); - fail(); - } catch (IllegalArgumentException e) { - // success - } - - publishingContext.setDelimiterBytes(new byte[]{0}); - try { - publishingContext.setDelimiterBytes(new byte[]{0}); - fail(); - } catch (IllegalArgumentException e) { - // success - } - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/StubPublishKafka.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/StubPublishKafka.java deleted file mode 100644 index c009014a866b..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/StubPublishKafka.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import java.lang.reflect.Field; -import static org.mockito.Mockito.when; - -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerConfig; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.clients.producer.RecordMetadata; -import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.errors.TopicAuthorizationException; -import org.apache.kafka.common.serialization.ByteArraySerializer; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.exception.ProcessException; -import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.BOOTSTRAP_SERVERS; -import org.mockito.Mockito; -import static org.mockito.Mockito.mock; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; - -public class StubPublishKafka extends PublishKafka_0_10 { - - private volatile Producer producer; - - private volatile boolean failed; - - private final int ackCheckSize; - - private final ExecutorService executor = Executors.newCachedThreadPool(); - private final Map msgsSent = new ConcurrentHashMap<>(); - - StubPublishKafka(int ackCheckSize) { - this.ackCheckSize = ackCheckSize; - } - - public Producer getProducer() { - return producer; - } - - public void destroy() { - this.executor.shutdownNow(); - } - - public Map getMessagesSent() { - return new HashMap<>(msgsSent); - } - - @SuppressWarnings("unchecked") - @Override - protected KafkaPublisher buildKafkaResource(ProcessContext context, ProcessSession session) - throws ProcessException { - final Map kafkaProperties = new HashMap<>(); - KafkaProcessorUtils.buildCommonKafkaProperties(context, ProducerConfig.class, kafkaProperties); - kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - KafkaPublisher publisher; - try { - Field f = PublishKafka_0_10.class.getDeclaredField("brokers"); - f.setAccessible(true); - f.set(this, context.getProperty(BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue()); - publisher = (KafkaPublisher) TestUtils.getUnsafe().allocateInstance(KafkaPublisher.class); - publisher.setAckWaitTime(15000); - producer = mock(Producer.class); - this.instrumentProducer(producer, false); - Field kf = KafkaPublisher.class.getDeclaredField("kafkaProducer"); - kf.setAccessible(true); - kf.set(publisher, producer); - - Field componentLogF = KafkaPublisher.class.getDeclaredField("componentLog"); - componentLogF.setAccessible(true); - componentLogF.set(publisher, mock(ComponentLog.class)); - - Field ackCheckSizeField = KafkaPublisher.class.getDeclaredField("ackCheckSize"); - ackCheckSizeField.setAccessible(true); - ackCheckSizeField.set(publisher, this.ackCheckSize); - } catch (Exception e) { - e.printStackTrace(); - throw new IllegalStateException(e); - } - return publisher; - } - - @SuppressWarnings("unchecked") - private void instrumentProducer(Producer producer, boolean failRandomly) { - - when(producer.send(Mockito.any(ProducerRecord.class))).then(new Answer>() { - @Override - public Future answer(InvocationOnMock invocation) throws Throwable { - final ProducerRecord record = invocation.getArgumentAt(0, ProducerRecord.class); - if (record != null && record.key() != null) { - msgsSent.put(record.key(), record.value()); - } - - String value = new String(record.value(), StandardCharsets.UTF_8); - if ("fail".equals(value) && !StubPublishKafka.this.failed) { - StubPublishKafka.this.failed = true; - throw new RuntimeException("intentional"); - } - Future future = executor.submit(new Callable() { - @Override - public RecordMetadata call() throws Exception { - if ("futurefail".equals(value) && !StubPublishKafka.this.failed) { - StubPublishKafka.this.failed = true; - throw new TopicAuthorizationException("Unauthorized"); - } else { - TopicPartition partition = new TopicPartition("foo", 0); - RecordMetadata meta = new RecordMetadata(partition, 0, 0); - return meta; - } - } - }); - return future; - } - }); - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java new file mode 100644 index 000000000000..e54a10c85e77 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeoutException; + +import org.apache.nifi.util.MockFlowFile; +import org.junit.Assert; +import org.junit.Test; + +public class TestInFlightMessageTracker { + + @Test(timeout = 5000L) + public void testAwaitCompletionWhenComplete() throws InterruptedException, TimeoutException { + final MockFlowFile flowFile = new MockFlowFile(1L); + + final InFlightMessageTracker tracker = new InFlightMessageTracker(); + tracker.incrementSentCount(flowFile); + + verifyNotComplete(tracker); + + tracker.incrementSentCount(flowFile); + verifyNotComplete(tracker); + + tracker.incrementAcknowledgedCount(flowFile); + verifyNotComplete(tracker); + + tracker.incrementAcknowledgedCount(flowFile); + tracker.awaitCompletion(1L); + } + + @Test(timeout = 5000L) + public void testAwaitCompletionWhileWaiting() throws InterruptedException, ExecutionException { + final MockFlowFile flowFile = new MockFlowFile(1L); + + final InFlightMessageTracker tracker = new InFlightMessageTracker(); + tracker.incrementSentCount(flowFile); + + verifyNotComplete(tracker); + + tracker.incrementSentCount(flowFile); + verifyNotComplete(tracker); + + final ExecutorService exec = Executors.newFixedThreadPool(1); + final Future future = exec.submit(() -> { + try { + tracker.awaitCompletion(10000L); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + + tracker.incrementAcknowledgedCount(flowFile); + tracker.incrementAcknowledgedCount(flowFile); + + future.get(); + } + + private void verifyNotComplete(final InFlightMessageTracker tracker) throws InterruptedException { + try { + tracker.awaitCompletion(10L); + Assert.fail("Expected timeout"); + } catch (final TimeoutException te) { + // expected + } + } + +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java new file mode 100644 index 000000000000..c7d1a60365eb --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.junit.Before; +import org.junit.Test; + +public class TestPublishKafka { + private static final String TOPIC_NAME = "unit-test"; + + private PublisherPool mockPool; + private PublisherLease mockLease; + private TestRunner runner; + + @Before + public void setup() { + mockPool = mock(PublisherPool.class); + mockLease = mock(PublisherLease.class); + + when(mockPool.obtainPublisher()).thenReturn(mockLease); + + runner = TestRunners.newTestRunner(new PublishKafka_0_10() { + @Override + protected PublisherPool createPublisherPool(final ProcessContext context) { + return mockPool; + } + }); + + runner.setProperty(PublishKafka_0_10.TOPIC, TOPIC_NAME); + } + + @Test + public void testSingleSuccess() throws IOException { + final MockFlowFile flowFile = runner.enqueue("hello world"); + + when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFile, 1)); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka_0_10.REL_SUCCESS, 1); + + verify(mockLease, times(1)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(0)).poison(); + verify(mockLease, times(1)).close(); + } + + @Test + public void testMultipleSuccess() throws IOException { + final Set flowFiles = new HashSet<>(); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + + + when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFiles, 1)); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka_0_10.REL_SUCCESS, 3); + + verify(mockLease, times(3)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(0)).poison(); + verify(mockLease, times(1)).close(); + } + + @Test + public void testSingleFailure() throws IOException { + final MockFlowFile flowFile = runner.enqueue("hello world"); + + when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFile)); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka_0_10.REL_FAILURE, 1); + + verify(mockLease, times(1)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(1)).close(); + } + + @Test + public void testMultipleFailures() throws IOException { + final Set flowFiles = new HashSet<>(); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + + when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFiles)); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka_0_10.REL_FAILURE, 3); + + verify(mockLease, times(3)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(1)).close(); + } + + @Test + public void testMultipleMessagesPerFlowFile() throws IOException { + final List flowFiles = new ArrayList<>(); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + + final Map msgCounts = new HashMap<>(); + msgCounts.put(flowFiles.get(0), 10); + msgCounts.put(flowFiles.get(1), 20); + + final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles), Collections.emptyMap()); + + when(mockLease.complete()).thenReturn(result); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka_0_10.REL_SUCCESS, 2); + + verify(mockLease, times(2)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(0)).poison(); + verify(mockLease, times(1)).close(); + + runner.assertAllFlowFilesContainAttribute("msg.count"); + assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka_0_10.REL_SUCCESS).stream() + .filter(ff -> ff.getAttribute("msg.count").equals("10")) + .count()); + assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka_0_10.REL_SUCCESS).stream() + .filter(ff -> ff.getAttribute("msg.count").equals("20")) + .count()); + } + + + @Test + public void testSomeSuccessSomeFailure() throws IOException { + final List flowFiles = new ArrayList<>(); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + + final Map msgCounts = new HashMap<>(); + msgCounts.put(flowFiles.get(0), 10); + msgCounts.put(flowFiles.get(1), 20); + + final Map failureMap = new HashMap<>(); + failureMap.put(flowFiles.get(2), new RuntimeException("Intentional Unit Test Exception")); + failureMap.put(flowFiles.get(3), new RuntimeException("Intentional Unit Test Exception")); + + final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles.subList(0, 2)), failureMap); + + when(mockLease.complete()).thenReturn(result); + + runner.run(); + runner.assertTransferCount(PublishKafka_0_10.REL_SUCCESS, 2); + runner.assertTransferCount(PublishKafka_0_10.REL_FAILURE, 2); + + verify(mockLease, times(4)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(1)).close(); + + assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka_0_10.REL_SUCCESS).stream() + .filter(ff -> "10".equals(ff.getAttribute("msg.count"))) + .count()); + assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka_0_10.REL_SUCCESS).stream() + .filter(ff -> "20".equals(ff.getAttribute("msg.count"))) + .count()); + + assertTrue(runner.getFlowFilesForRelationship(PublishKafka_0_10.REL_FAILURE).stream() + .noneMatch(ff -> ff.getAttribute("msg.count") != null)); + } + + + private PublishResult createAllSuccessPublishResult(final FlowFile successfulFlowFile, final int msgCount) { + return createAllSuccessPublishResult(Collections.singleton(successfulFlowFile), msgCount); + } + + private PublishResult createAllSuccessPublishResult(final Set successfulFlowFiles, final int msgCountPerFlowFile) { + final Map msgCounts = new HashMap<>(); + for (final FlowFile ff : successfulFlowFiles) { + msgCounts.put(ff, msgCountPerFlowFile); + } + return createPublishResult(msgCounts, successfulFlowFiles, Collections.emptyMap()); + } + + private PublishResult createFailurePublishResult(final FlowFile failure) { + return createFailurePublishResult(Collections.singleton(failure)); + } + + private PublishResult createFailurePublishResult(final Set failures) { + final Map failureMap = failures.stream().collect(Collectors.toMap(ff -> ff, ff -> new RuntimeException("Intentional Unit Test Exception"))); + return createPublishResult(Collections.emptyMap(), Collections.emptySet(), failureMap); + } + + private PublishResult createPublishResult(final Map msgCounts, final Set successFlowFiles, final Map failures) { + // sanity check. + for (final FlowFile success : successFlowFiles) { + if (failures.containsKey(success)) { + throw new IllegalArgumentException("Found same FlowFile in both 'success' and 'failures' collections: " + success); + } + } + + return new PublishResult() { + @Override + public Collection getSuccessfulFlowFiles() { + return successFlowFiles; + } + + @Override + public Collection getFailedFlowFiles() { + return failures.keySet(); + } + + @Override + public int getSuccessfulMessageCount(FlowFile flowFile) { + Integer count = msgCounts.get(flowFile); + return count == null ? 0 : count.intValue(); + } + + @Override + public Exception getReasonForFailure(FlowFile flowFile) { + return failures.get(flowFile); + } + }; + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java new file mode 100644 index 000000000000..c2d143cf3155 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.util.MockFlowFile; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + + +public class TestPublisherLease { + private ComponentLog logger; + private Producer producer; + + @Before + @SuppressWarnings("unchecked") + public void setup() { + logger = Mockito.mock(ComponentLog.class); + producer = Mockito.mock(Producer.class); + } + + @Test + public void testPoisonOnException() throws IOException { + final AtomicInteger poisonCount = new AtomicInteger(0); + + final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 1000L, logger) { + @Override + public void poison() { + poisonCount.incrementAndGet(); + super.poison(); + } + }; + + final FlowFile flowFile = new MockFlowFile(1L); + final String topic = "unit-test"; + final byte[] messageKey = null; + final byte[] demarcatorBytes = null; + + final InputStream failureInputStream = new InputStream() { + @Override + public int read() throws IOException { + throw new IOException("Intentional Unit Test Exception"); + } + }; + + try { + lease.publish(flowFile, failureInputStream, messageKey, demarcatorBytes, topic); + Assert.fail("Expected IOException"); + } catch (final IOException ioe) { + // expected + } + + assertEquals(1, poisonCount.get()); + + final PublishResult result = lease.complete(); + assertTrue(result.getFailedFlowFiles().contains(flowFile)); + assertFalse(result.getSuccessfulFlowFiles().contains(flowFile)); + } + + @Test + @SuppressWarnings("unchecked") + public void testPoisonOnFailure() throws IOException { + final AtomicInteger poisonCount = new AtomicInteger(0); + + final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 1000L, logger) { + @Override + public void poison() { + poisonCount.incrementAndGet(); + super.poison(); + } + }; + + final FlowFile flowFile = new MockFlowFile(1L); + final String topic = "unit-test"; + final byte[] messageKey = null; + final byte[] demarcatorBytes = null; + + doAnswer(new Answer() { + @Override + public Object answer(final InvocationOnMock invocation) throws Throwable { + final Callback callback = invocation.getArgumentAt(1, Callback.class); + callback.onCompletion(null, new RuntimeException("Unit Test Intentional Exception")); + return null; + } + }).when(producer).send(any(ProducerRecord.class), any(Callback.class)); + + lease.publish(flowFile, new ByteArrayInputStream(new byte[1]), messageKey, demarcatorBytes, topic); + + assertEquals(1, poisonCount.get()); + + final PublishResult result = lease.complete(); + assertTrue(result.getFailedFlowFiles().contains(flowFile)); + assertFalse(result.getSuccessfulFlowFiles().contains(flowFile)); + } + + @Test + @SuppressWarnings("unchecked") + public void testAllDelimitedMessagesSent() throws IOException { + final AtomicInteger poisonCount = new AtomicInteger(0); + + final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 10L, logger) { + @Override + protected void poison() { + poisonCount.incrementAndGet(); + super.poison(); + } + }; + + final AtomicInteger correctMessages = new AtomicInteger(0); + final AtomicInteger incorrectMessages = new AtomicInteger(0); + doAnswer(new Answer() { + @Override + public Object answer(InvocationOnMock invocation) throws Throwable { + final ProducerRecord record = invocation.getArgumentAt(0, ProducerRecord.class); + final byte[] value = record.value(); + final String valueString = new String(value, StandardCharsets.UTF_8); + if ("1234567890".equals(valueString)) { + correctMessages.incrementAndGet(); + } else { + incorrectMessages.incrementAndGet(); + } + + return null; + } + }).when(producer).send(any(ProducerRecord.class), any(Callback.class)); + + final FlowFile flowFile = new MockFlowFile(1L); + final String topic = "unit-test"; + final byte[] messageKey = null; + final byte[] demarcatorBytes = "\n".getBytes(StandardCharsets.UTF_8); + + final byte[] flowFileContent = "1234567890\n1234567890\n1234567890\n\n\n\n1234567890\n\n\n1234567890\n\n\n\n".getBytes(StandardCharsets.UTF_8); + lease.publish(flowFile, new ByteArrayInputStream(flowFileContent), messageKey, demarcatorBytes, topic); + + final byte[] flowFileContent2 = new byte[0]; + lease.publish(new MockFlowFile(2L), new ByteArrayInputStream(flowFileContent2), messageKey, demarcatorBytes, topic); + + final byte[] flowFileContent3 = "1234567890\n1234567890".getBytes(StandardCharsets.UTF_8); // no trailing new line + lease.publish(new MockFlowFile(3L), new ByteArrayInputStream(flowFileContent3), messageKey, demarcatorBytes, topic); + + final byte[] flowFileContent4 = "\n\n\n".getBytes(StandardCharsets.UTF_8); + lease.publish(new MockFlowFile(4L), new ByteArrayInputStream(flowFileContent4), messageKey, demarcatorBytes, topic); + + assertEquals(0, poisonCount.get()); + + verify(producer, times(0)).flush(); + + final PublishResult result = lease.complete(); + assertTrue(result.getFailedFlowFiles().contains(flowFile)); + assertFalse(result.getSuccessfulFlowFiles().contains(flowFile)); + + assertEquals(7, correctMessages.get()); + assertEquals(0, incorrectMessages.get()); + + verify(producer, times(1)).flush(); + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherPool.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherPool.java new file mode 100644 index 000000000000..7c701944c8e6 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherPool.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import static org.junit.Assert.assertEquals; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.apache.nifi.logging.ComponentLog; +import org.junit.Test; +import org.mockito.Mockito; + + +public class TestPublisherPool { + + @Test + public void testLeaseCloseReturnsToPool() { + final Map kafkaProperties = new HashMap<>(); + kafkaProperties.put("bootstrap.servers", "localhost:1111"); + kafkaProperties.put("key.serializer", ByteArraySerializer.class.getName()); + kafkaProperties.put("value.serializer", ByteArraySerializer.class.getName()); + + final PublisherPool pool = new PublisherPool(kafkaProperties, Mockito.mock(ComponentLog.class), 1024 * 1024, 1000L); + assertEquals(0, pool.available()); + + final PublisherLease lease = pool.obtainPublisher(); + assertEquals(0, pool.available()); + + lease.close(); + assertEquals(1, pool.available()); + } + + @Test + public void testPoisonedLeaseNotReturnedToPool() { + final Map kafkaProperties = new HashMap<>(); + kafkaProperties.put("bootstrap.servers", "localhost:1111"); + kafkaProperties.put("key.serializer", ByteArraySerializer.class.getName()); + kafkaProperties.put("value.serializer", ByteArraySerializer.class.getName()); + + final PublisherPool pool = new PublisherPool(kafkaProperties, Mockito.mock(ComponentLog.class), 1024 * 1024, 1000L); + assertEquals(0, pool.available()); + + final PublisherLease lease = pool.obtainPublisher(); + assertEquals(0, pool.available()); + + lease.poison(); + lease.close(); + assertEquals(0, pool.available()); + } + +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-8-processors/src/main/java/org/apache/nifi/processors/kafka/KafkaPublisher.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-8-processors/src/main/java/org/apache/nifi/processors/kafka/KafkaPublisher.java index 5bc0e0e5b5c7..e524589c051f 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-8-processors/src/main/java/org/apache/nifi/processors/kafka/KafkaPublisher.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-8-processors/src/main/java/org/apache/nifi/processors/kafka/KafkaPublisher.java @@ -17,6 +17,7 @@ package org.apache.nifi.processors.kafka; import java.io.Closeable; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; @@ -111,8 +112,9 @@ class KafkaPublisher implements Closeable { * instance of {@link PublishingContext} which hold context * information about the message(s) to be sent. * @return The index of the last successful offset. + * @throws IOException if unable to read from the Input Stream */ - KafkaPublisherResult publish(PublishingContext publishingContext) { + KafkaPublisherResult publish(PublishingContext publishingContext) throws IOException { StreamDemarcator streamTokenizer = new StreamDemarcator(publishingContext.getContentStream(), publishingContext.getDelimiterBytes(), publishingContext.getMaxRequestSize()); diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka.java index 0a3fe5d96953..3e01e51ea7af 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka.java @@ -21,17 +21,14 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; -import javax.xml.bind.DatatypeConverter; import org.apache.kafka.clients.consumer.ConsumerConfig; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.KafkaException; -import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.WakeupException; import org.apache.kafka.common.serialization.ByteArrayDeserializer; import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.InputRequirement; @@ -39,13 +36,12 @@ import org.apache.nifi.annotation.behavior.WritesAttributes; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.annotation.lifecycle.OnStopped; +import org.apache.nifi.annotation.lifecycle.OnUnscheduled; import org.apache.nifi.components.AllowableValue; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.ProcessContext; @@ -53,17 +49,18 @@ import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.util.StandardValidators; -import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.SECURITY_PROTOCOL; +import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.HEX_ENCODING; +import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.UTF8_ENCODING; -@CapabilityDescription("Consumes messages from Apache Kafka specifically built against the Kafka 0.9 Consumer API. " +@CapabilityDescription("Consumes messages from Apache Kafka specifically built against the Kafka 0.9.x Consumer API. " + " Please note there are cases where the publisher can get into an indefinite stuck state. We are closely monitoring" + " how this evolves in the Kafka community and will take advantage of those fixes as soon as we can. In the mean time" - + " it is possible to enter states where the only resolution will be to restart the JVM NiFi runs on.") + + " it is possible to enter states where the only resolution will be to restart the JVM NiFi runs on. The complementary NiFi processor for sending messages is PublishKafka.") @Tags({"Kafka", "Get", "Ingest", "Ingress", "Topic", "PubSub", "Consume", "0.9.x"}) @WritesAttributes({ @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_COUNT, description = "The number of messages written if more than one"), @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_KEY, description = "The key of message if present and if single message. " - + "How the key is encoded depends on the value of the 'Key Attribute Encoding' property."), + + "How the key is encoded depends on the value of the 'Key Attribute Encoding' property."), @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_OFFSET, description = "The offset of the message in the partition of the topic."), @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_PARTITION, description = "The partition of the topic the message or message bundle is from"), @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_TOPIC, description = "The topic the message or message bundle is from") @@ -75,22 +72,16 @@ + " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ") public class ConsumeKafka extends AbstractProcessor { - private static final long TWO_MB = 2L * 1024L * 1024L; - static final AllowableValue OFFSET_EARLIEST = new AllowableValue("earliest", "earliest", "Automatically reset the offset to the earliest offset"); static final AllowableValue OFFSET_LATEST = new AllowableValue("latest", "latest", "Automatically reset the offset to the latest offset"); static final AllowableValue OFFSET_NONE = new AllowableValue("none", "none", "Throw exception to the consumer if no previous offset is found for the consumer's group"); - static final AllowableValue UTF8_ENCODING = new AllowableValue("utf-8", "UTF-8 Encoded", "The key is interpreted as a UTF-8 Encoded string."); - static final AllowableValue HEX_ENCODING = new AllowableValue("hex", "Hex Encoded", - "The key is interpreted as arbitrary binary data and is encoded using hexadecimal characters with uppercase letters"); - static final PropertyDescriptor TOPICS = new PropertyDescriptor.Builder() .name("topic") .displayName("Topic Name(s)") - .description("The name of the Kafka Topic(s) to pull from. More than one can be supplied if comma seperated.") + .description("The name of the Kafka Topic(s) to pull from. More than one can be supplied if comma separated.") .required(true) .addValidator(StandardValidators.NON_BLANK_VALIDATOR) .expressionLanguageSupported(true) @@ -136,6 +127,7 @@ public class ConsumeKafka extends AbstractProcessor { + "will result in a single FlowFile which " + "time it is triggered. To enter special character such as 'new line' use CTRL+Enter or Shift+Enter depending on the OS") .build(); + static final PropertyDescriptor MAX_POLL_RECORDS = new PropertyDescriptor.Builder() .name("max.poll.records") .displayName("Max Poll Records") @@ -145,6 +137,20 @@ public class ConsumeKafka extends AbstractProcessor { .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) .build(); + static final PropertyDescriptor MAX_UNCOMMITTED_TIME = new PropertyDescriptor.Builder() + .name("max-uncommit-offset-wait") + .displayName("Max Uncommitted Time") + .description("Specifies the maximum amount of time allowed to pass before offsets must be committed. " + + "This value impacts how often offsets will be committed. Committing offsets less often increases " + + "throughput but also increases the window of potential data duplication in the event of a rebalance " + + "or JVM restart between commits. This value is also related to maximum poll records and the use " + + "of a message demarcator. When using a message demarcator we can have far more uncommitted messages " + + "than when we're not as there is much less for us to keep track of in memory.") + .required(false) + .defaultValue("1 secs") + .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) + .build(); + static final Relationship REL_SUCCESS = new Relationship.Builder() .name("success") .description("FlowFiles received from Kafka. Depending on demarcation strategy it is a flow file per message or a bundle of messages grouped by topic and partition.") @@ -153,8 +159,8 @@ public class ConsumeKafka extends AbstractProcessor { static final List DESCRIPTORS; static final Set RELATIONSHIPS; - private volatile byte[] demarcatorBytes = null; private volatile ConsumerPool consumerPool = null; + private final Set activeLeases = Collections.synchronizedSet(new HashSet<>()); static { List descriptors = new ArrayList<>(); @@ -165,6 +171,7 @@ public class ConsumeKafka extends AbstractProcessor { descriptors.add(KEY_ATTRIBUTE_ENCODING); descriptors.add(MESSAGE_DEMARCATOR); descriptors.add(MAX_POLL_RECORDS); + descriptors.add(MAX_UNCOMMITTED_TIME); DESCRIPTORS = Collections.unmodifiableList(descriptors); RELATIONSHIPS = Collections.singleton(REL_SUCCESS); } @@ -179,16 +186,8 @@ protected List getSupportedPropertyDescriptors() { return DESCRIPTORS; } - @OnScheduled - public void prepareProcessing(final ProcessContext context) { - this.demarcatorBytes = context.getProperty(MESSAGE_DEMARCATOR).isSet() - ? context.getProperty(MESSAGE_DEMARCATOR).evaluateAttributeExpressions().getValue().getBytes(StandardCharsets.UTF_8) - : null; - } - @OnStopped public void close() { - demarcatorBytes = null; final ConsumerPool pool = consumerPool; consumerPool = null; if (pool != null) { @@ -215,9 +214,22 @@ private synchronized ConsumerPool getConsumerPool(final ProcessContext context) return pool; } - final Map props = new HashMap<>(); + return consumerPool = createConsumerPool(context, getLogger()); + } + + protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) { + final int maxLeases = context.getMaxConcurrentTasks(); + final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS); + final byte[] demarcator = context.getProperty(ConsumeKafka.MESSAGE_DEMARCATOR).isSet() + ? context.getProperty(ConsumeKafka.MESSAGE_DEMARCATOR).evaluateAttributeExpressions().getValue().getBytes(StandardCharsets.UTF_8) + : null; + + final Map props = new HashMap<>(); KafkaProcessorUtils.buildCommonKafkaProperties(context, ConsumerConfig.class, props); - final String topicListing = context.getProperty(TOPICS).evaluateAttributeExpressions().getValue(); + props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); + props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + final String topicListing = context.getProperty(ConsumeKafka.TOPICS).evaluateAttributeExpressions().getValue(); final List topics = new ArrayList<>(); for (final String topic : topicListing.split(",", 100)) { final String trimmedName = topic.trim(); @@ -225,213 +237,78 @@ private synchronized ConsumerPool getConsumerPool(final ProcessContext context) topics.add(trimmedName); } } - props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); - props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); - props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); - return consumerPool = createConsumerPool(context.getMaxConcurrentTasks(), topics, props, getLogger()); - } + final String keyEncoding = context.getProperty(KEY_ATTRIBUTE_ENCODING).getValue(); + final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue(); + final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).getValue(); - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return new ConsumerPool(maxLeases, topics, props, log); + return new ConsumerPool(maxLeases, demarcator, props, topics, maxUncommittedTime, keyEncoding, securityProtocol, bootstrapServers, log); } - @Override - public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { - final long startTimeNanos = System.nanoTime(); - final ConsumerPool pool = getConsumerPool(context); - if (pool == null) { - context.yield(); - return; - } - final Map>> partitionRecordMap = new HashMap<>(); - - try (final ConsumerLease lease = pool.obtainConsumer()) { + @OnUnscheduled + public void interruptActiveThreads() { + // There are known issues with the Kafka client library that result in the client code hanging + // indefinitely when unable to communicate with the broker. In order to address this, we will wait + // up to 30 seconds for the Threads to finish and then will call Consumer.wakeup() to trigger the + // thread to wakeup when it is blocked, waiting on a response. + final long nanosToWait = TimeUnit.SECONDS.toNanos(5L); + final long start = System.nanoTime(); + while (System.nanoTime() - start < nanosToWait && !activeLeases.isEmpty()) { try { - if (lease == null) { - context.yield(); - return; - } - - final boolean foundData = gatherDataFromKafka(lease, partitionRecordMap, context); - if (!foundData) { - session.rollback(); - return; - } - - writeSessionData(context, session, partitionRecordMap, startTimeNanos); - //At-least once commit handling (if order is reversed it is at-most once) - session.commit(); - commitOffsets(lease, partitionRecordMap); - } catch (final KafkaException ke) { - lease.poison(); - getLogger().error("Problem while accessing kafka consumer " + ke, ke); - context.yield(); - session.rollback(); + Thread.sleep(100L); + } catch (final InterruptedException ie) { + Thread.currentThread().interrupt(); + return; } } - } - private void commitOffsets(final ConsumerLease lease, final Map>> partitionRecordMap) { - final Map partOffsetMap = new HashMap<>(); - partitionRecordMap.entrySet().stream() - .filter(entry -> !entry.getValue().isEmpty()) - .forEach((entry) -> { - long maxOffset = entry.getValue().stream() - .mapToLong(record -> record.offset()) - .max() - .getAsLong(); - partOffsetMap.put(entry.getKey(), new OffsetAndMetadata(maxOffset + 1L)); - }); - lease.commitOffsets(partOffsetMap); - } + if (!activeLeases.isEmpty()) { + int count = 0; + for (final ConsumerLease lease : activeLeases) { + getLogger().info("Consumer {} has not finished after waiting 30 seconds; will attempt to wake-up the lease", new Object[] {lease}); + lease.wakeup(); + count++; + } - private void writeSessionData( - final ProcessContext context, final ProcessSession session, - final Map>> partitionRecordMap, - final long startTimeNanos) { - if (demarcatorBytes != null) { - partitionRecordMap.entrySet().stream() - .filter(entry -> !entry.getValue().isEmpty()) - .forEach(entry -> { - writeData(context, session, entry.getValue(), startTimeNanos); - }); - } else { - partitionRecordMap.entrySet().stream() - .filter(entry -> !entry.getValue().isEmpty()) - .flatMap(entry -> entry.getValue().stream()) - .forEach(record -> { - writeData(context, session, Collections.singletonList(record), startTimeNanos); - }); + getLogger().info("Woke up {} consumers", new Object[] {count}); } - } - private String encodeKafkaKey(final byte[] key, final String encoding) { - if (key == null) { - return null; - } + activeLeases.clear(); + } - if (HEX_ENCODING.getValue().equals(encoding)) { - return DatatypeConverter.printHexBinary(key); - } else if (UTF8_ENCODING.getValue().equals(encoding)) { - return new String(key, StandardCharsets.UTF_8); - } else { - return null; // won't happen because it is guaranteed by the Allowable Values + @Override + public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { + final ConsumerPool pool = getConsumerPool(context); + if (pool == null) { + context.yield(); + return; } - } - private void writeData(final ProcessContext context, final ProcessSession session, final List> records, final long startTimeNanos) { - final ConsumerRecord firstRecord = records.get(0); - final String offset = String.valueOf(firstRecord.offset()); - final String keyValue = encodeKafkaKey(firstRecord.key(), context.getProperty(KEY_ATTRIBUTE_ENCODING).getValue()); - final String topic = firstRecord.topic(); - final String partition = String.valueOf(firstRecord.partition()); - FlowFile flowFile = session.create(); - flowFile = session.write(flowFile, out -> { - boolean useDemarcator = false; - for (final ConsumerRecord record : records) { - if (useDemarcator) { - out.write(demarcatorBytes); - } - out.write(record.value()); - useDemarcator = true; + try (final ConsumerLease lease = pool.obtainConsumer(session)) { + if (lease == null) { + context.yield(); + return; } - }); - final Map kafkaAttrs = new HashMap<>(); - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_OFFSET, offset); - if (keyValue != null && records.size() == 1) { - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_KEY, keyValue); - } - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_PARTITION, partition); - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_TOPIC, topic); - if (records.size() > 1) { - kafkaAttrs.put(KafkaProcessorUtils.KAFKA_COUNT, String.valueOf(records.size())); - } - flowFile = session.putAllAttributes(flowFile, kafkaAttrs); - final long executionDurationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNanos); - final String transitUri = KafkaProcessorUtils.buildTransitURI( - context.getProperty(SECURITY_PROTOCOL).getValue(), - context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).getValue(), - topic); - session.getProvenanceReporter().receive(flowFile, transitUri, executionDurationMillis); - this.getLogger().debug("Created {} containing {} messages from Kafka topic {}, partition {}, starting offset {} in {} millis", - new Object[]{flowFile, records.size(), topic, partition, offset, executionDurationMillis}); - session.transfer(flowFile, REL_SUCCESS); - } - /** - * Populates the given partitionRecordMap with new records until we poll - * that returns no records or until we have enough data. It is important to - * ensure we keep items grouped by their topic and partition so that when we - * bundle them we bundle them intelligently and so that we can set offsets - * properly even across multiple poll calls. - */ - private boolean gatherDataFromKafka(final ConsumerLease lease, final Map>> partitionRecordMap, ProcessContext context) { - final long startNanos = System.nanoTime(); - boolean foundData = false; - ConsumerRecords records; - final int maxRecords = context.getProperty(MAX_POLL_RECORDS).asInteger(); - - do { - records = lease.poll(); - - for (final TopicPartition partition : records.partitions()) { - List> currList = partitionRecordMap.get(partition); - if (currList == null) { - currList = new ArrayList<>(); - partitionRecordMap.put(partition, currList); + activeLeases.add(lease); + try { + while (this.isScheduled() && lease.continuePolling()) { + lease.poll(); } - currList.addAll(records.records(partition)); - if (currList.size() > 0) { - foundData = true; + if (this.isScheduled() && !lease.commit()) { + context.yield(); } + } catch (final WakeupException we) { + getLogger().warn("Was interrupted while trying to communicate with Kafka with lease {}. " + + "Will roll back session and discard any partially received data.", new Object[] {lease}); + } catch (final KafkaException kex) { + getLogger().error("Exception while interacting with Kafka so will close the lease {} due to {}", + new Object[] {lease, kex}, kex); + } catch (final Throwable t) { + getLogger().error("Exception while processing data from kafka so will close the lease {} due to {}", + new Object[] {lease, t}, t); + } finally { + activeLeases.remove(lease); } - //If we received data and we still want to get more - } while (!records.isEmpty() && !checkIfGatheredEnoughData(partitionRecordMap, maxRecords, startNanos)); - return foundData; - } - - /** - * Determines if we have enough data as-is and should move on. - * - * @return true if we've been gathering for more than 500 ms or if we're - * demarcating and have more than 50 flowfiles worth or if we're per message - * and have more than 2000 flowfiles or if totalMessageSize is greater than - * two megabytes; false otherwise - * - * Implementation note: 500 millis and 5 MB are magic numbers. These may - * need to be tuned. They get at how often offsets will get committed to - * kafka relative to how many records will get buffered into memory in a - * poll call before writing to repos. - */ - private boolean checkIfGatheredEnoughData(final Map>> partitionRecordMap, final long maxRecords, final long startTimeNanos) { - - final long durationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNanos); - - if (durationMillis > 500) { - return true; - } - - int topicPartitionsFilled = 0; - int totalRecords = 0; - long totalRecordSize = 0; - - for (final List> recordList : partitionRecordMap.values()) { - if (!recordList.isEmpty()) { - topicPartitionsFilled++; - } - totalRecords += recordList.size(); - for (final ConsumerRecord rec : recordList) { - totalRecordSize += rec.value().length; - } - } - - if (demarcatorBytes != null && demarcatorBytes.length > 0) { - return topicPartitionsFilled > 50; - } else if (totalRecordSize > TWO_MB) { - return true; - } else { - return totalRecords > maxRecords; } } - } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java index b954eba344c1..cd9365dbe739 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java @@ -17,11 +17,28 @@ package org.apache.nifi.processors.kafka.pubsub; import java.io.Closeable; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; +import javax.xml.bind.DatatypeConverter; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; +import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.TopicPartition; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.ProcessSession; +import static org.apache.nifi.processors.kafka.pubsub.ConsumeKafka.REL_SUCCESS; +import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.HEX_ENCODING; +import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.UTF8_ENCODING; /** * This class represents a lease to access a Kafka Consumer object. The lease is @@ -30,15 +47,108 @@ * the lease will be returned to the pool for future use by others. A given * lease may only belong to a single thread a time. */ -public interface ConsumerLease extends Closeable { +public abstract class ConsumerLease implements Closeable, ConsumerRebalanceListener { + + private final long maxWaitMillis; + private final Consumer kafkaConsumer; + private final ComponentLog logger; + private final byte[] demarcatorBytes; + private final String keyEncoding; + private final String securityProtocol; + private final String bootstrapServers; + private boolean poisoned = false; + //used for tracking demarcated flowfiles to their TopicPartition so we can append + //to them on subsequent poll calls + private final Map bundleMap = new HashMap<>(); + private final Map uncommittedOffsetsMap = new HashMap<>(); + private long leaseStartNanos = -1; + private boolean lastPollEmpty = false; + private int totalFlowFiles = 0; + + ConsumerLease( + final long maxWaitMillis, + final Consumer kafkaConsumer, + final byte[] demarcatorBytes, + final String keyEncoding, + final String securityProtocol, + final String bootstrapServers, + final ComponentLog logger) { + this.maxWaitMillis = maxWaitMillis; + this.kafkaConsumer = kafkaConsumer; + this.demarcatorBytes = demarcatorBytes; + this.keyEncoding = keyEncoding; + this.securityProtocol = securityProtocol; + this.bootstrapServers = bootstrapServers; + this.logger = logger; + } + + /** + * clears out internal state elements excluding session and consumer as + * those are managed by the pool itself + */ + private void resetInternalState() { + bundleMap.clear(); + uncommittedOffsetsMap.clear(); + leaseStartNanos = -1; + lastPollEmpty = false; + totalFlowFiles = 0; + } /** - * Executes a poll on the underlying Kafka Consumer. + * Kafka will call this method whenever it is about to rebalance the + * consumers for the given partitions. We'll simply take this to mean that + * we need to quickly commit what we've got and will return the consumer to + * the pool. This method will be called during the poll() method call of + * this class and will be called by the same thread calling poll according + * to the Kafka API docs. After this method executes the session and kafka + * offsets are committed and this lease is closed. * - * @return ConsumerRecords retrieved in the poll. - * @throws KafkaException if issue occurs talking to underlying resource. + * @param partitions partitions being reassigned + */ + @Override + public void onPartitionsRevoked(final Collection partitions) { + logger.debug("Rebalance Alert: Paritions '{}' revoked for lease '{}' with consumer '{}'", new Object[]{partitions, this, kafkaConsumer}); + //force a commit here. Can reuse the session and consumer after this but must commit now to avoid duplicates if kafka reassigns parittion + commit(); + } + + /** + * This will be called by Kafka when the rebalance has completed. We don't + * need to do anything with this information other than optionally log it as + * by this point we've committed what we've got and moved on. + * + * @param partitions topic partition set being reassigned + */ + @Override + public void onPartitionsAssigned(final Collection partitions) { + logger.debug("Rebalance Alert: Paritions '{}' assigned for lease '{}' with consumer '{}'", new Object[]{partitions, this, kafkaConsumer}); + } + + /** + * Executes a poll on the underlying Kafka Consumer and creates any new + * flowfiles necessary or appends to existing ones if in demarcation mode. */ - ConsumerRecords poll() throws KafkaException; + void poll() { + /** + * Implementation note: If we take too long (30 secs?) between kafka + * poll calls and our own record processing to any subsequent poll calls + * or the commit we can run into a situation where the commit will + * succeed to the session but fail on committing offsets. This is + * apparently different than the Kafka scenario of electing to rebalance + * for other reasons but in this case is due a session timeout. It + * appears Kafka KIP-62 aims to offer more control over the meaning of + * various timeouts. If we do run into this case it could result in + * duplicates. + */ + try { + final ConsumerRecords records = kafkaConsumer.poll(10); + lastPollEmpty = records.count() == 0; + processRecords(records); + } catch (final Throwable t) { + this.poison(); + throw t; + } + } /** * Notifies Kafka to commit the offsets for the specified topic/partition @@ -47,22 +157,251 @@ public interface ConsumerLease extends Closeable { * kafka client to collect more data from Kafka before committing the * offsets. * - * @param offsets offsets - * @throws KafkaException if issue occurs talking to underlying resource. + * if false then we didn't do anything and should probably yield if true + * then we committed new data + * */ - void commitOffsets(Map offsets) throws KafkaException; + boolean commit() { + if (uncommittedOffsetsMap.isEmpty()) { + resetInternalState(); + return false; + } + try { + /** + * Committing the nifi session then the offsets means we have an at + * least once guarantee here. If we reversed the order we'd have at + * most once. + */ + final Collection bundledFlowFiles = getBundles(); + if (!bundledFlowFiles.isEmpty()) { + getProcessSession().transfer(bundledFlowFiles, REL_SUCCESS); + } + getProcessSession().commit(); + kafkaConsumer.commitSync(uncommittedOffsetsMap); + resetInternalState(); + return true; + } catch (final KafkaException kex) { + poison(); + logger.warn("Duplicates are likely as we were able to commit the process" + + " session but received an exception from Kafka while committing" + + " offsets."); + throw kex; + } catch (final Throwable t) { + poison(); + throw t; + } + } /** - * Notifies that this lease is poisoned and should not be reused. + * Indicates whether we should continue polling for data. If we are not + * writing data with a demarcator then we're writing individual flow files + * per kafka message therefore we must be very mindful of memory usage for + * the flow file objects (not their content) being held in memory. The + * content of kafka messages will be written to the content repository + * immediately upon each poll call but we must still be mindful of how much + * memory can be used in each poll call. We will indicate that we should + * stop polling our last poll call produced no new results or if we've + * polling and processing data longer than the specified maximum polling + * time or if we have reached out specified max flow file limit or if a + * rebalance has been initiated for one of the partitions we're watching; + * otherwise true. + * + * @return true if should keep polling; false otherwise */ - void poison(); + boolean continuePolling() { + //stop if the last poll produced new no data + if (lastPollEmpty) { + return false; + } + + //stop if we've gone past our desired max uncommitted wait time + if (leaseStartNanos < 0) { + leaseStartNanos = System.nanoTime(); + } + final long durationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos); + if (durationMillis > maxWaitMillis) { + return false; + } + + //stop if we've generated enough flowfiles that we need to be concerned about memory usage for the objects + if (bundleMap.size() > 200) { //a magic number - the number of simultaneous bundles to track + return false; + } else { + return totalFlowFiles < 15000;//admittedly a magic number - good candidate for processor property + } + } /** - * Notifies that this lease is to be returned. The pool may optionally reuse - * this lease with another client. No further references by the caller - * should occur after calling close. + * Indicates that the underlying session and consumer should be immediately + * considered invalid. Once closed the session will be rolled back and the + * pool should destroy the underlying consumer. This is useful if due to + * external reasons, such as the processor no longer being scheduled, this + * lease should be terminated immediately. + */ + private void poison() { + poisoned = true; + } + + /** + * @return true if this lease has been poisoned; false otherwise + */ + boolean isPoisoned() { + return poisoned; + } + + /** + * Trigger the consumer's {@link KafkaConsumer#wakeup() wakeup()} method. + */ + public void wakeup() { + kafkaConsumer.wakeup(); + } + + /** + * Abstract method that is intended to be extended by the pool that created + * this ConsumerLease object. It should ensure that the session given to + * create this session is rolled back and that the underlying kafka consumer + * is either returned to the pool for continued use or destroyed if this + * lease has been poisoned. It can only be called once. Calling it more than + * once can result in undefined and non threadsafe behavior. */ @Override - void close(); + public void close() { + resetInternalState(); + } + + public abstract ProcessSession getProcessSession(); + + private void processRecords(final ConsumerRecords records) { + + records.partitions().stream().forEach(partition -> { + List> messages = records.records(partition); + if (!messages.isEmpty()) { + //update maximum offset map for this topic partition + long maxOffset = messages.stream() + .mapToLong(record -> record.offset()) + .max() + .getAsLong(); + uncommittedOffsetsMap.put(partition, new OffsetAndMetadata(maxOffset + 1L)); + + //write records to content repository and session + if (demarcatorBytes == null) { + totalFlowFiles += messages.size(); + messages.stream().forEach(message -> { + writeData(getProcessSession(), message, partition); + }); + } else { + writeData(getProcessSession(), messages, partition); + } + } + }); + } + + private static String encodeKafkaKey(final byte[] key, final String encoding) { + if (key == null) { + return null; + } + + if (HEX_ENCODING.getValue().equals(encoding)) { + return DatatypeConverter.printHexBinary(key); + } else if (UTF8_ENCODING.getValue().equals(encoding)) { + return new String(key, StandardCharsets.UTF_8); + } else { + return null; // won't happen because it is guaranteed by the Allowable Values + } + } + + private Collection getBundles() { + final List flowFiles = new ArrayList<>(); + for (final BundleTracker tracker : bundleMap.values()) { + populateAttributes(tracker); + flowFiles.add(tracker.flowFile); + } + return flowFiles; + } + + private void writeData(final ProcessSession session, ConsumerRecord record, final TopicPartition topicPartition) { + FlowFile flowFile = session.create(); + final BundleTracker tracker = new BundleTracker(record, topicPartition, keyEncoding); + tracker.incrementRecordCount(1); + flowFile = session.write(flowFile, out -> { + out.write(record.value()); + }); + tracker.updateFlowFile(flowFile); + populateAttributes(tracker); + session.transfer(tracker.flowFile, REL_SUCCESS); + } + + private void writeData(final ProcessSession session, final List> records, final TopicPartition topicPartition) { + final ConsumerRecord firstRecord = records.get(0); + final boolean demarcateFirstRecord; + BundleTracker tracker = bundleMap.get(topicPartition); + FlowFile flowFile; + if (tracker == null) { + tracker = new BundleTracker(firstRecord, topicPartition, keyEncoding); + flowFile = session.create(); + tracker.updateFlowFile(flowFile); + demarcateFirstRecord = false; //have not yet written records for this topic/partition in this lease + } else { + demarcateFirstRecord = true; //have already been writing records for this topic/partition in this lease + } + flowFile = tracker.flowFile; + tracker.incrementRecordCount(records.size()); + flowFile = session.append(flowFile, out -> { + boolean useDemarcator = demarcateFirstRecord; + for (final ConsumerRecord record : records) { + if (useDemarcator) { + out.write(demarcatorBytes); + } + out.write(record.value()); + useDemarcator = true; + } + }); + tracker.updateFlowFile(flowFile); + bundleMap.put(topicPartition, tracker); + } + + private void populateAttributes(final BundleTracker tracker) { + final Map kafkaAttrs = new HashMap<>(); + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_OFFSET, String.valueOf(tracker.initialOffset)); + if (tracker.key != null && tracker.totalRecords == 1) { + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_KEY, tracker.key); + } + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_PARTITION, String.valueOf(tracker.partition)); + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_TOPIC, tracker.topic); + if (tracker.totalRecords > 1) { + kafkaAttrs.put(KafkaProcessorUtils.KAFKA_COUNT, String.valueOf(tracker.totalRecords)); + } + final FlowFile newFlowFile = getProcessSession().putAllAttributes(tracker.flowFile, kafkaAttrs); + final long executionDurationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos); + final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, tracker.topic); + getProcessSession().getProvenanceReporter().receive(newFlowFile, transitUri, executionDurationMillis); + tracker.updateFlowFile(newFlowFile); + } + + private static class BundleTracker { + + final long initialOffset; + final int partition; + final String topic; + final String key; + FlowFile flowFile; + long totalRecords = 0; + + private BundleTracker(final ConsumerRecord initialRecord, final TopicPartition topicPartition, final String keyEncoding) { + this.initialOffset = initialRecord.offset(); + this.partition = topicPartition.partition(); + this.topic = topicPartition.topic(); + this.key = encodeKafkaKey(initialRecord.key(), keyEncoding); + } + + private void incrementRecordCount(final long count) { + totalRecords += count; + } + + private void updateFlowFile(final FlowFile flowFile) { + this.flowFile = flowFile; + } + + } } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java index 3f20b8f4cb2b..e13a8c3c4d1a 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java @@ -21,18 +21,15 @@ import org.apache.nifi.logging.ComponentLog; import java.io.Closeable; -import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.HashMap; +import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Queue; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.atomic.AtomicLong; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.KafkaException; -import org.apache.kafka.common.TopicPartition; +import org.apache.nifi.processor.ProcessSession; /** * A pool of Kafka Consumers for a given topic. Consumers can be obtained by @@ -41,176 +38,119 @@ */ public class ConsumerPool implements Closeable { - private final AtomicInteger activeLeaseCount = new AtomicInteger(0); - private final int maxLeases; - private final Queue consumerLeases; + private final BlockingQueue pooledLeases; private final List topics; private final Map kafkaProperties; + private final long maxWaitMillis; private final ComponentLog logger; - + private final byte[] demarcatorBytes; + private final String keyEncoding; + private final String securityProtocol; + private final String bootstrapServers; private final AtomicLong consumerCreatedCountRef = new AtomicLong(); private final AtomicLong consumerClosedCountRef = new AtomicLong(); private final AtomicLong leasesObtainedCountRef = new AtomicLong(); - private final AtomicLong productivePollCountRef = new AtomicLong(); - private final AtomicLong unproductivePollCountRef = new AtomicLong(); /** * Creates a pool of KafkaConsumer objects that will grow up to the maximum - * indicated leases. Consumers are lazily initialized. + * indicated threads from the given context. Consumers are lazily + * initialized. We may elect to not create up to the maximum number of + * configured consumers if the broker reported lag time for all topics is + * below a certain threshold. * - * @param maxLeases maximum number of active leases in the pool - * @param topics the topics to consume from - * @param kafkaProperties the properties for each consumer + * @param maxConcurrentLeases max allowable consumers at once + * @param demarcator bytes to use as demarcator between messages; null or + * empty means no demarcator + * @param kafkaProperties properties to use to initialize kafka consumers + * @param topics the topics to subscribe to + * @param maxWaitMillis maximum time to wait for a given lease to acquire + * data before committing + * @param keyEncoding the encoding to use for the key of a kafka message if + * found + * @param securityProtocol the security protocol used + * @param bootstrapServers the bootstrap servers * @param logger the logger to report any errors/warnings */ - public ConsumerPool(final int maxLeases, final List topics, final Map kafkaProperties, final ComponentLog logger) { - this.maxLeases = maxLeases; - if (maxLeases <= 0) { - throw new IllegalArgumentException("Max leases value must be greather than zero."); - } + public ConsumerPool( + final int maxConcurrentLeases, + final byte[] demarcator, + final Map kafkaProperties, + final List topics, + final long maxWaitMillis, + final String keyEncoding, + final String securityProtocol, + final String bootstrapServers, + final ComponentLog logger) { + this.pooledLeases = new ArrayBlockingQueue<>(maxConcurrentLeases); + this.maxWaitMillis = maxWaitMillis; this.logger = logger; - if (topics == null || topics.isEmpty()) { - throw new IllegalArgumentException("Must have a list of one or more topics"); - } - this.topics = topics; - this.kafkaProperties = new HashMap<>(kafkaProperties); - this.consumerLeases = new ArrayDeque<>(); + this.demarcatorBytes = demarcator; + this.keyEncoding = keyEncoding; + this.securityProtocol = securityProtocol; + this.bootstrapServers = bootstrapServers; + this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties); + this.topics = Collections.unmodifiableList(topics); } /** - * Obtains a consumer from the pool if one is available + * Obtains a consumer from the pool if one is available or lazily + * initializes a new one if deemed necessary. * - * @return consumer from the pool - * @throws IllegalArgumentException if pool already contains + * @param session the session for which the consumer lease will be + * associated + * @return consumer to use or null if not available or necessary */ - public ConsumerLease obtainConsumer() { - final ConsumerLease lease; - final int activeLeases; - synchronized (this) { - lease = consumerLeases.poll(); - activeLeases = activeLeaseCount.get(); - } - if (lease == null && activeLeases >= maxLeases) { - logger.warn("No available consumers and cannot create any as max consumer leases limit reached - verify pool settings"); - return null; + public ConsumerLease obtainConsumer(final ProcessSession session) { + SimpleConsumerLease lease = pooledLeases.poll(); + if (lease == null) { + final Consumer consumer = createKafkaConsumer(); + consumerCreatedCountRef.incrementAndGet(); + /** + * For now return a new consumer lease. But we could later elect to + * have this return null if we determine the broker indicates that + * the lag time on all topics being monitored is sufficiently low. + * For now we should encourage conservative use of threads because + * having too many means we'll have at best useless threads sitting + * around doing frequent network calls and at worst having consumers + * sitting idle which could prompt excessive rebalances. + */ + lease = new SimpleConsumerLease(consumer); + + /** + * This subscription tightly couples the lease to the given + * consumer. They cannot be separated from then on. + */ + consumer.subscribe(topics, lease); } + lease.setProcessSession(session); leasesObtainedCountRef.incrementAndGet(); - return (lease == null) ? createConsumer() : lease; + return lease; } + /** + * Exposed as protected method for easier unit testing + * + * @return consumer + * @throws KafkaException if unable to subscribe to the given topics + */ protected Consumer createKafkaConsumer() { return new KafkaConsumer<>(kafkaProperties); } - private ConsumerLease createConsumer() { - final Consumer kafkaConsumer = createKafkaConsumer(); - consumerCreatedCountRef.incrementAndGet(); - try { - kafkaConsumer.subscribe(topics); - } catch (final KafkaException kex) { - try { - kafkaConsumer.close(); - consumerClosedCountRef.incrementAndGet(); - } catch (final Exception ex) { - consumerClosedCountRef.incrementAndGet(); - //ignore - } - throw kex; - } - - final ConsumerLease lease = new ConsumerLease() { - - private volatile boolean poisoned = false; - private volatile boolean closed = false; - - @Override - public ConsumerRecords poll() { - - if (poisoned) { - throw new KafkaException("The consumer is poisoned and should no longer be used"); - } - - try { - final ConsumerRecords records = kafkaConsumer.poll(50); - if (records.isEmpty()) { - unproductivePollCountRef.incrementAndGet(); - } else { - productivePollCountRef.incrementAndGet(); - } - return records; - } catch (final KafkaException kex) { - logger.warn("Unable to poll from Kafka consumer so will poison and close this " + kafkaConsumer, kex); - poison(); - close(); - throw kex; - } - } - - @Override - public void commitOffsets(final Map offsets) { - - if (poisoned) { - throw new KafkaException("The consumer is poisoned and should no longer be used"); - } - try { - kafkaConsumer.commitSync(offsets); - } catch (final KafkaException kex) { - logger.warn("Unable to commit kafka consumer offsets so will poison and close this " + kafkaConsumer, kex); - poison(); - close(); - throw kex; - } - } - - @Override - public void close() { - if (closed) { - return; - } - if (poisoned || activeLeaseCount.get() > maxLeases) { - closeConsumer(kafkaConsumer); - activeLeaseCount.decrementAndGet(); - closed = true; - } else { - final boolean added; - synchronized (ConsumerPool.this) { - added = consumerLeases.offer(this); - } - if (!added) { - closeConsumer(kafkaConsumer); - activeLeaseCount.decrementAndGet(); - } - } - } - - @Override - public void poison() { - poisoned = true; - } - }; - activeLeaseCount.incrementAndGet(); - return lease; - } - /** - * Closes all consumers in the pool. Can be safely recalled. + * Closes all consumers in the pool. Can be safely called repeatedly. */ @Override public void close() { - final List leases = new ArrayList<>(); - synchronized (this) { - ConsumerLease lease = null; - while ((lease = consumerLeases.poll()) != null) { - leases.add(lease); - } - } - for (final ConsumerLease lease : leases) { - lease.poison(); - lease.close(); - } + final List leases = new ArrayList<>(); + pooledLeases.drainTo(leases); + leases.stream().forEach((lease) -> { + lease.close(true); + }); } - private void closeConsumer(final Consumer consumer) { + private void closeConsumer(final Consumer consumer) { + consumerClosedCountRef.incrementAndGet(); try { consumer.unsubscribe(); } catch (Exception e) { @@ -219,15 +159,55 @@ private void closeConsumer(final Consumer consumer) { try { consumer.close(); - consumerClosedCountRef.incrementAndGet(); } catch (Exception e) { - consumerClosedCountRef.incrementAndGet(); logger.warn("Failed while closing " + consumer, e); } } PoolStats getPoolStats() { - return new PoolStats(consumerCreatedCountRef.get(), consumerClosedCountRef.get(), leasesObtainedCountRef.get(), productivePollCountRef.get(), unproductivePollCountRef.get()); + return new PoolStats(consumerCreatedCountRef.get(), consumerClosedCountRef.get(), leasesObtainedCountRef.get()); + } + + private class SimpleConsumerLease extends ConsumerLease { + + private final Consumer consumer; + private volatile ProcessSession session; + private volatile boolean closedConsumer; + + private SimpleConsumerLease(final Consumer consumer) { + super(maxWaitMillis, consumer, demarcatorBytes, keyEncoding, securityProtocol, bootstrapServers, logger); + this.consumer = consumer; + } + + void setProcessSession(final ProcessSession session) { + this.session = session; + } + + @Override + public ProcessSession getProcessSession() { + return session; + } + + @Override + public void close() { + super.close(); + close(false); + } + + public void close(final boolean forceClose) { + if (closedConsumer) { + return; + } + super.close(); + if (session != null) { + session.rollback(); + setProcessSession(null); + } + if (forceClose || isPoisoned() || !pooledLeases.offer(this)) { + closedConsumer = true; + closeConsumer(consumer); + } + } } static final class PoolStats { @@ -235,30 +215,22 @@ static final class PoolStats { final long consumerCreatedCount; final long consumerClosedCount; final long leasesObtainedCount; - final long productivePollCount; - final long unproductivePollCount; PoolStats( final long consumerCreatedCount, final long consumerClosedCount, - final long leasesObtainedCount, - final long productivePollCount, - final long unproductivePollCount + final long leasesObtainedCount ) { this.consumerCreatedCount = consumerCreatedCount; this.consumerClosedCount = consumerClosedCount; this.leasesObtainedCount = leasesObtainedCount; - this.productivePollCount = productivePollCount; - this.unproductivePollCount = unproductivePollCount; } @Override public String toString() { return "Created Consumers [" + consumerCreatedCount + "]\n" + "Closed Consumers [" + consumerClosedCount + "]\n" - + "Leases Obtained [" + leasesObtainedCount + "]\n" - + "Productive Polls [" + productivePollCount + "]\n" - + "Unproductive Polls [" + unproductivePollCount + "]\n"; + + "Leases Obtained [" + leasesObtainedCount + "]\n"; } } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java new file mode 100644 index 000000000000..e7d5cb71634e --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.nifi.flowfile.FlowFile; + +public class InFlightMessageTracker { + private final ConcurrentMap messageCountsByFlowFile = new ConcurrentHashMap<>(); + private final ConcurrentMap failures = new ConcurrentHashMap<>(); + private final Object progressMutex = new Object(); + + public void incrementAcknowledgedCount(final FlowFile flowFile) { + final Counts counter = messageCountsByFlowFile.computeIfAbsent(flowFile, ff -> new Counts()); + counter.incrementAcknowledgedCount(); + + synchronized (progressMutex) { + progressMutex.notify(); + } + } + + public int getAcknowledgedCount(final FlowFile flowFile) { + final Counts counter = messageCountsByFlowFile.get(flowFile); + return (counter == null) ? 0 : counter.getAcknowledgedCount(); + } + + public void incrementSentCount(final FlowFile flowFile) { + final Counts counter = messageCountsByFlowFile.computeIfAbsent(flowFile, ff -> new Counts()); + counter.incrementSentCount(); + } + + public int getSentCount(final FlowFile flowFile) { + final Counts counter = messageCountsByFlowFile.get(flowFile); + return (counter == null) ? 0 : counter.getSentCount(); + } + + public void fail(final FlowFile flowFile, final Exception exception) { + failures.putIfAbsent(flowFile, exception); + + synchronized (progressMutex) { + progressMutex.notify(); + } + } + + public Exception getFailure(final FlowFile flowFile) { + return failures.get(flowFile); + } + + public boolean isFailed(final FlowFile flowFile) { + return getFailure(flowFile) != null; + } + + public void reset() { + messageCountsByFlowFile.clear(); + failures.clear(); + } + + public PublishResult failOutstanding(final Exception exception) { + messageCountsByFlowFile.keySet().stream() + .filter(ff -> !isComplete(ff)) + .filter(ff -> !failures.containsKey(ff)) + .forEach(ff -> failures.put(ff, exception)); + + return createPublishResult(); + } + + private boolean isComplete(final FlowFile flowFile) { + final Counts counts = messageCountsByFlowFile.get(flowFile); + if (counts.getAcknowledgedCount() == counts.getSentCount()) { + // all messages received successfully. + return true; + } + + if (failures.containsKey(flowFile)) { + // FlowFile failed so is complete + return true; + } + + return false; + } + + private boolean isComplete() { + return messageCountsByFlowFile.keySet().stream() + .allMatch(flowFile -> isComplete(flowFile)); + } + + void awaitCompletion(final long millis) throws InterruptedException, TimeoutException { + final long startTime = System.nanoTime(); + final long maxTime = startTime + TimeUnit.MILLISECONDS.toNanos(millis); + + while (System.nanoTime() < maxTime) { + synchronized (progressMutex) { + if (isComplete()) { + return; + } + + progressMutex.wait(millis); + } + } + + throw new TimeoutException(); + } + + + PublishResult createPublishResult() { + return new PublishResult() { + @Override + public Collection getSuccessfulFlowFiles() { + if (failures.isEmpty()) { + return messageCountsByFlowFile.keySet(); + } + + final Set flowFiles = new HashSet<>(messageCountsByFlowFile.keySet()); + flowFiles.removeAll(failures.keySet()); + return flowFiles; + } + + @Override + public Collection getFailedFlowFiles() { + return failures.keySet(); + } + + @Override + public int getSuccessfulMessageCount(final FlowFile flowFile) { + return getAcknowledgedCount(flowFile); + } + + @Override + public Exception getReasonForFailure(final FlowFile flowFile) { + return getFailure(flowFile); + } + }; + } + + public static class Counts { + private final AtomicInteger sentCount = new AtomicInteger(0); + private final AtomicInteger acknowledgedCount = new AtomicInteger(0); + + public void incrementSentCount() { + sentCount.incrementAndGet(); + } + + public void incrementAcknowledgedCount() { + acknowledgedCount.incrementAndGet(); + } + + public int getAcknowledgedCount() { + return acknowledgedCount.get(); + } + + public int getSentCount() { + return sentCount.get(); + } + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java index c74ad1859644..3d09f2df03c0 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java @@ -27,8 +27,9 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; -import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.commons.lang3.StringUtils; +import org.apache.kafka.clients.CommonClientConfigs; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.config.SaslConfigs; @@ -55,6 +56,10 @@ final class KafkaProcessorUtils { private static final String BROKER_REGEX = SINGLE_BROKER_REGEX + "(?:,\\s*" + SINGLE_BROKER_REGEX + ")*"; + static final AllowableValue UTF8_ENCODING = new AllowableValue("utf-8", "UTF-8 Encoded", "The key is interpreted as a UTF-8 Encoded string."); + static final AllowableValue HEX_ENCODING = new AllowableValue("hex", "Hex Encoded", + "The key is interpreted as arbitrary binary data and is encoded using hexadecimal characters with uppercase letters"); + static final Pattern HEX_KEY_PATTERN = Pattern.compile("(?:[0123456789abcdefABCDEF]{2})+"); static final String KAFKA_KEY = "kafka.key"; @@ -96,7 +101,6 @@ final class KafkaProcessorUtils { .addValidator(StandardValidators.NON_BLANK_VALIDATOR) .expressionLanguageSupported(false) .build(); - static final PropertyDescriptor SSL_CONTEXT_SERVICE = new PropertyDescriptor.Builder() .name("ssl.context.service") .displayName("SSL Context Service") @@ -183,7 +187,7 @@ static final class KafkaConfigValidator implements Validator { final Class classType; - public KafkaConfigValidator(final Class classType) { + public KafkaConfigValidator(final Class classType) { this.classType = classType; } @@ -208,7 +212,8 @@ static String buildTransitURI(String securityProtocol, String brokers, String to return builder.toString(); } - static void buildCommonKafkaProperties(final ProcessContext context, final Class kafkaConfigClass, final Map mapToPopulate) { + + static void buildCommonKafkaProperties(final ProcessContext context, final Class kafkaConfigClass, final Map mapToPopulate) { for (PropertyDescriptor propertyDescriptor : context.getProperties().keySet()) { if (propertyDescriptor.equals(SSL_CONTEXT_SERVICE)) { // Translate SSLContext Service configuration into Kafka properties @@ -228,28 +233,32 @@ static void buildCommonKafkaProperties(final ProcessContext context, final Class } } - String pName = propertyDescriptor.getName(); - String pValue = propertyDescriptor.isExpressionLanguageSupported() + String propertyName = propertyDescriptor.getName(); + String propertyValue = propertyDescriptor.isExpressionLanguageSupported() ? context.getProperty(propertyDescriptor).evaluateAttributeExpressions().getValue() : context.getProperty(propertyDescriptor).getValue(); - if (pValue != null) { - if (pName.endsWith(".ms")) { // kafka standard time notation - pValue = String.valueOf(FormatUtils.getTimeDuration(pValue.trim(), TimeUnit.MILLISECONDS)); + + if (propertyValue != null) { + // If the property name ends in ".ms" then it is a time period. We want to accept either an integer as number of milliseconds + // or the standard NiFi time period such as "5 secs" + if (propertyName.endsWith(".ms") && !StringUtils.isNumeric(propertyValue.trim())) { // kafka standard time notation + propertyValue = String.valueOf(FormatUtils.getTimeDuration(propertyValue.trim(), TimeUnit.MILLISECONDS)); } - if (isStaticStringFieldNamePresent(pName, kafkaConfigClass, CommonClientConfigs.class, SslConfigs.class, SaslConfigs.class)) { - mapToPopulate.put(pName, pValue); + + if (isStaticStringFieldNamePresent(propertyName, kafkaConfigClass, CommonClientConfigs.class, SslConfigs.class, SaslConfigs.class)) { + mapToPopulate.put(propertyName, propertyValue); } } } } - private static boolean isStaticStringFieldNamePresent(final String name, final Class... classes) { + private static boolean isStaticStringFieldNamePresent(final String name, final Class... classes) { return KafkaProcessorUtils.getPublicStaticStringFieldValues(classes).contains(name); } - private static Set getPublicStaticStringFieldValues(final Class... classes) { + private static Set getPublicStaticStringFieldValues(final Class... classes) { final Set strings = new HashSet<>(); - for (final Class classType : classes) { + for (final Class classType : classes) { for (final Field field : classType.getDeclaredFields()) { if (Modifier.isPublic(field.getModifiers()) && Modifier.isStatic(field.getModifiers()) && field.getType().equals(String.class)) { try { diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisher.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisher.java deleted file mode 100644 index 31a084f133d6..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisher.java +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import java.io.Closeable; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -import org.apache.kafka.clients.producer.KafkaProducer; -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.clients.producer.RecordMetadata; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.stream.io.util.StreamDemarcator; - -/** - * Wrapper over {@link KafkaProducer} to assist {@link PublishKafka} processor - * with sending contents of the {@link FlowFile}s to Kafka. - */ -class KafkaPublisher implements Closeable { - - private final Producer kafkaProducer; - - private volatile long ackWaitTime = 30000; - - private final ComponentLog componentLog; - - private final int ackCheckSize; - - KafkaPublisher(Properties kafkaProperties, ComponentLog componentLog) { - this(kafkaProperties, 100, componentLog); - } - - /** - * Creates an instance of this class as well as the instance of the - * corresponding Kafka {@link KafkaProducer} using provided Kafka - * configuration properties. - * - * @param kafkaProperties instance of {@link Properties} used to bootstrap - * {@link KafkaProducer} - */ - KafkaPublisher(Properties kafkaProperties, int ackCheckSize, ComponentLog componentLog) { - this.kafkaProducer = new KafkaProducer<>(kafkaProperties); - this.ackCheckSize = ackCheckSize; - this.componentLog = componentLog; - } - - /** - * Publishes messages to Kafka topic. It uses {@link StreamDemarcator} to - * determine how many messages to Kafka will be sent from a provided - * {@link InputStream} (see {@link PublishingContext#getContentStream()}). - * It supports two publishing modes: - *
    - *
  • Sending all messages constructed from - * {@link StreamDemarcator#nextToken()} operation.
  • - *
  • Sending only unacknowledged messages constructed from - * {@link StreamDemarcator#nextToken()} operation.
  • - *
- * The unacknowledged messages are determined from the value of - * {@link PublishingContext#getLastAckedMessageIndex()}. - *
- * This method assumes content stream affinity where it is expected that the - * content stream that represents the same Kafka message(s) will remain the - * same across possible retries. This is required specifically for cases - * where delimiter is used and a single content stream may represent - * multiple Kafka messages. The - * {@link PublishingContext#getLastAckedMessageIndex()} will provide the - * index of the last ACKed message, so upon retry only messages with the - * higher index are sent. - * - * @param publishingContext instance of {@link PublishingContext} which hold - * context information about the message(s) to be sent. - * @return The index of the last successful offset. - */ - KafkaPublisherResult publish(PublishingContext publishingContext) { - StreamDemarcator streamTokenizer = new StreamDemarcator(publishingContext.getContentStream(), - publishingContext.getDelimiterBytes(), publishingContext.getMaxRequestSize()); - - int prevLastAckedMessageIndex = publishingContext.getLastAckedMessageIndex(); - List> resultFutures = new ArrayList<>(); - - byte[] messageBytes; - int tokenCounter = 0; - boolean continueSending = true; - KafkaPublisherResult result = null; - for (; continueSending && (messageBytes = streamTokenizer.nextToken()) != null; tokenCounter++) { - if (prevLastAckedMessageIndex < tokenCounter) { - ProducerRecord message = new ProducerRecord<>(publishingContext.getTopic(), publishingContext.getKeyBytes(), messageBytes); - resultFutures.add(this.kafkaProducer.send(message)); - - if (tokenCounter % this.ackCheckSize == 0) { - int lastAckedMessageIndex = this.processAcks(resultFutures, prevLastAckedMessageIndex); - resultFutures.clear(); - if (lastAckedMessageIndex % this.ackCheckSize != 0) { - continueSending = false; - result = new KafkaPublisherResult(tokenCounter, lastAckedMessageIndex); - } - prevLastAckedMessageIndex = lastAckedMessageIndex; - } - } - } - - if (result == null) { - int lastAckedMessageIndex = this.processAcks(resultFutures, prevLastAckedMessageIndex); - resultFutures.clear(); - result = new KafkaPublisherResult(tokenCounter, lastAckedMessageIndex); - } - return result; - } - - /** - * Sets the time this publisher will wait for the {@link Future#get()} - * operation (the Future returned by - * {@link KafkaProducer#send(ProducerRecord)}) to complete before timing - * out. - * - * This value will also be used as a timeout when closing the underlying - * {@link KafkaProducer}. See {@link #close()}. - */ - void setAckWaitTime(long ackWaitTime) { - this.ackWaitTime = ackWaitTime; - } - - /** - * This operation will process ACKs from Kafka in the order in which - * {@link KafkaProducer#send(ProducerRecord)} invocation were made returning - * the index of the last ACKed message. Within this operation processing ACK - * simply means successful invocation of 'get()' operation on the - * {@link Future} returned by {@link KafkaProducer#send(ProducerRecord)} - * operation. Upon encountering any type of error while interrogating such - * {@link Future} the ACK loop will end. Messages that were not ACKed would - * be considered non-delivered and therefore could be resent at the later - * time. - * - * @param sendFutures list of {@link Future}s representing results of - * publishing to Kafka - * - * @param lastAckMessageIndex the index of the last ACKed message. It is - * important to provide the last ACKed message especially while re-trying so - * the proper index is maintained. - */ - private int processAcks(List> sendFutures, int lastAckMessageIndex) { - boolean exceptionThrown = false; - for (int segmentCounter = 0; segmentCounter < sendFutures.size() && !exceptionThrown; segmentCounter++) { - Future future = sendFutures.get(segmentCounter); - try { - future.get(this.ackWaitTime, TimeUnit.MILLISECONDS); - lastAckMessageIndex++; - } catch (InterruptedException e) { - exceptionThrown = true; - Thread.currentThread().interrupt(); - this.warnOrError("Interrupted while waiting for acks from Kafka", null); - } catch (ExecutionException e) { - exceptionThrown = true; - this.warnOrError("Failed while waiting for acks from Kafka", e); - } catch (TimeoutException e) { - exceptionThrown = true; - this.warnOrError("Timed out while waiting for acks from Kafka", null); - } - } - - return lastAckMessageIndex; - } - - /** - * Will close the underlying {@link KafkaProducer} waiting if necessary for - * the same duration as supplied {@link #setAckWaitTime(long)} - */ - @Override - public void close() { - this.kafkaProducer.close(this.ackWaitTime, TimeUnit.MILLISECONDS); - } - - /** - * - */ - private void warnOrError(String message, Exception e) { - if (e == null) { - this.componentLog.warn(message); - } else { - this.componentLog.error(message, e); - } - } - - /** - * Encapsulates the result received from publishing messages to Kafka - */ - static class KafkaPublisherResult { - - private final int messagesSent; - private final int lastMessageAcked; - - KafkaPublisherResult(int messagesSent, int lastMessageAcked) { - this.messagesSent = messagesSent; - this.lastMessageAcked = lastMessageAcked; - } - - public int getMessagesSent() { - return this.messagesSent; - } - - public int getLastMessageAcked() { - return this.lastMessageAcked; - } - - public boolean isAllAcked() { - return this.lastMessageAcked > -1 && this.messagesSent - 1 == this.lastMessageAcked; - } - - @Override - public String toString() { - return "Sent:" + this.messagesSent + "; Last ACK:" + this.lastMessageAcked; - } - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafka.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafka.java index 4e1403dbf4ab..af171bb38d4d 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafka.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafka.java @@ -14,9 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.nifi.processors.kafka.pubsub; -import java.io.Closeable; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; @@ -27,17 +28,16 @@ import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Properties; import java.util.Set; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; + import javax.xml.bind.DatatypeConverter; -import org.apache.kafka.clients.consumer.KafkaConsumer; + import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.ByteArraySerializer; import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.WritesAttribute; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnStopped; @@ -46,201 +46,192 @@ import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.processor.AbstractSessionFactoryProcessor; +import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.DataUnit; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessSessionFactory; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.util.FlowFileFilters; import org.apache.nifi.processor.util.StandardValidators; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; @Tags({"Apache", "Kafka", "Put", "Send", "Message", "PubSub", "0.9.x"}) -@CapabilityDescription("Sends the contents of a FlowFile as a message to Apache Kafka using the Kafka 0.9 producer. " - + "The messages to send may be individual FlowFiles or may be delimited, using a " - + "user-specified delimiter, such as a new-line. " - + " Please note there are cases where the publisher can get into an indefinite stuck state. We are closely monitoring" - + " how this evolves in the Kafka community and will take advantage of those fixes as soon as we can. In the mean time" - + " it is possible to enter states where the only resolution will be to restart the JVM NiFi runs on.") +@CapabilityDescription("Sends the contents of a FlowFile as a message to Apache Kafka using the Kafka 0.9.x Producer. " + + "The messages to send may be individual FlowFiles or may be delimited, using a " + + "user-specified delimiter, such as a new-line. " + + " Please note there are cases where the publisher can get into an indefinite stuck state. We are closely monitoring" + + " how this evolves in the Kafka community and will take advantage of those fixes as soon as we can. In the mean time" + + " it is possible to enter states where the only resolution will be to restart the JVM NiFi runs on. The complementary NiFi processor for fetching messages is ConsumeKafka.") @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) @DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.", - description = "These properties will be added on the Kafka configuration after loading any provided configuration properties." + description = "These properties will be added on the Kafka configuration after loading any provided configuration properties." + " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged." + " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ") -public class PublishKafka extends AbstractSessionFactoryProcessor { - - private final Logger logger = LoggerFactory.getLogger(this.getClass()); - - protected static final String FAILED_PROC_ID_ATTR = "failed.proc.id"; - - protected static final String FAILED_LAST_ACK_IDX = "failed.last.idx"; - - protected static final String FAILED_TOPIC_ATTR = "failed.topic"; - - protected static final String FAILED_KEY_ATTR = "failed.key"; - - protected static final String FAILED_DELIMITER_ATTR = "failed.delimiter"; - +@WritesAttribute(attribute = "msg.count", description = "The number of messages that were sent to Kafka for this FlowFile. This attribute is added only to " + + "FlowFiles that are routed to success. If the Property is not set, this will always be 1, but if the Property is set, it may " + + "be greater than 1.") +public class PublishKafka extends AbstractProcessor { protected static final String MSG_COUNT = "msg.count"; static final AllowableValue DELIVERY_REPLICATED = new AllowableValue("all", "Guarantee Replicated Delivery", - "FlowFile will be routed to failure unless the message is replicated to the appropriate " + "FlowFile will be routed to failure unless the message is replicated to the appropriate " + "number of Kafka Nodes according to the Topic configuration"); static final AllowableValue DELIVERY_ONE_NODE = new AllowableValue("1", "Guarantee Single Node Delivery", - "FlowFile will be routed to success if the message is received by a single Kafka node, " + "FlowFile will be routed to success if the message is received by a single Kafka node, " + "whether or not it is replicated. This is faster than " + "but can result in data loss if a Kafka node crashes"); static final AllowableValue DELIVERY_BEST_EFFORT = new AllowableValue("0", "Best Effort", - "FlowFile will be routed to success after successfully writing the content to a Kafka node, " + "FlowFile will be routed to success after successfully writing the content to a Kafka node, " + "without waiting for a response. This provides the best performance but may result in data loss."); static final AllowableValue ROUND_ROBIN_PARTITIONING = new AllowableValue(Partitioners.RoundRobinPartitioner.class.getName(), - Partitioners.RoundRobinPartitioner.class.getSimpleName(), - "Messages will be assigned partitions in a round-robin fashion, sending the first message to Partition 1, " + Partitioners.RoundRobinPartitioner.class.getSimpleName(), + "Messages will be assigned partitions in a round-robin fashion, sending the first message to Partition 1, " + "the next Partition to Partition 2, and so on, wrapping as necessary."); static final AllowableValue RANDOM_PARTITIONING = new AllowableValue("org.apache.kafka.clients.producer.internals.DefaultPartitioner", - "DefaultPartitioner", "Messages will be assigned to random partitions."); + "DefaultPartitioner", "Messages will be assigned to random partitions."); static final AllowableValue UTF8_ENCODING = new AllowableValue("utf-8", "UTF-8 Encoded", "The key is interpreted as a UTF-8 Encoded string."); static final AllowableValue HEX_ENCODING = new AllowableValue("hex", "Hex Encoded", "The key is interpreted as arbitrary binary data that is encoded using hexadecimal characters with uppercase letters."); static final PropertyDescriptor TOPIC = new PropertyDescriptor.Builder() - .name("topic") - .displayName("Topic Name") - .description("The name of the Kafka Topic to publish to.") - .required(true) - .addValidator(StandardValidators.NON_BLANK_VALIDATOR) - .expressionLanguageSupported(true) - .build(); + .name("topic") + .displayName("Topic Name") + .description("The name of the Kafka Topic to publish to.") + .required(true) + .addValidator(StandardValidators.NON_BLANK_VALIDATOR) + .expressionLanguageSupported(true) + .build(); static final PropertyDescriptor DELIVERY_GUARANTEE = new PropertyDescriptor.Builder() - .name(ProducerConfig.ACKS_CONFIG) - .displayName("Delivery Guarantee") - .description("Specifies the requirement for guaranteeing that a message is sent to Kafka. Corresponds to Kafka's 'acks' property.") - .required(true) - .expressionLanguageSupported(false) - .allowableValues(DELIVERY_BEST_EFFORT, DELIVERY_ONE_NODE, DELIVERY_REPLICATED) - .defaultValue(DELIVERY_BEST_EFFORT.getValue()) - .build(); - - static final PropertyDescriptor META_WAIT_TIME = new PropertyDescriptor.Builder() - .name(ProducerConfig.MAX_BLOCK_MS_CONFIG) - .displayName("Meta Data Wait Time") - .description("The amount of time KafkaConsumer will wait to obtain metadata during the 'send' call before failing the " - + "entire 'send' call. Corresponds to Kafka's 'max.block.ms' property") - .required(true) - .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) - .expressionLanguageSupported(true) - .defaultValue("30 sec") - .build(); + .name(ProducerConfig.ACKS_CONFIG) + .displayName("Delivery Guarantee") + .description("Specifies the requirement for guaranteeing that a message is sent to Kafka. Corresponds to Kafka's 'acks' property.") + .required(true) + .expressionLanguageSupported(false) + .allowableValues(DELIVERY_BEST_EFFORT, DELIVERY_ONE_NODE, DELIVERY_REPLICATED) + .defaultValue(DELIVERY_BEST_EFFORT.getValue()) + .build(); + + static final PropertyDescriptor METADATA_WAIT_TIME = new PropertyDescriptor.Builder() + .name(ProducerConfig.MAX_BLOCK_MS_CONFIG) + .displayName("Max Metadata Wait Time") + .description("The amount of time publisher will wait to obtain metadata or wait for the buffer to flush during the 'send' call before failing the " + + "entire 'send' call. Corresponds to Kafka's 'max.block.ms' property") + .required(true) + .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) + .expressionLanguageSupported(true) + .defaultValue("5 sec") + .build(); + + static final PropertyDescriptor ACK_WAIT_TIME = new PropertyDescriptor.Builder() + .name("ack.wait.time") + .displayName("Acknowledgment Wait Time") + .description("After sending a message to Kafka, this indicates the amount of time that we are willing to wait for a response from Kafka. " + + "If Kafka does not acknowledge the message within this time period, the FlowFile will be routed to 'failure'.") + .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) + .expressionLanguageSupported(false) + .required(true) + .defaultValue("5 secs") + .build(); static final PropertyDescriptor MAX_REQUEST_SIZE = new PropertyDescriptor.Builder() - .name("max.request.size") - .displayName("Max Request Size") - .description("The maximum size of a request in bytes. Corresponds to Kafka's 'max.request.size' property and defaults to 1 MB (1048576).") - .required(true) - .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) - .defaultValue("1 MB") - .build(); + .name("max.request.size") + .displayName("Max Request Size") + .description("The maximum size of a request in bytes. Corresponds to Kafka's 'max.request.size' property and defaults to 1 MB (1048576).") + .required(true) + .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) + .defaultValue("1 MB") + .build(); static final PropertyDescriptor KEY = new PropertyDescriptor.Builder() - .name("kafka-key") - .displayName("Kafka Key") - .description("The Key to use for the Message. " - + "If not specified, the flow file attribute 'kafka.key' is used as the message key, if it is present " - + "and we're not demarcating.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(true) - .build(); + .name("kafka-key") + .displayName("Kafka Key") + .description("The Key to use for the Message. " + + "If not specified, the flow file attribute 'kafka.key' is used as the message key, if it is present " + + "and we're not demarcating.") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); static final PropertyDescriptor KEY_ATTRIBUTE_ENCODING = new PropertyDescriptor.Builder() - .name("key-attribute-encoding") - .displayName("Key Attribute Encoding") - .description("FlowFiles that are emitted have an attribute named '" + KafkaProcessorUtils.KAFKA_KEY + "'. This property dictates how the value of the attribute should be encoded.") - .required(true) - .defaultValue(UTF8_ENCODING.getValue()) - .allowableValues(UTF8_ENCODING, HEX_ENCODING) - .build(); + .name("key-attribute-encoding") + .displayName("Key Attribute Encoding") + .description("FlowFiles that are emitted have an attribute named '" + KafkaProcessorUtils.KAFKA_KEY + "'. This property dictates how the value of the attribute should be encoded.") + .required(true) + .defaultValue(UTF8_ENCODING.getValue()) + .allowableValues(UTF8_ENCODING, HEX_ENCODING) + .build(); static final PropertyDescriptor MESSAGE_DEMARCATOR = new PropertyDescriptor.Builder() - .name("message-demarcator") - .displayName("Message Demarcator") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(true) - .description("Specifies the string (interpreted as UTF-8) to use for demarcating multiple messages within " - + "a single FlowFile. If not specified, the entire content of the FlowFile will be used as a single message. If specified, the " - + "contents of the FlowFile will be split on this delimiter and each section sent as a separate Kafka message. " - + "To enter special character such as 'new line' use CTRL+Enter or Shift+Enter depending on your OS.") - .build(); + .name("message-demarcator") + .displayName("Message Demarcator") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .description("Specifies the string (interpreted as UTF-8) to use for demarcating multiple messages within " + + "a single FlowFile. If not specified, the entire content of the FlowFile will be used as a single message. If specified, the " + + "contents of the FlowFile will be split on this delimiter and each section sent as a separate Kafka message. " + + "To enter special character such as 'new line' use CTRL+Enter or Shift+Enter, depending on your OS.") + .build(); static final PropertyDescriptor PARTITION_CLASS = new PropertyDescriptor.Builder() - .name(ProducerConfig.PARTITIONER_CLASS_CONFIG) - .displayName("Partitioner class") - .description("Specifies which class to use to compute a partition id for a message. Corresponds to Kafka's 'partitioner.class' property.") - .allowableValues(ROUND_ROBIN_PARTITIONING, RANDOM_PARTITIONING) - .defaultValue(RANDOM_PARTITIONING.getValue()) - .required(false) - .build(); + .name(ProducerConfig.PARTITIONER_CLASS_CONFIG) + .displayName("Partitioner class") + .description("Specifies which class to use to compute a partition id for a message. Corresponds to Kafka's 'partitioner.class' property.") + .allowableValues(ROUND_ROBIN_PARTITIONING, RANDOM_PARTITIONING) + .defaultValue(RANDOM_PARTITIONING.getValue()) + .required(false) + .build(); static final PropertyDescriptor COMPRESSION_CODEC = new PropertyDescriptor.Builder() - .name(ProducerConfig.COMPRESSION_TYPE_CONFIG) - .displayName("Compression Type") - .description("This parameter allows you to specify the compression codec for all data generated by this producer.") - .required(true) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .allowableValues("none", "gzip", "snappy", "lz4") - .defaultValue("none") - .build(); + .name(ProducerConfig.COMPRESSION_TYPE_CONFIG) + .displayName("Compression Type") + .description("This parameter allows you to specify the compression codec for all data generated by this producer.") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .allowableValues("none", "gzip", "snappy", "lz4") + .defaultValue("none") + .build(); static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("FlowFiles for which all content was sent to Kafka.") - .build(); + .name("success") + .description("FlowFiles for which all content was sent to Kafka.") + .build(); static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("Any FlowFile that cannot be sent to Kafka will be routed to this Relationship") - .build(); - - static final List DESCRIPTORS; - - static final Set RELATIONSHIPS; - - private volatile String brokers; + .name("failure") + .description("Any FlowFile that cannot be sent to Kafka will be routed to this Relationship") + .build(); - private final AtomicInteger taskCounter = new AtomicInteger(); + private static final List PROPERTIES; + private static final Set RELATIONSHIPS; - private volatile boolean acceptTask = true; + private volatile PublisherPool publisherPool = null; - /* - * Will ensure that list of PropertyDescriptors is build only once, since - * all other lifecycle methods are invoked multiple times. - */ static { - final List _descriptors = new ArrayList<>(); - _descriptors.addAll(KafkaProcessorUtils.getCommonPropertyDescriptors()); - _descriptors.add(TOPIC); - _descriptors.add(DELIVERY_GUARANTEE); - _descriptors.add(KEY); - _descriptors.add(KEY_ATTRIBUTE_ENCODING); - _descriptors.add(MESSAGE_DEMARCATOR); - _descriptors.add(MAX_REQUEST_SIZE); - _descriptors.add(META_WAIT_TIME); - _descriptors.add(PARTITION_CLASS); - _descriptors.add(COMPRESSION_CODEC); - - DESCRIPTORS = Collections.unmodifiableList(_descriptors); - - final Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - RELATIONSHIPS = Collections.unmodifiableSet(_relationships); + final List properties = new ArrayList<>(); + properties.addAll(KafkaProcessorUtils.getCommonPropertyDescriptors()); + properties.add(TOPIC); + properties.add(DELIVERY_GUARANTEE); + properties.add(KEY); + properties.add(KEY_ATTRIBUTE_ENCODING); + properties.add(MESSAGE_DEMARCATOR); + properties.add(MAX_REQUEST_SIZE); + properties.add(ACK_WAIT_TIME); + properties.add(METADATA_WAIT_TIME); + properties.add(PARTITION_CLASS); + properties.add(COMPRESSION_CODEC); + + PROPERTIES = Collections.unmodifiableList(properties); + + final Set relationships = new HashSet<>(); + relationships.add(REL_SUCCESS); + relationships.add(REL_FAILURE); + RELATIONSHIPS = Collections.unmodifiableSet(relationships); } @Override @@ -250,15 +241,17 @@ public Set getRelationships() { @Override protected List getSupportedPropertyDescriptors() { - return DESCRIPTORS; + return PROPERTIES; } @Override protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { return new PropertyDescriptor.Builder() - .description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.") - .name(propertyDescriptorName).addValidator(new KafkaProcessorUtils.KafkaConfigValidator(ProducerConfig.class)).dynamic(true) - .build(); + .description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.") + .name(propertyDescriptorName) + .addValidator(new KafkaProcessorUtils.KafkaConfigValidator(ProducerConfig.class)) + .dynamic(true) + .build(); } @Override @@ -266,226 +259,123 @@ protected Collection customValidate(final ValidationContext va return KafkaProcessorUtils.validateCommonProperties(validationContext); } - volatile KafkaPublisher kafkaPublisher; - - /** - * This thread-safe operation will delegate to - * {@link #rendezvousWithKafka(ProcessContext, ProcessSession)} after first - * checking and creating (if necessary) Kafka resource which could be either - * {@link KafkaPublisher} or {@link KafkaConsumer}. It will also close and - * destroy the underlying Kafka resource upon catching an {@link Exception} - * raised by {@link #rendezvousWithKafka(ProcessContext, ProcessSession)}. - * After Kafka resource is destroyed it will be re-created upon the next - * invocation of this operation essentially providing a self healing - * mechanism to deal with potentially corrupted resource. - *

- * Keep in mind that upon catching an exception the state of this processor - * will be set to no longer accept any more tasks, until Kafka resource is - * reset. This means that in a multi-threaded situation currently executing - * tasks will be given a chance to complete while no new tasks will be - * accepted. - * - * @param context context - * @param sessionFactory factory - */ - @Override - public final void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException { - if (this.acceptTask) { // acts as a circuit breaker to allow existing tasks to wind down so 'kafkaPublisher' can be reset before new tasks are accepted. - this.taskCounter.incrementAndGet(); - final ProcessSession session = sessionFactory.createSession(); - try { - /* - * We can't be doing double null check here since as a pattern - * it only works for lazy init but not reset, which is what we - * are doing here. In fact the first null check is dangerous - * since 'kafkaPublisher' can become null right after its null - * check passed causing subsequent NPE. - */ - synchronized (this) { - if (this.kafkaPublisher == null) { - this.kafkaPublisher = this.buildKafkaResource(context, session); - } - } - - /* - * The 'processed' boolean flag does not imply any failure or success. It simply states that: - * - ConsumeKafka - some messages were received form Kafka and 1_ FlowFile were generated - * - PublishKafka0_10 - some messages were sent to Kafka based on existence of the input FlowFile - */ - boolean processed = this.rendezvousWithKafka(context, session); - session.commit(); - if (!processed) { - context.yield(); - } - } catch (Throwable e) { - this.acceptTask = false; - session.rollback(true); - this.getLogger().error("{} failed to process due to {}; rolling back session", new Object[]{this, e}); - } finally { - synchronized (this) { - if (this.taskCounter.decrementAndGet() == 0 && !this.acceptTask) { - this.close(); - this.acceptTask = true; - } - } - } - } else { - this.logger.debug("Task was not accepted due to the processor being in 'reset' state. It will be re-submitted upon completion of the reset."); - this.getLogger().debug("Task was not accepted due to the processor being in 'reset' state. It will be re-submitted upon completion of the reset."); - context.yield(); + private synchronized PublisherPool getPublisherPool(final ProcessContext context) { + PublisherPool pool = publisherPool; + if (pool != null) { + return pool; } + + return publisherPool = createPublisherPool(context); + } + + protected PublisherPool createPublisherPool(final ProcessContext context) { + final int maxMessageSize = context.getProperty(MAX_REQUEST_SIZE).asDataSize(DataUnit.B).intValue(); + final long maxAckWaitMillis = context.getProperty(ACK_WAIT_TIME).asTimePeriod(TimeUnit.MILLISECONDS).longValue(); + + final Map kafkaProperties = new HashMap<>(); + KafkaProcessorUtils.buildCommonKafkaProperties(context, ProducerConfig.class, kafkaProperties); + kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + kafkaProperties.put("max.request.size", String.valueOf(maxMessageSize)); + + return new PublisherPool(kafkaProperties, getLogger(), maxMessageSize, maxAckWaitMillis); } - /** - * Will call {@link Closeable#close()} on the target resource after which - * the target resource will be set to null. Should only be called when there - * are no more threads being executed on this processor or when it has been - * verified that only a single thread remains. - * - * @see KafkaPublisher - * @see KafkaConsumer - */ @OnStopped - public void close() { - try { - if (this.kafkaPublisher != null) { - try { - this.kafkaPublisher.close(); - } catch (Exception e) { - this.getLogger().warn("Failed while closing " + this.kafkaPublisher, e); - } - } - } finally { - this.kafkaPublisher = null; + public void closePool() { + if (publisherPool != null) { + publisherPool.close(); } + + publisherPool = null; } - /** - * Will rendezvous with Kafka if {@link ProcessSession} contains - * {@link FlowFile} producing a result {@link FlowFile}. - *
- * The result {@link FlowFile} that is successful is then transfered to - * {@link #REL_SUCCESS} - *
- * The result {@link FlowFile} that is failed is then transfered to - * {@link #REL_FAILURE} - * - */ - protected boolean rendezvousWithKafka(ProcessContext context, ProcessSession session) { - FlowFile flowFile = session.get(); - if (flowFile != null) { - long start = System.nanoTime(); - flowFile = this.doRendezvousWithKafka(flowFile, context, session); - Relationship relationship = REL_SUCCESS; - if (!this.isFailedFlowFile(flowFile)) { - String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue(); - long executionDuration = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start); - String transitUri = KafkaProcessorUtils.buildTransitURI(context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue(), this.brokers, topic); - session.getProvenanceReporter().send(flowFile, transitUri, "Sent " + flowFile.getAttribute(MSG_COUNT) + " Kafka messages", executionDuration); - this.getLogger().debug("Successfully sent {} to Kafka as {} message(s) in {} millis", - new Object[]{flowFile, flowFile.getAttribute(MSG_COUNT), executionDuration}); - } else { - relationship = REL_FAILURE; - flowFile = session.penalize(flowFile); - } - session.transfer(flowFile, relationship); + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + final boolean useDemarcator = context.getProperty(MESSAGE_DEMARCATOR).isSet(); + + final List flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(250, DataUnit.KB, 500)); + if (flowFiles.isEmpty()) { + return; } - return flowFile != null; - } - /** - * Builds and instance of {@link KafkaPublisher}. - */ - protected KafkaPublisher buildKafkaResource(ProcessContext context, ProcessSession session) { - final Map kafkaProps = new HashMap<>(); - KafkaProcessorUtils.buildCommonKafkaProperties(context, ProducerConfig.class, kafkaProps); - kafkaProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProps.put("max.request.size", String.valueOf(context.getProperty(MAX_REQUEST_SIZE).asDataSize(DataUnit.B).intValue())); - this.brokers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue(); - final Properties props = new Properties(); - props.putAll(kafkaProps); - KafkaPublisher publisher = new KafkaPublisher(props, this.getLogger()); - return publisher; - } + final PublisherPool pool = getPublisherPool(context); + if (pool == null) { + context.yield(); + return; + } - /** - * Will rendezvous with {@link KafkaPublisher} after building - * {@link PublishingContext} and will produce the resulting - * {@link FlowFile}. The resulting FlowFile contains all required - * information to determine if message publishing originated from the - * provided FlowFile has actually succeeded fully, partially or failed - * completely (see {@link #isFailedFlowFile(FlowFile)}. - */ - private FlowFile doRendezvousWithKafka(final FlowFile flowFile, final ProcessContext context, final ProcessSession session) { - final AtomicReference publishResultRef = new AtomicReference<>(); - session.read(flowFile, new InputStreamCallback() { - @Override - public void process(InputStream contentStream) throws IOException { - PublishingContext publishingContext = PublishKafka.this.buildPublishingContext(flowFile, context, contentStream); - KafkaPublisher.KafkaPublisherResult result = PublishKafka.this.kafkaPublisher.publish(publishingContext); - publishResultRef.set(result); + final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue(); + final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue(); + + final long startTime = System.nanoTime(); + try (final PublisherLease lease = pool.obtainPublisher()) { + // Send each FlowFile to Kafka asynchronously. + for (final FlowFile flowFile : flowFiles) { + if (!isScheduled()) { + // If stopped, re-queue FlowFile instead of sending it + session.transfer(flowFile); + continue; + } + + final byte[] messageKey = getMessageKey(flowFile, context); + final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue(); + final byte[] demarcatorBytes; + if (useDemarcator) { + demarcatorBytes = context.getProperty(MESSAGE_DEMARCATOR).evaluateAttributeExpressions(flowFile).getValue().getBytes(StandardCharsets.UTF_8); + } else { + demarcatorBytes = null; + } + + session.read(flowFile, new InputStreamCallback() { + @Override + public void process(final InputStream rawIn) throws IOException { + try (final InputStream in = new BufferedInputStream(rawIn)) { + lease.publish(flowFile, in, messageKey, demarcatorBytes, topic); + } + } + }); } - }); - FlowFile resultFile = publishResultRef.get().isAllAcked() - ? this.cleanUpFlowFileIfNecessary(flowFile, session) - : session.putAllAttributes(flowFile, this.buildFailedFlowFileAttributes(publishResultRef.get().getLastMessageAcked(), flowFile, context)); + // Complete the send + final PublishResult publishResult = lease.complete(); - if (!this.isFailedFlowFile(resultFile)) { - resultFile = session.putAttribute(resultFile, MSG_COUNT, String.valueOf(publishResultRef.get().getMessagesSent())); - } - return resultFile; - } + // Transfer any successful FlowFiles. + final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime); + for (FlowFile success : publishResult.getSuccessfulFlowFiles()) { + final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue(); - /** - * Builds {@link PublishingContext} for message(s) to be sent to Kafka. - * {@link PublishingContext} contains all contextual information required by - * {@link KafkaPublisher} to publish to Kafka. Such information contains - * things like topic name, content stream, delimiter, key and last ACKed - * message for cases where provided FlowFile is being retried (failed in the - * past). - *
- * For the clean FlowFile (file that has been sent for the first time), - * PublishingContext will be built form {@link ProcessContext} associated - * with this invocation. - *
- * For the failed FlowFile, {@link PublishingContext} will be built from - * attributes of that FlowFile which by then will already contain required - * information (e.g., topic, key, delimiter etc.). This is required to - * ensure the affinity of the retry in the even where processor - * configuration has changed. However keep in mind that failed FlowFile is - * only considered a failed FlowFile if it is being re-processed by the same - * processor (determined via {@link #FAILED_PROC_ID_ATTR}, see - * {@link #isFailedFlowFile(FlowFile)}). If failed FlowFile is being sent to - * another PublishKafka0_10 processor it is treated as a fresh FlowFile - * regardless if it has #FAILED* attributes set. - */ - private PublishingContext buildPublishingContext(FlowFile flowFile, ProcessContext context, InputStream contentStream) { - final byte[] keyBytes = getMessageKey(flowFile, context); - - final String topicName; - final byte[] delimiterBytes; - int lastAckedMessageIndex = -1; - if (this.isFailedFlowFile(flowFile)) { - lastAckedMessageIndex = Integer.valueOf(flowFile.getAttribute(FAILED_LAST_ACK_IDX)); - topicName = flowFile.getAttribute(FAILED_TOPIC_ATTR); - delimiterBytes = flowFile.getAttribute(FAILED_DELIMITER_ATTR) != null - ? flowFile.getAttribute(FAILED_DELIMITER_ATTR).getBytes(StandardCharsets.UTF_8) : null; - } else { - topicName = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue(); - delimiterBytes = context.getProperty(MESSAGE_DEMARCATOR).isSet() ? context.getProperty(MESSAGE_DEMARCATOR) - .evaluateAttributeExpressions(flowFile).getValue().getBytes(StandardCharsets.UTF_8) : null; - } + final int msgCount = publishResult.getSuccessfulMessageCount(success); + success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount)); + session.adjustCounter("Messages Sent", msgCount, true); + + final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic); + session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis); + session.transfer(success, REL_SUCCESS); + } + + // Transfer any failures. + for (final FlowFile failure : publishResult.getFailedFlowFiles()) { + final int successCount = publishResult.getSuccessfulMessageCount(failure); + if (successCount > 0) { + getLogger().error("Failed to send some messages for {} to Kafka, but {} messages were acknowledged by Kafka. Routing to failure due to {}", + new Object[] {failure, successCount, publishResult.getReasonForFailure(failure)}); + } else { + getLogger().error("Failed to send all message for {} to Kafka; routing to failure due to {}", + new Object[] {failure, publishResult.getReasonForFailure(failure)}); + } - PublishingContext publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex, - context.getProperty(MAX_REQUEST_SIZE).asDataSize(DataUnit.B).intValue()); - publishingContext.setKeyBytes(keyBytes); - publishingContext.setDelimiterBytes(delimiterBytes); - return publishingContext; + session.transfer(failure, REL_FAILURE); + } + } } + private byte[] getMessageKey(final FlowFile flowFile, final ProcessContext context) { + if (context.getProperty(MESSAGE_DEMARCATOR).isSet()) { + return null; + } + final String uninterpretedKey; if (context.getProperty(KEY).isSet()) { uninterpretedKey = context.getProperty(KEY).evaluateAttributeExpressions(flowFile).getValue(); @@ -504,51 +394,4 @@ private byte[] getMessageKey(final FlowFile flowFile, final ProcessContext conte return DatatypeConverter.parseHexBinary(uninterpretedKey); } - - /** - * Will remove FAILED_* attributes if FlowFile is no longer considered a - * failed FlowFile - * - * @see #isFailedFlowFile(FlowFile) - */ - private FlowFile cleanUpFlowFileIfNecessary(FlowFile flowFile, ProcessSession session) { - if (this.isFailedFlowFile(flowFile)) { - Set keysToRemove = new HashSet<>(); - keysToRemove.add(FAILED_DELIMITER_ATTR); - keysToRemove.add(FAILED_KEY_ATTR); - keysToRemove.add(FAILED_TOPIC_ATTR); - keysToRemove.add(FAILED_PROC_ID_ATTR); - keysToRemove.add(FAILED_LAST_ACK_IDX); - flowFile = session.removeAllAttributes(flowFile, keysToRemove); - } - return flowFile; - } - - /** - * Builds a {@link Map} of FAILED_* attributes - * - * @see #FAILED_PROC_ID_ATTR - * @see #FAILED_LAST_ACK_IDX - * @see #FAILED_TOPIC_ATTR - * @see #FAILED_KEY_ATTR - * @see #FAILED_DELIMITER_ATTR - */ - private Map buildFailedFlowFileAttributes(int lastAckedMessageIndex, FlowFile sourceFlowFile, ProcessContext context) { - Map attributes = new HashMap<>(); - attributes.put(FAILED_PROC_ID_ATTR, this.getIdentifier()); - attributes.put(FAILED_LAST_ACK_IDX, String.valueOf(lastAckedMessageIndex)); - attributes.put(FAILED_TOPIC_ATTR, context.getProperty(TOPIC).evaluateAttributeExpressions(sourceFlowFile).getValue()); - attributes.put(FAILED_KEY_ATTR, context.getProperty(KEY).evaluateAttributeExpressions(sourceFlowFile).getValue()); - attributes.put(FAILED_DELIMITER_ATTR, context.getProperty(MESSAGE_DEMARCATOR).isSet() - ? context.getProperty(MESSAGE_DEMARCATOR).evaluateAttributeExpressions(sourceFlowFile).getValue() : null); - return attributes; - } - - /** - * Returns 'true' if provided FlowFile is a failed FlowFile. A failed - * FlowFile contains {@link #FAILED_PROC_ID_ATTR}. - */ - private boolean isFailedFlowFile(FlowFile flowFile) { - return this.getIdentifier().equals(flowFile.getAttribute(FAILED_PROC_ID_ATTR)); - } } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishResult.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishResult.java new file mode 100644 index 000000000000..b68526501f13 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishResult.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.util.Collection; +import java.util.Collections; + +import org.apache.nifi.flowfile.FlowFile; + +public interface PublishResult { + Collection getSuccessfulFlowFiles(); + + Collection getFailedFlowFiles(); + + int getSuccessfulMessageCount(FlowFile flowFile); + + Exception getReasonForFailure(FlowFile flowFile); + + + public static final PublishResult EMPTY = new PublishResult() { + @Override + public Collection getSuccessfulFlowFiles() { + return Collections.emptyList(); + } + + @Override + public Collection getFailedFlowFiles() { + return Collections.emptyList(); + } + + @Override + public int getSuccessfulMessageCount(FlowFile flowFile) { + return 0; + } + + @Override + public Exception getReasonForFailure(FlowFile flowFile) { + return null; + } + }; +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java new file mode 100644 index 000000000000..b67e8a8614d8 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.stream.io.exception.TokenTooLargeException; +import org.apache.nifi.stream.io.util.StreamDemarcator; + +public class PublisherLease implements Closeable { + private final ComponentLog logger; + private final Producer producer; + private final int maxMessageSize; + private final long maxAckWaitMillis; + private volatile boolean poisoned = false; + + private InFlightMessageTracker tracker; + + public PublisherLease(final Producer producer, final int maxMessageSize, final long maxAckWaitMillis, final ComponentLog logger) { + this.producer = producer; + this.maxMessageSize = maxMessageSize; + this.logger = logger; + this.maxAckWaitMillis = maxAckWaitMillis; + } + + protected void poison() { + this.poisoned = true; + } + + public boolean isPoisoned() { + return poisoned; + } + + void publish(final FlowFile flowFile, final InputStream flowFileContent, final byte[] messageKey, final byte[] demarcatorBytes, final String topic) throws IOException { + if (tracker == null) { + tracker = new InFlightMessageTracker(); + } + + try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) { + byte[] messageContent; + try { + while ((messageContent = demarcator.nextToken()) != null) { + // We do not want to use any key if we have a demarcator because that would result in + // the key being the same for multiple messages + final byte[] keyToUse = demarcatorBytes == null ? messageKey : null; + publish(flowFile, keyToUse, messageContent, topic, tracker); + + if (tracker.isFailed(flowFile)) { + // If we have a failure, don't try to send anything else. + return; + } + } + } catch (final TokenTooLargeException ttle) { + tracker.fail(flowFile, ttle); + } + } catch (final Exception e) { + tracker.fail(flowFile, e); + poison(); + throw e; + } + } + + private void publish(final FlowFile flowFile, final byte[] messageKey, final byte[] messageContent, final String topic, final InFlightMessageTracker tracker) { + final ProducerRecord record = new ProducerRecord<>(topic, null, messageKey, messageContent); + producer.send(record, new Callback() { + @Override + public void onCompletion(final RecordMetadata metadata, final Exception exception) { + if (exception == null) { + tracker.incrementAcknowledgedCount(flowFile); + } else { + tracker.fail(flowFile, exception); + poison(); + } + } + }); + + tracker.incrementSentCount(flowFile); + } + + public PublishResult complete() { + if (tracker == null) { + throw new IllegalStateException("Cannot complete publishing to Kafka because Publisher Lease was already closed"); + } + + producer.flush(); + + try { + tracker.awaitCompletion(maxAckWaitMillis); + return tracker.createPublishResult(); + } catch (final InterruptedException e) { + logger.warn("Interrupted while waiting for an acknowledgement from Kafka; some FlowFiles may be transferred to 'failure' even though they were received by Kafka"); + Thread.currentThread().interrupt(); + return tracker.failOutstanding(e); + } catch (final TimeoutException e) { + logger.warn("Timed out while waiting for an acknowledgement from Kafka; some FlowFiles may be transferred to 'failure' even though they were received by Kafka"); + return tracker.failOutstanding(e); + } finally { + tracker = null; + } + } + + @Override + public void close() { + producer.close(maxAckWaitMillis, TimeUnit.MILLISECONDS); + tracker = null; + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherPool.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherPool.java new file mode 100644 index 000000000000..5902b038c61d --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherPool.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.io.Closeable; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.nifi.logging.ComponentLog; + +public class PublisherPool implements Closeable { + private final ComponentLog logger; + private final BlockingQueue publisherQueue; + private final Map kafkaProperties; + private final int maxMessageSize; + private final long maxAckWaitMillis; + + private volatile boolean closed = false; + + PublisherPool(final Map kafkaProperties, final ComponentLog logger, final int maxMessageSize, final long maxAckWaitMillis) { + this.logger = logger; + this.publisherQueue = new LinkedBlockingQueue<>(); + this.kafkaProperties = kafkaProperties; + this.maxMessageSize = maxMessageSize; + this.maxAckWaitMillis = maxAckWaitMillis; + } + + public PublisherLease obtainPublisher() { + if (isClosed()) { + throw new IllegalStateException("Connection Pool is closed"); + } + + PublisherLease lease = publisherQueue.poll(); + if (lease != null) { + return lease; + } + + lease = createLease(); + return lease; + } + + private PublisherLease createLease() { + final Producer producer = new KafkaProducer<>(kafkaProperties); + final PublisherLease lease = new PublisherLease(producer, maxMessageSize, maxAckWaitMillis, logger) { + @Override + public void close() { + if (isPoisoned() || isClosed()) { + super.close(); + } else { + publisherQueue.offer(this); + } + } + }; + + return lease; + } + + public synchronized boolean isClosed() { + return closed; + } + + @Override + public synchronized void close() { + closed = true; + + PublisherLease lease; + while ((lease = publisherQueue.poll()) != null) { + lease.close(); + } + } + + /** + * Returns the number of leases that are currently available + * + * @return the number of leases currently available + */ + protected int available() { + return publisherQueue.size(); + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishingContext.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishingContext.java deleted file mode 100644 index 1513481df136..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishingContext.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import java.io.InputStream; -import java.nio.charset.StandardCharsets; - -/** - * Holder of context information used by {@link KafkaPublisher} required to - * publish messages to Kafka. - */ -class PublishingContext { - - private final InputStream contentStream; - - private final String topic; - - private final int lastAckedMessageIndex; - - private final int maxRequestSize; - - private byte[] keyBytes; - - private byte[] delimiterBytes; - - PublishingContext(InputStream contentStream, String topic) { - this(contentStream, topic, -1); - } - - PublishingContext(InputStream contentStream, String topic, int lastAckedMessageIndex) { - this(contentStream, topic, lastAckedMessageIndex, 1048576); - } - - PublishingContext(InputStream contentStream, String topic, int lastAckedMessageIndex, int maxRequestSize) { - this.validateInput(contentStream, topic, lastAckedMessageIndex); - this.contentStream = contentStream; - this.topic = topic; - this.lastAckedMessageIndex = lastAckedMessageIndex; - this.maxRequestSize = maxRequestSize; - } - - @Override - public String toString() { - return "topic: '" + this.topic + "'; delimiter: '" + new String(this.delimiterBytes, StandardCharsets.UTF_8) + "'"; - } - - int getLastAckedMessageIndex() { - return this.lastAckedMessageIndex; - } - - int getMaxRequestSize() { - return this.maxRequestSize; - } - - byte[] getKeyBytes() { - return this.keyBytes; - } - - byte[] getDelimiterBytes() { - return this.delimiterBytes; - } - - InputStream getContentStream() { - return this.contentStream; - } - - String getTopic() { - return this.topic; - } - - void setKeyBytes(byte[] keyBytes) { - if (this.keyBytes == null) { - if (keyBytes != null) { - this.assertBytesValid(keyBytes); - this.keyBytes = keyBytes; - } - } else { - throw new IllegalArgumentException("'keyBytes' can only be set once per instance"); - } - } - - void setDelimiterBytes(byte[] delimiterBytes) { - if (this.delimiterBytes == null) { - if (delimiterBytes != null) { - this.assertBytesValid(delimiterBytes); - this.delimiterBytes = delimiterBytes; - } - } else { - throw new IllegalArgumentException("'delimiterBytes' can only be set once per instance"); - } - } - - private void assertBytesValid(byte[] bytes) { - if (bytes != null) { - if (bytes.length == 0) { - throw new IllegalArgumentException("'bytes' must not be empty"); - } - } - } - - private void validateInput(InputStream contentStream, String topic, int lastAckedMessageIndex) { - if (contentStream == null) { - throw new IllegalArgumentException("'contentStream' must not be null"); - } else if (topic == null || topic.trim().length() == 0) { - throw new IllegalArgumentException("'topic' must not be null or empty"); - } else if (lastAckedMessageIndex < -1) { - throw new IllegalArgumentException("'lastAckedMessageIndex' must be >= -1"); - } - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java index 7e4b12c17110..8e3fa3b9fb51 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java @@ -16,105 +16,36 @@ */ package org.apache.nifi.processors.kafka.pubsub; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.UUID; - +import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerConfig; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; -import org.apache.kafka.common.KafkaException; -import org.apache.kafka.common.TopicPartition; + import org.apache.kafka.common.serialization.ByteArrayDeserializer; import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Test; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import org.junit.Before; +import static org.mockito.Matchers.anyObject; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; public class ConsumeKafkaTest { - static class MockConsumerPool extends ConsumerPool { - - final int actualMaxLeases; - final List actualTopics; - final Map actualKafkaProperties; - boolean throwKafkaExceptionOnPoll = false; - boolean throwKafkaExceptionOnCommit = false; - Queue> nextPlannedRecordsQueue = new ArrayDeque<>(); - Map nextExpectedCommitOffsets = null; - Map actualCommitOffsets = null; - boolean wasConsumerLeasePoisoned = false; - boolean wasConsumerLeaseClosed = false; - boolean wasPoolClosed = false; - - public MockConsumerPool(int maxLeases, List topics, Map kafkaProperties, ComponentLog logger) { - super(maxLeases, topics, kafkaProperties, null); - actualMaxLeases = maxLeases; - actualTopics = topics; - actualKafkaProperties = kafkaProperties; - } - - @Override - public ConsumerLease obtainConsumer() { - return new ConsumerLease() { - @Override - public ConsumerRecords poll() { - if (throwKafkaExceptionOnPoll) { - throw new KafkaException("i planned to fail"); - } - final ConsumerRecords records = nextPlannedRecordsQueue.poll(); - return (records == null) ? ConsumerRecords.empty() : records; - } - - @Override - public void commitOffsets(Map offsets) { - if (throwKafkaExceptionOnCommit) { - throw new KafkaException("i planned to fail"); - } - actualCommitOffsets = offsets; - } - - @Override - public void poison() { - wasConsumerLeasePoisoned = true; - } - - @Override - public void close() { - wasConsumerLeaseClosed = true; - } - }; - } - - @Override - public void close() { - wasPoolClosed = true; - } - - void resetState() { - throwKafkaExceptionOnPoll = false; - throwKafkaExceptionOnCommit = false; - nextPlannedRecordsQueue = null; - nextExpectedCommitOffsets = null; - wasConsumerLeasePoisoned = false; - wasConsumerLeaseClosed = false; - wasPoolClosed = false; - } + Consumer mockConsumer = null; + ConsumerLease mockLease = null; + ConsumerPool mockConsumerPool = null; + @Before + public void setup() { + mockConsumer = mock(Consumer.class); + mockLease = mock(ConsumerLease.class); + mockConsumerPool = mock(ConsumerPool.class); } @Test @@ -175,365 +106,45 @@ public void validatePropertiesValidation() throws Exception { public void validateGetAllMessages() throws Exception { String groupName = "validateGetAllMessages"; - final byte[][] firstPassValues = new byte[][]{ - "Hello-1".getBytes(StandardCharsets.UTF_8), - "Hello-2".getBytes(StandardCharsets.UTF_8), - "Hello-3".getBytes(StandardCharsets.UTF_8) - }; - final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues); - - final byte[][] secondPassValues = new byte[][]{ - "Hello-4".getBytes(StandardCharsets.UTF_8), - "Hello-5".getBytes(StandardCharsets.UTF_8), - "Hello-6".getBytes(StandardCharsets.UTF_8) - }; - final ConsumerRecords secondRecs = createConsumerRecords("bar", 1, 1L, secondPassValues); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(firstRecs); - mockPool.nextPlannedRecordsQueue.add(secondRecs); - - ConsumeKafka proc = new ConsumeKafka() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka.TOPICS, "foo,bar"); - runner.setProperty(ConsumeKafka.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka.AUTO_OFFSET_RESET, ConsumeKafka.OFFSET_EARLIEST); - - runner.run(1, false); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka.REL_SUCCESS); - - assertEquals(expectedTopics, mockPool.actualTopics); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-2")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-3")).count()); - - if (mockPool.nextPlannedRecordsQueue.isEmpty()) { - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-4")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-5")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-6")).count()); - assertEquals(2, mockPool.actualCommitOffsets.size()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("foo", 1)).offset()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("bar", 1)).offset()); - } else { - assertEquals(2, mockPool.actualCommitOffsets.size()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("foo", 1)).offset()); - } - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - - } - - @Test - public void validateGetLotsOfMessages() throws Exception { - String groupName = "validateGetLotsOfMessages"; - - final byte[][] firstPassValues = new byte[10010][1]; - for (final byte[] value : firstPassValues) { - value[0] = 0x12; - } - final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues); - - final byte[][] secondPassValues = new byte[][]{ - "Hello-4".getBytes(StandardCharsets.UTF_8), - "Hello-5".getBytes(StandardCharsets.UTF_8), - "Hello-6".getBytes(StandardCharsets.UTF_8) - }; - final ConsumerRecords secondRecs = createConsumerRecords("bar", 1, 1L, secondPassValues); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(firstRecs); - mockPool.nextPlannedRecordsQueue.add(secondRecs); - - ConsumeKafka proc = new ConsumeKafka() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka.TOPICS, "foo,bar"); - runner.setProperty(ConsumeKafka.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka.AUTO_OFFSET_RESET, ConsumeKafka.OFFSET_EARLIEST); - - runner.run(1, false); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka.REL_SUCCESS); - - assertEquals(10010, flowFiles.stream().map(ff -> ff.toByteArray()).filter(content -> content.length == 1 && content[0] == 0x12).count()); - assertEquals(1, mockPool.nextPlannedRecordsQueue.size()); - - assertEquals(1, mockPool.actualCommitOffsets.size()); - assertEquals(10011L, mockPool.actualCommitOffsets.get(new TopicPartition("foo", 1)).offset()); - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - - } - - @SuppressWarnings({"rawtypes", "unchecked"}) - private ConsumerRecords createConsumerRecords(final String topic, final int partition, final long startingOffset, final byte[][] rawRecords) { - final Map>> map = new HashMap<>(); - final TopicPartition tPart = new TopicPartition(topic, partition); - final List> records = new ArrayList<>(); - long offset = startingOffset; - for (final byte[] rawRecord : rawRecords) { - final ConsumerRecord rec = new ConsumerRecord(topic, partition, offset++, UUID.randomUUID().toString().getBytes(), rawRecord); - records.add(rec); - } - map.put(tPart, records); - return new ConsumerRecords(map); - } - - @SuppressWarnings({"rawtypes", "unchecked"}) - private ConsumerRecords createConsumerRecords(final String topic, final int partition, final long startingOffset, final Map rawRecords) { - final Map>> map = new HashMap<>(); - final TopicPartition tPart = new TopicPartition(topic, partition); - final List> records = new ArrayList<>(); - long offset = startingOffset; - - for (final Map.Entry entry : rawRecords.entrySet()) { - final byte[] key = entry.getKey(); - final byte[] rawRecord = entry.getValue(); - final ConsumerRecord rec = new ConsumerRecord(topic, partition, offset++, key, rawRecord); - records.add(rec); - } - map.put(tPart, records); - return new ConsumerRecords(map); - } - - private ConsumerRecords mergeRecords(final ConsumerRecords... records) { - final Map>> map = new HashMap<>(); - for (final ConsumerRecords rec : records) { - rec.partitions().stream().forEach((part) -> { - final List> conRecs = rec.records(part); - if (map.get(part) != null) { - throw new IllegalStateException("already have that topic/partition in the record map"); - } - map.put(part, conRecs); - }); - } - return new ConsumerRecords<>(map); - } - - @Test - public void validateGetAllMessagesWithProvidedDemarcator() throws Exception { - String groupName = "validateGetAllMessagesWithProvidedDemarcator"; - - final byte[][] firstPassValues = new byte[][]{ - "Hello-1".getBytes(StandardCharsets.UTF_8), - "Hello-2".getBytes(StandardCharsets.UTF_8), - "Hello-3".getBytes(StandardCharsets.UTF_8) - }; - - final byte[][] secondPassValues = new byte[][]{ - "Hello-4".getBytes(StandardCharsets.UTF_8), - "Hello-5".getBytes(StandardCharsets.UTF_8), - "Hello-6".getBytes(StandardCharsets.UTF_8) - }; - final ConsumerRecords consumerRecs = mergeRecords( - createConsumerRecords("foo", 1, 1L, firstPassValues), - createConsumerRecords("bar", 1, 1L, secondPassValues) - ); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(consumerRecs); + when(mockConsumerPool.obtainConsumer(anyObject())).thenReturn(mockLease); + when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE); + when(mockLease.commit()).thenReturn(Boolean.TRUE); ConsumeKafka proc = new ConsumeKafka() { @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; + protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) { + return mockConsumerPool; } }; - final TestRunner runner = TestRunners.newTestRunner(proc); runner.setValidateExpressionUsage(false); runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); runner.setProperty(ConsumeKafka.TOPICS, "foo,bar"); runner.setProperty(ConsumeKafka.GROUP_ID, groupName); runner.setProperty(ConsumeKafka.AUTO_OFFSET_RESET, ConsumeKafka.OFFSET_EARLIEST); - runner.setProperty(ConsumeKafka.MESSAGE_DEMARCATOR, "blah"); - runner.run(1, false); - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka.REL_SUCCESS); - - assertEquals(2, flowFiles.size()); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1blahHello-2blahHello-3")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-4blahHello-5blahHello-6")).count()); - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - - assertEquals(2, mockPool.actualCommitOffsets.size()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("foo", 1)).offset()); - assertEquals(4L, mockPool.actualCommitOffsets.get(new TopicPartition("bar", 1)).offset()); - } - - @Test - public void validatePollException() throws Exception { - String groupName = "validatePollException"; - - final byte[][] firstPassValues = new byte[][]{ - "Hello-1".getBytes(StandardCharsets.UTF_8), - "Hello-2".getBytes(StandardCharsets.UTF_8), - "Hello-3".getBytes(StandardCharsets.UTF_8) - }; - - final ConsumerRecords consumerRecs = mergeRecords( - createConsumerRecords("foo", 1, 1L, firstPassValues) - ); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(consumerRecs); - mockPool.throwKafkaExceptionOnPoll = true; - - ConsumeKafka proc = new ConsumeKafka() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka.TOPICS, "foo"); - runner.setProperty(ConsumeKafka.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka.AUTO_OFFSET_RESET, ConsumeKafka.OFFSET_EARLIEST); - runner.setProperty(ConsumeKafka.MESSAGE_DEMARCATOR, "blah"); - - runner.run(1, true); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka.REL_SUCCESS); - - assertEquals(0, flowFiles.size()); - assertNull(null, mockPool.actualCommitOffsets); - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertTrue(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - } - - @Test - public void validateCommitOffsetException() throws Exception { - String groupName = "validateCommitOffsetException"; - - final byte[][] firstPassValues = new byte[][]{ - "Hello-1".getBytes(StandardCharsets.UTF_8), - "Hello-2".getBytes(StandardCharsets.UTF_8), - "Hello-3".getBytes(StandardCharsets.UTF_8) - }; - - final ConsumerRecords consumerRecs = mergeRecords( - createConsumerRecords("foo", 1, 1L, firstPassValues) - ); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.EMPTY_MAP, null); - mockPool.nextPlannedRecordsQueue.add(consumerRecs); - mockPool.throwKafkaExceptionOnCommit = true; - - ConsumeKafka proc = new ConsumeKafka() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka.TOPICS, "foo"); - runner.setProperty(ConsumeKafka.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka.AUTO_OFFSET_RESET, ConsumeKafka.OFFSET_EARLIEST); - runner.setProperty(ConsumeKafka.MESSAGE_DEMARCATOR, "blah"); - - runner.run(1, true); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka.REL_SUCCESS); - - assertEquals(1, flowFiles.size()); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1blahHello-2blahHello-3")).count()); - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertTrue(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - - assertNull(null, mockPool.actualCommitOffsets); + verify(mockConsumerPool, times(1)).obtainConsumer(anyObject()); + verify(mockLease, times(3)).continuePolling(); + verify(mockLease, times(2)).poll(); + verify(mockLease, times(1)).commit(); + verify(mockLease, times(1)).close(); + verifyNoMoreInteractions(mockConsumerPool); + verifyNoMoreInteractions(mockLease); } @Test - public void validateUtf8Key() { - String groupName = "validateGetAllMessages"; - - final Map rawRecords = new HashMap<>(); - rawRecords.put("key1".getBytes(), "Hello-1".getBytes()); - rawRecords.put(new byte[0], "Hello-2".getBytes()); - rawRecords.put(null, "Hello-3".getBytes()); + public void validateGetErrorMessages() throws Exception { + String groupName = "validateGetErrorMessages"; - final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, rawRecords); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.emptyMap(), null); - mockPool.nextPlannedRecordsQueue.add(firstRecs); + when(mockConsumerPool.obtainConsumer(anyObject())).thenReturn(mockLease); + when(mockLease.continuePolling()).thenReturn(true, false); + when(mockLease.commit()).thenReturn(Boolean.FALSE); ConsumeKafka proc = new ConsumeKafka() { @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; + protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) { + return mockConsumerPool; } }; final TestRunner runner = TestRunners.newTestRunner(proc); @@ -542,89 +153,15 @@ protected ConsumerPool createConsumerPool(final int maxLeases, final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka.REL_SUCCESS); - - assertEquals(expectedTopics, mockPool.actualTopics); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-2")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-3")).count()); - - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> "key1".equals(key)).count()); - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> key == null).count()); - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> "".equals(key)).count()); - - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); + verify(mockConsumerPool, times(1)).obtainConsumer(anyObject()); + verify(mockLease, times(2)).continuePolling(); + verify(mockLease, times(1)).poll(); + verify(mockLease, times(1)).commit(); + verify(mockLease, times(1)).close(); + verifyNoMoreInteractions(mockConsumerPool); + verifyNoMoreInteractions(mockLease); } - @Test - public void validateHexKey() { - String groupName = "validateGetAllMessages"; - - final Map rawRecords = new HashMap<>(); - rawRecords.put("key1".getBytes(), "Hello-1".getBytes()); - rawRecords.put(new byte[0], "Hello-2".getBytes()); - rawRecords.put(null, "Hello-3".getBytes()); - - final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, rawRecords); - - final List expectedTopics = new ArrayList<>(); - expectedTopics.add("foo"); - expectedTopics.add("bar"); - final MockConsumerPool mockPool = new MockConsumerPool(1, expectedTopics, Collections.emptyMap(), null); - mockPool.nextPlannedRecordsQueue.add(firstRecs); - - ConsumeKafka proc = new ConsumeKafka() { - @Override - protected ConsumerPool createConsumerPool(final int maxLeases, final List topics, final Map props, final ComponentLog log) { - return mockPool; - } - }; - final TestRunner runner = TestRunners.newTestRunner(proc); - runner.setValidateExpressionUsage(false); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234"); - runner.setProperty(ConsumeKafka.TOPICS, "foo,bar"); - runner.setProperty(ConsumeKafka.GROUP_ID, groupName); - runner.setProperty(ConsumeKafka.AUTO_OFFSET_RESET, ConsumeKafka.OFFSET_EARLIEST); - runner.setProperty(ConsumeKafka.KEY_ATTRIBUTE_ENCODING, ConsumeKafka.HEX_ENCODING); - - runner.run(1, false); - - final List flowFiles = runner.getFlowFilesForRelationship(ConsumeKafka.REL_SUCCESS); - - assertEquals(expectedTopics, mockPool.actualTopics); - - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-1")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-2")).count()); - assertEquals(1, flowFiles.stream().map(ff -> new String(ff.toByteArray())).filter(content -> content.equals("Hello-3")).count()); - - final String expectedHex = (Integer.toHexString('k') + Integer.toHexString('e') + Integer.toHexString('y') + Integer.toHexString('1')).toUpperCase(); - - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> expectedHex.equals(key)).count()); - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> key == null).count()); - assertEquals(1, flowFiles.stream().map(ff -> ff.getAttribute(KafkaProcessorUtils.KAFKA_KEY)).filter(key -> "".equals(key)).count()); - - - //asert that all consumers were closed as expected - //assert that the consumer pool was properly closed - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertFalse(mockPool.wasPoolClosed); - runner.run(1, true); - assertFalse(mockPool.wasConsumerLeasePoisoned); - assertTrue(mockPool.wasConsumerLeaseClosed); - assertTrue(mockPool.wasPoolClosed); - } } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java index 7f88ea2b2bb1..0ebf2b3ce337 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java @@ -16,109 +16,203 @@ */ package org.apache.nifi.processors.kafka.pubsub; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.TopicPartition; import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.provenance.ProvenanceReporter; import org.apache.nifi.processors.kafka.pubsub.ConsumerPool.PoolStats; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import org.junit.Before; import org.junit.Test; -import static org.mockito.Matchers.anyInt; +import static org.mockito.Matchers.anyLong; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; public class ConsumerPoolTest { Consumer consumer = null; + ProcessSession mockSession = null; + ProvenanceReporter mockReporter = null; + ConsumerPool testPool = null; + ConsumerPool testDemarcatedPool = null; ComponentLog logger = null; @Before public void setup() { consumer = mock(Consumer.class); logger = mock(ComponentLog.class); - } - - @Test - public void validatePoolSimpleCreateClose() throws Exception { - - final ConsumerPool testPool = new ConsumerPool(1, Collections.singletonList("nifi"), Collections.emptyMap(), logger) { + mockSession = mock(ProcessSession.class); + mockReporter = mock(ProvenanceReporter.class); + when(mockSession.getProvenanceReporter()).thenReturn(mockReporter); + testPool = new ConsumerPool( + 1, + null, + Collections.emptyMap(), + Collections.singletonList("nifi"), + 100L, + "utf-8", + "ssl", + "localhost", + logger) { @Override protected Consumer createKafkaConsumer() { return consumer; } }; + testDemarcatedPool = new ConsumerPool( + 1, + "--demarcator--".getBytes(StandardCharsets.UTF_8), + Collections.emptyMap(), + Collections.singletonList("nifi"), + 100L, + "utf-8", + "ssl", + "localhost", + logger) { + @Override + protected Consumer createKafkaConsumer() { + return consumer; + } + }; + } - when(consumer.poll(anyInt())).thenReturn(ConsumerRecords.empty()); + @Test + public void validatePoolSimpleCreateClose() throws Exception { - try (final ConsumerLease lease = testPool.obtainConsumer()) { + when(consumer.poll(anyLong())).thenReturn(createConsumerRecords("nifi", 0, 0L, new byte[][]{})); + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + lease.poll(); + } + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + lease.poll(); + } + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + lease.poll(); + } + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { lease.poll(); - lease.commitOffsets(Collections.emptyMap()); } testPool.close(); + verify(mockSession, times(0)).create(); + verify(mockSession, times(0)).commit(); final PoolStats stats = testPool.getPoolStats(); assertEquals(1, stats.consumerCreatedCount); assertEquals(1, stats.consumerClosedCount); - assertEquals(1, stats.leasesObtainedCount); - assertEquals(1, stats.unproductivePollCount); - assertEquals(0, stats.productivePollCount); + assertEquals(4, stats.leasesObtainedCount); } @Test - public void validatePoolSimpleBatchCreateClose() throws Exception { - - final ConsumerPool testPool = new ConsumerPool(5, Collections.singletonList("nifi"), Collections.emptyMap(), logger) { - @Override - protected Consumer createKafkaConsumer() { - return consumer; - } + public void validatePoolSimpleCreatePollClose() throws Exception { + final byte[][] firstPassValues = new byte[][]{ + "Hello-1".getBytes(StandardCharsets.UTF_8), + "Hello-2".getBytes(StandardCharsets.UTF_8), + "Hello-3".getBytes(StandardCharsets.UTF_8) }; + final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues); - when(consumer.poll(anyInt())).thenReturn(ConsumerRecords.empty()); + when(consumer.poll(anyLong())).thenReturn(firstRecs, createConsumerRecords("nifi", 0, 0L, new byte[][]{})); + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + lease.poll(); + lease.commit(); + } + testPool.close(); + verify(mockSession, times(3)).create(); + verify(mockSession, times(1)).commit(); + final PoolStats stats = testPool.getPoolStats(); + assertEquals(1, stats.consumerCreatedCount); + assertEquals(1, stats.consumerClosedCount); + assertEquals(1, stats.leasesObtainedCount); + } + @Test + public void validatePoolSimpleBatchCreateClose() throws Exception { + when(consumer.poll(anyLong())).thenReturn(createConsumerRecords("nifi", 0, 0L, new byte[][]{})); for (int i = 0; i < 100; i++) { - try (final ConsumerLease lease = testPool.obtainConsumer()) { + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { for (int j = 0; j < 100; j++) { lease.poll(); } - lease.commitOffsets(Collections.emptyMap()); } } testPool.close(); + verify(mockSession, times(0)).create(); + verify(mockSession, times(0)).commit(); final PoolStats stats = testPool.getPoolStats(); assertEquals(1, stats.consumerCreatedCount); assertEquals(1, stats.consumerClosedCount); assertEquals(100, stats.leasesObtainedCount); - assertEquals(10000, stats.unproductivePollCount); - assertEquals(0, stats.productivePollCount); } @Test - public void validatePoolConsumerFails() throws Exception { - - final ConsumerPool testPool = new ConsumerPool(1, Collections.singletonList("nifi"), Collections.emptyMap(), logger) { - @Override - protected Consumer createKafkaConsumer() { - return consumer; - } + public void validatePoolBatchCreatePollClose() throws Exception { + final byte[][] firstPassValues = new byte[][]{ + "Hello-1".getBytes(StandardCharsets.UTF_8), + "Hello-2".getBytes(StandardCharsets.UTF_8), + "Hello-3".getBytes(StandardCharsets.UTF_8) }; + final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues); - when(consumer.poll(anyInt())).thenThrow(new KafkaException()); - - try (final ConsumerLease lease = testPool.obtainConsumer()) { + when(consumer.poll(anyLong())).thenReturn(firstRecs, createConsumerRecords("nifi", 0, 0L, new byte[][]{})); + try (final ConsumerLease lease = testDemarcatedPool.obtainConsumer(mockSession)) { lease.poll(); - fail(); - } catch (final KafkaException ke) { + lease.commit(); + } + testDemarcatedPool.close(); + verify(mockSession, times(1)).create(); + verify(mockSession, times(1)).commit(); + final PoolStats stats = testDemarcatedPool.getPoolStats(); + assertEquals(1, stats.consumerCreatedCount); + assertEquals(1, stats.consumerClosedCount); + assertEquals(1, stats.leasesObtainedCount); + } + + @Test + public void validatePoolConsumerFails() throws Exception { + + when(consumer.poll(anyLong())).thenThrow(new KafkaException("oops")); + try (final ConsumerLease lease = testPool.obtainConsumer(mockSession)) { + try { + lease.poll(); + fail(); + } catch (final KafkaException ke) { + } } testPool.close(); + verify(mockSession, times(0)).create(); + verify(mockSession, times(0)).commit(); final PoolStats stats = testPool.getPoolStats(); assertEquals(1, stats.consumerCreatedCount); assertEquals(1, stats.consumerClosedCount); assertEquals(1, stats.leasesObtainedCount); - assertEquals(0, stats.unproductivePollCount); - assertEquals(0, stats.productivePollCount); } + + @SuppressWarnings({"rawtypes", "unchecked"}) + static ConsumerRecords createConsumerRecords(final String topic, final int partition, final long startingOffset, final byte[][] rawRecords) { + final Map>> map = new HashMap<>(); + final TopicPartition tPart = new TopicPartition(topic, partition); + final List> records = new ArrayList<>(); + long offset = startingOffset; + for (final byte[] rawRecord : rawRecords) { + final ConsumerRecord rec = new ConsumerRecord(topic, partition, offset++, UUID.randomUUID().toString().getBytes(), rawRecord); + records.add(rec); + } + map.put(tPart, records); + return new ConsumerRecords(map); + } + } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisherTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisherTest.java deleted file mode 100644 index 19c64af1af48..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/KafkaPublisherTest.java +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.kafka.clients.producer.Partitioner; -import org.apache.kafka.common.Cluster; -import org.apache.kafka.common.serialization.ByteArraySerializer; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processors.kafka.pubsub.KafkaPublisher.KafkaPublisherResult; -import org.apache.nifi.processors.kafka.test.EmbeddedKafka; -import org.apache.nifi.processors.kafka.test.EmbeddedKafkaProducerHelper; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -import kafka.consumer.Consumer; -import kafka.consumer.ConsumerConfig; -import kafka.consumer.ConsumerIterator; -import kafka.consumer.ConsumerTimeoutException; -import kafka.consumer.KafkaStream; -import kafka.javaapi.consumer.ConsumerConnector; -import org.apache.kafka.clients.producer.ProducerConfig; - -public class KafkaPublisherTest { - - private static EmbeddedKafka kafkaLocal; - - private static EmbeddedKafkaProducerHelper producerHelper; - - @BeforeClass - public static void beforeClass() { - kafkaLocal = new EmbeddedKafka(); - kafkaLocal.start(); - producerHelper = new EmbeddedKafkaProducerHelper(kafkaLocal); - } - - @AfterClass - public static void afterClass() throws Exception { - producerHelper.close(); - kafkaLocal.stop(); - } - - @Test - public void validateSuccessfulSendAsWhole() throws Exception { - InputStream contentStream = new ByteArrayInputStream("Hello Kafka".getBytes(StandardCharsets.UTF_8)); - String topicName = "validateSuccessfulSendAsWhole"; - - Properties kafkaProperties = this.buildProducerProperties(); - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - - PublishingContext publishingContext = new PublishingContext(contentStream, topicName); - KafkaPublisherResult result = publisher.publish(publishingContext); - - assertEquals(0, result.getLastMessageAcked()); - assertEquals(1, result.getMessagesSent()); - contentStream.close(); - publisher.close(); - - ConsumerIterator iter = this.buildConsumer(topicName); - assertNotNull(iter.next()); - try { - iter.next(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - } - - @Test - public void validateSuccessfulSendAsDelimited() throws Exception { - InputStream contentStream = new ByteArrayInputStream( - "Hello Kafka\nHello Kafka\nHello Kafka\nHello Kafka\n".getBytes(StandardCharsets.UTF_8)); - String topicName = "validateSuccessfulSendAsDelimited"; - - Properties kafkaProperties = this.buildProducerProperties(); - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - - PublishingContext publishingContext = new PublishingContext(contentStream, topicName); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - KafkaPublisherResult result = publisher.publish(publishingContext); - - assertEquals(3, result.getLastMessageAcked()); - assertEquals(4, result.getMessagesSent()); - contentStream.close(); - publisher.close(); - - ConsumerIterator iter = this.buildConsumer(topicName); - assertNotNull(iter.next()); - assertNotNull(iter.next()); - assertNotNull(iter.next()); - assertNotNull(iter.next()); - try { - iter.next(); - fail(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - } - - /* - * This test simulates the condition where not all messages were ACKed by - * Kafka - */ - @Test - public void validateRetries() throws Exception { - byte[] testValue = "Hello Kafka1\nHello Kafka2\nHello Kafka3\nHello Kafka4\n".getBytes(StandardCharsets.UTF_8); - InputStream contentStream = new ByteArrayInputStream(testValue); - String topicName = "validateSuccessfulReSendOfFailedSegments"; - - Properties kafkaProperties = this.buildProducerProperties(); - - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - - // simulates the first re-try - int lastAckedMessageIndex = 1; - PublishingContext publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - - publisher.publish(publishingContext); - - ConsumerIterator iter = this.buildConsumer(topicName); - String m1 = new String(iter.next().message()); - String m2 = new String(iter.next().message()); - assertEquals("Hello Kafka3", m1); - assertEquals("Hello Kafka4", m2); - try { - iter.next(); - fail(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - - // simulates the second re-try - lastAckedMessageIndex = 2; - contentStream = new ByteArrayInputStream(testValue); - publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - publisher.publish(publishingContext); - - m1 = new String(iter.next().message()); - assertEquals("Hello Kafka4", m1); - - publisher.close(); - } - - /* - * Similar to the above test, but it sets the first retry index to the last - * possible message index and second index to an out of bound index. The - * expectation is that no messages will be sent to Kafka - */ - @Test - public void validateRetriesWithWrongIndex() throws Exception { - byte[] testValue = "Hello Kafka1\nHello Kafka2\nHello Kafka3\nHello Kafka4\n".getBytes(StandardCharsets.UTF_8); - InputStream contentStream = new ByteArrayInputStream(testValue); - String topicName = "validateRetriesWithWrongIndex"; - - Properties kafkaProperties = this.buildProducerProperties(); - - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - - // simulates the first re-try - int lastAckedMessageIndex = 3; - PublishingContext publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - - publisher.publish(publishingContext); - - ConsumerIterator iter = this.buildConsumer(topicName); - try { - iter.next(); - fail(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - - // simulates the second re-try - lastAckedMessageIndex = 6; - contentStream = new ByteArrayInputStream(testValue); - publishingContext = new PublishingContext(contentStream, topicName, lastAckedMessageIndex); - publishingContext.setDelimiterBytes("\n".getBytes(StandardCharsets.UTF_8)); - publisher.publish(publishingContext); - try { - iter.next(); - fail(); - } catch (ConsumerTimeoutException e) { - // that's OK since this is the Kafka mechanism to unblock - } - - publisher.close(); - } - - @Test - public void validateWithMultiByteCharactersNoDelimiter() throws Exception { - String data = "僠THIS IS MY NEW TEXT.僠IT HAS A NEWLINE."; - InputStream contentStream = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); - String topicName = "validateWithMultiByteCharacters"; - - Properties kafkaProperties = this.buildProducerProperties(); - - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - PublishingContext publishingContext = new PublishingContext(contentStream, topicName); - - publisher.publish(publishingContext); - publisher.close(); - - ConsumerIterator iter = this.buildConsumer(topicName); - String r = new String(iter.next().message(), StandardCharsets.UTF_8); - assertEquals(data, r); - } - - @Test - public void validateWithNonDefaultPartitioner() throws Exception { - String data = "fooandbarandbaz"; - InputStream contentStream = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8)); - String topicName = "validateWithNonDefaultPartitioner"; - - Properties kafkaProperties = this.buildProducerProperties(); - kafkaProperties.setProperty("partitioner.class", TestPartitioner.class.getName()); - KafkaPublisher publisher = new KafkaPublisher(kafkaProperties, mock(ComponentLog.class)); - PublishingContext publishingContext = new PublishingContext(contentStream, topicName); - publishingContext.setDelimiterBytes("and".getBytes(StandardCharsets.UTF_8)); - - try { - publisher.publish(publishingContext); - // partitioner should be invoked 3 times - assertTrue(TestPartitioner.counter == 3); - publisher.close(); - } finally { - TestPartitioner.counter = 0; - } - } - - private Properties buildProducerProperties() { - Properties kafkaProperties = new Properties(); - kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProperties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:" + kafkaLocal.getKafkaPort()); - kafkaProperties.put("auto.create.topics.enable", "true"); - return kafkaProperties; - } - - private ConsumerIterator buildConsumer(String topic) { - Properties props = new Properties(); - props.put("zookeeper.connect", "localhost:" + kafkaLocal.getZookeeperPort()); - props.put("group.id", "test"); - props.put("consumer.timeout.ms", "500"); - props.put("auto.offset.reset", "smallest"); - ConsumerConfig consumerConfig = new ConsumerConfig(props); - ConsumerConnector consumer = Consumer.createJavaConsumerConnector(consumerConfig); - Map topicCountMap = new HashMap<>(1); - topicCountMap.put(topic, 1); - Map>> consumerMap = consumer.createMessageStreams(topicCountMap); - List> streams = consumerMap.get(topic); - ConsumerIterator iter = streams.get(0).iterator(); - return iter; - } - - public static class TestPartitioner implements Partitioner { - - static int counter; - - @Override - public void configure(Map configs) { - // nothing to do, test - } - - @Override - public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, - Cluster cluster) { - counter++; - return 0; - } - - @Override - public void close() { - counter = 0; - } - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaTest.java deleted file mode 100644 index d81f0c177f82..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaTest.java +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.Map; - -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerConfig; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.common.serialization.ByteArraySerializer; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.Test; -import org.mockito.Mockito; -import static org.mockito.Mockito.times; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.verify; - -public class PublishKafkaTest { - - @Test - public void validateCustomSerilaizerDeserializerSettings() throws Exception { - PublishKafka publishKafka = new PublishKafka(); - TestRunner runner = TestRunners.newTestRunner(publishKafka); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234"); - runner.setProperty(PublishKafka.TOPIC, "foo"); - runner.setProperty(PublishKafka.META_WAIT_TIME, "3 sec"); - runner.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - runner.assertValid(); - runner.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "Foo"); - runner.assertNotValid(); - } - - @Test - public void validatePropertiesValidation() throws Exception { - PublishKafka publishKafka = new PublishKafka(); - TestRunner runner = TestRunners.newTestRunner(publishKafka); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234"); - runner.setProperty(PublishKafka.TOPIC, "foo"); - runner.setProperty(PublishKafka.META_WAIT_TIME, "foo"); - - try { - runner.assertValid(); - fail(); - } catch (AssertionError e) { - assertTrue(e.getMessage().contains("'max.block.ms' validated against 'foo' is invalid")); - } - } - - @Test - public void validateCustomValidation() { - String topicName = "validateCustomValidation"; - PublishKafka publishKafka = new PublishKafka(); - - /* - * Validates that Kerberos principle is required if one of SASL set for - * secirity protocol - */ - TestRunner runner = TestRunners.newTestRunner(publishKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(KafkaProcessorUtils.SECURITY_PROTOCOL, KafkaProcessorUtils.SEC_SASL_PLAINTEXT); - try { - runner.run(); - fail(); - } catch (Throwable e) { - assertTrue(e.getMessage().contains("'Kerberos Service Name' is invalid because")); - } - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateSingleCharacterDemarcatedMessages() { - String topicName = "validateSingleCharacterDemarcatedMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(PublishKafka.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "\n"); - - runner.enqueue("Hello World\nGoodbye\n1\n2\n3\n4\n5".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(7)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateMultiCharacterDemarcatedMessagesAndCustomPartitionerA() { - String topicName = "validateMultiCharacterDemarcatedMessagesAndCustomPartitioner"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(PublishKafka.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.PARTITION_CLASS, Partitioners.RoundRobinPartitioner.class.getName()); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "foo"); - - runner.enqueue("Hello WorldfooGoodbyefoo1foo2foo3foo4foo5".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(7)).send(Mockito.any(ProducerRecord.class)); - - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateMultiCharacterDemarcatedMessagesAndCustomPartitionerB() { - String topicName = "validateMultiCharacterDemarcatedMessagesAndCustomPartitioner"; - StubPublishKafka putKafka = new StubPublishKafka(1); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(PublishKafka.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.PARTITION_CLASS, Partitioners.RoundRobinPartitioner.class.getName()); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "foo"); - - runner.enqueue("Hello WorldfooGoodbyefoo1foo2foo3foo4foo5".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(7)).send(Mockito.any(ProducerRecord.class)); - - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateOnSendFailureAndThenResendSuccessA() throws Exception { - String topicName = "validateSendFailureAndThenResendSuccess"; - StubPublishKafka putKafka = new StubPublishKafka(100); - - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(PublishKafka.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "\n"); - runner.setProperty(PublishKafka.META_WAIT_TIME, "3000 millis"); - - final String text = "Hello World\nGoodbye\nfail\n2"; - runner.enqueue(text.getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(1, runner.getQueueSize().getObjectCount()); // due to failure - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(4)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - putKafka.destroy(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateOnSendFailureAndThenResendSuccessB() throws Exception { - String topicName = "validateSendFailureAndThenResendSuccess"; - StubPublishKafka putKafka = new StubPublishKafka(1); - - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(PublishKafka.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "\n"); - runner.setProperty(PublishKafka.META_WAIT_TIME, "500 millis"); - - final String text = "Hello World\nGoodbye\nfail\n2"; - runner.enqueue(text.getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - assertEquals(1, runner.getQueueSize().getObjectCount()); // due to failure - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - verify(producer, times(4)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateOnFutureGetFailureAndThenResendSuccessFirstMessageFail() throws Exception { - String topicName = "validateSendFailureAndThenResendSuccess"; - StubPublishKafka putKafka = new StubPublishKafka(100); - - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(PublishKafka.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "\n"); - runner.setProperty(PublishKafka.META_WAIT_TIME, "500 millis"); - - final String text = "futurefail\nHello World\nGoodbye\n2"; - runner.enqueue(text.getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - MockFlowFile ff = runner.getFlowFilesForRelationship(PublishKafka.REL_FAILURE).get(0); - assertNotNull(ff); - runner.enqueue(ff); - - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - // 6 sends due to duplication - verify(producer, times(5)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateOnFutureGetFailureAndThenResendSuccess() throws Exception { - String topicName = "validateSendFailureAndThenResendSuccess"; - StubPublishKafka putKafka = new StubPublishKafka(100); - - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(PublishKafka.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "\n"); - runner.setProperty(PublishKafka.META_WAIT_TIME, "500 millis"); - - final String text = "Hello World\nGoodbye\nfuturefail\n2"; - runner.enqueue(text.getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - MockFlowFile ff = runner.getFlowFilesForRelationship(PublishKafka.REL_FAILURE).get(0); - assertNotNull(ff); - runner.enqueue(ff); - - runner.run(1, false); - assertEquals(0, runner.getQueueSize().getObjectCount()); - Producer producer = putKafka.getProducer(); - // 6 sends due to duplication - verify(producer, times(6)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateDemarcationIntoEmptyMessages() { - String topicName = "validateDemarcationIntoEmptyMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - final TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(PublishKafka.KEY, "key1"); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "\n"); - - final byte[] bytes = "\n\n\n1\n2\n\n\n\n3\n4\n\n\n".getBytes(StandardCharsets.UTF_8); - runner.enqueue(bytes); - runner.run(1); - Producer producer = putKafka.getProducer(); - verify(producer, times(4)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateComplexRightPartialDemarcatedMessages() { - String topicName = "validateComplexRightPartialDemarcatedMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "僠<僠WILDSTUFF僠>僠"); - - runner.enqueue("Hello World僠<僠WILDSTUFF僠>僠Goodbye僠<僠WILDSTUFF僠>僠I Mean IT!僠<僠WILDSTUFF僠>".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - - Producer producer = putKafka.getProducer(); - verify(producer, times(3)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateComplexLeftPartialDemarcatedMessages() { - String topicName = "validateComplexLeftPartialDemarcatedMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "僠<僠WILDSTUFF僠>僠"); - - runner.enqueue("Hello World僠<僠WILDSTUFF僠>僠Goodbye僠<僠WILDSTUFF僠>僠I Mean IT!僠<僠WILDSTUFF僠>僠<僠WILDSTUFF僠>僠".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - - runner.assertAllFlowFilesTransferred(PublishKafka.REL_SUCCESS, 1); - Producer producer = putKafka.getProducer(); - verify(producer, times(4)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @SuppressWarnings("unchecked") - @Test - public void validateComplexPartialMatchDemarcatedMessages() { - String topicName = "validateComplexPartialMatchDemarcatedMessages"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.MESSAGE_DEMARCATOR, "僠<僠WILDSTUFF僠>僠"); - - runner.enqueue("Hello World僠<僠WILDSTUFF僠>僠Goodbye僠<僠WILDBOOMSTUFF僠>僠".getBytes(StandardCharsets.UTF_8)); - runner.run(1, false); - - runner.assertAllFlowFilesTransferred(PublishKafka.REL_SUCCESS, 1); - Producer producer = putKafka.getProducer(); - verify(producer, times(2)).send(Mockito.any(ProducerRecord.class)); - runner.shutdown(); - } - - @Test - public void validateUtf8Key() { - String topicName = "validateUtf8Key"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.KEY, "${myKey}"); - - final Map attributes = Collections.singletonMap("myKey", "key1"); - runner.enqueue("Hello World".getBytes(StandardCharsets.UTF_8), attributes); - runner.run(1); - - runner.assertAllFlowFilesTransferred(PublishKafka.REL_SUCCESS, 1); - final Map msgs = putKafka.getMessagesSent(); - assertEquals(1, msgs.size()); - final byte[] msgKey = (byte[]) msgs.keySet().iterator().next(); - assertTrue(Arrays.equals("key1".getBytes(StandardCharsets.UTF_8), msgKey)); - } - - @Test - public void validateHexKey() { - String topicName = "validateUtf8Key"; - StubPublishKafka putKafka = new StubPublishKafka(100); - TestRunner runner = TestRunners.newTestRunner(putKafka); - runner.setProperty(PublishKafka.TOPIC, topicName); - runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "localhost:1234"); - runner.setProperty(PublishKafka.KEY_ATTRIBUTE_ENCODING, PublishKafka.HEX_ENCODING); - runner.setProperty(PublishKafka.KEY, "${myKey}"); - - final Map attributes = Collections.singletonMap("myKey", "6B657931"); - runner.enqueue("Hello World".getBytes(StandardCharsets.UTF_8), attributes); - runner.run(1); - - runner.assertAllFlowFilesTransferred(PublishKafka.REL_SUCCESS, 1); - final Map msgs = putKafka.getMessagesSent(); - assertEquals(1, msgs.size()); - final byte[] msgKey = (byte[]) msgs.keySet().iterator().next(); - - assertTrue(Arrays.equals(new byte[] {0x6B, 0x65, 0x79, 0x31}, msgKey)); - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishingContextTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishingContextTest.java deleted file mode 100644 index 76c29cdd97f9..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/PublishingContextTest.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; - -import java.io.InputStream; -import java.nio.charset.StandardCharsets; - -import org.junit.Test; - -public class PublishingContextTest { - - @Test - public void failInvalidConstructorArgs() { - try { - new PublishingContext(null, null); - fail(); - } catch (IllegalArgumentException e) { - // success - } - try { - new PublishingContext(mock(InputStream.class), null); - fail(); - } catch (IllegalArgumentException e) { - // success - } - - try { - new PublishingContext(mock(InputStream.class), ""); - fail(); - } catch (IllegalArgumentException e) { - // success - } - - try { - new PublishingContext(mock(InputStream.class), "mytopic", -3); - fail(); - } catch (IllegalArgumentException e) { - // success - } - } - - @Test - public void validateFullSetting() { - PublishingContext publishingContext = new PublishingContext(mock(InputStream.class), "topic", 3); - publishingContext.setDelimiterBytes("delimiter".getBytes(StandardCharsets.UTF_8)); - publishingContext.setKeyBytes("key".getBytes(StandardCharsets.UTF_8)); - - assertEquals("delimiter", new String(publishingContext.getDelimiterBytes(), StandardCharsets.UTF_8)); - assertEquals("key", new String(publishingContext.getKeyBytes(), StandardCharsets.UTF_8)); - assertEquals("topic", publishingContext.getTopic()); - assertEquals("topic: 'topic'; delimiter: 'delimiter'", publishingContext.toString()); - } - - @Test - public void validateOnlyOnceSetPerInstance() { - PublishingContext publishingContext = new PublishingContext(mock(InputStream.class), "topic"); - publishingContext.setKeyBytes(new byte[]{0}); - try { - publishingContext.setKeyBytes(new byte[]{0}); - fail(); - } catch (IllegalArgumentException e) { - // success - } - - publishingContext.setDelimiterBytes(new byte[]{0}); - try { - publishingContext.setDelimiterBytes(new byte[]{0}); - fail(); - } catch (IllegalArgumentException e) { - // success - } - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/StubPublishKafka.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/StubPublishKafka.java deleted file mode 100644 index 533655e464d0..000000000000 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/StubPublishKafka.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.kafka.pubsub; - -import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.BOOTSTRAP_SERVERS; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.lang.reflect.Field; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerConfig; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.clients.producer.RecordMetadata; -import org.apache.kafka.common.TopicPartition; -import org.apache.kafka.common.errors.TopicAuthorizationException; -import org.apache.kafka.common.serialization.ByteArraySerializer; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.exception.ProcessException; -import org.mockito.Mockito; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; - -public class StubPublishKafka extends PublishKafka { - - private volatile Producer producer; - - private volatile boolean failed; - - private final int ackCheckSize; - - private final ExecutorService executor = Executors.newCachedThreadPool(); - private final Map msgsSent = new ConcurrentHashMap<>(); - - StubPublishKafka(int ackCheckSize) { - this.ackCheckSize = ackCheckSize; - } - - public Producer getProducer() { - return producer; - } - - public void destroy() { - this.executor.shutdownNow(); - } - - public Map getMessagesSent() { - return new HashMap<>(msgsSent); - } - - @SuppressWarnings("unchecked") - @Override - protected KafkaPublisher buildKafkaResource(ProcessContext context, ProcessSession session) - throws ProcessException { - final Map kafkaProperties = new HashMap<>(); - KafkaProcessorUtils.buildCommonKafkaProperties(context, ProducerConfig.class, kafkaProperties); - kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); - KafkaPublisher publisher; - try { - Field f = PublishKafka.class.getDeclaredField("brokers"); - f.setAccessible(true); - f.set(this, context.getProperty(BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue()); - publisher = (KafkaPublisher) TestUtils.getUnsafe().allocateInstance(KafkaPublisher.class); - publisher.setAckWaitTime(15000); - producer = mock(Producer.class); - - this.instrumentProducer(producer, false); - Field kf = KafkaPublisher.class.getDeclaredField("kafkaProducer"); - kf.setAccessible(true); - kf.set(publisher, producer); - - Field componentLogF = KafkaPublisher.class.getDeclaredField("componentLog"); - componentLogF.setAccessible(true); - componentLogF.set(publisher, mock(ComponentLog.class)); - - Field ackCheckSizeField = KafkaPublisher.class.getDeclaredField("ackCheckSize"); - ackCheckSizeField.setAccessible(true); - ackCheckSizeField.set(publisher, this.ackCheckSize); - } catch (Exception e) { - e.printStackTrace(); - throw new IllegalStateException(e); - } - return publisher; - } - - @SuppressWarnings("unchecked") - private void instrumentProducer(Producer producer, boolean failRandomly) { - - when(producer.send(Mockito.any(ProducerRecord.class))).then(new Answer>() { - @Override - public Future answer(InvocationOnMock invocation) throws Throwable { - final ProducerRecord record = invocation.getArgumentAt(0, ProducerRecord.class); - if (record != null && record.key() != null) { - msgsSent.put(record.key(), record.value()); - } - - String value = new String(record.value(), StandardCharsets.UTF_8); - if ("fail".equals(value) && !StubPublishKafka.this.failed) { - StubPublishKafka.this.failed = true; - throw new RuntimeException("intentional"); - } - Future future = executor.submit(new Callable() { - @Override - public RecordMetadata call() throws Exception { - if ("futurefail".equals(value) && !StubPublishKafka.this.failed) { - StubPublishKafka.this.failed = true; - throw new TopicAuthorizationException("Unauthorized"); - } else { - TopicPartition partition = new TopicPartition("foo", 0); - RecordMetadata meta = new RecordMetadata(partition, 0, 0); - return meta; - } - } - }); - return future; - } - }); - } -} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java new file mode 100644 index 000000000000..e54a10c85e77 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeoutException; + +import org.apache.nifi.util.MockFlowFile; +import org.junit.Assert; +import org.junit.Test; + +public class TestInFlightMessageTracker { + + @Test(timeout = 5000L) + public void testAwaitCompletionWhenComplete() throws InterruptedException, TimeoutException { + final MockFlowFile flowFile = new MockFlowFile(1L); + + final InFlightMessageTracker tracker = new InFlightMessageTracker(); + tracker.incrementSentCount(flowFile); + + verifyNotComplete(tracker); + + tracker.incrementSentCount(flowFile); + verifyNotComplete(tracker); + + tracker.incrementAcknowledgedCount(flowFile); + verifyNotComplete(tracker); + + tracker.incrementAcknowledgedCount(flowFile); + tracker.awaitCompletion(1L); + } + + @Test(timeout = 5000L) + public void testAwaitCompletionWhileWaiting() throws InterruptedException, ExecutionException { + final MockFlowFile flowFile = new MockFlowFile(1L); + + final InFlightMessageTracker tracker = new InFlightMessageTracker(); + tracker.incrementSentCount(flowFile); + + verifyNotComplete(tracker); + + tracker.incrementSentCount(flowFile); + verifyNotComplete(tracker); + + final ExecutorService exec = Executors.newFixedThreadPool(1); + final Future future = exec.submit(() -> { + try { + tracker.awaitCompletion(10000L); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + + tracker.incrementAcknowledgedCount(flowFile); + tracker.incrementAcknowledgedCount(flowFile); + + future.get(); + } + + private void verifyNotComplete(final InFlightMessageTracker tracker) throws InterruptedException { + try { + tracker.awaitCompletion(10L); + Assert.fail("Expected timeout"); + } catch (final TimeoutException te) { + // expected + } + } + +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java new file mode 100644 index 000000000000..f9f2485b48c7 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.junit.Before; +import org.junit.Test; + +public class TestPublishKafka { + private static final String TOPIC_NAME = "unit-test"; + + private PublisherPool mockPool; + private PublisherLease mockLease; + private TestRunner runner; + + @Before + public void setup() { + mockPool = mock(PublisherPool.class); + mockLease = mock(PublisherLease.class); + + when(mockPool.obtainPublisher()).thenReturn(mockLease); + + runner = TestRunners.newTestRunner(new PublishKafka() { + @Override + protected PublisherPool createPublisherPool(final ProcessContext context) { + return mockPool; + } + }); + + runner.setProperty(PublishKafka.TOPIC, TOPIC_NAME); + } + + @Test + public void testSingleSuccess() throws IOException { + final MockFlowFile flowFile = runner.enqueue("hello world"); + + when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFile, 1)); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka.REL_SUCCESS, 1); + + verify(mockLease, times(1)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(0)).poison(); + verify(mockLease, times(1)).close(); + } + + @Test + public void testMultipleSuccess() throws IOException { + final Set flowFiles = new HashSet<>(); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + + + when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFiles, 1)); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka.REL_SUCCESS, 3); + + verify(mockLease, times(3)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(0)).poison(); + verify(mockLease, times(1)).close(); + } + + @Test + public void testSingleFailure() throws IOException { + final MockFlowFile flowFile = runner.enqueue("hello world"); + + when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFile)); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka.REL_FAILURE, 1); + + verify(mockLease, times(1)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(1)).close(); + } + + @Test + public void testMultipleFailures() throws IOException { + final Set flowFiles = new HashSet<>(); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + + when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFiles)); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka.REL_FAILURE, 3); + + verify(mockLease, times(3)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(1)).close(); + } + + @Test + public void testMultipleMessagesPerFlowFile() throws IOException { + final List flowFiles = new ArrayList<>(); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + + final Map msgCounts = new HashMap<>(); + msgCounts.put(flowFiles.get(0), 10); + msgCounts.put(flowFiles.get(1), 20); + + final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles), Collections.emptyMap()); + + when(mockLease.complete()).thenReturn(result); + + runner.run(); + runner.assertAllFlowFilesTransferred(PublishKafka.REL_SUCCESS, 2); + + verify(mockLease, times(2)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(0)).poison(); + verify(mockLease, times(1)).close(); + + runner.assertAllFlowFilesContainAttribute("msg.count"); + assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka.REL_SUCCESS).stream() + .filter(ff -> ff.getAttribute("msg.count").equals("10")) + .count()); + assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka.REL_SUCCESS).stream() + .filter(ff -> ff.getAttribute("msg.count").equals("20")) + .count()); + } + + + @Test + public void testSomeSuccessSomeFailure() throws IOException { + final List flowFiles = new ArrayList<>(); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + flowFiles.add(runner.enqueue("hello world")); + + final Map msgCounts = new HashMap<>(); + msgCounts.put(flowFiles.get(0), 10); + msgCounts.put(flowFiles.get(1), 20); + + final Map failureMap = new HashMap<>(); + failureMap.put(flowFiles.get(2), new RuntimeException("Intentional Unit Test Exception")); + failureMap.put(flowFiles.get(3), new RuntimeException("Intentional Unit Test Exception")); + + final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles.subList(0, 2)), failureMap); + + when(mockLease.complete()).thenReturn(result); + + runner.run(); + runner.assertTransferCount(PublishKafka.REL_SUCCESS, 2); + runner.assertTransferCount(PublishKafka.REL_FAILURE, 2); + + verify(mockLease, times(4)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME)); + verify(mockLease, times(1)).complete(); + verify(mockLease, times(1)).close(); + + assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka.REL_SUCCESS).stream() + .filter(ff -> "10".equals(ff.getAttribute("msg.count"))) + .count()); + assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka.REL_SUCCESS).stream() + .filter(ff -> "20".equals(ff.getAttribute("msg.count"))) + .count()); + + assertTrue(runner.getFlowFilesForRelationship(PublishKafka.REL_FAILURE).stream() + .noneMatch(ff -> ff.getAttribute("msg.count") != null)); + } + + + private PublishResult createAllSuccessPublishResult(final FlowFile successfulFlowFile, final int msgCount) { + return createAllSuccessPublishResult(Collections.singleton(successfulFlowFile), msgCount); + } + + private PublishResult createAllSuccessPublishResult(final Set successfulFlowFiles, final int msgCountPerFlowFile) { + final Map msgCounts = new HashMap<>(); + for (final FlowFile ff : successfulFlowFiles) { + msgCounts.put(ff, msgCountPerFlowFile); + } + return createPublishResult(msgCounts, successfulFlowFiles, Collections.emptyMap()); + } + + private PublishResult createFailurePublishResult(final FlowFile failure) { + return createFailurePublishResult(Collections.singleton(failure)); + } + + private PublishResult createFailurePublishResult(final Set failures) { + final Map failureMap = failures.stream().collect(Collectors.toMap(ff -> ff, ff -> new RuntimeException("Intentional Unit Test Exception"))); + return createPublishResult(Collections.emptyMap(), Collections.emptySet(), failureMap); + } + + private PublishResult createPublishResult(final Map msgCounts, final Set successFlowFiles, final Map failures) { + // sanity check. + for (final FlowFile success : successFlowFiles) { + if (failures.containsKey(success)) { + throw new IllegalArgumentException("Found same FlowFile in both 'success' and 'failures' collections: " + success); + } + } + + return new PublishResult() { + @Override + public Collection getSuccessfulFlowFiles() { + return successFlowFiles; + } + + @Override + public Collection getFailedFlowFiles() { + return failures.keySet(); + } + + @Override + public int getSuccessfulMessageCount(FlowFile flowFile) { + Integer count = msgCounts.get(flowFile); + return count == null ? 0 : count.intValue(); + } + + @Override + public Exception getReasonForFailure(FlowFile flowFile) { + return failures.get(flowFile); + } + }; + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java new file mode 100644 index 000000000000..c2d143cf3155 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.kafka.clients.producer.Callback; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.util.MockFlowFile; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + + +public class TestPublisherLease { + private ComponentLog logger; + private Producer producer; + + @Before + @SuppressWarnings("unchecked") + public void setup() { + logger = Mockito.mock(ComponentLog.class); + producer = Mockito.mock(Producer.class); + } + + @Test + public void testPoisonOnException() throws IOException { + final AtomicInteger poisonCount = new AtomicInteger(0); + + final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 1000L, logger) { + @Override + public void poison() { + poisonCount.incrementAndGet(); + super.poison(); + } + }; + + final FlowFile flowFile = new MockFlowFile(1L); + final String topic = "unit-test"; + final byte[] messageKey = null; + final byte[] demarcatorBytes = null; + + final InputStream failureInputStream = new InputStream() { + @Override + public int read() throws IOException { + throw new IOException("Intentional Unit Test Exception"); + } + }; + + try { + lease.publish(flowFile, failureInputStream, messageKey, demarcatorBytes, topic); + Assert.fail("Expected IOException"); + } catch (final IOException ioe) { + // expected + } + + assertEquals(1, poisonCount.get()); + + final PublishResult result = lease.complete(); + assertTrue(result.getFailedFlowFiles().contains(flowFile)); + assertFalse(result.getSuccessfulFlowFiles().contains(flowFile)); + } + + @Test + @SuppressWarnings("unchecked") + public void testPoisonOnFailure() throws IOException { + final AtomicInteger poisonCount = new AtomicInteger(0); + + final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 1000L, logger) { + @Override + public void poison() { + poisonCount.incrementAndGet(); + super.poison(); + } + }; + + final FlowFile flowFile = new MockFlowFile(1L); + final String topic = "unit-test"; + final byte[] messageKey = null; + final byte[] demarcatorBytes = null; + + doAnswer(new Answer() { + @Override + public Object answer(final InvocationOnMock invocation) throws Throwable { + final Callback callback = invocation.getArgumentAt(1, Callback.class); + callback.onCompletion(null, new RuntimeException("Unit Test Intentional Exception")); + return null; + } + }).when(producer).send(any(ProducerRecord.class), any(Callback.class)); + + lease.publish(flowFile, new ByteArrayInputStream(new byte[1]), messageKey, demarcatorBytes, topic); + + assertEquals(1, poisonCount.get()); + + final PublishResult result = lease.complete(); + assertTrue(result.getFailedFlowFiles().contains(flowFile)); + assertFalse(result.getSuccessfulFlowFiles().contains(flowFile)); + } + + @Test + @SuppressWarnings("unchecked") + public void testAllDelimitedMessagesSent() throws IOException { + final AtomicInteger poisonCount = new AtomicInteger(0); + + final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 10L, logger) { + @Override + protected void poison() { + poisonCount.incrementAndGet(); + super.poison(); + } + }; + + final AtomicInteger correctMessages = new AtomicInteger(0); + final AtomicInteger incorrectMessages = new AtomicInteger(0); + doAnswer(new Answer() { + @Override + public Object answer(InvocationOnMock invocation) throws Throwable { + final ProducerRecord record = invocation.getArgumentAt(0, ProducerRecord.class); + final byte[] value = record.value(); + final String valueString = new String(value, StandardCharsets.UTF_8); + if ("1234567890".equals(valueString)) { + correctMessages.incrementAndGet(); + } else { + incorrectMessages.incrementAndGet(); + } + + return null; + } + }).when(producer).send(any(ProducerRecord.class), any(Callback.class)); + + final FlowFile flowFile = new MockFlowFile(1L); + final String topic = "unit-test"; + final byte[] messageKey = null; + final byte[] demarcatorBytes = "\n".getBytes(StandardCharsets.UTF_8); + + final byte[] flowFileContent = "1234567890\n1234567890\n1234567890\n\n\n\n1234567890\n\n\n1234567890\n\n\n\n".getBytes(StandardCharsets.UTF_8); + lease.publish(flowFile, new ByteArrayInputStream(flowFileContent), messageKey, demarcatorBytes, topic); + + final byte[] flowFileContent2 = new byte[0]; + lease.publish(new MockFlowFile(2L), new ByteArrayInputStream(flowFileContent2), messageKey, demarcatorBytes, topic); + + final byte[] flowFileContent3 = "1234567890\n1234567890".getBytes(StandardCharsets.UTF_8); // no trailing new line + lease.publish(new MockFlowFile(3L), new ByteArrayInputStream(flowFileContent3), messageKey, demarcatorBytes, topic); + + final byte[] flowFileContent4 = "\n\n\n".getBytes(StandardCharsets.UTF_8); + lease.publish(new MockFlowFile(4L), new ByteArrayInputStream(flowFileContent4), messageKey, demarcatorBytes, topic); + + assertEquals(0, poisonCount.get()); + + verify(producer, times(0)).flush(); + + final PublishResult result = lease.complete(); + assertTrue(result.getFailedFlowFiles().contains(flowFile)); + assertFalse(result.getSuccessfulFlowFiles().contains(flowFile)); + + assertEquals(7, correctMessages.get()); + assertEquals(0, incorrectMessages.get()); + + verify(producer, times(1)).flush(); + } +} diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherPool.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherPool.java new file mode 100644 index 000000000000..7c701944c8e6 --- /dev/null +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-9-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherPool.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.processors.kafka.pubsub; + +import static org.junit.Assert.assertEquals; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.apache.nifi.logging.ComponentLog; +import org.junit.Test; +import org.mockito.Mockito; + + +public class TestPublisherPool { + + @Test + public void testLeaseCloseReturnsToPool() { + final Map kafkaProperties = new HashMap<>(); + kafkaProperties.put("bootstrap.servers", "localhost:1111"); + kafkaProperties.put("key.serializer", ByteArraySerializer.class.getName()); + kafkaProperties.put("value.serializer", ByteArraySerializer.class.getName()); + + final PublisherPool pool = new PublisherPool(kafkaProperties, Mockito.mock(ComponentLog.class), 1024 * 1024, 1000L); + assertEquals(0, pool.available()); + + final PublisherLease lease = pool.obtainPublisher(); + assertEquals(0, pool.available()); + + lease.close(); + assertEquals(1, pool.available()); + } + + @Test + public void testPoisonedLeaseNotReturnedToPool() { + final Map kafkaProperties = new HashMap<>(); + kafkaProperties.put("bootstrap.servers", "localhost:1111"); + kafkaProperties.put("key.serializer", ByteArraySerializer.class.getName()); + kafkaProperties.put("value.serializer", ByteArraySerializer.class.getName()); + + final PublisherPool pool = new PublisherPool(kafkaProperties, Mockito.mock(ComponentLog.class), 1024 * 1024, 1000L); + assertEquals(0, pool.available()); + + final PublisherLease lease = pool.obtainPublisher(); + assertEquals(0, pool.available()); + + lease.poison(); + lease.close(); + assertEquals(0, pool.available()); + } + +}