apache · zachjsh · Jan 19, 2024 · Oct 31, 2023 · Nov 10, 2023 · Nov 14, 2023
diff --git a/docs/development/extensions-core/kinesis-ingestion.md b/docs/development/extensions-core/kinesis-ingestion.md
@@ -241,11 +241,9 @@ The following table outlines the configuration options for `ioConfig`:
 |`completionTimeout`|ISO 8601 period|The length of time to wait before Druid declares a publishing task has failed and terminates it. If this is set too low, your tasks may never publish. The publishing clock for a task begins roughly after `taskDuration` elapses.|No|PT6H|
 |`lateMessageRejectionPeriod`|ISO 8601 period|Configure tasks to reject messages with timestamps earlier than this period before the task is created. For example, if `lateMessageRejectionPeriod` is set to `PT1H` and the supervisor creates a task at `2016-01-01T12:00Z`, messages with timestamps earlier than `2016-01-01T11:00Z` are dropped. This may help prevent concurrency issues if your data stream has late messages and you have multiple pipelines that need to operate on the same segments, such as a streaming and a nightly batch ingestion pipeline.|No||
 |`earlyMessageRejectionPeriod`|ISO 8601 period|Configure tasks to reject messages with timestamps later than this period after the task reached its `taskDuration`. For example, if `earlyMessageRejectionPeriod` is set to `PT1H`, the `taskDuration` is set to `PT1H` and the supervisor creates a task at `2016-01-01T12:00Z`. Messages with timestamps later than `2016-01-01T14:00Z` are dropped. **Note:** Tasks sometimes run past their task duration, for example, in cases of supervisor failover. Setting `earlyMessageRejectionPeriod` too low may cause messages to be dropped unexpectedly whenever a task runs past its originally configured task duration.|No||
-|`recordsPerFetch`|Integer|The number of records to request per call to fetch records from Kinesis.|No| See [Determine fetch settings](#determine-fetch-settings) for defaults.|
 |`fetchDelayMillis`|Integer|Time in milliseconds to wait between subsequent calls to fetch records from Kinesis. See [Determine fetch settings](#determine-fetch-settings).|No|0|
 |`awsAssumedRoleArn`|String|The AWS assumed role to use for additional permissions.|No||
 |`awsExternalId`|String|The AWS external ID to use for additional permissions.|No||
-|`deaggregate`|Boolean|Whether to use the deaggregate function of the Kinesis Client Library (KCL).|No||
 |`autoScalerConfig`|Object|Defines autoscaling behavior for Kinesis ingest tasks. See [Task autoscaler properties](#task-autoscaler-properties) for more information.|No|null|
 
 ### Task autoscaler properties
@@ -406,7 +404,7 @@ The following table outlines the configuration options for `tuningConfig`:
 |`chatRetries`|Integer|The number of times Druid retries HTTP requests to indexing tasks before considering tasks unresponsive.|No|8|
 |`httpTimeout`|ISO 8601 period|The period of time to wait for a HTTP response from an indexing task.|No|PT10S|
 |`shutdownTimeout`|ISO 8601 period|The period of time to wait for the supervisor to attempt a graceful shutdown of tasks before exiting.|No|PT80S|
-|`recordBufferSize`|Integer|The size of the buffer (number of events) Druid uses between the Kinesis fetch threads and the main ingestion thread.|No|See [Determine fetch settings](#determine-fetch-settings) for defaults.|
+|`recordBufferSizeBytes`|Integer| The size of the buffer (heap memory bytes) Druid uses between the Kinesis fetch threads and the main ingestion thread.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |No|See [Determine fetch settings](#determine-fetch-settings) for defaults.|
 |`recordBufferOfferTimeout`|Integer|The number of milliseconds to wait for space to become available in the buffer before timing out.|No|5000|
 |`recordBufferFullWait`|Integer|The number of milliseconds to wait for the buffer to drain before Druid attempts to fetch records from Kinesis again.|No|5000|
 |`fetchThreads`|Integer|The size of the pool of threads fetching data from Kinesis. There is no benefit in having more threads than Kinesis shards.|No| `procs * 2`, where `procs` is the number of processors available to the task.|
@@ -419,6 +417,7 @@ The following table outlines the configuration options for `tuningConfig`:
 |`offsetFetchPeriod`|ISO 8601 period|Determines how often the supervisor queries Kinesis and the indexing tasks to fetch current offsets and calculate lag. If the user-specified value is below the minimum value of PT5S, the supervisor ignores the value and uses the minimum value instead.|No|PT30S|
 |`useListShards`|Boolean|Indicates if `listShards` API of AWS Kinesis SDK can be used to prevent `LimitExceededException` during ingestion. You must set the necessary `IAM` permissions.|No|`false`|
 
+
 ### IndexSpec
 
 The following table outlines the configuration options for `indexSpec`:
@@ -656,25 +655,22 @@ For more detail, see [Segment size optimization](../../operations/segment-optimi
 
 Kinesis indexing tasks fetch records using `fetchThreads` threads.
 If `fetchThreads` is higher than the number of Kinesis shards, the excess threads are unused.
-Each fetch thread fetches up to `recordsPerFetch` records at once from a Kinesis shard, with a delay between fetches
+Each fetch thread fetches up to 10 MB of records at once from a Kinesis shard, with a delay between fetches
 of `fetchDelayMillis`.
-The records fetched by each thread are pushed into a shared queue of size `recordBufferSize`.
+The records fetched by each thread are pushed into a shared queue of size `recordBufferSizeBytes`.
 The main runner thread for each task polls up to `maxRecordsPerPoll` records from the queue at once.
 
-When using Kinesis Producer Library's aggregation feature, that is when [`deaggregate`](#deaggregation) is set,
-each of these parameters refers to aggregated records rather than individual records.
-
 The default values for these parameters are:
 
 - `fetchThreads`: Twice the number of processors available to the task. The number of processors available to the task
 is the total number of processors on the server, divided by `druid.worker.capacity` (the number of task slots on that
-particular server).
+particular server). This value is further limited so that the total data record data fetched at a given time does not
+exceed 5% of the max heap configured, assuming that each thread fetches 10 MB of records at once. If the value specified
+for this configuration is higher than this limit, no failure occurs, but a warning is logged, and the value is
+implicitly lowered to the max allowed by this constraint.
 - `fetchDelayMillis`: 0 (no delay between fetches).
-- `recordsPerFetch`: 100 MB or an estimated 5% of available heap, whichever is smaller, divided by `fetchThreads`.
-For estimation purposes, Druid uses a figure of 10 KB for regular records and 1 MB for [aggregated records](#deaggregation).
-- `recordBufferSize`: 100 MB or an estimated 10% of available heap, whichever is smaller.
-For estimation purposes, Druid uses a figure of 10 KB for regular records and 1 MB for [aggregated records](#deaggregation).
-- `maxRecordsPerPoll`: 100 for regular records, 1 for [aggregated records](#deaggregation).
+- `recordBufferSizeBytes`: 100 MB or an estimated 10% of available heap, whichever is smaller.
+- `maxRecordsPerPoll`: 1.
 
 Kinesis places the following restrictions on calls to fetch records:
 
@@ -697,8 +693,6 @@ Kinesis stream.
 The Kinesis indexing service supports de-aggregation of multiple rows packed into a single record by the Kinesis
 Producer Library's aggregate method for more efficient data transfer.
 
-To enable this feature, set `deaggregate` to true in your `ioConfig` when submitting a supervisor spec.
-
 ## Resharding
 
 [Resharding](https://docs.aws.amazon.com/streams/latest/dev/kinesis-using-sdk-java-resharding.html) is an advanced operation that lets you adjust the number of shards in a stream to adapt to changes in the rate of data flowing through a stream.

diff --git a/...ions-contrib/kafka-emitter/src/main/java/org/apache/druid/emitter/kafka/KafkaEmitter.java b/...ions-contrib/kafka-emitter/src/main/java/org/apache/druid/emitter/kafka/KafkaEmitter.java
@@ -23,7 +23,7 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.druid.emitter.kafka.KafkaEmitterConfig.EventType;
-import org.apache.druid.emitter.kafka.MemoryBoundLinkedBlockingQueue.ObjectContainer;
+import org.apache.druid.java.util.common.MemoryBoundLinkedBlockingQueue;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.lifecycle.LifecycleStop;
 import org.apache.druid.java.util.common.logger.Logger;
@@ -173,7 +173,7 @@ private void sendSegmentMetadataToKafka()
 
   private void sendToKafka(final String topic, MemoryBoundLinkedBlockingQueue<String> recordQueue, Callback callback)
   {
-    ObjectContainer<String> objectToSend;
+    MemoryBoundLinkedBlockingQueue.ObjectContainer<String> objectToSend;
     try {
       while (true) {
         objectToSend = recordQueue.take();
@@ -199,7 +199,7 @@ public void emit(final Event event)
 
         String resultJson = jsonMapper.writeValueAsString(map);
 
-        ObjectContainer<String> objectContainer = new ObjectContainer<>(
+        MemoryBoundLinkedBlockingQueue.ObjectContainer<String> objectContainer = new MemoryBoundLinkedBlockingQueue.ObjectContainer<>(
             resultJson,
             StringUtils.toUtf8(resultJson).length
         );

diff --git a/...is-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/KinesisIndexTask.java b/...is-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/KinesisIndexTask.java
@@ -49,6 +49,10 @@
 public class KinesisIndexTask extends SeekableStreamIndexTask<String, String, ByteEntity>
 {
   private static final String TYPE = "index_kinesis";
+
+  // GetRecords returns maximum 10MB per call
+  // (https://docs.aws.amazon.com/streams/latest/dev/service-sizes-and-limits.html)
+  private static final long GET_RECORDS_MAX_BYTES_PER_CALL = 10_000_000L;
   private static final Logger log = new Logger(KinesisIndexTask.class);
 
   private final boolean useListShards;
@@ -78,6 +82,10 @@ public KinesisIndexTask(
     );
     this.useListShards = useListShards;
     this.awsCredentialsConfig = awsCredentialsConfig;
+    if (tuningConfig.getRecordBufferSizeConfigured() != null) {
+      log.warn("The 'recordBufferSize' config property of the kinesis tuning config has been deprecated. "
+               + "Please use 'recordBufferSizeBytes'.");
+    }
   }
 
   @Override
@@ -105,21 +113,18 @@ protected KinesisRecordSupplier newTaskRecordSupplier(final TaskToolbox toolbox)
   {
     KinesisIndexTaskIOConfig ioConfig = ((KinesisIndexTaskIOConfig) super.ioConfig);
     KinesisIndexTaskTuningConfig tuningConfig = ((KinesisIndexTaskTuningConfig) super.tuningConfig);
+    final int recordBufferSizeBytes =
+        tuningConfig.getRecordBufferSizeBytesOrDefault(runtimeInfo.getMaxHeapSizeBytes());
     final int fetchThreads = computeFetchThreads(runtimeInfo, tuningConfig.getFetchThreads());
-    final int recordsPerFetch = ioConfig.getRecordsPerFetchOrDefault(runtimeInfo.getMaxHeapSizeBytes(), fetchThreads);
-    final int recordBufferSize =
-        tuningConfig.getRecordBufferSizeOrDefault(runtimeInfo.getMaxHeapSizeBytes(), ioConfig.isDeaggregate());
-    final int maxRecordsPerPoll = tuningConfig.getMaxRecordsPerPollOrDefault(ioConfig.isDeaggregate());
+    final int maxRecordsPerPoll = tuningConfig.getMaxRecordsPerPollOrDefault();
 
     log.info(
-        "Starting record supplier with fetchThreads [%d], fetchDelayMillis [%d], recordsPerFetch [%d], "
-        + "recordBufferSize [%d], maxRecordsPerPoll [%d], deaggregate [%s].",
+        "Starting record supplier with fetchThreads [%d], fetchDelayMillis [%d], "
+        + "recordBufferSizeBytes [%d], maxRecordsPerPoll [%d]",
         fetchThreads,
         ioConfig.getFetchDelayMillis(),
-        recordsPerFetch,
-        recordBufferSize,
-        maxRecordsPerPoll,
-        ioConfig.isDeaggregate()
+        recordBufferSizeBytes,
+        maxRecordsPerPoll
     );
 
     return new KinesisRecordSupplier(
@@ -129,11 +134,9 @@ protected KinesisRecordSupplier newTaskRecordSupplier(final TaskToolbox toolbox)
             ioConfig.getAwsAssumedRoleArn(),
             ioConfig.getAwsExternalId()
         ),
-        recordsPerFetch,
         ioConfig.getFetchDelayMillis(),
         fetchThreads,
-        ioConfig.isDeaggregate(),
-        recordBufferSize,
+        recordBufferSizeBytes,
         tuningConfig.getRecordBufferOfferTimeout(),
         tuningConfig.getRecordBufferFullWait(),
         maxRecordsPerPoll,
@@ -179,15 +182,36 @@ AWSCredentialsConfig getAwsCredentialsConfig()
   }
 
   @VisibleForTesting
-  static int computeFetchThreads(final RuntimeInfo runtimeInfo, final Integer configuredFetchThreads)
+  static int computeFetchThreads(
+      final RuntimeInfo runtimeInfo,
+      final Integer configuredFetchThreads
+  )
   {
-    final int fetchThreads;
+    int fetchThreads;
     if (configuredFetchThreads != null) {
       fetchThreads = configuredFetchThreads;
     } else {
       fetchThreads = runtimeInfo.getAvailableProcessors() * 2;
     }
 
+    // Each fetchThread can return upto 10MB at a time
+    // (https://docs.aws.amazon.com/streams/latest/dev/service-sizes-and-limits.html), cap fetchThreads so that
+    // we don't exceed more than the least of 100MB or 5% of heap at a time. Don't fail if fetchThreads specified
+    // is greater than this as to not cause failure for older configurations, but log warning in this case, and lower
+    // fetchThreads implicitly.
+    final long memoryToUse = Math.min(
+        KinesisIndexTaskIOConfig.MAX_RECORD_FETCH_MEMORY,
+        (long) (runtimeInfo.getMaxHeapSizeBytes() * KinesisIndexTaskIOConfig.RECORD_FETCH_MEMORY_MAX_HEAP_FRACTION)
+    );
+    int maxFetchThreads = Math.max(
+        1,
+        (int) (memoryToUse / GET_RECORDS_MAX_BYTES_PER_CALL)
+    );
+    if (fetchThreads > maxFetchThreads) {
+      log.warn("fetchThreads [%d] being lowered to [%d]", fetchThreads, maxFetchThreads);
+      fetchThreads = maxFetchThreads;
+    }
+
     Preconditions.checkArgument(
         fetchThreads > 0,
         "Must have at least one background fetch thread for the record supplier"

diff --git a/...ing-service/src/main/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskIOConfig.java b/...ing-service/src/main/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskIOConfig.java
@@ -23,7 +23,6 @@
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.base.Preconditions;
-import com.google.common.primitives.Ints;
 import org.apache.druid.data.input.InputFormat;
 import org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers;
 import org.apache.druid.indexing.seekablestream.SeekableStreamIndexTaskIOConfig;
@@ -41,21 +40,19 @@ public class KinesisIndexTaskIOConfig extends SeekableStreamIndexTaskIOConfig<St
    * Together with {@link KinesisIndexTaskTuningConfig#MAX_RECORD_BUFFER_MEMORY}, don't take up more than 200MB
    * per task.
    */
-  private static final int MAX_RECORD_FETCH_MEMORY = 100_000_000;
+  public static final int MAX_RECORD_FETCH_MEMORY = 100_000_000;
 
   /**
    * Together with {@link KinesisIndexTaskTuningConfig#RECORD_BUFFER_MEMORY_MAX_HEAP_FRACTION}, don't take up more
    * than 15% of the heap.
    */
-  private static final double RECORD_FETCH_MEMORY_MAX_HEAP_FRACTION = 0.05;
+  public static final double RECORD_FETCH_MEMORY_MAX_HEAP_FRACTION = 0.05;
 
   private final String endpoint;
-  private final Integer recordsPerFetch;
   private final int fetchDelayMillis;
 
   private final String awsAssumedRoleArn;
   private final String awsExternalId;
-  private final boolean deaggregate;
 
   @JsonCreator
   public KinesisIndexTaskIOConfig(
@@ -79,11 +76,9 @@ public KinesisIndexTaskIOConfig(
       @JsonProperty("maximumMessageTime") DateTime maximumMessageTime,
       @JsonProperty("inputFormat") @Nullable InputFormat inputFormat,
       @JsonProperty("endpoint") String endpoint,
-      @JsonProperty("recordsPerFetch") Integer recordsPerFetch,
       @JsonProperty("fetchDelayMillis") Integer fetchDelayMillis,
       @JsonProperty("awsAssumedRoleArn") String awsAssumedRoleArn,
-      @JsonProperty("awsExternalId") String awsExternalId,
-      @JsonProperty("deaggregate") boolean deaggregate
+      @JsonProperty("awsExternalId") String awsExternalId
   )
   {
     super(
@@ -105,11 +100,9 @@ public KinesisIndexTaskIOConfig(
     );
 
     this.endpoint = Preconditions.checkNotNull(endpoint, "endpoint");
-    this.recordsPerFetch = recordsPerFetch;
     this.fetchDelayMillis = fetchDelayMillis != null ? fetchDelayMillis : DEFAULT_FETCH_DELAY_MILLIS;
     this.awsAssumedRoleArn = awsAssumedRoleArn;
     this.awsExternalId = awsExternalId;
-    this.deaggregate = deaggregate;
   }
 
   public KinesisIndexTaskIOConfig(
@@ -122,11 +115,9 @@ public KinesisIndexTaskIOConfig(
       DateTime maximumMessageTime,
       InputFormat inputFormat,
       String endpoint,
-      Integer recordsPerFetch,
       Integer fetchDelayMillis,
       String awsAssumedRoleArn,
-      String awsExternalId,
-      boolean deaggregate
+      String awsExternalId
   )
   {
     this(
@@ -142,11 +133,9 @@ public KinesisIndexTaskIOConfig(
         maximumMessageTime,
         inputFormat,
         endpoint,
-        recordsPerFetch,
         fetchDelayMillis,
         awsAssumedRoleArn,
-        awsExternalId,
-        deaggregate
+        awsExternalId
     );
   }
 
@@ -215,32 +204,6 @@ public String getEndpoint()
     return endpoint;
   }
 
-  @Nullable
-  @JsonProperty("recordsPerFetch")
-  @JsonInclude(JsonInclude.Include.NON_NULL)
-  public Integer getRecordsPerFetchConfigured()
-  {
-    return recordsPerFetch;
-  }
-
-  public int getRecordsPerFetchOrDefault(final long maxHeapSize, final int fetchThreads)
-  {
-    if (recordsPerFetch != null) {
-      return recordsPerFetch;
-    } else {
-      final long memoryToUse = Math.min(
-          MAX_RECORD_FETCH_MEMORY,
-          (long) (maxHeapSize * RECORD_FETCH_MEMORY_MAX_HEAP_FRACTION)
-      );
-
-      final int assumedRecordSize = deaggregate
-                                    ? KinesisIndexTaskTuningConfig.ASSUMED_RECORD_SIZE_AGGREGATE
-                                    : KinesisIndexTaskTuningConfig.ASSUMED_RECORD_SIZE;
-
-      return Ints.checkedCast(Math.max(1, memoryToUse / assumedRecordSize / fetchThreads));
-    }
-  }
-
   @JsonProperty
   @JsonInclude(JsonInclude.Include.NON_DEFAULT)
   public int getFetchDelayMillis()
@@ -262,13 +225,6 @@ public String getAwsExternalId()
     return awsExternalId;
   }
 
-  @JsonProperty
-  @JsonInclude(JsonInclude.Include.NON_DEFAULT)
-  public boolean isDeaggregate()
-  {
-    return deaggregate;
-  }
-
   @Override
   public String toString()
   {
@@ -280,11 +236,9 @@ public String toString()
            ", minimumMessageTime=" + getMinimumMessageTime() +
            ", maximumMessageTime=" + getMaximumMessageTime() +
            ", endpoint='" + endpoint + '\'' +
-           ", recordsPerFetch=" + recordsPerFetch +
            ", fetchDelayMillis=" + fetchDelayMillis +
            ", awsAssumedRoleArn='" + awsAssumedRoleArn + '\'' +
            ", awsExternalId='" + awsExternalId + '\'' +
-           ", deaggregate=" + deaggregate +
            '}';
   }
 }