Skip to content
Permalink
Browse files
Increase default DatasourceCompactionConfig.inputSegmentSizeBytes to …
…Long.MAX_VALUE (#12381)

The current default value of inputSegmentSizeBytes is 400MB, which is pretty
low for most compaction use cases. Thus most users are forced to override the
default.

The default value is now increased to Long.MAX_VALUE.
  • Loading branch information
tejaswini-imply committed Apr 4, 2022
1 parent c5531be commit 984904779bea348905d96c95e1c3f05f44883af8
Show file tree
Hide file tree
Showing 6 changed files with 3 additions and 51 deletions.
@@ -962,7 +962,7 @@ A description of the compaction config is:
|--------|-----------|--------|
|`dataSource`|dataSource name to be compacted.|yes|
|`taskPriority`|[Priority](../ingestion/tasks.md#priority) of compaction task.|no (default = 25)|
|`inputSegmentSizeBytes`|Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.|no (default = 419430400)|
|`inputSegmentSizeBytes`|Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.|no (default = Long.MAX_VALUE)|
|`maxRowsPerSegment`|Max number of rows per segment after compaction.|no|
|`skipOffsetFromLatest`|The offset for searching segments to be compacted in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Strongly recommended to set for realtime dataSources. See [Data handling with compaction](../ingestion/compaction.md#data-handling-with-compaction)|no (default = "P1D")|
|`tuningConfig`|Tuning config for compaction tasks. See below [Compaction Task TuningConfig](#automatic-compaction-tuningconfig).|no|
@@ -13,4 +13,4 @@
-- See the License for the specific language governing permissions and
-- limitations under the License.

INSERT INTO druid_config (name, payload) VALUES ('coordinator.compaction.config', '{"compactionConfigs":[{"dataSource":"upgradeTest","taskPriority":25,"inputSegmentSizeBytes":419430400,"maxRowsPerSegment":null,"skipOffsetFromLatest":"P1D","tuningConfig":{"maxRowsInMemory":null,"maxBytesInMemory":null,"maxTotalRows":null,"splitHintSpec":null,"partitionsSpec":{"type":"hashed","numShards":null,"partitionDimensions":[],"partitionFunction":"murmur3_32_abs","maxRowsPerSegment":5000000},"indexSpec":null,"indexSpecForIntermediatePersists":null,"maxPendingPersists":null,"pushTimeout":null,"segmentWriteOutMediumFactory":null,"maxNumConcurrentSubTasks":null,"maxRetry":null,"taskStatusCheckPeriodMs":null,"chatHandlerTimeout":null,"chatHandlerNumRetries":null,"maxNumSegmentsToMerge":null,"totalNumMergeTasks":null,"forceGuaranteedRollup":true,"type":"index_parallel"},"taskContext":null}],"compactionTaskSlotRatio":0.1,"maxCompactionTaskSlots":2147483647}');
INSERT INTO druid_config (name, payload) VALUES ('coordinator.compaction.config', '{"compactionConfigs":[{"dataSource":"upgradeTest","taskPriority":25,"inputSegmentSizeBytes":9223372036854775807,"maxRowsPerSegment":null,"skipOffsetFromLatest":"P1D","tuningConfig":{"maxRowsInMemory":null,"maxBytesInMemory":null,"maxTotalRows":null,"splitHintSpec":null,"partitionsSpec":{"type":"hashed","numShards":null,"partitionDimensions":[],"partitionFunction":"murmur3_32_abs","maxRowsPerSegment":5000000},"indexSpec":null,"indexSpecForIntermediatePersists":null,"maxPendingPersists":null,"pushTimeout":null,"segmentWriteOutMediumFactory":null,"maxNumConcurrentSubTasks":null,"maxRetry":null,"taskStatusCheckPeriodMs":null,"chatHandlerTimeout":null,"chatHandlerNumRetries":null,"maxNumSegmentsToMerge":null,"totalNumMergeTasks":null,"forceGuaranteedRollup":true,"type":"index_parallel"},"taskContext":null}],"compactionTaskSlotRatio":0.1,"maxCompactionTaskSlots":2147483647}');
@@ -34,7 +34,7 @@
{
/** Must be synced with Tasks.DEFAULT_MERGE_TASK_PRIORITY */
public static final int DEFAULT_COMPACTION_TASK_PRIORITY = 25;
private static final long DEFAULT_INPUT_SEGMENT_SIZE_BYTES = 400 * 1024 * 1024;
private static final long DEFAULT_INPUT_SEGMENT_SIZE_BYTES = Long.MAX_VALUE;
private static final Period DEFAULT_SKIP_OFFSET_FROM_LATEST = new Period("P1D");

private final String dataSource;
@@ -75,7 +75,6 @@ describe('AutoForm', () => {
{
dataSource: 'ds',
taskPriority: 25,
inputSegmentSizeBytes: 419430400,
maxRowsPerSegment: null,
skipOffsetFromLatest: 'P4D',
tuningConfig: {
@@ -121,7 +120,6 @@ describe('AutoForm', () => {
{
dataSource: 'ds',
taskPriority: 25,
inputSegmentSizeBytes: 419430400,
skipOffsetFromLatest: 'P4D',
tuningConfig: {
partitionsSpec: {
@@ -271,14 +271,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit
"name": "tuningConfig.partitionsSpec.assumeGrouped",
"type": "boolean",
},
Object {
"defaultValue": 419430400,
"info": <p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
</p>,
"name": "inputSegmentSizeBytes",
"type": "number",
},
Object {
"defaultValue": 1,
"info": <React.Fragment>
@@ -641,14 +633,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
"name": "tuningConfig.partitionsSpec.assumeGrouped",
"type": "boolean",
},
Object {
"defaultValue": 419430400,
"info": <p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
</p>,
"name": "inputSegmentSizeBytes",
"type": "number",
},
Object {
"defaultValue": 1,
"info": <React.Fragment>
@@ -1011,14 +995,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (range partitio
"name": "tuningConfig.partitionsSpec.assumeGrouped",
"type": "boolean",
},
Object {
"defaultValue": 419430400,
"info": <p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
</p>,
"name": "inputSegmentSizeBytes",
"type": "number",
},
Object {
"defaultValue": 1,
"info": <React.Fragment>
@@ -1381,14 +1357,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = `
"name": "tuningConfig.partitionsSpec.assumeGrouped",
"type": "boolean",
},
Object {
"defaultValue": 419430400,
"info": <p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
</p>,
"name": "inputSegmentSizeBytes",
"type": "number",
},
Object {
"defaultValue": 1,
"info": <React.Fragment>
@@ -230,20 +230,6 @@ export const COMPACTION_CONFIG_FIELDS: Field<CompactionConfig>[] = [
</p>
),
},
{
name: 'inputSegmentSizeBytes',
type: 'number',
defaultValue: 419430400,
info: (
<p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must
be processed in its entirety, if the segments for a particular time chunk have a total size
in bytes greater than this parameter, compaction will not run for that time chunk. Because
each compaction task runs with a single thread, setting this value too far above 1–2GB will
result in compaction tasks taking an excessive amount of time.
</p>
),
},
{
name: 'tuningConfig.maxNumConcurrentSubTasks',
type: 'number',

0 comments on commit 9849047

Please sign in to comment.