From 274ccbfd85c86a1333344c4ac2aedce241027728 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 24 Apr 2024 02:39:24 -0700 Subject: [PATCH 01/31] Reset buffer aggregators when resetting Groupers. (#16296) Buffer aggregators can contain some cached objects within them, such as Memory references or HLL Unions. Prior to this patch, various Grouper implementations were not releasing this state when resetting their own internal state, which could lead to excessive memory use. This patch renames AggregatorAdapater#close to "reset", and updates Grouper implementations to call this reset method whenever they reset their internal state. The base method on BufferAggregator and VectorAggregator remains named "close", for compatibility with existing extensions, but the contract is adjusted to say that the aggregator may be reused after the method is called. All existing implementations in core already adhere to this new contract, except for the ArrayOfDoubles build flavors, which are updated in this patch to adhere. Additionally, this patch harmonizes buffer sketch helpers to call their clear method "clear" rather than a mix of "clear" and "close". (Others were already using "clear".) --- .../hll/HllSketchMergeBufferAggregator.java | 2 +- .../HllSketchMergeBufferAggregatorHelper.java | 2 +- .../hll/HllSketchMergeVectorAggregator.java | 2 +- .../theta/SketchBufferAggregator.java | 2 +- .../theta/SketchBufferAggregatorHelper.java | 4 ++-- .../theta/SketchVectorAggregator.java | 2 +- .../ArrayOfDoublesSketchBuildAggregator.java | 16 ++++++++++----- ...yOfDoublesSketchBuildBufferAggregator.java | 6 ++++-- .../query/aggregation/AggregatorAdapters.java | 20 +++++++++---------- .../query/aggregation/BufferAggregator.java | 6 +++++- .../query/aggregation/VectorAggregator.java | 6 +++++- .../AbstractBufferHashGrouper.java | 2 +- .../epinephelinae/BufferArrayGrouper.java | 3 ++- .../epinephelinae/BufferHashGrouper.java | 1 + .../epinephelinae/HashVectorGrouper.java | 3 ++- .../LimitedBufferHashGrouper.java | 1 + .../timeseries/TimeseriesQueryEngine.java | 6 +++--- .../druid/query/topn/BaseTopNAlgorithm.java | 6 +++--- .../query/topn/HeapBasedTopNAlgorithm.java | 2 +- .../druid/query/topn/PooledTopNAlgorithm.java | 2 +- .../topn/TimeExtractionTopNAlgorithm.java | 2 +- .../epinephelinae/HashVectorGrouperTest.java | 2 +- 22 files changed, 58 insertions(+), 40 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeBufferAggregator.java index 8e3bfffa0737..0458b5084c43 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeBufferAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeBufferAggregator.java @@ -75,7 +75,7 @@ public Object get(final ByteBuffer buf, final int position) @Override public void close() { - helper.close(); + helper.clear(); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeBufferAggregatorHelper.java index 226530197725..1fa9ee4c9a3f 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeBufferAggregatorHelper.java @@ -142,7 +142,7 @@ private void initializeEmptyUnion(ByteBuffer buf, int position) } } - public void close() + public void clear() { unions.clear(); memCache.clear(); diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeVectorAggregator.java index 5fec9b94ba2d..31ad26cb5d7d 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeVectorAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeVectorAggregator.java @@ -102,7 +102,7 @@ public Object get(final ByteBuffer buf, final int position) @Override public void close() { - helper.close(); + helper.clear(); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java index 34aae3f36e18..60d83f4e0a71 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregator.java @@ -85,7 +85,7 @@ public double getDouble(ByteBuffer buf, int position) @Override public void close() { - helper.close(); + helper.clear(); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregatorHelper.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregatorHelper.java index 49856c9e80df..e2f699012a19 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregatorHelper.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchBufferAggregatorHelper.java @@ -95,7 +95,7 @@ public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, Byt /** * Returns a {@link Union} associated with a particular buffer location. * - * The Union object will be cached in this helper until {@link #close()} is called. + * The Union object will be cached in this helper until {@link #clear()} is called. */ public Union getOrCreateUnion(ByteBuffer buf, int position) { @@ -122,7 +122,7 @@ private Union createNewUnion(ByteBuffer buf, int position, boolean isWrapped) return union; } - public void close() + public void clear() { unions.clear(); memCache.clear(); diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java index a862265d561c..7d10bc30fb5e 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchVectorAggregator.java @@ -107,6 +107,6 @@ public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, Byt @Override public void close() { - helper.close(); + helper.clear(); } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildAggregator.java index 7ca1061889de..b093e730f0b3 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildAggregator.java @@ -27,7 +27,6 @@ import org.apache.druid.segment.data.IndexedInts; import javax.annotation.Nullable; - import java.nio.ByteBuffer; import java.util.LinkedHashMap; import java.util.List; @@ -48,6 +47,7 @@ public class ArrayOfDoublesSketchBuildAggregator implements Aggregator @Nullable private ArrayOfDoublesUpdatableSketch sketch; + private final int nominalEntries; private final boolean canLookupUtf8; private final boolean canCacheById; private final LinkedHashMap stringCache = new LinkedHashMap() @@ -67,10 +67,7 @@ public ArrayOfDoublesSketchBuildAggregator( { this.keySelector = keySelector; this.valueSelectors = valueSelectors.toArray(new BaseDoubleColumnValueSelector[0]); - values = new double[valueSelectors.size()]; - sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) - .setNumberOfValues(valueSelectors.size()).build(); - + this.nominalEntries = nominalEntries; this.canCacheById = this.keySelector.nameLookupPossibleInAdvance(); this.canLookupUtf8 = this.keySelector.supportsLookupNameUtf8(); } @@ -83,6 +80,15 @@ public ArrayOfDoublesSketchBuildAggregator( @Override public void aggregate() { + if (values == null) { + values = new double[valueSelectors.length]; + } + + if (sketch == null) { + sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) + .setNumberOfValues(valueSelectors.length).build(); + } + final IndexedInts keys = keySelector.getRow(); for (int i = 0; i < valueSelectors.length; i++) { if (valueSelectors[i].isNull()) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildBufferAggregator.java index 18906d129360..b925220c89fd 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildBufferAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildBufferAggregator.java @@ -73,8 +73,6 @@ public ArrayOfDoublesSketchBuildBufferAggregator( this.valueSelectors = valueSelectors.toArray(new BaseDoubleColumnValueSelector[0]); this.nominalEntries = nominalEntries; this.maxIntermediateSize = maxIntermediateSize; - values = new double[valueSelectors.size()]; - this.canCacheById = this.keySelector.nameLookupPossibleInAdvance(); this.canLookupUtf8 = this.keySelector.supportsLookupNameUtf8(); } @@ -92,6 +90,10 @@ public void init(final ByteBuffer buf, final int position) @Override public void aggregate(final ByteBuffer buf, final int position) { + if (values == null) { + values = new double[valueSelectors.length]; + } + for (int i = 0; i < valueSelectors.length; i++) { if (valueSelectors[i].isNull()) { return; diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java index 8ae7a33b08d6..25c9102bcf72 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorAdapters.java @@ -26,7 +26,6 @@ import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; -import java.io.Closeable; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; @@ -42,7 +41,7 @@ * (2) Query engines are freed from the need to manage how much space each individual aggregator needs. They only * need to allocate a block of size "spaceNeeded". */ -public class AggregatorAdapters implements Closeable +public class AggregatorAdapters { private static final Logger log = new Logger(AggregatorAdapters.class); @@ -230,14 +229,14 @@ public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, Byt } /** - * Close all of our aggregators. + * Reset all of our aggregators, releasing resources held by them. After this, this instance may be reused or + * it may be discarded. */ - @Override - public void close() + public void reset() { for (Adapter adapter : adapters) { try { - adapter.close(); + adapter.reset(); } catch (Exception e) { log.warn(e, "Could not close aggregator [%s], skipping.", adapter.getFactory().getName()); @@ -250,7 +249,7 @@ public void close() * BufferAggregator and VectorAggregator. Private, since it doesn't escape this class and the * only two implementations are private static classes below. */ - private interface Adapter extends Closeable + private interface Adapter { void init(ByteBuffer buf, int position); @@ -259,8 +258,7 @@ private interface Adapter extends Closeable void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer); - @Override - void close(); + void reset(); AggregatorFactory getFactory(); @@ -293,7 +291,7 @@ public Object get(final ByteBuffer buf, final int position) } @Override - public void close() + public void reset() { aggregator.close(); } @@ -352,7 +350,7 @@ public Object get(final ByteBuffer buf, final int position) } @Override - public void close() + public void reset() { aggregator.close(); } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java index e9fdbeaa061b..20d13491b0f6 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/BufferAggregator.java @@ -158,7 +158,11 @@ default double getDouble(ByteBuffer buf, int position) } /** - * Release any resources used by the aggregator + * Release any resources used by the aggregator. The aggregator may be reused after this call, by calling + * {@link #init(ByteBuffer, int)} followed by other methods as normal. + * + * This call would be more properly named "reset", but we use the name "close" to improve compatibility with + * existing aggregator implementations in extensions. */ void close(); diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/VectorAggregator.java b/processing/src/main/java/org/apache/druid/query/aggregation/VectorAggregator.java index befff12ba6e0..a3e506e59c87 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/VectorAggregator.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/VectorAggregator.java @@ -83,7 +83,11 @@ default void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, By } /** - * Release any resources used by the aggregator. + * Release any resources used by the aggregator. The aggregator may be reused after this call, by calling + * {@link #init(ByteBuffer, int)} followed by other methods as normal. + * + * This call would be more properly named "reset", but we use the name "close" to improve compatibility with + * existing aggregator implementations in extensions. */ void close(); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java index f3bc195dcbdf..70cf5832cf33 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/AbstractBufferHashGrouper.java @@ -170,7 +170,7 @@ public AggregateResult aggregate(KeyType key, int keyHash) public void close() { keySerde.reset(); - aggregators.close(); + aggregators.reset(); } /** diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java index 0fcb4ddeb2e0..616ac190dd83 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferArrayGrouper.java @@ -269,6 +269,7 @@ public void reset() { // Clear the entire usedFlagBuffer usedFlagMemory.clear(); + aggregators.reset(); } @Override @@ -280,7 +281,7 @@ public IntGrouperHashFunction hashFunction() @Override public void close() { - aggregators.close(); + aggregators.reset(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java index 167b322b9d45..c4d046977168 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/BufferHashGrouper.java @@ -158,6 +158,7 @@ public void reset() offsetList.reset(); hashTable.reset(); keySerde.reset(); + aggregators.reset(); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/HashVectorGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/HashVectorGrouper.java index ad12d0503f8f..e5c2801c3b74 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/HashVectorGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/HashVectorGrouper.java @@ -205,6 +205,7 @@ public void reset() } this.hashTable = createTable(buffer, tableStart, numBuckets); + this.aggregators.reset(); } @Override @@ -256,7 +257,7 @@ public void close() @Override public void close() { - aggregators.close(); + aggregators.reset(); } @VisibleForTesting diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java index 756a8227f5e9..90a0e1e250d6 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/LimitedBufferHashGrouper.java @@ -185,6 +185,7 @@ public void reset() hashTable.reset(); keySerde.reset(); offsetHeap.reset(); + aggregators.reset(); heapIndexUpdater.setHashTableBuffer(hashTable.getTableBuffer()); hasIterated = false; offsetHeapIterableSize = 0; diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java index 7ae290dd7d48..c5e83b84e87c 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java @@ -164,9 +164,9 @@ private Sequence> processVectorized( } final VectorColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); - final AggregatorAdapters aggregators = closer.register( - AggregatorAdapters.factorizeVector(columnSelectorFactory, query.getAggregatorSpecs()) - ); + final AggregatorAdapters aggregators = + AggregatorAdapters.factorizeVector(columnSelectorFactory, query.getAggregatorSpecs()); + closer.register(aggregators::reset); final ResourceHolder bufferHolder = closer.register(bufferPool.take()); diff --git a/processing/src/main/java/org/apache/druid/query/topn/BaseTopNAlgorithm.java b/processing/src/main/java/org/apache/druid/query/topn/BaseTopNAlgorithm.java index 843d248221e7..f34464a49d0e 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/BaseTopNAlgorithm.java +++ b/processing/src/main/java/org/apache/druid/query/topn/BaseTopNAlgorithm.java @@ -120,7 +120,7 @@ private void runWithCardinalityKnown( updateResults(params, theDimValSelector, aggregatesStore, resultBuilder); - closeAggregators(aggregatesStore); + resetAggregators(aggregatesStore); numProcessed += numToProcess; params.getCursor().reset(); @@ -151,7 +151,7 @@ private void runWithCardinalityUnknown( } long processedRows = scanAndAggregate(params, null, aggregatesStore); updateResults(params, null, aggregatesStore, resultBuilder); - closeAggregators(aggregatesStore); + resetAggregators(aggregatesStore); params.getCursor().reset(); if (queryMetrics != null) { queryMetrics.addProcessedRows(processedRows); @@ -199,7 +199,7 @@ protected abstract void updateResults( TopNResultBuilder resultBuilder ); - protected abstract void closeAggregators( + protected abstract void resetAggregators( DimValAggregateStore dimValAggregateStore ); diff --git a/processing/src/main/java/org/apache/druid/query/topn/HeapBasedTopNAlgorithm.java b/processing/src/main/java/org/apache/druid/query/topn/HeapBasedTopNAlgorithm.java index 14f3b729e1e5..ba5fbf251084 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/HeapBasedTopNAlgorithm.java +++ b/processing/src/main/java/org/apache/druid/query/topn/HeapBasedTopNAlgorithm.java @@ -112,7 +112,7 @@ protected void updateResults( } @Override - protected void closeAggregators(TopNColumnAggregatesProcessor processor) + protected void resetAggregators(TopNColumnAggregatesProcessor processor) { processor.closeAggregators(); } diff --git a/processing/src/main/java/org/apache/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/org/apache/druid/query/topn/PooledTopNAlgorithm.java index 6ddda5eb1be8..d0c0fb064e03 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/org/apache/druid/query/topn/PooledTopNAlgorithm.java @@ -768,7 +768,7 @@ protected void updateResults( } @Override - protected void closeAggregators(BufferAggregator[] bufferAggregators) + protected void resetAggregators(BufferAggregator[] bufferAggregators) { for (BufferAggregator agg : bufferAggregators) { agg.close(); diff --git a/processing/src/main/java/org/apache/druid/query/topn/TimeExtractionTopNAlgorithm.java b/processing/src/main/java/org/apache/druid/query/topn/TimeExtractionTopNAlgorithm.java index 70e01e49aed0..3b60bb65ee17 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/TimeExtractionTopNAlgorithm.java +++ b/processing/src/main/java/org/apache/druid/query/topn/TimeExtractionTopNAlgorithm.java @@ -135,7 +135,7 @@ protected void updateResults( } @Override - protected void closeAggregators(Map stringMap) + protected void resetAggregators(Map stringMap) { for (Aggregator[] aggregators : stringMap.values()) { for (Aggregator agg : aggregators) { diff --git a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/HashVectorGrouperTest.java b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/HashVectorGrouperTest.java index d5a863a7542a..fd0314a607c4 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/HashVectorGrouperTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/epinephelinae/HashVectorGrouperTest.java @@ -45,7 +45,7 @@ public void testCloseAggregatorAdaptorsShouldBeClosed() ); grouper.initVectorized(512); grouper.close(); - Mockito.verify(aggregatorAdapters, Mockito.times(1)).close(); + Mockito.verify(aggregatorAdapters, Mockito.times(2)).reset(); } @Test From 080476f9ea3b4fba23408efbda4cdb5f14190897 Mon Sep 17 00:00:00 2001 From: Sree Charan Manamala <155449160+sreemanamala@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:45:02 +0530 Subject: [PATCH 02/31] WINDOWING - Fix 2 nodes with same digest causing mapping issue (#16301) Fixes the mapping issue in window fucntions where 2 nodes get the same reference. --- .../apache/druid/msq/exec/MSQWindowTest.java | 32 +++++++++++++++++++ .../druid/sql/calcite/rel/Windowing.java | 13 +++++--- .../sql/calcite/CalciteWindowQueryTest.java | 21 ++++++++++++ .../sql/calcite/DrillWindowQueryTest.java | 3 -- .../druid/sql/calcite/NotYetSupported.java | 1 - 5 files changed, 62 insertions(+), 8 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQWindowTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQWindowTest.java index 74b04138a741..1ffa89ab2471 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQWindowTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQWindowTest.java @@ -1724,6 +1724,38 @@ public void testSimpleWindowWithEmptyOverNoGroupBy(String contextName, Map context) + { + RowSignature rowSignature = RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("m1", ColumnType.FLOAT) + .add("cc", ColumnType.DOUBLE) + .add("cc_dup", ColumnType.DOUBLE) + .build(); + + testIngestQuery().setSql(" REPLACE INTO foo OVERWRITE ALL\n" + + "select __time, m1,SUM(m1) OVER() cc,SUM(m1) OVER() cc_dup from foo\n" + + "PARTITIONED BY ALL CLUSTERED BY m1") + .setExpectedDataSource("foo") + .setExpectedRowSignature(rowSignature) + .setQueryContext(context) + .setExpectedDestinationIntervals(Intervals.ONLY_ETERNITY) + .setExpectedResultRows( + ImmutableList.of( + new Object[]{946684800000L, 1.0f, 21.0, 21.0}, + new Object[]{946771200000L, 2.0f, 21.0, 21.0}, + new Object[]{946857600000L, 3.0f, 21.0, 21.0}, + new Object[]{978307200000L, 4.0f, 21.0, 21.0}, + new Object[]{978393600000L, 5.0f, 21.0, 21.0}, + new Object[]{978480000000L, 6.0f, 21.0, 21.0} + ) + ) + .setExpectedSegment(ImmutableSet.of(SegmentId.of("foo", Intervals.ETERNITY, "test", 0))) + .verifyResults(); + } + @MethodSource("data") @ParameterizedTest(name = "{index}:with context {0}") public void testSimpleWindowWithJoins(String contextName, Map context) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Windowing.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Windowing.java index 0a4f3226d7e3..20c672ce924b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Windowing.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Windowing.java @@ -240,11 +240,16 @@ public static Windowing fromCalciteStuff( // Apply windowProject, if present. if (partialQuery.getWindowProject() != null) { - // We know windowProject is a mapping due to the isMapping() check in DruidRules. Check for null anyway, - // as defensive programming. + // We know windowProject is a mapping due to the isMapping() check in DruidRules. + // check anyway as defensive programming. + Preconditions.checkArgument(partialQuery.getWindowProject().isMapping()); final Mappings.TargetMapping mapping = Preconditions.checkNotNull( - partialQuery.getWindowProject().getMapping(), - "mapping for windowProject[%s]", partialQuery.getWindowProject() + Project.getPartialMapping( + partialQuery.getWindowProject().getInput().getRowType().getFieldCount(), + partialQuery.getWindowProject().getProjects() + ), + "mapping for windowProject[%s]", + partialQuery.getWindowProject() ); final List windowProjectOutputColumns = new ArrayList<>(); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java index c869cb8e44fa..16706335515b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.ISE; @@ -37,6 +38,7 @@ import org.apache.druid.sql.calcite.QueryVerification.QueryResultsVerifier; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.junit.Assert; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; @@ -231,6 +233,25 @@ public void windowQueryTestWithCustomContextMaxSubqueryBytes(String filename) th } } + @Test + public void testWindow() + { + testBuilder() + .sql("SELECT\n" + + "(rank() over (order by count(*) desc)),\n" + + "(rank() over (order by count(*) desc))\n" + + "FROM \"wikipedia\"") + .queryContext(ImmutableMap.of( + PlannerContext.CTX_ENABLE_WINDOW_FNS, true, + QueryContexts.ENABLE_DEBUG, true, + QueryContexts.WINDOWING_STRICT_VALIDATION, false + )) + .expectedResults(ImmutableList.of( + new Object[]{1L, 1L} + )) + .run(); + } + private WindowOperatorQuery getWindowOperatorQuery(List> queries) { assertEquals(1, queries.size()); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java index 2236a7d71a80..59f7de2ad177 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java @@ -4698,7 +4698,6 @@ public void test_aggregates_winFnQry_21() windowQueryTest(); } - @NotYetSupported(Modes.NPE) @DrillTest("first_val/firstValFn_5") @Test public void test_first_val_firstValFn_5() @@ -4922,7 +4921,6 @@ public void test_frameclause_subQueries_frmInSubQry_46() windowQueryTest(); } - @NotYetSupported(Modes.NPE) @DrillTest("lag_func/lag_Fn_82") @Test public void test_lag_func_lag_Fn_82() @@ -4930,7 +4928,6 @@ public void test_lag_func_lag_Fn_82() windowQueryTest(); } - @NotYetSupported(Modes.NPE) @DrillTest("last_val/lastValFn_5") @Test public void test_last_val_lastValFn_5() diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java b/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java index 43f2faa3f0ce..de94a2649766 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java @@ -83,7 +83,6 @@ enum Modes COLUMN_NOT_FOUND(DruidException.class, "CalciteContextException.*Column.*not found in any table"), NULLS_FIRST_LAST(DruidException.class, "NULLS (FIRST|LAST)"), BIGINT_TO_DATE(DruidException.class, "BIGINT to type (DATE|TIME)"), - NPE(DruidException.class, "java.lang.NullPointerException"), AGGREGATION_NOT_SUPPORT_TYPE(DruidException.class, "Aggregation \\[(MIN|MAX)\\] does not support type \\[STRING\\]"), ALLDATA_CSV(DruidException.class, "allData.csv"), BIGINT_TIME_COMPARE(DruidException.class, "Cannot apply '.' to arguments of type"), From e30790e013912a46fb7000e8124e9e97fcfcb848 Mon Sep 17 00:00:00 2001 From: Rishabh Singh <6513075+findingrish@users.noreply.github.com> Date: Wed, 24 Apr 2024 22:22:53 +0530 Subject: [PATCH 03/31] Introduce Segment Schema Publishing and Polling for Efficient Datasource Schema Building (#15817) Issue: #14989 The initial step in optimizing segment metadata was to centralize the construction of datasource schema in the Coordinator (#14985). Thereafter, we addressed the problem of publishing schema for realtime segments (#15475). Subsequently, our goal is to eliminate the requirement for regularly executing queries to obtain segment schema information. This is the final change which involves publishing segment schema for finalized segments from task and periodically polling them in the Coordinator. --- .github/workflows/standard-its.yml | 4 +- ...ruidSchemaInternRowSignatureBenchmark.java | 4 +- docs/configuration/index.md | 3 +- docs/operations/metrics.md | 6 + .../MaterializedViewSupervisorTest.java | 27 +- .../DatasourceOptimizerTest.java | 18 +- .../storage/sqlserver/SQLServerConnector.java | 9 +- .../sqlserver/SQLServerConnectorTest.java | 7 +- .../apache/druid/msq/exec/ControllerImpl.java | 8 +- ...SegmentGeneratorFrameProcessorFactory.java | 5 +- .../druid/msq/exec/ControllerImplTest.java | 8 +- .../storage/mysql/MySQLConnector.java | 6 +- .../storage/mysql/MySQLConnectorTest.java | 36 +- .../postgresql/PostgreSQLConnector.java | 6 +- .../postgresql/PostgreSQLConnectorTest.java | 30 +- .../MetadataStorageUpdaterJobSpec.java | 1 + .../druid/indexing/common/TaskToolbox.java | 2 +- .../common/actions/SegmentInsertAction.java | 18 +- .../SegmentTransactionalAppendAction.java | 29 +- .../SegmentTransactionalInsertAction.java | 31 +- .../SegmentTransactionalReplaceAction.java | 23 +- .../common/task/AbstractBatchIndexTask.java | 10 +- .../AppenderatorDriverRealtimeIndexTask.java | 3 +- .../common/task/BatchAppenderators.java | 10 +- .../druid/indexing/common/task/IndexTask.java | 14 +- .../common/task/InputSourceProcessor.java | 10 +- .../parallel/ParallelIndexSupervisorTask.java | 24 +- ...icSegmentMergeParallelIndexTaskRunner.java | 14 +- .../PartialGenericSegmentMergeTask.java | 20 +- .../parallel/PartialSegmentGenerateTask.java | 6 +- .../parallel/PartialSegmentMergeTask.java | 52 +- .../batch/parallel/PushedSegmentsReport.java | 17 +- .../SinglePhaseParallelIndexTaskRunner.java | 11 +- .../batch/parallel/SinglePhaseSubTask.java | 22 +- .../seekablestream/SequenceMetadata.java | 16 +- .../druid/indexing/common/TestIndexTask.java | 15 + .../actions/LocalTaskActionClientTest.java | 2 +- .../actions/RetrieveSegmentsActionsTest.java | 4 +- .../actions/SegmentAllocateActionTest.java | 22 +- .../actions/SegmentInsertActionTest.java | 4 +- .../SegmentTransactionalInsertActionTest.java | 12 +- .../common/actions/TaskActionTestKit.java | 16 +- ...penderatorDriverRealtimeIndexTaskTest.java | 24 +- .../common/task/AppenderatorsTest.java | 10 +- .../common/task/BatchAppenderatorsTest.java | 2 + .../task/CompactionTaskParallelRunTest.java | 67 +- .../common/task/CompactionTaskRunTest.java | 268 ++++-- .../common/task/CompactionTaskTest.java | 2 + .../indexing/common/task/IndexTaskTest.java | 203 +++- .../common/task/IngestionTestBase.java | 60 +- .../task/KillUnusedSegmentsTaskTest.java | 27 +- .../common/task/TestAppenderatorsManager.java | 18 +- ...bstractMultiPhaseParallelIndexingTest.java | 10 +- ...stractParallelIndexSupervisorTaskTest.java | 13 +- ...rtitionAdjustingCorePartitionSizeTest.java | 4 +- ...rtitionMultiPhaseParallelIndexingTest.java | 12 +- ...aseParallelIndexingWithNullColumnTest.java | 8 +- .../ParallelIndexSupervisorTaskKillTest.java | 4 +- ...rallelIndexSupervisorTaskResourceTest.java | 7 +- .../batch/parallel/PartialCompactionTest.java | 45 +- .../PartialGenericSegmentMergeTaskTest.java | 9 +- ...rtitionAdjustingCorePartitionSizeTest.java | 4 +- ...rtitionMultiPhaseParallelIndexingTest.java | 44 +- .../SinglePhaseParallelIndexingTest.java | 26 +- .../task/concurrent/ActionsTestTask.java | 4 +- .../ConcurrentReplaceAndAppendTest.java | 7 +- .../indexing/overlord/RealtimeishTask.java | 6 +- .../overlord/RemoteTaskRunnerTest.java | 47 + .../indexing/overlord/TaskLifecycleTest.java | 12 +- .../overlord/TaskLockBoxConcurrencyTest.java | 12 +- .../indexing/overlord/TaskLockboxTest.java | 21 +- .../indexing/overlord/TaskQueueScaleTest.java | 9 +- .../overlord/TestTaskToolboxFactory.java | 3 +- .../SeekableStreamIndexTaskTestBase.java | 8 +- .../seekablestream/SequenceMetadataTest.java | 79 +- ...TestIndexerMetadataStorageCoordinator.java | 26 +- ...r-compose.cds-coordinator-smq-disabled.yml | 110 +++ ...mpose.cds-task-schema-publish-disabled.yml | 111 +++ ...-compose.centralized-datasource-schema.yml | 6 +- integration-tests/docker/druid.sh | 2 +- ...s-coordinator-smq-disabled-sample-data.sql | 20 + ...sk-schema-publish-disabled-sample-data.sql | 20 + .../script/docker_compose_args.sh | 8 + .../src/main/resources/log4j2.xml | 9 + .../org/apache/druid/tests/TestNGGroup.java | 4 + .../tests/indexer/ITAppendBatchIndexTest.java | 2 +- ...penderatorDriverRealtimeIndexTaskTest.java | 2 +- .../ITBestEffortRollupParallelIndexTest.java | 2 +- ...CombiningInputSourceParallelIndexTest.java | 2 +- .../indexer/ITCompactionSparseColumnTest.java | 2 +- .../tests/indexer/ITCompactionTaskTest.java | 2 +- .../tests/indexer/ITHttpInputSourceTest.java | 2 +- .../druid/tests/indexer/ITIndexerTest.java | 2 +- ...ServiceNonTransactionalSerializedTest.java | 2 +- ...ingServiceTransactionalSerializedTest.java | 2 +- .../ITLocalInputSourceAllInputFormatTest.java | 2 +- .../indexer/ITOverwriteBatchIndexTest.java | 2 +- .../ITPerfectRollupParallelIndexTest.java | 2 +- .../indexer/ITRealtimeIndexTaskTest.java | 2 +- .../ITSystemTableBatchIndexTaskTest.java | 2 +- .../metadata/MetadataStorageConnector.java | 5 + .../metadata/MetadataStorageTablesConfig.java | 14 +- .../druid/segment/DataSegmentWithSchema.java | 57 ++ .../segment/DataSegmentsWithSchemas.java | 91 ++ .../apache/druid/segment/SchemaPayload.java | 97 ++ .../druid/segment/SchemaPayloadPlus.java | 85 ++ .../apache/druid/segment/SegmentMetadata.java | 87 ++ .../druid/segment/SegmentSchemaMapping.java | 141 +++ .../MetadataStorageTablesConfigTest.java | 15 + .../TestMetadataStorageConnector.java | 6 + .../TestMetadataStorageTablesConfig.java | 1 + .../column/DataSegmentsWithSchemasTest.java | 110 +++ .../segment/column/SchemaPayloadPlusTest.java | 58 ++ .../segment/column/SchemaPayloadTest.java | 62 ++ .../column/SegmentSchemaMappingTest.java | 110 +++ .../test/resources/test.runtime.properties | 1 + .../indexing/overlord/DataSourceMetadata.java | 3 +- .../IndexerMetadataStorageCoordinator.java | 37 +- .../IndexerSQLMetadataStorageCoordinator.java | 326 +++++-- .../druid/metadata/SQLMetadataConnector.java | 153 ++- .../metadata/SQLMetadataSegmentPublisher.java | 2 +- .../metadata/SqlSegmentsMetadataManager.java | 165 +++- .../SqlSegmentsMetadataManagerProvider.java | 15 +- .../metadata/SqlSegmentsMetadataQuery.java | 101 +- .../storage/derby/DerbyConnector.java | 11 +- .../AbstractSegmentMetadataCache.java | 317 ++++--- .../CentralizedDatasourceSchemaConfig.java | 48 +- .../CoordinatorSegmentMetadataCache.java | 380 +++++++- .../metadata/FingerprintGenerator.java | 76 ++ .../metadata/SegmentMetadataCacheConfig.java | 14 + .../metadata/SegmentSchemaBackFillQueue.java | 195 ++++ .../segment/metadata/SegmentSchemaCache.java | 341 +++++++ .../metadata/SegmentSchemaManager.java | 432 +++++++++ .../appenderator/AppenderatorImpl.java | 58 +- .../realtime/appenderator/Appenderators.java | 18 +- .../appenderator/AppenderatorsManager.java | 9 +- .../appenderator/BaseAppenderatorDriver.java | 10 +- .../appenderator/BatchAppenderator.java | 59 +- .../appenderator/BatchAppenderatorDriver.java | 7 +- .../DefaultOfflineAppenderatorFactory.java | 18 +- ...DummyForInjectionAppenderatorsManager.java | 9 +- .../PeonAppenderatorsManager.java | 18 +- .../SegmentsAndCommitMetadata.java | 13 +- .../appenderator/StreamAppenderator.java | 50 +- .../StreamAppenderatorDriver.java | 6 +- .../appenderator/TaskSegmentSchemaUtil.java | 56 ++ .../TransactionalSegmentPublisher.java | 10 +- .../UnifiedIndexerAppenderatorsManager.java | 18 +- .../server/coordinator/DruidCoordinator.java | 40 +- .../coordinator/DruidCoordinatorConfig.java | 11 + .../server/coordinator/MetadataManager.java | 11 +- .../KillUnreferencedSegmentSchemaDuty.java | 93 ++ .../coordinator/duty/MetadataCleanupDuty.java | 7 +- .../druid/server/http/DataSegmentPlus.java | 33 +- .../druid/server/http/MetadataResource.java | 61 +- ...exerSQLMetadataStorageCoordinatorTest.java | 894 +++++------------- ...orageCoordinatorSchemaPersistenceTest.java | 476 ++++++++++ ...SqlMetadataStorageCoordinatorTestBase.java | 563 +++++++++++ ...etadataConnectorSchemaPersistenceTest.java | 145 +++ .../metadata/SQLMetadataConnectorTest.java | 129 +-- .../metadata/SQLMetadataRuleManagerTest.java | 1 + ...qlSegmentsMetadataManagerProviderTest.java | 9 +- ...SegmentsMetadataManagerSchemaPollTest.java | 270 ++++++ .../SqlSegmentsMetadataManagerTest.java | 28 +- .../SqlSegmentsMetadataManagerTestBase.java | 113 +++ .../druid/metadata/TestDerbyConnector.java | 31 +- ...inatorSegmentDataCacheConcurrencyTest.java | 74 +- .../CoordinatorSegmentMetadataCacheTest.java | 489 +++++++--- ...rdinatorSegmentMetadataCacheTestBase.java} | 45 +- .../metadata/FingerprintGeneratorTest.java | 128 +++ ...java => SegmentMetadataCacheTestBase.java} | 2 +- .../SegmentSchemaBackFillQueueTest.java | 129 +++ .../metadata/SegmentSchemaCacheTest.java | 112 +++ .../metadata/SegmentSchemaManagerTest.java | 243 +++++ .../metadata/SegmentSchemaTestUtils.java | 272 ++++++ .../TestSegmentMetadataQueryWalker.java | 4 +- ...edSegmensSinksBatchAppenderatorTester.java | 4 +- ...mentsSinksBatchAppenderatorDriverTest.java | 6 +- ...enAndClosedSegmentsAppenderatorTester.java | 7 +- ...edSegmentsBatchAppenderatorDriverTest.java | 6 +- ...ndClosedSegmentsBatchAppenderatorTest.java | 1 - .../StreamAppenderatorDriverFailTest.java | 2 +- .../StreamAppenderatorDriverTest.java | 4 +- ...nifiedIndexerAppenderatorsManagerTest.java | 4 +- .../coordinator/DruidCoordinatorTest.java | 22 +- .../TestDruidCoordinatorConfig.java | 48 +- ...KillUnreferencedSegmentSchemaDutyTest.java | 420 ++++++++ .../duty/KillUnusedSegmentsTest.java | 5 +- .../CoordinatorSimulationBuilder.java | 6 +- .../server/http/DataSegmentPlusTest.java | 4 +- .../server/http/MetadataResourceTest.java | 2 +- .../java/org/apache/druid/cli/CliBroker.java | 2 + .../org/apache/druid/cli/CliCoordinator.java | 39 +- .../java/org/apache/druid/cli/CliIndexer.java | 6 + .../apache/druid/cli/CliMiddleManager.java | 2 + .../org/apache/druid/cli/CliOverlord.java | 13 + .../java/org/apache/druid/cli/CliPeon.java | 28 +- .../org/apache/druid/cli/CreateTables.java | 1 + .../org/apache/druid/cli/ServerRunnable.java | 34 + ...ibleCentralizedDatasourceSchemaConfig.java | 71 ++ .../calcite/planner/CalcitePlannerModule.java | 2 + .../schema/BrokerSegmentMetadataCache.java | 55 +- .../BrokerSegmentMetadataCacheConfig.java | 10 - ...erSegmentMetadataCacheConcurrencyTest.java | 9 +- .../BrokerSegmentMetadataCacheTest.java | 31 +- ...> BrokerSegmentMetadataCacheTestBase.java} | 4 +- .../schema/DruidSchemaNoDataInitTest.java | 4 +- .../sql/calcite/schema/SystemSchemaTest.java | 4 +- .../sql/calcite/util/QueryFrameworkUtils.java | 4 +- website/.spelling | 1 + 210 files changed, 9306 insertions(+), 1889 deletions(-) create mode 100644 integration-tests/docker/docker-compose.cds-coordinator-smq-disabled.yml create mode 100644 integration-tests/docker/docker-compose.cds-task-schema-publish-disabled.yml create mode 100644 integration-tests/docker/test-data/cds-coordinator-smq-disabled-sample-data.sql create mode 100644 integration-tests/docker/test-data/cds-task-schema-publish-disabled-sample-data.sql create mode 100644 processing/src/main/java/org/apache/druid/segment/DataSegmentWithSchema.java create mode 100644 processing/src/main/java/org/apache/druid/segment/DataSegmentsWithSchemas.java create mode 100644 processing/src/main/java/org/apache/druid/segment/SchemaPayload.java create mode 100644 processing/src/main/java/org/apache/druid/segment/SchemaPayloadPlus.java create mode 100644 processing/src/main/java/org/apache/druid/segment/SegmentMetadata.java create mode 100644 processing/src/main/java/org/apache/druid/segment/SegmentSchemaMapping.java create mode 100644 processing/src/test/java/org/apache/druid/segment/column/DataSegmentsWithSchemasTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/column/SchemaPayloadPlusTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/column/SchemaPayloadTest.java create mode 100644 processing/src/test/java/org/apache/druid/segment/column/SegmentSchemaMappingTest.java create mode 100644 server/src/main/java/org/apache/druid/segment/metadata/FingerprintGenerator.java create mode 100644 server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaBackFillQueue.java create mode 100644 server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaCache.java create mode 100644 server/src/main/java/org/apache/druid/segment/metadata/SegmentSchemaManager.java create mode 100644 server/src/main/java/org/apache/druid/segment/realtime/appenderator/TaskSegmentSchemaUtil.java create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnreferencedSegmentSchemaDuty.java create mode 100644 server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorSchemaPersistenceTest.java create mode 100644 server/src/test/java/org/apache/druid/metadata/IndexerSqlMetadataStorageCoordinatorTestBase.java create mode 100644 server/src/test/java/org/apache/druid/metadata/SQLMetadataConnectorSchemaPersistenceTest.java create mode 100644 server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java create mode 100644 server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTestBase.java rename server/src/test/java/org/apache/druid/segment/metadata/{CoordinatorSegmentMetadataCacheCommon.java => CoordinatorSegmentMetadataCacheTestBase.java} (61%) create mode 100644 server/src/test/java/org/apache/druid/segment/metadata/FingerprintGeneratorTest.java rename server/src/test/java/org/apache/druid/segment/metadata/{SegmentMetadataCacheCommon.java => SegmentMetadataCacheTestBase.java} (99%) create mode 100644 server/src/test/java/org/apache/druid/segment/metadata/SegmentSchemaBackFillQueueTest.java create mode 100644 server/src/test/java/org/apache/druid/segment/metadata/SegmentSchemaCacheTest.java create mode 100644 server/src/test/java/org/apache/druid/segment/metadata/SegmentSchemaManagerTest.java create mode 100644 server/src/test/java/org/apache/druid/segment/metadata/SegmentSchemaTestUtils.java create mode 100644 server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnreferencedSegmentSchemaDutyTest.java create mode 100644 services/src/test/java/org/apache/druid/cli/TestValidateIncompatibleCentralizedDatasourceSchemaConfig.java rename sql/src/test/java/org/apache/druid/sql/calcite/schema/{BrokerSegmentMetadataCacheCommon.java => BrokerSegmentMetadataCacheTestBase.java} (95%) diff --git a/.github/workflows/standard-its.yml b/.github/workflows/standard-its.yml index bc15a6ee16d9..69c9e6158693 100644 --- a/.github/workflows/standard-its.yml +++ b/.github/workflows/standard-its.yml @@ -47,7 +47,7 @@ jobs: strategy: fail-fast: false matrix: - testing_group: [batch-index, input-format, input-source, perfect-rollup-parallel-batch-index, kafka-index, kafka-index-slow, kafka-transactional-index, kafka-transactional-index-slow, kafka-data-format, ldap-security, realtime-index, append-ingestion, compaction] + testing_group: [batch-index, input-format, input-source, perfect-rollup-parallel-batch-index, kafka-index, kafka-index-slow, kafka-transactional-index, kafka-transactional-index-slow, kafka-data-format, ldap-security, realtime-index, append-ingestion, compaction, cds-task-schema-publish-disabled, cds-coordinator-smq-disabled] uses: ./.github/workflows/reusable-standard-its.yml if: ${{ needs.changes.outputs.core == 'true' || needs.changes.outputs.common-extensions == 'true' }} with: @@ -196,6 +196,6 @@ jobs: with: build_jdk: 8 runtime_jdk: 8 - testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties,centralized-datasource-schema + testing_groups: -DexcludedGroups=batch-index,input-format,input-source,perfect-rollup-parallel-batch-index,kafka-index,query,query-retry,query-error,realtime-index,security,ldap-security,s3-deep-storage,gcs-deep-storage,azure-deep-storage,hdfs-deep-storage,s3-ingestion,kinesis-index,kinesis-data-format,kafka-transactional-index,kafka-index-slow,kafka-transactional-index-slow,kafka-data-format,hadoop-s3-to-s3-deep-storage,hadoop-s3-to-hdfs-deep-storage,hadoop-azure-to-azure-deep-storage,hadoop-azure-to-hdfs-deep-storage,hadoop-gcs-to-gcs-deep-storage,hadoop-gcs-to-hdfs-deep-storage,aliyun-oss-deep-storage,append-ingestion,compaction,high-availability,upgrade,shuffle-deep-store,custom-coordinator-duties,centralized-datasource-schema,cds-task-schema-publish-disabled,cds-coordinator-smq-disabled use_indexer: ${{ matrix.indexer }} group: other diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DruidSchemaInternRowSignatureBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DruidSchemaInternRowSignatureBenchmark.java index dbf9d39c2a17..42017bcced1e 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DruidSchemaInternRowSignatureBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DruidSchemaInternRowSignatureBenchmark.java @@ -32,6 +32,7 @@ import org.apache.druid.query.metadata.metadata.SegmentAnalysis; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.join.JoinableFactory; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.server.QueryLifecycleFactory; import org.apache.druid.server.SegmentManager; import org.apache.druid.server.coordination.DruidServerMetadata; @@ -91,7 +92,8 @@ public SegmentMetadataCacheForBenchmark( brokerInternalQueryConfig, new NoopServiceEmitter(), new PhysicalDatasourceMetadataFactory(joinableFactory, segmentManager), - new NoopCoordinatorClient() + new NoopCoordinatorClient(), + CentralizedDatasourceSchemaConfig.create() ); } diff --git a/docs/configuration/index.md b/docs/configuration/index.md index dd02eda80695..5f4c9902360c 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -878,7 +878,7 @@ These Coordinator static configurations can be defined in the `coordinator/runti |`druid.coordinator.loadqueuepeon.repeatDelay`|The start and repeat delay for the `loadqueuepeon`, which manages the load and drop of segments.|`PT0.050S` (50 ms)| |`druid.coordinator.asOverlord.enabled`|Boolean value for whether this Coordinator service should act like an Overlord as well. This configuration allows users to simplify a Druid cluster by not having to deploy any standalone Overlord services. If set to true, then Overlord console is available at `http://coordinator-host:port/console.html` and be sure to set `druid.coordinator.asOverlord.overlordService` also.|false| |`druid.coordinator.asOverlord.overlordService`| Required, if `druid.coordinator.asOverlord.enabled` is `true`. This must be same value as `druid.service` on standalone Overlord services and `druid.selectors.indexing.serviceName` on Middle Managers.|NULL| -|`druid.centralizedDatasourceSchema.enabled`|Boolean flag for enabling datasource schema building on the Coordinator.|false| +|`druid.centralizedDatasourceSchema.enabled`|Boolean flag for enabling datasource schema building on the Coordinator. Note, when using MiddleManager to launch task, set `druid.indexer.fork.property.druid.centralizedDatasourceSchema.enabled` in MiddleManager runtime config. |false| ##### Metadata management @@ -1435,6 +1435,7 @@ MiddleManagers pass their configurations down to their child peons. The MiddleMa |`druid.worker.baseTaskDirs`|List of base temporary working directories, one of which is assigned per task in a round-robin fashion. This property can be used to allow usage of multiple disks for indexing. This property is recommended in place of and takes precedence over `${druid.indexer.task.baseTaskDir}`. If this configuration is not set, `${druid.indexer.task.baseTaskDir}` is used. For example, `druid.worker.baseTaskDirs=[\"PATH1\",\"PATH2\",...]`.|null| |`druid.worker.baseTaskDirSize`|The total amount of bytes that can be used by tasks on any single task dir. This value is treated symmetrically across all directories, that is, if this is 500 GB and there are 3 `baseTaskDirs`, then each of those task directories is assumed to allow for 500 GB to be used and a total of 1.5 TB will potentially be available across all tasks. The actual amount of memory assigned to each task is discussed in [Configuring task storage sizes](../ingestion/tasks.md#configuring-task-storage-sizes)|`Long.MAX_VALUE`| |`druid.worker.category`|A string to name the category that the MiddleManager node belongs to.|`_default_worker_category`| +|`druid.indexer.fork.property.druid.centralizedDatasourceSchema.enabled`| This config should be set when CentralizedDatasourceSchema feature is enabled. |false| #### Peon processing diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index dde1c7f64f89..a877d8b8522d 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -75,6 +75,12 @@ Most metric values reset each emission period, as specified in `druid.monitoring |`metadatacache/schemaPoll/count`|Number of coordinator polls to fetch datasource schema.|| |`metadatacache/schemaPoll/failed`|Number of failed coordinator polls to fetch datasource schema.|| |`metadatacache/schemaPoll/time`|Time taken for coordinator polls to fetch datasource schema.|| +|`metadatacache/backfill/count`|Number of segments for which schema was back filled in the database.|`dataSource`| +|`schemacache/realtime/count`|Number of realtime segments for which schema is cached.||Depends on the number of realtime segments.| +|`schemacache/finalizedSegmentMetadata/count`|Number of finalized segments for which schema metadata is cached.||Depends on the number of segments in the cluster.| +|`schemacache/finalizedSchemaPayload/count`|Number of finalized segment schema cached.||Depends on the number of distinct schema in the cluster.| +|`schemacache/inTransitSMQResults/count`|Number of segments for which schema was fetched by executing segment metadata query.||Eventually it should be 0.| +|`schemacache/inTransitSMQPublishedResults/count`|Number of segments for which schema is cached after back filling in the database.||Eventually it should be 0.| |`serverview/sync/healthy`|Sync status of the Broker with a segment-loading server such as a Historical or Peon. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled. This metric can be used in conjunction with `serverview/sync/unstableTime` to debug slow startup of Brokers.|`server`, `tier`|1 for fully synced servers, 0 otherwise| |`serverview/sync/unstableTime`|Time in milliseconds for which the Broker has been failing to sync with a segment-loading server. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled.|`server`, `tier`|Not emitted for synced servers.| |`subquery/rowLimit/count`|Number of subqueries whose results are materialized as rows (Java objects on heap).|This metric is only available if the `SubqueryCountStatsMonitor` module is included.| | diff --git a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java index 64070b11dc87..80a5408cfe3f 100644 --- a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java +++ b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java @@ -50,6 +50,8 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.server.security.AuthorizerMapper; @@ -88,19 +90,28 @@ public class MaterializedViewSupervisorTest private String derivativeDatasourceName; private MaterializedViewSupervisorSpec spec; private final ObjectMapper objectMapper = TestHelper.makeJsonMapper(); + private SegmentSchemaManager segmentSchemaManager; @Before public void setUp() { TestDerbyConnector derbyConnector = derbyConnectorRule.getConnector(); derbyConnector.createDataSourceTable(); + derbyConnector.createSegmentSchemasTable(); derbyConnector.createSegmentTable(); taskStorage = EasyMock.createMock(TaskStorage.class); taskMaster = EasyMock.createMock(TaskMaster.class); + segmentSchemaManager = new SegmentSchemaManager( + derbyConnectorRule.metadataTablesConfigSupplier().get(), + objectMapper, + derbyConnector + ); indexerMetadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( objectMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), - derbyConnector + derbyConnector, + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() ); metadataSupervisorManager = EasyMock.createMock(MetadataSupervisorManager.class); sqlSegmentsMetadataManager = EasyMock.createMock(SqlSegmentsMetadataManager.class); @@ -142,8 +153,8 @@ public void testCheckSegments() throws IOException final Interval day1 = baseSegments.get(0).getInterval(); final Interval day2 = new Interval(day1.getStart().plusDays(1), day1.getEnd().plusDays(1)); - indexerMetadataStorageCoordinator.commitSegments(new HashSet<>(baseSegments)); - indexerMetadataStorageCoordinator.commitSegments(derivativeSegments); + indexerMetadataStorageCoordinator.commitSegments(new HashSet<>(baseSegments), null); + indexerMetadataStorageCoordinator.commitSegments(derivativeSegments, null); EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes(); EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes(); EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes(); @@ -165,8 +176,8 @@ public void testSubmitTasksDoesNotFailIfTaskAlreadyExists() throws IOException Set baseSegments = Sets.newHashSet(createBaseSegments()); Set derivativeSegments = Sets.newHashSet(createDerivativeSegments()); - indexerMetadataStorageCoordinator.commitSegments(baseSegments); - indexerMetadataStorageCoordinator.commitSegments(derivativeSegments); + indexerMetadataStorageCoordinator.commitSegments(baseSegments, null); + indexerMetadataStorageCoordinator.commitSegments(derivativeSegments, null); EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes(); EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes(); EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes(); @@ -187,8 +198,8 @@ public void testSubmitTasksFailsIfTaskCannotBeAdded() throws IOException Set baseSegments = Sets.newHashSet(createBaseSegments()); Set derivativeSegments = Sets.newHashSet(createDerivativeSegments()); - indexerMetadataStorageCoordinator.commitSegments(baseSegments); - indexerMetadataStorageCoordinator.commitSegments(derivativeSegments); + indexerMetadataStorageCoordinator.commitSegments(baseSegments, null); + indexerMetadataStorageCoordinator.commitSegments(derivativeSegments, null); EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes(); EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes(); EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes(); @@ -211,7 +222,7 @@ public void testSubmitTasksFailsIfTaskCannotBeAdded() throws IOException public void testCheckSegmentsAndSubmitTasks() throws IOException { Set baseSegments = Collections.singleton(createBaseSegments().get(0)); - indexerMetadataStorageCoordinator.commitSegments(baseSegments); + indexerMetadataStorageCoordinator.commitSegments(baseSegments, null); EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes(); EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes(); EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes(); diff --git a/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java b/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java index b7874eae6d20..bb6d649f70eb 100644 --- a/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java +++ b/extensions-contrib/materialized-view-selection/src/test/java/org/apache/druid/query/materializedview/DatasourceOptimizerTest.java @@ -52,6 +52,8 @@ import org.apache.druid.query.topn.TopNQuery; import org.apache.druid.query.topn.TopNQueryBuilder; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.realtime.appenderator.SegmentSchemas; import org.apache.druid.server.coordination.DruidServerMetadata; import org.apache.druid.server.coordination.ServerType; @@ -89,12 +91,14 @@ public class DatasourceOptimizerTest extends CuratorTestBase private IndexerSQLMetadataStorageCoordinator metadataStorageCoordinator; private BatchServerInventoryView baseView; private BrokerServerView brokerServerView; + private SegmentSchemaManager segmentSchemaManager; @Before public void setUp() throws Exception { TestDerbyConnector derbyConnector = derbyConnectorRule.getConnector(); derbyConnector.createDataSourceTable(); + derbyConnector.createSegmentSchemasTable(); derbyConnector.createSegmentTable(); MaterializedViewConfig viewConfig = new MaterializedViewConfig(); jsonMapper = TestHelper.makeJsonMapper(); @@ -106,10 +110,18 @@ public void setUp() throws Exception jsonMapper, derbyConnector ); + segmentSchemaManager = new SegmentSchemaManager( + derbyConnectorRule.metadataTablesConfigSupplier().get(), + jsonMapper, + derbyConnector + ); + metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( jsonMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), - derbyConnector + derbyConnector, + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() ); setupServerAndCurator(); @@ -167,7 +179,7 @@ public void testOptimize() throws InterruptedException 1024 * 1024 ); try { - metadataStorageCoordinator.commitSegments(Sets.newHashSet(segment)); + metadataStorageCoordinator.commitSegments(Sets.newHashSet(segment), null); announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper); } catch (IOException e) { @@ -192,7 +204,7 @@ public void testOptimize() throws InterruptedException 1024 ); try { - metadataStorageCoordinator.commitSegments(Sets.newHashSet(segment)); + metadataStorageCoordinator.commitSegments(Sets.newHashSet(segment), null); announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper); } catch (IOException e) { diff --git a/extensions-contrib/sqlserver-metadata-storage/src/main/java/org/apache/druid/metadata/storage/sqlserver/SQLServerConnector.java b/extensions-contrib/sqlserver-metadata-storage/src/main/java/org/apache/druid/metadata/storage/sqlserver/SQLServerConnector.java index 523214502e33..42787a0733cd 100644 --- a/extensions-contrib/sqlserver-metadata-storage/src/main/java/org/apache/druid/metadata/storage/sqlserver/SQLServerConnector.java +++ b/extensions-contrib/sqlserver-metadata-storage/src/main/java/org/apache/druid/metadata/storage/sqlserver/SQLServerConnector.java @@ -27,6 +27,7 @@ import org.apache.druid.metadata.MetadataStorageConnectorConfig; import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.SQLMetadataConnector; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.skife.jdbi.v2.Binding; import org.skife.jdbi.v2.ColonPrefixNamedParamStatementRewriter; import org.skife.jdbi.v2.DBI; @@ -133,9 +134,13 @@ public class SQLServerConnector extends SQLMetadataConnector )); @Inject - public SQLServerConnector(Supplier config, Supplier dbTables) + public SQLServerConnector( + Supplier config, + Supplier dbTables, + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig + ) { - super(config, dbTables); + super(config, dbTables, centralizedDatasourceSchemaConfig); final BasicDataSource datasource = getDatasource(); datasource.setDriverClassLoader(getClass().getClassLoader()); diff --git a/extensions-contrib/sqlserver-metadata-storage/src/test/java/org/apache/druid/metadata/storage/sqlserver/SQLServerConnectorTest.java b/extensions-contrib/sqlserver-metadata-storage/src/test/java/org/apache/druid/metadata/storage/sqlserver/SQLServerConnectorTest.java index ab3d9c37fe23..1c44edd18fe5 100644 --- a/extensions-contrib/sqlserver-metadata-storage/src/test/java/org/apache/druid/metadata/storage/sqlserver/SQLServerConnectorTest.java +++ b/extensions-contrib/sqlserver-metadata-storage/src/test/java/org/apache/druid/metadata/storage/sqlserver/SQLServerConnectorTest.java @@ -22,6 +22,7 @@ import com.google.common.base.Suppliers; import org.apache.druid.metadata.MetadataStorageConnectorConfig; import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.junit.Assert; import org.junit.Test; @@ -38,7 +39,8 @@ public void testIsTransientException() Suppliers.ofInstance(new MetadataStorageConnectorConfig()), Suppliers.ofInstance( MetadataStorageTablesConfig.fromBase(null) - ) + ), + CentralizedDatasourceSchemaConfig.create() ); Assert.assertTrue(connector.isTransientException(new SQLException("Resource Failure!", "08DIE"))); @@ -59,7 +61,8 @@ public void testLimitClause() Suppliers.ofInstance(new MetadataStorageConnectorConfig()), Suppliers.ofInstance( MetadataStorageTablesConfig.fromBase(null) - ) + ), + CentralizedDatasourceSchemaConfig.create() ); Assert.assertEquals("FETCH NEXT 100 ROWS ONLY", connector.limitClause(100)); } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 81e6ddd88caa..682e2b484e4e 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1584,9 +1584,9 @@ private static TaskAction createAppendAction( ) { if (taskLockType.equals(TaskLockType.APPEND)) { - return SegmentTransactionalAppendAction.forSegments(segments); + return SegmentTransactionalAppendAction.forSegments(segments, null); } else if (taskLockType.equals(TaskLockType.SHARED)) { - return SegmentTransactionalInsertAction.appendAction(segments, null, null); + return SegmentTransactionalInsertAction.appendAction(segments, null, null, null); } else { throw DruidException.defensive("Invalid lock type [%s] received for append action", taskLockType); } @@ -1598,9 +1598,9 @@ private TaskAction createOverwriteAction( ) { if (taskLockType.equals(TaskLockType.REPLACE)) { - return SegmentTransactionalReplaceAction.create(segmentsWithTombstones); + return SegmentTransactionalReplaceAction.create(segmentsWithTombstones, null); } else if (taskLockType.equals(TaskLockType.EXCLUSIVE)) { - return SegmentTransactionalInsertAction.overwriteAction(null, segmentsWithTombstones); + return SegmentTransactionalInsertAction.overwriteAction(null, segmentsWithTombstones, null); } else { throw DruidException.defensive("Invalid lock type [%s] received for overwrite action", taskLockType); } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/processor/SegmentGeneratorFrameProcessorFactory.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/processor/SegmentGeneratorFrameProcessorFactory.java index 35176fbb1fb3..e925e1a1c028 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/processor/SegmentGeneratorFrameProcessorFactory.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/processor/SegmentGeneratorFrameProcessorFactory.java @@ -57,6 +57,7 @@ import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.TuningConfig; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.appenderator.Appenderator; import org.apache.druid.segment.realtime.appenderator.AppenderatorConfig; import org.apache.druid.segment.realtime.appenderator.Appenderators; @@ -192,7 +193,9 @@ public Pair apply(ReadableInput readableInput) frameContext.indexMerger(), meters, parseExceptionHandler, - true + true, + // MSQ doesn't support CentralizedDatasourceSchema feature as of now. + CentralizedDatasourceSchemaConfig.create(false) ); return new SegmentGeneratorFrameProcessor( diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/ControllerImplTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/ControllerImplTest.java index dc399e6623dd..41c3cff66a50 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/ControllerImplTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/ControllerImplTest.java @@ -64,7 +64,7 @@ public void setUp() public void test_performSegmentPublish_ok() throws IOException { final SegmentTransactionalInsertAction action = - SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null); + SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null, null); final TaskActionClient taskActionClient = EasyMock.mock(TaskActionClient.class); EasyMock.expect(taskActionClient.submit(action)).andReturn(SegmentPublishResult.ok(Collections.emptySet())); @@ -78,7 +78,7 @@ public void test_performSegmentPublish_ok() throws IOException public void test_performSegmentPublish_publishFail() throws IOException { final SegmentTransactionalInsertAction action = - SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null); + SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null, null); final TaskActionClient taskActionClient = EasyMock.mock(TaskActionClient.class); EasyMock.expect(taskActionClient.submit(action)).andReturn(SegmentPublishResult.fail("oops")); @@ -96,7 +96,7 @@ public void test_performSegmentPublish_publishFail() throws IOException public void test_performSegmentPublish_publishException() throws IOException { final SegmentTransactionalInsertAction action = - SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null); + SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null, null); final TaskActionClient taskActionClient = EasyMock.mock(TaskActionClient.class); EasyMock.expect(taskActionClient.submit(action)).andThrow(new ISE("oops")); @@ -114,7 +114,7 @@ public void test_performSegmentPublish_publishException() throws IOException public void test_performSegmentPublish_publishLockPreemptedException() throws IOException { final SegmentTransactionalInsertAction action = - SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null); + SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null, null); final TaskActionClient taskActionClient = EasyMock.mock(TaskActionClient.class); EasyMock.expect(taskActionClient.submit(action)).andThrow(new ISE("are not covered by locks")); diff --git a/extensions-core/mysql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/mysql/MySQLConnector.java b/extensions-core/mysql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/mysql/MySQLConnector.java index 91abb44380fa..5c4be5b084c3 100644 --- a/extensions-core/mysql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/mysql/MySQLConnector.java +++ b/extensions-core/mysql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/mysql/MySQLConnector.java @@ -30,6 +30,7 @@ import org.apache.druid.metadata.MetadataStorageConnectorConfig; import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.SQLMetadataConnector; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.skife.jdbi.v2.DBI; import org.skife.jdbi.v2.Handle; import org.skife.jdbi.v2.util.StringMapper; @@ -62,10 +63,11 @@ public MySQLConnector( Supplier config, Supplier dbTables, MySQLConnectorSslConfig connectorSslConfig, - MySQLConnectorDriverConfig driverConfig + MySQLConnectorDriverConfig driverConfig, + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { - super(config, dbTables); + super(config, dbTables, centralizedDatasourceSchemaConfig); log.info("Loading MySQL metadata connector driver %s", driverConfig.getDriverClassName()); tryLoadDriverClass(driverConfig.getDriverClassName(), true); diff --git a/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLConnectorTest.java b/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLConnectorTest.java index b60168d5f429..2fa0dbd6ffa7 100644 --- a/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLConnectorTest.java +++ b/extensions-core/mysql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/mysql/MySQLConnectorTest.java @@ -24,13 +24,20 @@ import com.mysql.jdbc.exceptions.MySQLTransientException; import org.apache.druid.metadata.MetadataStorageConnectorConfig; import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.junit.Assert; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.sql.SQLException; import java.sql.SQLTransientConnectionException; import java.sql.SQLTransientException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +@RunWith(Parameterized.class) public class MySQLConnectorTest { private static final MySQLConnectorDriverConfig MYSQL_DRIVER_CONFIG = new MySQLConnectorDriverConfig(); @@ -47,6 +54,23 @@ public String getDriverClassName() private static final Supplier TABLES_CONFIG_SUPPLIER = () -> MetadataStorageTablesConfig.fromBase(null); + private CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; + + public MySQLConnectorTest(CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig) + { + this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; + } + + @Parameterized.Parameters(name = "{0}") + public static Collection constructorFeeder() + { + final List constructors = new ArrayList<>(); + constructors.add(new Object[]{CentralizedDatasourceSchemaConfig.create()}); + CentralizedDatasourceSchemaConfig config = new CentralizedDatasourceSchemaConfig(); + config.setEnabled(true); + constructors.add(new Object[]{config}); + return constructors; + } @Test public void testIsExceptionTransientMySql() @@ -55,7 +79,8 @@ public void testIsExceptionTransientMySql() CONNECTOR_CONFIG_SUPPLIER, TABLES_CONFIG_SUPPLIER, new MySQLConnectorSslConfig(), - MYSQL_DRIVER_CONFIG + MYSQL_DRIVER_CONFIG, + centralizedDatasourceSchemaConfig ); Assert.assertTrue(connector.connectorIsTransientException(new MySQLTransientException())); Assert.assertTrue(connector.connectorIsTransientException(new MySQLTransactionRollbackException())); @@ -78,7 +103,8 @@ public void testIsExceptionTransientNoMySqlClazz() CONNECTOR_CONFIG_SUPPLIER, TABLES_CONFIG_SUPPLIER, new MySQLConnectorSslConfig(), - MARIADB_DRIVER_CONFIG + MARIADB_DRIVER_CONFIG, + centralizedDatasourceSchemaConfig ); // no vendor specific for MariaDb, so should always be false Assert.assertFalse(connector.connectorIsTransientException(new MySQLTransientException())); @@ -100,7 +126,8 @@ public void testIsRootCausePacketTooBigException() CONNECTOR_CONFIG_SUPPLIER, TABLES_CONFIG_SUPPLIER, new MySQLConnectorSslConfig(), - MYSQL_DRIVER_CONFIG + MYSQL_DRIVER_CONFIG, + centralizedDatasourceSchemaConfig ); // The test method should return true only for @@ -127,7 +154,8 @@ public void testLimitClause() CONNECTOR_CONFIG_SUPPLIER, TABLES_CONFIG_SUPPLIER, new MySQLConnectorSslConfig(), - MYSQL_DRIVER_CONFIG + MYSQL_DRIVER_CONFIG, + centralizedDatasourceSchemaConfig ); Assert.assertEquals("LIMIT 100", connector.limitClause(100)); } diff --git a/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnector.java b/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnector.java index bdbf71bddc5a..b44d57115cf7 100644 --- a/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnector.java +++ b/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnector.java @@ -29,6 +29,7 @@ import org.apache.druid.metadata.MetadataStorageConnectorConfig; import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.SQLMetadataConnector; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.postgresql.PGProperty; import org.postgresql.util.PSQLException; import org.skife.jdbi.v2.DBI; @@ -64,10 +65,11 @@ public PostgreSQLConnector( Supplier config, Supplier dbTables, PostgreSQLConnectorConfig connectorConfig, - PostgreSQLTablesConfig tablesConfig + PostgreSQLTablesConfig tablesConfig, + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { - super(config, dbTables); + super(config, dbTables, centralizedDatasourceSchemaConfig); final BasicDataSource datasource = getDatasource(); // PostgreSQL driver is classloader isolated as part of the extension diff --git a/extensions-core/postgresql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnectorTest.java b/extensions-core/postgresql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnectorTest.java index 3b6c9aace521..304de62b515d 100644 --- a/extensions-core/postgresql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnectorTest.java +++ b/extensions-core/postgresql-metadata-storage/src/test/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnectorTest.java @@ -22,13 +22,37 @@ import com.google.common.base.Suppliers; import org.apache.druid.metadata.MetadataStorageConnectorConfig; import org.apache.druid.metadata.MetadataStorageTablesConfig; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.junit.Assert; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +@RunWith(Parameterized.class) public class PostgreSQLConnectorTest { + private CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; + + public PostgreSQLConnectorTest(CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig) + { + this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; + } + + @Parameterized.Parameters(name = "{0}") + public static Collection constructorFeeder() + { + final List constructors = new ArrayList<>(); + constructors.add(new Object[]{CentralizedDatasourceSchemaConfig.create()}); + CentralizedDatasourceSchemaConfig config = new CentralizedDatasourceSchemaConfig(); + config.setEnabled(true); + constructors.add(new Object[]{config}); + return constructors; + } @Test public void testIsTransientException() @@ -37,7 +61,8 @@ public void testIsTransientException() Suppliers.ofInstance(new MetadataStorageConnectorConfig()), Suppliers.ofInstance(MetadataStorageTablesConfig.fromBase(null)), new PostgreSQLConnectorConfig(), - new PostgreSQLTablesConfig() + new PostgreSQLTablesConfig(), + centralizedDatasourceSchemaConfig ); Assert.assertTrue(connector.isTransientException(new SQLException("bummer, connection problem", "08DIE"))); @@ -56,7 +81,8 @@ public void testLimitClause() Suppliers.ofInstance(new MetadataStorageConnectorConfig()), Suppliers.ofInstance(MetadataStorageTablesConfig.fromBase(null)), new PostgreSQLConnectorConfig(), - new PostgreSQLTablesConfig() + new PostgreSQLTablesConfig(), + centralizedDatasourceSchemaConfig ); Assert.assertEquals("LIMIT 100", connector.limitClause(100)); } diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/updater/MetadataStorageUpdaterJobSpec.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/updater/MetadataStorageUpdaterJobSpec.java index b262e9b4def6..dfbdc3e4e85f 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/updater/MetadataStorageUpdaterJobSpec.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/updater/MetadataStorageUpdaterJobSpec.java @@ -98,6 +98,7 @@ public MetadataStorageTablesConfig getMetadataStorageTablesConfig() null, null, null, + null, null ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java index 62d649894f87..46de3064f033 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/TaskToolbox.java @@ -349,7 +349,7 @@ public void publishSegments(Iterable segments) throws IOException for (final Collection segmentCollection : segmentMultimap.asMap().values()) { getTaskActionClient().submit( SegmentTransactionalInsertAction.appendAction( - ImmutableSet.copyOf(segmentCollection), null, null + ImmutableSet.copyOf(segmentCollection), null, null, null ) ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentInsertAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentInsertAction.java index 9b7f964da4ff..478e0b89d3d9 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentInsertAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentInsertAction.java @@ -24,9 +24,11 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.collect.ImmutableSet; import org.apache.druid.indexing.common.task.Task; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; +import javax.annotation.Nullable; import java.util.Set; /** @@ -38,12 +40,17 @@ public class SegmentInsertAction implements TaskAction> { private final Set segments; + @Nullable + private final SegmentSchemaMapping segmentSchemaMapping; + @JsonCreator public SegmentInsertAction( - @JsonProperty("segments") Set segments + @JsonProperty("segments") Set segments, + @JsonProperty("segmentSchemaMapping") @Nullable SegmentSchemaMapping segmentSchemaMapping ) { this.segments = ImmutableSet.copyOf(segments); + this.segmentSchemaMapping = segmentSchemaMapping; } @JsonProperty @@ -52,6 +59,13 @@ public Set getSegments() return segments; } + @JsonProperty + @Nullable + public SegmentSchemaMapping getSegmentSchemaMapping() + { + return segmentSchemaMapping; + } + @Override public TypeReference> getReturnTypeReference() { @@ -68,7 +82,7 @@ public TypeReference> getReturnTypeReference() @Override public Set perform(Task task, TaskActionToolbox toolbox) { - return SegmentTransactionalInsertAction.appendAction(segments, null, null).perform(task, toolbox).getSegments(); + return SegmentTransactionalInsertAction.appendAction(segments, null, null, segmentSchemaMapping).perform(task, toolbox).getSegments(); } @Override diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java index 1a1e6c793776..4871e65e162c 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalAppendAction.java @@ -33,6 +33,7 @@ import org.apache.druid.indexing.overlord.DataSourceMetadata; import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.metadata.ReplaceTaskLock; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; @@ -65,26 +66,30 @@ public class SegmentTransactionalAppendAction implements TaskAction segments) + public static SegmentTransactionalAppendAction forSegments(Set segments, SegmentSchemaMapping segmentSchemaMapping) { - return new SegmentTransactionalAppendAction(segments, null, null); + return new SegmentTransactionalAppendAction(segments, null, null, segmentSchemaMapping); } public static SegmentTransactionalAppendAction forSegmentsAndMetadata( Set segments, DataSourceMetadata startMetadata, - DataSourceMetadata endMetadata + DataSourceMetadata endMetadata, + SegmentSchemaMapping segmentSchemaMapping ) { - return new SegmentTransactionalAppendAction(segments, startMetadata, endMetadata); + return new SegmentTransactionalAppendAction(segments, startMetadata, endMetadata, segmentSchemaMapping); } @JsonCreator private SegmentTransactionalAppendAction( @JsonProperty("segments") Set segments, @JsonProperty("startMetadata") @Nullable DataSourceMetadata startMetadata, - @JsonProperty("endMetadata") @Nullable DataSourceMetadata endMetadata + @JsonProperty("endMetadata") @Nullable DataSourceMetadata endMetadata, + @JsonProperty("segmentSchemaMapping") @Nullable SegmentSchemaMapping segmentSchemaMapping ) { this.segments = segments; @@ -95,6 +100,7 @@ private SegmentTransactionalAppendAction( || (startMetadata != null && endMetadata == null)) { throw InvalidInput.exception("startMetadata and endMetadata must either be both null or both non-null."); } + this.segmentSchemaMapping = segmentSchemaMapping; } @JsonProperty @@ -117,6 +123,13 @@ public DataSourceMetadata getEndMetadata() return endMetadata; } + @JsonProperty + @Nullable + public SegmentSchemaMapping getSegmentSchemaMapping() + { + return segmentSchemaMapping; + } + @Override public TypeReference getReturnTypeReference() { @@ -158,7 +171,8 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegments( segments, segmentToReplaceLock, - taskAllocatorId + taskAllocatorId, + segmentSchemaMapping ); } else { publishAction = () -> toolbox.getIndexerMetadataStorageCoordinator().commitAppendSegmentsAndMetadata( @@ -166,7 +180,8 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) segmentToReplaceLock, startMetadata, endMetadata, - taskAllocatorId + taskAllocatorId, + segmentSchemaMapping ); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertAction.java index 5a9ca0cacdfe..4bcc8c5d39f3 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertAction.java @@ -33,6 +33,7 @@ import org.apache.druid.indexing.overlord.DataSourceMetadata; import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; @@ -68,22 +69,28 @@ public class SegmentTransactionalInsertAction implements TaskAction segmentsToBeOverwritten, - Set segmentsToPublish + Set segmentsToPublish, + @Nullable SegmentSchemaMapping segmentSchemaMapping ) { - return new SegmentTransactionalInsertAction(segmentsToBeOverwritten, segmentsToPublish, null, null, null); + return new SegmentTransactionalInsertAction(segmentsToBeOverwritten, segmentsToPublish, null, null, null, + segmentSchemaMapping + ); } public static SegmentTransactionalInsertAction appendAction( Set segments, @Nullable DataSourceMetadata startMetadata, - @Nullable DataSourceMetadata endMetadata + @Nullable DataSourceMetadata endMetadata, + @Nullable SegmentSchemaMapping segmentSchemaMapping ) { - return new SegmentTransactionalInsertAction(null, segments, startMetadata, endMetadata, null); + return new SegmentTransactionalInsertAction(null, segments, startMetadata, endMetadata, null, segmentSchemaMapping); } public static SegmentTransactionalInsertAction commitMetadataOnlyAction( @@ -92,7 +99,7 @@ public static SegmentTransactionalInsertAction commitMetadataOnlyAction( DataSourceMetadata endMetadata ) { - return new SegmentTransactionalInsertAction(null, null, startMetadata, endMetadata, dataSource); + return new SegmentTransactionalInsertAction(null, null, startMetadata, endMetadata, dataSource, null); } @JsonCreator @@ -101,7 +108,8 @@ private SegmentTransactionalInsertAction( @JsonProperty("segments") @Nullable Set segments, @JsonProperty("startMetadata") @Nullable DataSourceMetadata startMetadata, @JsonProperty("endMetadata") @Nullable DataSourceMetadata endMetadata, - @JsonProperty("dataSource") @Nullable String dataSource + @JsonProperty("dataSource") @Nullable String dataSource, + @JsonProperty("segmentSchemaMapping") @Nullable SegmentSchemaMapping segmentSchemaMapping ) { this.segmentsToBeOverwritten = segmentsToBeOverwritten; @@ -109,6 +117,7 @@ private SegmentTransactionalInsertAction( this.startMetadata = startMetadata; this.endMetadata = endMetadata; this.dataSource = dataSource; + this.segmentSchemaMapping = segmentSchemaMapping; } @JsonProperty @@ -145,6 +154,13 @@ public String getDataSource() return dataSource; } + @JsonProperty + @Nullable + public SegmentSchemaMapping getSegmentSchemaMapping() + { + return segmentSchemaMapping; + } + @Override public TypeReference getReturnTypeReference() { @@ -201,7 +217,8 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) () -> toolbox.getIndexerMetadataStorageCoordinator().commitSegmentsAndMetadata( segments, startMetadata, - endMetadata + endMetadata, + segmentSchemaMapping ) ) .onInvalidLocks( diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java index 2f4a580e0464..f2b080cff6ef 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalReplaceAction.java @@ -32,10 +32,12 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; +import javax.annotation.Nullable; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -69,19 +71,25 @@ public class SegmentTransactionalReplaceAction implements TaskAction segments; + @Nullable + private final SegmentSchemaMapping segmentSchemaMapping; + public static SegmentTransactionalReplaceAction create( - Set segmentsToPublish + Set segmentsToPublish, + SegmentSchemaMapping segmentSchemaMapping ) { - return new SegmentTransactionalReplaceAction(segmentsToPublish); + return new SegmentTransactionalReplaceAction(segmentsToPublish, segmentSchemaMapping); } @JsonCreator private SegmentTransactionalReplaceAction( - @JsonProperty("segments") Set segments + @JsonProperty("segments") Set segments, + @JsonProperty("segmentSchemaMapping") @Nullable SegmentSchemaMapping segmentSchemaMapping ) { this.segments = ImmutableSet.copyOf(segments); + this.segmentSchemaMapping = segmentSchemaMapping; } @JsonProperty @@ -90,6 +98,13 @@ public Set getSegments() return segments; } + @JsonProperty + @Nullable + public SegmentSchemaMapping getSegmentSchemaMapping() + { + return segmentSchemaMapping; + } + @Override public TypeReference getReturnTypeReference() { @@ -118,7 +133,7 @@ public SegmentPublishResult perform(Task task, TaskActionToolbox toolbox) CriticalAction.builder() .onValidLocks( () -> toolbox.getIndexerMetadataStorageCoordinator() - .commitReplaceSegments(segments, replaceLocksForTask) + .commitReplaceSegments(segments, replaceLocksForTask, segmentSchemaMapping) ) .onInvalidLocks( () -> SegmentPublishResult.fail( diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index bfe110a33e0c..53daa6cc5e98 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -69,6 +69,7 @@ import org.apache.druid.query.DruidMetrics; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.SegmentDescriptor; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.handoff.SegmentHandoffNotifier; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; @@ -433,16 +434,19 @@ private boolean tryLockWithDetermineResult(TaskActionClient client, LockGranular protected TaskAction buildPublishAction( Set segmentsToBeOverwritten, Set segmentsToPublish, + SegmentSchemaMapping segmentSchemaMapping, TaskLockType lockType ) { switch (lockType) { case REPLACE: - return SegmentTransactionalReplaceAction.create(segmentsToPublish); + return SegmentTransactionalReplaceAction.create(segmentsToPublish, segmentSchemaMapping); case APPEND: - return SegmentTransactionalAppendAction.forSegments(segmentsToPublish); + return SegmentTransactionalAppendAction.forSegments(segmentsToPublish, segmentSchemaMapping); default: - return SegmentTransactionalInsertAction.overwriteAction(segmentsToBeOverwritten, segmentsToPublish); + return SegmentTransactionalInsertAction.overwriteAction(segmentsToBeOverwritten, segmentsToPublish, + segmentSchemaMapping + ); } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index 42759262fab5..81abc86e9543 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -359,7 +359,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) int sequenceNumber = 0; String sequenceName = makeSequenceName(getId(), sequenceNumber); - final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, segments, commitMetadata) -> { + final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, segments, commitMetadata, map) -> { if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) { throw new ISE( "Stream ingestion task unexpectedly attempted to overwrite segments: %s", @@ -369,6 +369,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction( segments, null, + null, null ); return toolbox.getTaskActionClient().submit(action); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/BatchAppenderators.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/BatchAppenderators.java index f16673e4efa3..e47bc0bc1c8d 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/BatchAppenderators.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/BatchAppenderators.java @@ -87,7 +87,8 @@ public static Appenderator newAppenderator( toolbox.getIndexMergerV9(), rowIngestionMeters, parseExceptionHandler, - useMaxMemoryEstimates + useMaxMemoryEstimates, + toolbox.getCentralizedTableSchemaConfig() ); } else if (toolbox.getConfig().getBatchProcessingMode() == TaskConfig.BatchProcessingMode.CLOSED_SEGMENTS) { return appenderatorsManager.createClosedSegmentsOfflineAppenderatorForTask( @@ -101,7 +102,8 @@ public static Appenderator newAppenderator( toolbox.getIndexMergerV9(), rowIngestionMeters, parseExceptionHandler, - useMaxMemoryEstimates + useMaxMemoryEstimates, + toolbox.getCentralizedTableSchemaConfig() ); } else if (toolbox.getConfig().getBatchProcessingMode() == TaskConfig.BatchProcessingMode.CLOSED_SEGMENTS_SINKS) { return appenderatorsManager.createOfflineAppenderatorForTask( @@ -115,14 +117,14 @@ public static Appenderator newAppenderator( toolbox.getIndexMergerV9(), rowIngestionMeters, parseExceptionHandler, - useMaxMemoryEstimates + useMaxMemoryEstimates, + toolbox.getCentralizedTableSchemaConfig() ); } else { throw new IAE("Invalid batchProcesingMode[%s]", toolbox.getConfig().getBatchProcessingMode()); } } - public static BatchAppenderatorDriver newDriver( final Appenderator appenderator, final TaskToolbox toolbox, diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 1796f6ea2a64..7603fe9542ba 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -67,6 +67,7 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.JodaUtils; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.granularity.Granularity; @@ -75,6 +76,7 @@ import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.segment.IndexMerger; import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.incremental.AppendableIndexSpec; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.ParseExceptionReport; @@ -881,8 +883,8 @@ private TaskStatus generateAndPublishSegments( final TaskLockType taskLockType = getTaskLockHelper().getLockTypeToUse(); final TransactionalSegmentPublisher publisher = - (segmentsToBeOverwritten, segmentsToPublish, commitMetadata) -> toolbox.getTaskActionClient().submit( - buildPublishAction(segmentsToBeOverwritten, segmentsToPublish, taskLockType) + (segmentsToBeOverwritten, segmentsToPublish, commitMetadata, map) -> toolbox.getTaskActionClient().submit( + buildPublishAction(segmentsToBeOverwritten, segmentsToPublish, map, taskLockType) ); String effectiveId = getContextValue(CompactionTask.CTX_KEY_APPENDERATOR_TRACKING_TASK_ID, null); @@ -905,7 +907,7 @@ private TaskStatus generateAndPublishSegments( try (final BatchAppenderatorDriver driver = BatchAppenderators.newDriver(appenderator, toolbox, segmentAllocator)) { driver.startJob(); - SegmentsAndCommitMetadata pushed = InputSourceProcessor.process( + Pair commitMetadataAndSchema = InputSourceProcessor.process( dataSchema, driver, partitionsSpec, @@ -919,6 +921,7 @@ private TaskStatus generateAndPublishSegments( pushTimeout ); + SegmentsAndCommitMetadata pushed = commitMetadataAndSchema.lhs; // If we use timeChunk lock, then we don't have to specify what segments will be overwritten because // it will just overwrite all segments overlapped with the new segments. final Set inputSegments = getTaskLockHelper().isUseSegmentLock() @@ -956,7 +959,6 @@ private TaskStatus generateAndPublishSegments( tombStones = tombstoneHelper.computeTombstones(ingestionSchema.getDataSchema(), tombstonesAndVersions); - log.debugSegments(tombStones, "To publish tombstones"); } @@ -966,7 +968,8 @@ private TaskStatus generateAndPublishSegments( inputSegments, tombStones, publisher, - annotateFunction + annotateFunction, + commitMetadataAndSchema.rhs ), pushTimeout); appenderator.close(); @@ -1776,5 +1779,4 @@ public String toString() '}'; } } - } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java index b9c3b589f47d..9115818dcb36 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/InputSourceProcessor.java @@ -29,12 +29,15 @@ import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.indexing.common.task.batch.parallel.iterator.IndexTaskInputRowIteratorBuilder; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.GranularitySpec; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult; import org.apache.druid.segment.realtime.appenderator.BatchAppenderatorDriver; import org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata; @@ -58,7 +61,7 @@ public class InputSourceProcessor * * @return {@link SegmentsAndCommitMetadata} for the pushed segments. */ - public static SegmentsAndCommitMetadata process( + public static Pair process( DataSchema dataSchema, BatchAppenderatorDriver driver, PartitionsSpec partitionsSpec, @@ -77,6 +80,7 @@ public static SegmentsAndCommitMetadata process( ? (DynamicPartitionsSpec) partitionsSpec : null; final GranularitySpec granularitySpec = dataSchema.getGranularitySpec(); + final SegmentSchemaMapping segmentSchemaMapping = new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION); try ( final CloseableIterator inputRowIterator = AbstractBatchIndexTask.inputSourceReader( @@ -120,6 +124,7 @@ public static SegmentsAndCommitMetadata process( // If those segments are not pushed here, the remaining available space in appenderator will be kept // small which could lead to smaller segments. final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout); + segmentSchemaMapping.merge(pushed.getSegmentSchemaMapping()); LOG.debugSegments(pushed.getSegments(), "Pushed segments"); } } @@ -129,9 +134,10 @@ public static SegmentsAndCommitMetadata process( } final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout); + segmentSchemaMapping.merge(pushed.getSegmentSchemaMapping()); LOG.debugSegments(pushed.getSegments(), "Pushed segments"); - return pushed; + return Pair.of(pushed, segmentSchemaMapping); } } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 935adb3cde0f..e2c0681d001b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -66,6 +66,7 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.rpc.HttpResponseException; import org.apache.druid.rpc.indexing.OverlordClient; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.incremental.ParseExceptionReport; import org.apache.druid.segment.incremental.RowIngestionMeters; import org.apache.druid.segment.incremental.RowIngestionMetersTotals; @@ -73,6 +74,7 @@ import org.apache.druid.segment.indexing.TuningConfig; import org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec; import org.apache.druid.segment.indexing.granularity.GranularitySpec; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher; import org.apache.druid.segment.realtime.firehose.ChatHandler; @@ -209,7 +211,6 @@ public class ParallelIndexSupervisorTask extends AbstractBatchIndexTask private Long segmentsPublished; private final boolean isCompactionTask; - @JsonCreator public ParallelIndexSupervisorTask( @JsonProperty("id") String id, @@ -358,7 +359,8 @@ SinglePhaseParallelIndexTaskRunner createSinglePhaseTaskRunner(TaskToolbox toolb getGroupId(), baseSubtaskSpecName, ingestionSchema, - getContext() + getContext(), + toolbox.getCentralizedTableSchemaConfig() ); } @@ -439,7 +441,9 @@ PartialGenericSegmentMergeParallelIndexTaskRunner createPartialGenericSegmentMer ingestionSchema.getDataSchema(), ioConfigs, ingestionSchema.getTuningConfig(), - getContext() + getContext(), + toolbox.getJsonMapper(), + toolbox.getCentralizedTableSchemaConfig() ); } @@ -1147,11 +1151,16 @@ private void publishSegments( { final Set oldSegments = new HashSet<>(); final Set newSegments = new HashSet<>(); + final SegmentSchemaMapping segmentSchemaMapping = new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION); + reportsMap .values() .forEach(report -> { oldSegments.addAll(report.getOldSegments()); newSegments.addAll(report.getNewSegments()); + if (report.getSegmentSchemaMapping() != null) { + segmentSchemaMapping.merge(report.getSegmentSchemaMapping()); + } }); final boolean storeCompactionState = getContextValue( Tasks.STORE_COMPACTION_STATE_KEY, @@ -1163,7 +1172,6 @@ private void publishSegments( ingestionSchema ); - Set tombStones = Collections.emptySet(); if (getIngestionMode() == IngestionMode.REPLACE) { TombstoneHelper tombstoneHelper = new TombstoneHelper(toolbox.getTaskActionClient()); @@ -1189,16 +1197,16 @@ private void publishSegments( final TaskLockType taskLockType = getTaskLockHelper().getLockTypeToUse(); final TransactionalSegmentPublisher publisher = - (segmentsToBeOverwritten, segmentsToPublish, commitMetadata) -> toolbox.getTaskActionClient().submit( - buildPublishAction(segmentsToBeOverwritten, segmentsToPublish, taskLockType) + (segmentsToBeOverwritten, segmentsToPublish, commitMetadata, map) -> toolbox.getTaskActionClient().submit( + buildPublishAction(segmentsToBeOverwritten, segmentsToPublish, map, taskLockType) ); final boolean published = newSegments.isEmpty() - || publisher.publishSegments(oldSegments, newSegments, annotateFunction, null).isSuccess(); + || publisher.publishSegments(oldSegments, newSegments, annotateFunction, null, segmentSchemaMapping).isSuccess(); if (published) { - LOG.info("Published [%d] segments", newSegments.size()); + LOG.info("Published [%d] segments & [%d] schemas", newSegments.size(), segmentSchemaMapping.getSchemaCount()); // segment metrics: emitMetric(toolbox.getEmitter(), "ingest/tombstones/count", tombStones.size()); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java index 8babf50d8265..0c743d1f1862 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java @@ -19,10 +19,12 @@ package org.apache.druid.indexing.common.task.batch.parallel; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import org.apache.druid.data.input.InputSplit; import org.apache.druid.indexing.common.TaskToolbox; import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import java.util.Iterator; import java.util.List; @@ -38,6 +40,8 @@ class PartialGenericSegmentMergeParallelIndexTaskRunner private final DataSchema dataSchema; private final List mergeIOConfigs; + private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; + private final ObjectMapper mapper; PartialGenericSegmentMergeParallelIndexTaskRunner( TaskToolbox toolbox, @@ -47,13 +51,17 @@ class PartialGenericSegmentMergeParallelIndexTaskRunner DataSchema dataSchema, List mergeIOConfigs, ParallelIndexTuningConfig tuningConfig, - Map context + Map context, + ObjectMapper mapper, + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { super(toolbox, taskId, groupId, baseSubtaskSpecName, tuningConfig, context); this.dataSchema = dataSchema; this.mergeIOConfigs = mergeIOConfigs; + this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; + this.mapper = mapper; } @Override @@ -102,7 +110,9 @@ public PartialGenericSegmentMergeTask newSubTask(int numAttempts) subtaskSpecId, numAttempts, ingestionSpec, - getContext() + getContext(), + centralizedDatasourceSchemaConfig, + mapper ); } }; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java index 989f0a77daab..be44fb282ef6 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java @@ -19,9 +19,11 @@ package org.apache.druid.indexing.common.task.batch.parallel; +import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import com.google.common.collect.HashBasedTable; import com.google.common.collect.ImmutableSet; @@ -29,6 +31,7 @@ import org.apache.druid.indexing.common.TaskToolbox; import org.apache.druid.indexing.common.task.TaskResource; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.server.security.ResourceAction; import org.apache.druid.timeline.partition.BuildingShardSpec; import org.apache.druid.timeline.partition.ShardSpec; @@ -50,6 +53,8 @@ public class PartialGenericSegmentMergeTask extends PartialSegmentMergeTask> intervalAndIntegerToShardSpec; + private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; + @JsonCreator public PartialGenericSegmentMergeTask( // id shouldn't be null except when this task is created by ParallelIndexSupervisorTask @@ -61,7 +66,9 @@ public PartialGenericSegmentMergeTask( @JsonProperty("subtaskSpecId") @Nullable final String subtaskSpecId, @JsonProperty("numAttempts") final int numAttempts, // zero-based counting @JsonProperty("spec") final PartialSegmentMergeIngestionSpec ingestionSchema, - @JsonProperty("context") final Map context + @JsonProperty("context") final Map context, + @JsonProperty("centralizedDatasourceSchemaConfig") CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig, + @JacksonInject ObjectMapper mapper ) { super( @@ -74,9 +81,12 @@ public PartialGenericSegmentMergeTask( ingestionSchema.getIOConfig(), ingestionSchema.getTuningConfig(), numAttempts, - context + context, + mapper, + centralizedDatasourceSchemaConfig ); + this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; this.ingestionSchema = ingestionSchema; this.intervalAndIntegerToShardSpec = createIntervalAndIntegerToShardSpec( ingestionSchema.getIOConfig().getPartitionLocations() @@ -117,6 +127,12 @@ private PartialSegmentMergeIngestionSpec getIngestionSchema() return ingestionSchema; } + @JsonProperty("centralizedDatasourceSchemaConfig") + private CentralizedDatasourceSchemaConfig getCentralizedDatasourceSchemaConfig() + { + return centralizedDatasourceSchemaConfig; + } + @Override public String getType() { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentGenerateTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentGenerateTask.java index 768d118d84a8..d051038634cf 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentGenerateTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentGenerateTask.java @@ -40,6 +40,8 @@ import org.apache.druid.indexing.firehose.WindowedSegmentId; import org.apache.druid.indexing.input.DruidInputSource; import org.apache.druid.indexing.worker.shuffle.ShuffleDataSegmentPusher; +import org.apache.druid.java.util.common.Pair; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.ParseExceptionReport; import org.apache.druid.segment.incremental.RowIngestionMeters; @@ -217,7 +219,7 @@ private List generateSegments( try (final BatchAppenderatorDriver driver = BatchAppenderators.newDriver(appenderator, toolbox, segmentAllocator)) { driver.startJob(); - final SegmentsAndCommitMetadata pushed = InputSourceProcessor.process( + final Pair pushed = InputSourceProcessor.process( dataSchema, driver, partitionsSpec, @@ -230,7 +232,7 @@ private List generateSegments( parseExceptionHandler, pushTimeout ); - return pushed.getSegments(); + return pushed.lhs.getSegments(); } catch (Exception e) { exceptionOccurred = true; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentMergeTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentMergeTask.java index e8f1effcfe1e..7f0208417ff1 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentMergeTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentMergeTask.java @@ -20,6 +20,7 @@ package org.apache.druid.indexing.common.task.batch.parallel; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; import com.google.common.collect.Maps; @@ -42,12 +43,18 @@ import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.BaseProgressIndicator; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMerger; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.SchemaPayloadPlus; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.FingerprintGenerator; +import org.apache.druid.segment.realtime.appenderator.TaskSegmentSchemaUtil; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.ShardSpec; import org.joda.time.Interval; @@ -74,10 +81,11 @@ abstract class PartialSegmentMergeTask extends PerfectRollu { private static final Logger LOG = new Logger(PartialSegmentMergeTask.class); - private final PartialSegmentMergeIOConfig ioConfig; private final int numAttempts; private final String subtaskSpecId; + private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; + private final FingerprintGenerator fingerprintGenerator; PartialSegmentMergeTask( // id shouldn't be null except when this task is created by ParallelIndexSupervisorTask @@ -90,7 +98,9 @@ abstract class PartialSegmentMergeTask extends PerfectRollu PartialSegmentMergeIOConfig ioConfig, ParallelIndexTuningConfig tuningConfig, final int numAttempts, // zero-based counting - final Map context + final Map context, + final ObjectMapper mapper, + final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { super( @@ -110,6 +120,8 @@ abstract class PartialSegmentMergeTask extends PerfectRollu this.subtaskSpecId = subtaskSpecId; this.ioConfig = ioConfig; this.numAttempts = numAttempts; + this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; + this.fingerprintGenerator = new FingerprintGenerator(mapper); } @JsonProperty @@ -180,7 +192,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception org.apache.commons.io.FileUtils.deleteQuietly(persistDir); FileUtils.mkdirp(persistDir); - final Set pushedSegments = mergeAndPushSegments( + final DataSegmentsWithSchemas dataSegmentsWithSchemas = mergeAndPushSegments( toolbox, getDataSchema(), getTuningConfig(), @@ -190,7 +202,13 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception ); taskClient.report( - new PushedSegmentsReport(getId(), Collections.emptySet(), pushedSegments, new TaskReport.ReportMap()) + new PushedSegmentsReport( + getId(), + Collections.emptySet(), + dataSegmentsWithSchemas.getSegments(), + new TaskReport.ReportMap(), + dataSegmentsWithSchemas.getSegmentSchemaMapping() + ) ); return TaskStatus.success(getId()); @@ -234,7 +252,7 @@ private Map>> fetchSegmentFiles( */ abstract S createShardSpec(TaskToolbox toolbox, Interval interval, int bucketId); - private Set mergeAndPushSegments( + private DataSegmentsWithSchemas mergeAndPushSegments( TaskToolbox toolbox, DataSchema dataSchema, ParallelIndexTuningConfig tuningConfig, @@ -245,12 +263,15 @@ private Set mergeAndPushSegments( { final DataSegmentPusher segmentPusher = toolbox.getSegmentPusher(); final Set pushedSegments = new HashSet<>(); + final SegmentSchemaMapping segmentSchemaMapping = new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION); + for (Entry>> entryPerInterval : intervalToUnzippedFiles.entrySet()) { final Interval interval = entryPerInterval.getKey(); for (Int2ObjectMap.Entry> entryPerBucketId : entryPerInterval.getValue().int2ObjectEntrySet()) { long startTime = System.nanoTime(); final int bucketId = entryPerBucketId.getIntKey(); final List segmentFilesToMerge = entryPerBucketId.getValue(); + final Pair> mergedFileAndDimensionNames = mergeSegmentsInSamePartition( dataSchema, tuningConfig, @@ -261,6 +282,7 @@ private Set mergeAndPushSegments( persistDir, 0 ); + long mergeFinishTime = System.nanoTime(); LOG.info("Merged [%d] input segment(s) for interval [%s] in [%,d]ms.", segmentFilesToMerge.size(), @@ -292,6 +314,21 @@ private Set mergeAndPushSegments( ); long pushFinishTime = System.nanoTime(); pushedSegments.add(segment); + + if (centralizedDatasourceSchemaConfig.isEnabled()) { + SchemaPayloadPlus schemaPayloadPlus = + TaskSegmentSchemaUtil.getSegmentSchema(mergedFileAndDimensionNames.lhs, toolbox.getIndexIO()); + segmentSchemaMapping.addSchema( + segment.getId(), + schemaPayloadPlus, + fingerprintGenerator.generateFingerprint( + schemaPayloadPlus.getSchemaPayload(), + getDataSource(), + CentralizedDatasourceSchemaConfig.SCHEMA_VERSION + ) + ); + } + LOG.info("Built segment [%s] for interval [%s] (from [%d] input segment(s) in [%,d]ms) of " + "size [%d] bytes and pushed ([%,d]ms) to deep storage [%s].", segment.getId(), @@ -304,7 +341,10 @@ private Set mergeAndPushSegments( ); } } - return pushedSegments; + if (centralizedDatasourceSchemaConfig.isEnabled()) { + LOG.info("SegmentSchema for the pushed segments is [%s]", segmentSchemaMapping); + } + return new DataSegmentsWithSchemas(pushedSegments, segmentSchemaMapping.isNonEmpty() ? segmentSchemaMapping : null); } private static Pair> mergeSegmentsInSamePartition( diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PushedSegmentsReport.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PushedSegmentsReport.java index c93906b11a2e..730752533498 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PushedSegmentsReport.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PushedSegmentsReport.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import org.apache.druid.indexer.report.TaskReport; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.timeline.DataSegment; import java.util.Objects; @@ -40,6 +41,7 @@ public class PushedSegmentsReport implements SubTaskReport private final String taskId; private final Set oldSegments; private final Set newSegments; + private final SegmentSchemaMapping segmentSchemaMapping; private final TaskReport.ReportMap taskReport; @JsonCreator @@ -47,13 +49,15 @@ public PushedSegmentsReport( @JsonProperty("taskId") String taskId, @JsonProperty("oldSegments") Set oldSegments, @JsonProperty("segments") Set newSegments, - @JsonProperty("taskReport") TaskReport.ReportMap taskReport + @JsonProperty("taskReport") TaskReport.ReportMap taskReport, + @JsonProperty("segmentSchemaMapping") SegmentSchemaMapping segmentSchemaMapping ) { this.taskId = Preconditions.checkNotNull(taskId, "taskId"); this.oldSegments = Preconditions.checkNotNull(oldSegments, "oldSegments"); this.newSegments = Preconditions.checkNotNull(newSegments, "newSegments"); this.taskReport = taskReport; + this.segmentSchemaMapping = segmentSchemaMapping; } @Override @@ -81,6 +85,12 @@ public TaskReport.ReportMap getTaskReport() return taskReport; } + @JsonProperty("segmentSchemaMapping") + public SegmentSchemaMapping getSegmentSchemaMapping() + { + return segmentSchemaMapping; + } + @Override public boolean equals(Object o) { @@ -94,12 +104,13 @@ public boolean equals(Object o) return Objects.equals(taskId, that.taskId) && Objects.equals(oldSegments, that.oldSegments) && Objects.equals(newSegments, that.newSegments) - && Objects.equals(taskReport, that.taskReport); + && Objects.equals(taskReport, that.taskReport) + && Objects.equals(segmentSchemaMapping, that.segmentSchemaMapping); } @Override public int hashCode() { - return Objects.hash(taskId, oldSegments, newSegments, taskReport); + return Objects.hash(taskId, oldSegments, newSegments, taskReport, segmentSchemaMapping); } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexTaskRunner.java index 76311c0dbb6d..ce6aee98af35 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexTaskRunner.java @@ -35,6 +35,7 @@ import org.apache.druid.indexing.common.task.batch.parallel.TaskMonitor.SubTaskCompleteEvent; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.NonnullPair; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.BuildingNumberedShardSpec; @@ -101,6 +102,7 @@ public class SinglePhaseParallelIndexTaskRunner extends ParallelIndexPhaseRunner private final ParallelIndexIngestionSpec ingestionSchema; private final SplittableInputSource baseInputSource; + private CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; SinglePhaseParallelIndexTaskRunner( TaskToolbox toolbox, @@ -108,7 +110,8 @@ public class SinglePhaseParallelIndexTaskRunner extends ParallelIndexPhaseRunner String groupId, String baseSubtaskSpecName, ParallelIndexIngestionSpec ingestionSchema, - Map context + Map context, + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { super( @@ -121,6 +124,7 @@ public class SinglePhaseParallelIndexTaskRunner extends ParallelIndexPhaseRunner ); this.ingestionSchema = ingestionSchema; this.baseInputSource = (SplittableInputSource) ingestionSchema.getIOConfig().getNonNullInputSource(toolbox); + this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; } @VisibleForTesting @@ -129,10 +133,11 @@ public class SinglePhaseParallelIndexTaskRunner extends ParallelIndexPhaseRunner String taskId, String groupId, ParallelIndexIngestionSpec ingestionSchema, - Map context + Map context, + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { - this(toolbox, taskId, groupId, taskId, ingestionSchema, context); + this(toolbox, taskId, groupId, taskId, ingestionSchema, context, centralizedDatasourceSchemaConfig); } @Override diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java index e02d59936b20..0a1f00f90251 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseSubTask.java @@ -52,6 +52,8 @@ import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.parsers.CloseableIterator; +import org.apache.druid.segment.DataSegmentsWithSchemas; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.ParseExceptionReport; import org.apache.druid.segment.incremental.RowIngestionMeters; @@ -59,6 +61,7 @@ import org.apache.druid.segment.indexing.RealtimeIOConfig; import org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec; import org.apache.druid.segment.indexing.granularity.GranularitySpec; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.FireDepartment; import org.apache.druid.segment.realtime.FireDepartmentMetrics; import org.apache.druid.segment.realtime.appenderator.Appenderator; @@ -273,7 +276,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) throws Exception ingestionSchema.getTuningConfig().getChatHandlerNumRetries() ); ingestionState = IngestionState.BUILD_SEGMENTS; - final Set pushedSegments = generateAndPushSegments( + final DataSegmentsWithSchemas dataSegmentsWithSchemas = generateAndPushSegments( toolbox, taskClient, inputSource, @@ -282,7 +285,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) throws Exception // Find inputSegments overshadowed by pushedSegments final Set allSegments = new HashSet<>(getTaskLockHelper().getLockedExistingSegments()); - allSegments.addAll(pushedSegments); + allSegments.addAll(dataSegmentsWithSchemas.getSegments()); final SegmentTimeline timeline = SegmentTimeline.forSegments(allSegments); final Set oldSegments = FluentIterable.from(timeline.findFullyOvershadowed()) .transformAndConcat(TimelineObjectHolder::getObject) @@ -290,7 +293,7 @@ public TaskStatus runTask(final TaskToolbox toolbox) throws Exception .toSet(); TaskReport.ReportMap taskReport = getTaskCompletionReports(); - taskClient.report(new PushedSegmentsReport(getId(), oldSegments, pushedSegments, taskReport)); + taskClient.report(new PushedSegmentsReport(getId(), oldSegments, dataSegmentsWithSchemas.getSegments(), taskReport, dataSegmentsWithSchemas.getSegmentSchemaMapping())); toolbox.getTaskReportFileWriter().write(getId(), taskReport); @@ -363,7 +366,7 @@ public Granularity getSegmentGranularity() * * @return true if generated segments are successfully published, otherwise false */ - private Set generateAndPushSegments( + private DataSegmentsWithSchemas generateAndPushSegments( final TaskToolbox toolbox, final ParallelIndexSupervisorTaskClient taskClient, final InputSource inputSource, @@ -437,6 +440,7 @@ private Set generateAndPushSegments( driver.startJob(); final Set pushedSegments = new HashSet<>(); + final SegmentSchemaMapping segmentSchemaMapping = new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION); while (inputRowIterator.hasNext()) { final InputRow inputRow = inputRowIterator.next(); @@ -456,8 +460,10 @@ private Set generateAndPushSegments( // which makes the size of segments smaller. final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout); pushedSegments.addAll(pushed.getSegments()); - LOG.info("Pushed [%s] segments", pushed.getSegments().size()); + segmentSchemaMapping.merge(pushed.getSegmentSchemaMapping()); + LOG.info("Pushed [%s] segments and [%s] schemas", pushed.getSegments().size(), segmentSchemaMapping.getSchemaCount()); LOG.infoSegments(pushed.getSegments(), "Pushed segments"); + LOG.info("SegmentSchema is [%s]", segmentSchemaMapping); } } else { throw new ISE("Failed to add a row with timestamp[%s]", inputRow.getTimestamp()); @@ -468,11 +474,13 @@ private Set generateAndPushSegments( final SegmentsAndCommitMetadata pushed = driver.pushAllAndClear(pushTimeout); pushedSegments.addAll(pushed.getSegments()); - LOG.info("Pushed [%s] segments", pushed.getSegments().size()); + segmentSchemaMapping.merge(pushed.getSegmentSchemaMapping()); + LOG.info("Pushed [%s] segments and [%s] schemas", pushed.getSegments().size(), segmentSchemaMapping.getSchemaCount()); LOG.infoSegments(pushed.getSegments(), "Pushed segments"); + LOG.info("SegmentSchema is [%s]", segmentSchemaMapping); appenderator.close(); - return pushedSegments; + return new DataSegmentsWithSchemas(pushedSegments, segmentSchemaMapping.isNonEmpty() ? segmentSchemaMapping : null); } catch (TimeoutException | ExecutionException e) { exceptionOccurred = true; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SequenceMetadata.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SequenceMetadata.java index b5a65e99462c..c3832391be88 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SequenceMetadata.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SequenceMetadata.java @@ -36,6 +36,7 @@ import org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher; import org.apache.druid.timeline.DataSegment; @@ -351,7 +352,8 @@ public SequenceMetadataTransactionalSegmentPublisher( public SegmentPublishResult publishAnnotatedSegments( @Nullable Set mustBeNullOrEmptyOverwriteSegments, Set segmentsToPush, - @Nullable Object commitMetadata + @Nullable Object commitMetadata, + SegmentSchemaMapping segmentSchemaMapping ) throws IOException { if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) { @@ -417,12 +419,16 @@ public SegmentPublishResult publishAnnotatedSegments( ); final DataSourceMetadata endMetadata = runner.createDataSourceMetadata(finalPartitions); action = taskLockType == TaskLockType.APPEND - ? SegmentTransactionalAppendAction.forSegmentsAndMetadata(segmentsToPush, startMetadata, endMetadata) - : SegmentTransactionalInsertAction.appendAction(segmentsToPush, startMetadata, endMetadata); + ? SegmentTransactionalAppendAction.forSegmentsAndMetadata(segmentsToPush, startMetadata, endMetadata, + segmentSchemaMapping + ) + : SegmentTransactionalInsertAction.appendAction(segmentsToPush, startMetadata, endMetadata, + segmentSchemaMapping + ); } else { action = taskLockType == TaskLockType.APPEND - ? SegmentTransactionalAppendAction.forSegments(segmentsToPush) - : SegmentTransactionalInsertAction.appendAction(segmentsToPush, null, null); + ? SegmentTransactionalAppendAction.forSegments(segmentsToPush, segmentSchemaMapping) + : SegmentTransactionalInsertAction.appendAction(segmentsToPush, null, null, segmentSchemaMapping); } return toolbox.getTaskActionClient().submit(action); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java index 63bad86c1952..e94ced42193d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestIndexTask.java @@ -28,13 +28,18 @@ import org.apache.druid.data.input.impl.LocalInputSource; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.indexing.common.actions.TaskAction; import org.apache.druid.indexing.common.task.IndexTask; import org.apache.druid.indexing.common.task.TaskResource; +import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.indexing.DataSchema; +import org.apache.druid.timeline.DataSegment; import java.io.File; +import java.util.Set; /** */ @@ -110,4 +115,14 @@ public TaskStatus runTask(TaskToolbox toolbox) { return status; } + + public TaskAction testBuildPublishAction( + Set segmentsToBeOverwritten, + Set segmentsToPublish, + SegmentSchemaMapping segmentSchemaMapping, + TaskLockType lockType + ) + { + return buildPublishAction(segmentsToBeOverwritten, segmentsToPublish, segmentSchemaMapping, lockType); + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/LocalTaskActionClientTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/LocalTaskActionClientTest.java index 55307984824b..e3928d2f916a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/LocalTaskActionClientTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/LocalTaskActionClientTest.java @@ -33,7 +33,7 @@ public class LocalTaskActionClientTest @Test public void testGetActionType() { - final TaskAction action = SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null); + final TaskAction action = SegmentTransactionalInsertAction.appendAction(Collections.emptySet(), null, null, null); Assert.assertEquals("segmentTransactionalInsert", LocalTaskActionClient.getActionType(objectMapper, action)); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java index f0e16fc7d24e..915385e811e3 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/RetrieveSegmentsActionsTest.java @@ -65,7 +65,7 @@ public static void setup() throws IOException expectedUnusedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), UNUSED_V1)); actionTestKit.getMetadataStorageCoordinator() - .commitSegments(expectedUnusedSegments); + .commitSegments(expectedUnusedSegments, null); expectedUnusedSegments.forEach(s -> actionTestKit.getTaskLockbox().unlock(task, s.getInterval())); @@ -75,7 +75,7 @@ public static void setup() throws IOException expectedUsedSegments.add(createSegment(Intervals.of("2017-10-07/2017-10-08"), "2")); actionTestKit.getMetadataStorageCoordinator() - .commitSegments(expectedUsedSegments); + .commitSegments(expectedUsedSegments, null); expectedUsedSegments.forEach(s -> actionTestKit.getTaskLockbox().unlock(task, s.getInterval())); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java index 05760fd46caa..f2da105d269c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentAllocateActionTest.java @@ -427,7 +427,7 @@ public void testSegmentIsAllocatedForLatestUsedSegmentVersion() throws IOExcepti .size(100) .build(); taskActionTestKit.getMetadataStorageCoordinator().commitSegments( - Collections.singleton(segmentV1) + Collections.singleton(segmentV1), null ); // Verify that new allocations use version V1 @@ -451,7 +451,7 @@ public void testSegmentIsAllocatedForLatestUsedSegmentVersion() throws IOExcepti .size(100) .build(); taskActionTestKit.getMetadataStorageCoordinator().commitSegments( - Collections.singleton(segmentV2) + Collections.singleton(segmentV2), null ); Assert.assertTrue(segmentV2.getVersion().compareTo(segmentV1.getVersion()) > 0); @@ -654,7 +654,8 @@ public void testAddToExistingLinearShardSpecsSameGranularity() throws Exception .shardSpec(new LinearShardSpec(1)) .size(0) .build() - ) + ), + null ); taskActionTestKit.getTaskLockbox().add(task); @@ -719,7 +720,8 @@ public void testAddToExistingNumberedShardSpecsSameGranularity() throws Exceptio .shardSpec(new NumberedShardSpec(1, 2)) .size(0) .build() - ) + ), + null ); taskActionTestKit.getTaskLockbox().add(task); @@ -782,7 +784,8 @@ public void testAddToExistingNumberedShardSpecsCoarserPreferredGranularity() thr .shardSpec(new NumberedShardSpec(1, 2)) .size(0) .build() - ) + ), + null ); taskActionTestKit.getTaskLockbox().add(task); @@ -821,7 +824,8 @@ public void testAddToExistingNumberedShardSpecsFinerPreferredGranularity() throw .shardSpec(new NumberedShardSpec(1, 2)) .size(0) .build() - ) + ), + null ); taskActionTestKit.getTaskLockbox().add(task); @@ -860,7 +864,8 @@ public void testCannotAddToExistingNumberedShardSpecsWithCoarserQueryGranularity .shardSpec(new NumberedShardSpec(1, 2)) .size(0) .build() - ) + ), + null ); taskActionTestKit.getTaskLockbox().add(task); @@ -909,7 +914,8 @@ public void testWithPartialShardSpecAndOvershadowingSegments() throws IOExceptio ) .size(0) .build() - ) + ), + null ); final SegmentAllocateAction action = new SegmentAllocateAction( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentInsertActionTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentInsertActionTest.java index 8ac5c6b517f9..c8999c2f5d46 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentInsertActionTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentInsertActionTest.java @@ -102,7 +102,7 @@ private LockResult acquireTimeChunkLock(TaskLockType lockType, Task task, Interv public void testSimple() throws Exception { final Task task = NoopTask.create(); - final SegmentInsertAction action = new SegmentInsertAction(ImmutableSet.of(SEGMENT1, SEGMENT2)); + final SegmentInsertAction action = new SegmentInsertAction(ImmutableSet.of(SEGMENT1, SEGMENT2), null); actionTestKit.getTaskLockbox().add(task); acquireTimeChunkLock(TaskLockType.EXCLUSIVE, task, INTERVAL, 5000); actionTestKit.getTaskLockbox().doInCriticalSection( @@ -129,7 +129,7 @@ public void testSimple() throws Exception public void testFailBadVersion() throws Exception { final Task task = NoopTask.create(); - final SegmentInsertAction action = new SegmentInsertAction(ImmutableSet.of(SEGMENT3)); + final SegmentInsertAction action = new SegmentInsertAction(ImmutableSet.of(SEGMENT3), null); actionTestKit.getTaskLockbox().add(task); acquireTimeChunkLock(TaskLockType.EXCLUSIVE, task, INTERVAL, 5000); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertActionTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertActionTest.java index 847354706ba7..f158ef1980cb 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertActionTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertActionTest.java @@ -107,7 +107,8 @@ public void testTransactionalUpdateDataSourceMetadata() throws Exception SegmentPublishResult result1 = SegmentTransactionalInsertAction.appendAction( ImmutableSet.of(SEGMENT1), new ObjectMetadata(null), - new ObjectMetadata(ImmutableList.of(1)) + new ObjectMetadata(ImmutableList.of(1)), + null ).perform( task, actionTestKit.getTaskActionToolbox() @@ -117,7 +118,8 @@ public void testTransactionalUpdateDataSourceMetadata() throws Exception SegmentPublishResult result2 = SegmentTransactionalInsertAction.appendAction( ImmutableSet.of(SEGMENT2), new ObjectMetadata(ImmutableList.of(1)), - new ObjectMetadata(ImmutableList.of(2)) + new ObjectMetadata(ImmutableList.of(2)), + null ).perform( task, actionTestKit.getTaskActionToolbox() @@ -145,7 +147,8 @@ public void testFailTransactionalUpdateDataSourceMetadata() throws Exception SegmentPublishResult result = SegmentTransactionalInsertAction.appendAction( ImmutableSet.of(SEGMENT1), new ObjectMetadata(ImmutableList.of(1)), - new ObjectMetadata(ImmutableList.of(2)) + new ObjectMetadata(ImmutableList.of(2)), + null ).perform( task, actionTestKit.getTaskActionToolbox() @@ -168,7 +171,8 @@ public void testFailBadVersion() throws Exception final Task task = NoopTask.create(); final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.overwriteAction( null, - ImmutableSet.of(SEGMENT3) + ImmutableSet.of(SEGMENT3), + null ); actionTestKit.getTaskLockbox().add(task); acquireTimeChunkLock(TaskLockType.EXCLUSIVE, task, INTERVAL, 5000); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java index eebf78a7ddcb..ed9b0e501fda 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java @@ -38,6 +38,9 @@ import org.apache.druid.metadata.SegmentsMetadataManagerConfig; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.SegmentSchemaCache; +import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.easymock.EasyMock; import org.joda.time.Period; @@ -53,6 +56,8 @@ public class TaskActionTestKit extends ExternalResource private IndexerMetadataStorageCoordinator metadataStorageCoordinator; private SegmentsMetadataManager segmentsMetadataManager; private TaskActionToolbox taskActionToolbox; + private SegmentSchemaManager segmentSchemaManager; + private SegmentSchemaCache segmentSchemaCache; public TaskLockbox getTaskLockbox() { @@ -83,10 +88,13 @@ public void before() Suppliers.ofInstance(metadataStorageTablesConfig) ); final ObjectMapper objectMapper = new TestUtils().getTestObjectMapper(); + segmentSchemaManager = new SegmentSchemaManager(metadataStorageTablesConfig, objectMapper, testDerbyConnector); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( objectMapper, metadataStorageTablesConfig, - testDerbyConnector + testDerbyConnector, + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() ) { @Override @@ -96,11 +104,14 @@ public int getSqlMetadataMaxRetry() } }; taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator); + segmentSchemaCache = new SegmentSchemaCache(new NoopServiceEmitter()); segmentsMetadataManager = new SqlSegmentsMetadataManager( objectMapper, Suppliers.ofInstance(new SegmentsMetadataManagerConfig()), Suppliers.ofInstance(metadataStorageTablesConfig), - testDerbyConnector + testDerbyConnector, + segmentSchemaCache, + CentralizedDatasourceSchemaConfig.create() ); final ServiceEmitter noopEmitter = new NoopServiceEmitter(); final TaskLockConfig taskLockConfig = new TaskLockConfig() @@ -135,6 +146,7 @@ public long getBatchAllocationWaitTime() ); testDerbyConnector.createDataSourceTable(); testDerbyConnector.createPendingSegmentsTable(); + testDerbyConnector.createSegmentSchemasTable(); testDerbyConnector.createSegmentTable(); testDerbyConnector.createRulesTable(); testDerbyConnector.createConfigTable(); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java index 79c4ef86ec6c..a12a353e4c33 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java @@ -94,6 +94,7 @@ import org.apache.druid.java.util.metrics.MonitorScheduler; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; +import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate; import org.apache.druid.query.DirectQueryProcessingPool; @@ -112,6 +113,7 @@ import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; import org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory; import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.handoff.SegmentHandoffNotifier; import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; @@ -121,6 +123,7 @@ import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; import org.apache.druid.segment.transform.ExpressionTransform; import org.apache.druid.segment.transform.TransformSpec; @@ -170,6 +173,7 @@ public class AppenderatorDriverRealtimeIndexTaskTest extends InitializedNullHand "host", new NoopEmitter() ); + private static final ObjectMapper OBJECT_MAPPER = TestHelper.makeJsonMapper(); private static final String FAIL_DIM = "__fail__"; @@ -254,6 +258,7 @@ public Firehose connect(InputRowParser parser, File temporaryDirectory) throws P @Rule public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule(); + private final ObjectMapper mapper = TestHelper.makeJsonMapper(); private DateTime now; private ListeningExecutorService taskExec; @@ -266,6 +271,7 @@ public Firehose connect(InputRowParser parser, File temporaryDirectory) throws P private TaskToolboxFactory taskToolboxFactory; private File baseDir; private File reportsFile; + private SegmentSchemaManager segmentSchemaManager; @Before public void setUp() throws IOException @@ -278,12 +284,14 @@ public void setUp() throws IOException TestDerbyConnector derbyConnector = derbyConnectorRule.getConnector(); derbyConnector.createDataSourceTable(); derbyConnector.createTaskTables(); + derbyConnector.createSegmentSchemasTable(); derbyConnector.createSegmentTable(); derbyConnector.createPendingSegmentsTable(); baseDir = tempFolder.newFolder(); reportsFile = File.createTempFile("KafkaIndexTaskTestReports-" + System.currentTimeMillis(), "json"); makeToolboxFactory(baseDir); + segmentSchemaManager = new SegmentSchemaManager(MetadataStorageTablesConfig.fromBase(null), mapper, derbyConnector); } @After @@ -1505,13 +1513,15 @@ private void makeToolboxFactory(final File directory) IndexerSQLMetadataStorageCoordinator mdc = new IndexerSQLMetadataStorageCoordinator( mapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), - derbyConnectorRule.getConnector() + derbyConnectorRule.getConnector(), + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() ) { @Override - public Set commitSegments(Set segments) throws IOException + public Set commitSegments(Set segments, SegmentSchemaMapping segmentSchemaMapping) throws IOException { - Set result = super.commitSegments(segments); + Set result = super.commitSegments(segments, segmentSchemaMapping); Assert.assertFalse( "Segment latch not initialized, did you forget to call expectPublishSegments?", @@ -1528,10 +1538,11 @@ public Set commitSegments(Set segments) throws IOExcep public SegmentPublishResult commitSegmentsAndMetadata( Set segments, DataSourceMetadata startMetadata, - DataSourceMetadata endMetadata + DataSourceMetadata endMetadata, + SegmentSchemaMapping segmentSchemaMapping ) throws IOException { - SegmentPublishResult result = super.commitSegmentsAndMetadata(segments, startMetadata, endMetadata); + SegmentPublishResult result = super.commitSegmentsAndMetadata(segments, startMetadata, endMetadata, segmentSchemaMapping); Assert.assertNotNull( "Segment latch not initialized, did you forget to call expectPublishSegments?", @@ -1561,11 +1572,13 @@ public SegmentPublishResult commitSegmentsAndMetadata( EasyMock.createMock(SupervisorManager.class), OBJECT_MAPPER ); + final TaskActionClientFactory taskActionClientFactory = new LocalTaskActionClientFactory( taskStorage, taskActionToolbox, new TaskAuditLogConfig(false) ); + final QueryRunnerFactoryConglomerate conglomerate = new DefaultQueryRunnerFactoryConglomerate( ImmutableMap.of( TimeseriesQuery.class, @@ -1578,6 +1591,7 @@ public SegmentPublishResult commitSegmentsAndMetadata( ) ) ); + handOffCallbacks = new ConcurrentHashMap<>(); final SegmentHandoffNotifierFactory handoffNotifierFactory = dataSource -> new SegmentHandoffNotifier() { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorsTest.java index 00e170d90f5d..12f82b84a2a6 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorsTest.java @@ -47,6 +47,7 @@ import org.apache.druid.segment.indexing.TuningConfig; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.FireDepartmentMetrics; import org.apache.druid.segment.realtime.appenderator.Appenderator; import org.apache.druid.segment.realtime.appenderator.AppenderatorConfig; @@ -245,7 +246,8 @@ public Map makeLoadSpec(URI uri) indexMerger, rowIngestionMeters, new ParseExceptionHandler(rowIngestionMeters, false, Integer.MAX_VALUE, 0), - false + false, + CentralizedDatasourceSchemaConfig.create() ); break; case "CLOSED_SEGMENTS": @@ -260,7 +262,8 @@ public Map makeLoadSpec(URI uri) indexMerger, rowIngestionMeters, new ParseExceptionHandler(rowIngestionMeters, false, Integer.MAX_VALUE, 0), - false + false, + CentralizedDatasourceSchemaConfig.create() ); break; @@ -276,7 +279,8 @@ public Map makeLoadSpec(URI uri) indexMerger, rowIngestionMeters, new ParseExceptionHandler(rowIngestionMeters, false, Integer.MAX_VALUE, 0), - false + false, + CentralizedDatasourceSchemaConfig.create() ); break; default: diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/BatchAppenderatorsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/BatchAppenderatorsTest.java index 18371b2afd54..bbc873270489 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/BatchAppenderatorsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/BatchAppenderatorsTest.java @@ -51,6 +51,7 @@ import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.loading.DataSegmentPusher; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.FireDepartmentMetrics; import org.apache.druid.segment.realtime.appenderator.Appenderator; import org.apache.druid.segment.realtime.appenderator.AppenderatorConfig; @@ -594,6 +595,7 @@ private static TaskToolbox makeTaskToolbox( .appenderatorsManager(new TestAppenderatorsManager()) .taskLogPusher(null) .attemptId("1") + .centralizedTableSchemaConfig(CentralizedDatasourceSchemaConfig.create()) .build(); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java index e944ad2aac24..ba9a6e3e2be2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskParallelRunTest.java @@ -59,6 +59,7 @@ import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; @@ -167,7 +168,10 @@ public void testRunParallelWithDynamicPartitioningMatchCompactionState() throws .tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); + for (DataSegment segment : compactedSegments) { Assert.assertSame( lockGranularity == LockGranularity.TIME_CHUNK ? NumberedShardSpec.class : NumberedOverwriteShardSpec.class, @@ -219,7 +223,9 @@ public void testRunParallelWithHashPartitioningMatchCompactionState() throws Exc .tuningConfig(newTuningConfig(new HashedPartitionsSpec(null, 3, null), 2, true)) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : compactedSegments) { // Expect compaction state to exist as store compaction state by default Map expectedLongSumMetric = new HashMap<>(); @@ -283,7 +289,9 @@ public void testRunParallelWithRangePartitioning() throws Exception .tuningConfig(newTuningConfig(new SingleDimensionPartitionsSpec(7, null, "dim", false), 2, true)) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : compactedSegments) { // Expect compaction state to exist as store compaction state by default Map expectedLongSumMetric = new HashMap<>(); @@ -342,7 +350,9 @@ public void testRunParallelWithRangePartitioningAndNoUpfrontSegmentFetching() th ) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : compactedSegments) { // Expect compaction state to exist as store compaction state by default Map expectedLongSumMetric = new HashMap<>(); @@ -396,7 +406,9 @@ public void testRunParallelWithMultiDimensionRangePartitioning() throws Exceptio true )).build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : compactedSegments) { // Expect compaction state to exist as store compaction state by default Map expectedLongSumMetric = new HashMap<>(); @@ -445,7 +457,9 @@ public void testRunParallelWithRangePartitioningWithSingleTask() throws Exceptio .tuningConfig(newTuningConfig(new SingleDimensionPartitionsSpec(7, null, "dim", false), 1, true)) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : compactedSegments) { // Expect compaction state to exist as store compaction state by default Map expectedLongSumMetric = new HashMap<>(); @@ -497,7 +511,9 @@ public void testRunParallelWithMultiDimensionRangePartitioningWithSingleTask() t true )).build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : compactedSegments) { // Expect compaction state to exist as store compaction state by default Map expectedLongSumMetric = new HashMap<>(); @@ -544,7 +560,9 @@ public void testRunCompactionStateNotStoreIfContextSetToFalse() .context(ImmutableMap.of(Tasks.STORE_COMPACTION_STATE_KEY, false)) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : compactedSegments) { Assert.assertSame( @@ -573,7 +591,9 @@ public void testRunCompactionWithFilterShouldStoreInState() throws Exception .transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim", "a", null))) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); Assert.assertEquals(3, compactedSegments.size()); @@ -631,7 +651,9 @@ public void testRunCompactionWithNewMetricsShouldStoreInState() throws Exception }) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); Assert.assertEquals(3, compactedSegments.size()); @@ -688,8 +710,12 @@ public void testCompactHashAndDynamicPartitionedSegments() .tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING) .build(); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); + final Map> intervalToSegments = SegmentUtils.groupSegmentsByInterval( - runTask(compactionTask) + compactedSegments ); Assert.assertEquals(3, intervalToSegments.size()); Assert.assertEquals( @@ -734,8 +760,12 @@ public void testCompactRangeAndDynamicPartitionedSegments() .tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING) .build(); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); + final Set compactedSegments = dataSegmentsWithSchemas.getSegments(); + final Map> intervalToSegments = SegmentUtils.groupSegmentsByInterval( - runTask(compactionTask) + compactedSegments ); Assert.assertEquals(3, intervalToSegments.size()); Assert.assertEquals( @@ -824,7 +854,8 @@ public void testCompactionDropSegmentsOfInputIntervalIfDropFlagIsSet() throws Ex .granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.MINUTE, null, null)) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); usedSegments = getCoordinatorClient().fetchUsedSegments( DATA_SOURCE, @@ -869,7 +900,8 @@ public void testCompactionDoesNotDropSegmentsIfDropFlagNotSet() throws Exception .granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.MINUTE, null, null)) .build(); - final Set compactedSegments = runTask(compactionTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(compactionTask); + verifySchema(dataSegmentsWithSchemas); usedSegments = getCoordinatorClient().fetchUsedSegments( DATA_SOURCE, @@ -952,14 +984,15 @@ private void runIndexTask(@Nullable PartitionsSpec partitionsSpec, boolean appen indexTask.getInputSourceResources() ); - runTask(indexTask); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(indexTask); + verifySchema(dataSegmentsWithSchemas); } - private Set runTask(Task task) + private DataSegmentsWithSchemas runTask(Task task) { task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK); TaskStatus status = getIndexingServiceClient().runAndWait(task); Assert.assertEquals(status.toString(), TaskState.SUCCESS, status.getStatusCode()); - return getIndexingServiceClient().getPublishedSegments(task); + return getIndexingServiceClient().getSegmentAndSchemas(task); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java index 82c07687536a..2893ef476a63 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskRunTest.java @@ -75,6 +75,7 @@ import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndexStorageAdapter; @@ -92,6 +93,7 @@ import org.apache.druid.segment.loading.SegmentLocalCacheManager; import org.apache.druid.segment.loading.StorageLocationConfig; import org.apache.druid.segment.loading.TombstoneLoadSpec; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; import org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter; import org.apache.druid.server.security.AuthTestUtils; @@ -128,6 +130,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; @@ -273,7 +276,8 @@ public void teardown() @Test public void testRunWithDynamicPartitioning() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -285,11 +289,12 @@ public void testRunWithDynamicPartitioning() throws Exception .interval(Intervals.of("2014-01-01/2014-01-02")) .build(); - final Pair> resultPair = runTask(compactionTask); - + final Pair resultPair = runTask(compactionTask); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - final List segments = resultPair.rhs; + final DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + final List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); for (int i = 0; i < 3; i++) { @@ -339,7 +344,8 @@ public void testRunWithHashPartitioning() throws Exception if (lockGranularity == LockGranularity.SEGMENT) { return; } - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -387,11 +393,12 @@ public void testRunWithHashPartitioning() throws Exception ) .build(); - final Pair> resultPair = runTask(compactionTask); - + final Pair resultPair = runTask(compactionTask); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - final List segments = resultPair.rhs; + final DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + final List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(6, segments.size()); for (int i = 0; i < 3; i++) { @@ -437,7 +444,8 @@ public void testRunWithHashPartitioning() throws Exception @Test public void testRunCompactionTwice() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -449,11 +457,12 @@ public void testRunCompactionTwice() throws Exception .interval(Intervals.of("2014-01-01/2014-01-02")) .build(); - Pair> resultPair = runTask(compactionTask1); - + Pair resultPair = runTask(compactionTask1); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - List segments = resultPair.rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); for (int i = 0; i < 3; i++) { @@ -487,10 +496,11 @@ public void testRunCompactionTwice() throws Exception .build(); resultPair = runTask(compactionTask2); - + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - segments = resultPair.rhs; + dataSegmentsWithSchemas = resultPair.rhs; + segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); for (int i = 0; i < 3; i++) { @@ -529,7 +539,8 @@ public void testRunCompactionTwice() throws Exception @Test public void testRunIndexAndCompactAtTheSameTimeForDifferentInterval() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -576,17 +587,19 @@ public void testRunIndexAndCompactAtTheSameTimeForDifferentInterval() throws Exc null ); - final Future>> compactionFuture = exec.submit( + final Future> compactionFuture = exec.submit( () -> runTask(compactionTask) ); - final Future>> indexFuture = exec.submit( + final Future> indexFuture = exec.submit( () -> runTask(indexTask) ); Assert.assertTrue(indexFuture.get().lhs.isSuccess()); - List segments = indexFuture.get().rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = indexFuture.get().rhs; + verifySchema(dataSegmentsWithSchemas); + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(6, segments.size()); for (int i = 0; i < 6; i++) { @@ -603,7 +616,9 @@ public void testRunIndexAndCompactAtTheSameTimeForDifferentInterval() throws Exc Assert.assertTrue(compactionFuture.get().lhs.isSuccess()); - segments = compactionFuture.get().rhs; + dataSegmentsWithSchemas = compactionFuture.get().rhs; + verifySchema(dataSegmentsWithSchemas); + segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); for (int i = 0; i < 3; i++) { @@ -636,7 +651,8 @@ public void testRunIndexAndCompactAtTheSameTimeForDifferentInterval() throws Exc @Test public void testWithSegmentGranularity() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -650,11 +666,12 @@ public void testWithSegmentGranularity() throws Exception .segmentGranularity(Granularities.DAY) .build(); - Pair> resultPair = runTask(compactionTask1); - + Pair resultPair = runTask(compactionTask1); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - List segments = resultPair.rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -676,10 +693,12 @@ public void testWithSegmentGranularity() throws Exception .build(); resultPair = runTask(compactionTask2); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - segments = resultPair.rhs; + dataSegmentsWithSchemas = resultPair.rhs; + segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); for (int i = 0; i < 3; i++) { @@ -702,7 +721,8 @@ public void testWithSegmentGranularity() throws Exception @Test public void testWithSegmentGranularityMisalignedInterval() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -736,7 +756,8 @@ public void testWithSegmentGranularityMisalignedInterval() throws Exception @Test public void testWithSegmentGranularityMisalignedIntervalAllowed() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -756,11 +777,12 @@ public void testWithSegmentGranularityMisalignedIntervalAllowed() throws Excepti .segmentGranularity(Granularities.WEEK) .build(); - Pair> resultPair = runTask(compactionTask1); - + Pair resultPair = runTask(compactionTask1); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - List segments = resultPair.rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -779,7 +801,8 @@ public void testWithSegmentGranularityMisalignedIntervalAllowed() throws Excepti @Test public void testCompactionWithFilterInTransformSpec() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -794,11 +817,12 @@ public void testCompactionWithFilterInTransformSpec() throws Exception .transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim", "a", null))) .build(); - Pair> resultPair = runTask(compactionTask); - + Pair resultPair = runTask(compactionTask); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - List segments = resultPair.rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -837,7 +861,8 @@ public void testCompactionWithFilterInTransformSpec() throws Exception @Test public void testCompactionWithNewMetricInMetricsSpec() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -855,11 +880,12 @@ public void testCompactionWithNewMetricInMetricsSpec() throws Exception }) .build(); - Pair> resultPair = runTask(compactionTask); - + Pair resultPair = runTask(compactionTask); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - List segments = resultPair.rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -901,7 +927,8 @@ public void testCompactionWithNewMetricInMetricsSpec() throws Exception @Test public void testWithGranularitySpecNonNullSegmentGranularityAndNullQueryGranularity() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -915,11 +942,12 @@ public void testWithGranularitySpecNonNullSegmentGranularityAndNullQueryGranular .granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, null, null)) .build(); - Pair> resultPair = runTask(compactionTask1); - + Pair resultPair = runTask(compactionTask1); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - List segments = resultPair.rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -941,10 +969,11 @@ public void testWithGranularitySpecNonNullSegmentGranularityAndNullQueryGranular .build(); resultPair = runTask(compactionTask2); - + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - segments = resultPair.rhs; + dataSegmentsWithSchemas = resultPair.rhs; + segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); for (int i = 0; i < 3; i++) { @@ -967,7 +996,8 @@ public void testWithGranularitySpecNonNullSegmentGranularityAndNullQueryGranular @Test public void testWithGranularitySpecNonNullQueryGranularityAndNullSegmentGranularity() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -981,11 +1011,12 @@ public void testWithGranularitySpecNonNullQueryGranularityAndNullSegmentGranular .granularitySpec(new ClientCompactionTaskGranularitySpec(null, Granularities.SECOND, null)) .build(); - Pair> resultPair = runTask(compactionTask1); - + Pair resultPair = runTask(compactionTask1); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - List segments = resultPair.rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); @@ -1019,7 +1050,8 @@ public void testWithGranularitySpecNonNullQueryGranularityAndNullSegmentGranular @Test public void testWithGranularitySpecNonNullQueryGranularityAndNonNullSegmentGranularity() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -1033,11 +1065,12 @@ public void testWithGranularitySpecNonNullQueryGranularityAndNonNullSegmentGranu .granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, Granularities.DAY, null)) .build(); - Pair> resultPair = runTask(compactionTask1); - + Pair resultPair = runTask(compactionTask1); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - List segments = resultPair.rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -1056,7 +1089,8 @@ public void testWithGranularitySpecNonNullQueryGranularityAndNonNullSegmentGranu @Test public void testWithGranularitySpecNullQueryGranularityAndNullSegmentGranularity() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -1069,11 +1103,12 @@ public void testWithGranularitySpecNullQueryGranularityAndNullSegmentGranularity .granularitySpec(new ClientCompactionTaskGranularitySpec(null, null, null)) .build(); - Pair> resultPair = runTask(compactionTask1); - + Pair resultPair = runTask(compactionTask1); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - List segments = resultPair.rhs; + DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); @@ -1107,7 +1142,8 @@ public void testWithGranularitySpecNullQueryGranularityAndNullSegmentGranularity @Test public void testCompactThenAppend() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -1119,13 +1155,17 @@ public void testCompactThenAppend() throws Exception .interval(Intervals.of("2014-01-01/2014-01-02")) .build(); - final Pair> compactionResult = runTask(compactionTask); + final Pair compactionResult = runTask(compactionTask); + verifySchema(compactionResult.rhs); Assert.assertTrue(compactionResult.lhs.isSuccess()); - final Set expectedSegments = new HashSet<>(compactionResult.rhs); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = compactionResult.rhs; + final Set expectedSegments = dataSegmentsWithSchemas.getSegments(); - final Pair> appendResult = runAppendTask(); + final Pair appendResult = runAppendTask(); + verifySchema(appendResult.rhs); Assert.assertTrue(appendResult.lhs.isSuccess()); - expectedSegments.addAll(appendResult.rhs); + DataSegmentsWithSchemas dataSegmentsWithSchemasAppendResult = appendResult.rhs; + expectedSegments.addAll(dataSegmentsWithSchemasAppendResult.getSegments()); final Set usedSegments = new HashSet<>( getStorageCoordinator().retrieveUsedSegmentsForIntervals( @@ -1164,8 +1204,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva // there are 10 rows total in data set // maxRowsPerSegment is set to 2 inside the runIndexTask methods - Pair> result = runIndexTask(); - Assert.assertEquals(6, result.rhs.size()); + Pair result = runIndexTask(); + Assert.assertEquals(6, result.rhs.getSegments().size()); final Builder builder = new Builder( DATA_SOURCE, @@ -1187,7 +1227,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva // Set dropExisting to true .inputSpec(new CompactionIntervalSpec(compactionPartialInterval, null), true) .build(); - final Pair> partialCompactionResult = runTask(partialCompactionTask); + final Pair partialCompactionResult = runTask(partialCompactionTask); + verifySchema(partialCompactionResult.rhs); Assert.assertTrue(partialCompactionResult.lhs.isSuccess()); // Segments that did not belong in the compaction interval (hours 00 and 02) are expected unchanged @@ -1208,7 +1249,7 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva Segments.ONLY_VISIBLE ) ); - expectedSegments.addAll(partialCompactionResult.rhs); + expectedSegments.addAll(partialCompactionResult.rhs.getSegments()); Assert.assertEquals(64, expectedSegments.size()); // New segments that were compacted are expected. However, old segments of the compacted interval should be @@ -1250,7 +1291,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva .build(); // **** FULL COMPACTION **** - final Pair> fullCompactionResult = runTask(fullCompactionTask); + final Pair fullCompactionResult = runTask(fullCompactionTask); + verifySchema(fullCompactionResult.rhs); Assert.assertTrue(fullCompactionResult.lhs.isSuccess()); @@ -1318,8 +1360,8 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti // there are 10 rows total in data set // maxRowsPerSegment is set to 2 inside the runIndexTask methods - Pair> result = runIndexTask(); - Assert.assertEquals(6, result.rhs.size()); + Pair result = runIndexTask(); + Assert.assertEquals(6, result.rhs.getSegments().size()); final Builder builder = new Builder( DATA_SOURCE, @@ -1343,7 +1385,8 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti // Set dropExisting to true .inputSpec(new CompactionIntervalSpec(compactionPartialInterval, null), true) .build(); - final Pair> partialCompactionResult = runTask(partialCompactionTask); + final Pair partialCompactionResult = runTask(partialCompactionTask); + verifySchema(partialCompactionResult.rhs); Assert.assertTrue(partialCompactionResult.lhs.isSuccess()); // Segments that did not belong in the compaction interval (hours 00 and 02) are expected unchanged @@ -1364,7 +1407,7 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti Segments.ONLY_VISIBLE ) ); - expectedSegments.addAll(partialCompactionResult.rhs); + expectedSegments.addAll(partialCompactionResult.rhs.getSegments()); Assert.assertEquals(64, expectedSegments.size()); // New segments that were compacted are expected. However, old segments of the compacted interval should be @@ -1398,13 +1441,14 @@ public void testCompactDatasourceOverIntervalWithOnlyTombstones() throws Excepti .build(); // **** Compaction over tombstones **** - final Pair> resultOverOnlyTombstones = runTask(compactionTaskOverOnlyTombstones); + final Pair resultOverOnlyTombstones = runTask(compactionTaskOverOnlyTombstones); + verifySchema(resultOverOnlyTombstones.rhs); Assert.assertTrue(resultOverOnlyTombstones.lhs.isSuccess()); // compaction should not fail but since it is over the same granularity it should leave // the tombstones unchanged - Assert.assertEquals(59, resultOverOnlyTombstones.rhs.size()); - resultOverOnlyTombstones.rhs.forEach(t -> Assert.assertTrue(t.isTombstone())); + Assert.assertEquals(59, resultOverOnlyTombstones.rhs.getSegments().size()); + resultOverOnlyTombstones.rhs.getSegments().forEach(t -> Assert.assertTrue(t.isTombstone())); } @Test @@ -1416,7 +1460,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva return; } - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Set expectedSegments = new HashSet<>( getStorageCoordinator().retrieveUsedSegmentsForIntervals( @@ -1439,10 +1484,11 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva .inputSpec(new CompactionIntervalSpec(partialInterval, null), false) .build(); - final Pair> partialCompactionResult = runTask(partialCompactionTask); + final Pair partialCompactionResult = runTask(partialCompactionTask); + verifySchema(partialCompactionResult.rhs); Assert.assertTrue(partialCompactionResult.lhs.isSuccess()); // All segments in the previous expectedSegments should still appear as they have larger segment granularity. - expectedSegments.addAll(partialCompactionResult.rhs); + expectedSegments.addAll(partialCompactionResult.rhs.getSegments()); final Set segmentsAfterPartialCompaction = new HashSet<>( getStorageCoordinator().retrieveUsedSegmentsForIntervals( @@ -1460,7 +1506,8 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva .inputSpec(new CompactionIntervalSpec(Intervals.of("2014-01-01/2014-01-02"), null), false) .build(); - final Pair> fullCompactionResult = runTask(fullCompactionTask); + final Pair fullCompactionResult = runTask(fullCompactionTask); + verifySchema(fullCompactionResult.rhs); Assert.assertTrue(fullCompactionResult.lhs.isSuccess()); final List segmentsAfterFullCompaction = new ArrayList<>( @@ -1486,12 +1533,13 @@ public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullInterva @Test public void testRunIndexAndCompactForSameSegmentAtTheSameTime() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); // make sure that indexTask becomes ready first, then compactionTask becomes ready, then indexTask runs final CountDownLatch compactionTaskReadyLatch = new CountDownLatch(1); final CountDownLatch indexTaskStartLatch = new CountDownLatch(1); - final Future>> indexFuture = exec.submit( + final Future> indexFuture = exec.submit( () -> runIndexTask(compactionTaskReadyLatch, indexTaskStartLatch, false) ); @@ -1505,7 +1553,7 @@ public void testRunIndexAndCompactForSameSegmentAtTheSameTime() throws Exception .interval(Intervals.of("2014-01-01T00:00:00/2014-01-02T03:00:00")) .build(); - final Future>> compactionFuture = exec.submit( + final Future> compactionFuture = exec.submit( () -> { compactionTaskReadyLatch.await(); return runTask(compactionTask, indexTaskStartLatch, null); @@ -1513,8 +1561,9 @@ public void testRunIndexAndCompactForSameSegmentAtTheSameTime() throws Exception ); Assert.assertTrue(indexFuture.get().lhs.isSuccess()); + verifySchema(indexFuture.get().rhs); - List segments = indexFuture.get().rhs; + List segments = new ArrayList<>(indexFuture.get().rhs.getSegments()); Assert.assertEquals(6, segments.size()); for (int i = 0; i < 6; i++) { @@ -1538,14 +1587,16 @@ public void testRunIndexAndCompactForSameSegmentAtTheSameTime() throws Exception } } - final Pair> compactionResult = compactionFuture.get(); + final Pair compactionResult = compactionFuture.get(); + verifySchema(compactionResult.rhs); Assert.assertEquals(TaskState.FAILED, compactionResult.lhs.getStatusCode()); } @Test public void testRunIndexAndCompactForSameSegmentAtTheSameTime2() throws Exception { - runIndexTask(); + Pair indexTaskResult = runIndexTask(); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -1560,9 +1611,9 @@ public void testRunIndexAndCompactForSameSegmentAtTheSameTime2() throws Exceptio // make sure that compactionTask becomes ready first, then the indexTask becomes ready, then compactionTask runs final CountDownLatch indexTaskReadyLatch = new CountDownLatch(1); final CountDownLatch compactionTaskStartLatch = new CountDownLatch(1); - final Future>> compactionFuture = exec.submit( + final Future> compactionFuture = exec.submit( () -> { - final Pair> pair = runTask( + final Pair pair = runTask( compactionTask, indexTaskReadyLatch, compactionTaskStartLatch @@ -1571,7 +1622,7 @@ public void testRunIndexAndCompactForSameSegmentAtTheSameTime2() throws Exceptio } ); - final Future>> indexFuture = exec.submit( + final Future> indexFuture = exec.submit( () -> { indexTaskReadyLatch.await(); return runIndexTask(compactionTaskStartLatch, null, false); @@ -1579,8 +1630,9 @@ public void testRunIndexAndCompactForSameSegmentAtTheSameTime2() throws Exceptio ); Assert.assertTrue(indexFuture.get().lhs.isSuccess()); + verifySchema(indexFuture.get().rhs); - List segments = indexFuture.get().rhs; + List segments = new ArrayList<>(indexFuture.get().rhs.getSegments()); Assert.assertEquals(6, segments.size()); for (int i = 0; i < 6; i++) { @@ -1604,7 +1656,8 @@ public void testRunIndexAndCompactForSameSegmentAtTheSameTime2() throws Exceptio } } - final Pair> compactionResult = compactionFuture.get(); + final Pair compactionResult = compactionFuture.get(); + verifySchema(compactionResult.rhs); Assert.assertEquals(TaskState.FAILED, compactionResult.lhs.getStatusCode()); } @@ -1633,7 +1686,8 @@ public void testRunWithSpatialDimensions() throws Exception false, 0 ); - runIndexTask(null, null, spatialSpec, spatialrows, false); + Pair indexTaskResult = runIndexTask(null, null, spatialSpec, spatialrows, false); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -1645,11 +1699,12 @@ public void testRunWithSpatialDimensions() throws Exception .interval(Intervals.of("2014-01-01/2014-01-02")) .build(); - final Pair> resultPair = runTask(compactionTask); + final Pair resultPair = runTask(compactionTask); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - final List segments = resultPair.rhs; + final List segments = new ArrayList<>(resultPair.rhs.getSegments()); Assert.assertEquals(2, segments.size()); for (int i = 0; i < 2; i++) { @@ -1763,7 +1818,8 @@ public void testRunWithAutoCastDimensions() throws Exception false, 0 ); - runIndexTask(null, null, spec, rows, false); + Pair indexTaskResult = runIndexTask(null, null, spec, rows, false); + verifySchema(indexTaskResult.rhs); final Builder builder = new Builder( DATA_SOURCE, @@ -1775,11 +1831,13 @@ public void testRunWithAutoCastDimensions() throws Exception .interval(Intervals.of("2014-01-01/2014-01-02")) .build(); - final Pair> resultPair = runTask(compactionTask); + final Pair resultPair = runTask(compactionTask); + verifySchema(resultPair.rhs); Assert.assertTrue(resultPair.lhs.isSuccess()); - final List segments = resultPair.rhs; + final DataSegmentsWithSchemas dataSegmentsWithSchemas = resultPair.rhs; + final List segments = new ArrayList<>(dataSegmentsWithSchemas.getSegments()); Assert.assertEquals(2, segments.size()); for (int i = 0; i < 2; i++) { @@ -1874,17 +1932,17 @@ public void testRunWithAutoCastDimensions() throws Exception Assert.assertEquals(rows, rowsFromSegment); } - private Pair> runIndexTask() throws Exception + private Pair runIndexTask() throws Exception { return runIndexTask(null, null, false); } - private Pair> runAppendTask() throws Exception + private Pair runAppendTask() throws Exception { return runIndexTask(null, null, true); } - private Pair> runIndexTask( + private Pair runIndexTask( @Nullable CountDownLatch readyLatchToCountDown, @Nullable CountDownLatch latchToAwaitBeforeRun, boolean appendToExisting @@ -1922,7 +1980,7 @@ private Pair> runIndexTask( return runTask(indexTask, readyLatchToCountDown, latchToAwaitBeforeRun); } - private Pair> runIndexTask( + private Pair runIndexTask( @Nullable CountDownLatch readyLatchToCountDown, @Nullable CountDownLatch latchToAwaitBeforeRun, ParseSpec parseSpec, @@ -1962,12 +2020,12 @@ private Pair> runIndexTask( return runTask(indexTask, readyLatchToCountDown, latchToAwaitBeforeRun); } - private Pair> runTask(Task task) throws Exception + private Pair runTask(Task task) throws Exception { return runTask(task, null, null); } - private Pair> runTask( + private Pair runTask( Task task, @Nullable CountDownLatch readyLatchToCountDown, @Nullable CountDownLatch latchToAwaitBeforeRun @@ -1993,11 +2051,12 @@ private Pair> runTask( } TaskStatus status = task.run(box); shutdownTask(task); - final List segments = new ArrayList<>( - ((TestLocalTaskActionClient) box.getTaskActionClient()).getPublishedSegments() + return Pair.of( + status, + new DataSegmentsWithSchemas( + new TreeSet<>(((TestLocalTaskActionClient) box.getTaskActionClient()).getPublishedSegments()), + ((TestLocalTaskActionClient) box.getTaskActionClient()).getSegmentSchemas()) ); - Collections.sort(segments); - return Pair.of(status, segments); } else { throw new ISE("task[%s] is not ready", task.getId()); } @@ -2020,6 +2079,8 @@ public List getLocations() final TaskConfig config = new TaskConfigBuilder() .setBatchProcessingMode(TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name()) .build(); + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig = CentralizedDatasourceSchemaConfig.create(); + centralizedDatasourceSchemaConfig.setEnabled(true); return new TaskToolbox.Builder() .config(config) .taskActionClient(createActionClient(task)) @@ -2040,6 +2101,7 @@ public List getLocations() .coordinatorClient(coordinatorClient) .taskLogPusher(null) .attemptId("1") + .centralizedTableSchemaConfig(centralizedDatasourceSchemaConfig) .build(); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java index c90c08349c4d..ee49a0dc0b6c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java @@ -127,6 +127,7 @@ import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.loading.NoopSegmentCacheManager; import org.apache.druid.segment.loading.SegmentCacheManager; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; @@ -1981,6 +1982,7 @@ public void cleanup(DataSegment segment) .taskLogPusher(null) .attemptId("1") .emitter(emitter) + .centralizedTableSchemaConfig(CentralizedDatasourceSchemaConfig.create()) .build(); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index c83edca79b0e..336b4d499bc8 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -68,11 +68,15 @@ import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.Cursor; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.data.CompressionStrategy; import org.apache.druid.segment.handoff.SegmentHandoffNotifier; import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; @@ -129,6 +133,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeSet; import java.util.function.Function; @RunWith(Parameterized.class) @@ -295,10 +300,24 @@ public void testIngestNullOnlyColumns() throws Exception Assert.assertFalse(indexTask.supportsQueries()); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); + Assert.assertEquals(1, segments.size()); Assert.assertEquals(ImmutableList.of("ts", "dim", "valDim"), segments.get(0).getDimensions()); Assert.assertEquals(ImmutableList.of("valMet"), segments.get(0).getMetrics()); + + verifySchemaAndAggFactory( + segmentWithSchemas, + RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("ts", ColumnType.STRING) + .add("dim", ColumnType.STRING) + .add("valDim", ColumnType.LONG) + .add("valMet", ColumnType.LONG) + .build(), + Collections.singletonMap("valMet", new LongSumAggregatorFactory("valMet", "valMet")) + ); } @Test @@ -344,11 +363,23 @@ public void testIngestNullOnlyColumns_storeEmptyColumnsOff_shouldNotStoreEmptyCo Assert.assertFalse(indexTask.supportsQueries()); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); // only empty string dimensions are ignored currently Assert.assertEquals(ImmutableList.of("ts", "valDim"), segments.get(0).getDimensions()); Assert.assertEquals(ImmutableList.of("valMet"), segments.get(0).getMetrics()); + + verifySchemaAndAggFactory( + segmentWithSchemas, + RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("ts", ColumnType.STRING) + .add("valDim", ColumnType.LONG) + .add("valMet", ColumnType.LONG) + .build(), + Collections.singletonMap("valMet", new LongSumAggregatorFactory("valMet", "valMet")) + ); } @Test @@ -372,8 +403,8 @@ public void testDeterminePartitions() throws Exception Assert.assertFalse(indexTask.supportsQueries()); - final List segments = runSuccessfulTask(indexTask); - + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(2, segments.size()); Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource()); @@ -395,6 +426,34 @@ public void testDeterminePartitions() throws Exception HashPartitionFunction.MURMUR3_32_ABS, ((HashBasedNumberedShardSpec) segments.get(1).getShardSpec()).getPartitionFunction() ); + + Assert.assertEquals(2, segmentWithSchemas.getSegmentSchemaMapping().getSegmentIdToMetadataMap().size()); + Assert.assertEquals(1, segmentWithSchemas.getSegmentSchemaMapping().getSchemaFingerprintToPayloadMap().size()); + Assert.assertEquals( + RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("ts", ColumnType.STRING) + .add("dim", ColumnType.STRING) + .add("val", ColumnType.LONG) + .build(), + segmentWithSchemas.getSegmentSchemaMapping() + .getSchemaFingerprintToPayloadMap() + .values() + .stream() + .findAny() + .get() + .getRowSignature() + ); + Assert.assertEquals( + Collections.singletonMap("val", new LongSumAggregatorFactory("val", "val")), + segmentWithSchemas.getSegmentSchemaMapping() + .getSchemaFingerprintToPayloadMap() + .values() + .stream() + .findAny() + .get() + .getAggregatorFactories() + ); } @Test @@ -465,7 +524,8 @@ public void testTransformSpec() throws Exception Assert.assertEquals(indexTask.getId(), indexTask.getGroupId()); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); DataSegment segment = segments.get(0); @@ -523,6 +583,23 @@ public void testTransformSpec() throws Exception Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval()); Assert.assertEquals(NumberedShardSpec.class, segments.get(0).getShardSpec().getClass()); Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum()); + + verifySchemaAndAggFactory( + segmentWithSchemas, + RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("ts", ColumnType.STRING) + .add("dim", ColumnType.STRING) + .add("dim_array", ColumnType.STRING) + .add("dim_num_array", ColumnType.STRING) + .add("dimt", ColumnType.STRING) + .add("dimtarray1", ColumnType.STRING) + .add("dimtarray2", ColumnType.STRING) + .add("dimtnum_array", ColumnType.STRING) + .add("val", ColumnType.LONG) + .build(), + Collections.singletonMap("val", new LongSumAggregatorFactory("val", "val")) + ); } @Test @@ -547,7 +624,9 @@ public void testWithArbitraryGranularity() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); + Assert.assertEquals(1, segments.size()); invokeApi(req -> indexTask.getLiveReports(req, null)); @@ -578,7 +657,8 @@ public void testIntervalBucketing() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); } @@ -602,7 +682,8 @@ public void testNumShardsProvided() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -637,7 +718,8 @@ public void testNumShardsAndHashPartitionFunctionProvided() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -670,7 +752,8 @@ public void testNumShardsAndPartitionDimensionsProvided() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(2, segments.size()); @@ -737,7 +820,8 @@ public void testWriteNewSegmentsWithAppendToExistingWithLinearPartitioningSucces Assert.assertEquals("index_append_test", indexTask.getGroupId()); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(2, taskRunner.getTaskActionClient().getActionCount(SegmentAllocateAction.class)); Assert.assertEquals(2, segments.size()); @@ -776,7 +860,8 @@ public void testIntervalNotSpecified() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); @@ -868,7 +953,8 @@ public void testCSVFileWithHeader() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -918,7 +1004,8 @@ public void testCSVFileWithHeaderColumnOverride() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); @@ -956,7 +1043,8 @@ public void testWithSmallMaxTotalRows() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(6, segments.size()); @@ -992,7 +1080,8 @@ public void testPerfectRollup() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); @@ -1027,7 +1116,8 @@ public void testBestEffortRollup() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(5, segments.size()); @@ -1311,7 +1401,8 @@ public void testIgnoreParseException() throws Exception IndexTask indexTask = createIndexTask(parseExceptionIgnoreSpec, null); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(Collections.singletonList("d"), segments.get(0).getDimensions()); Assert.assertEquals(Collections.singletonList("val"), segments.get(0).getMetrics()); @@ -1868,7 +1959,9 @@ public void testCsvWithHeaderOfEmptyColumns() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); + // the order of result segments can be changed because hash shardSpec is used. // the below loop is to make this test deterministic. Assert.assertEquals(2, segments.size()); @@ -1980,7 +2073,8 @@ public void testOverwriteWithSameSegmentGranularity() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(5, segments.size()); @@ -2037,7 +2131,8 @@ public void testOverwriteWithDifferentSegmentGranularity() throws Exception null ); - final List segments = runSuccessfulTask(indexTask); + final DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + final List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(5, segments.size()); @@ -2100,7 +2195,8 @@ public void testOldSegmentNotReplacedWhenDropFlagFalse() throws Exception ); // Ingest data with YEAR segment granularity - List segments = runSuccessfulTask(indexTask); + DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); Set usedSegmentsBeforeOverwrite = getAllUsedSegments(); @@ -2124,7 +2220,8 @@ public void testOldSegmentNotReplacedWhenDropFlagFalse() throws Exception ); // Ingest data with overwrite and MINUTE segment granularity - segments = runSuccessfulTask(indexTask); + segmentWithSchemas = runSuccessfulTask(indexTask); + segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(3, segments.size()); Set usedSegmentsBeforeAfterOverwrite = getAllUsedSegments(); @@ -2171,7 +2268,8 @@ public void testOldSegmentNotCoveredByTombstonesWhenDropFlagTrueSinceIngestionIn ); // Ingest data with DAY segment granularity - List segments = runSuccessfulTask(indexTask); + DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); Set usedSegmentsBeforeOverwrite = getAllUsedSegments(); @@ -2195,7 +2293,8 @@ public void testOldSegmentNotCoveredByTombstonesWhenDropFlagTrueSinceIngestionIn ); // Ingest data with overwrite and HOUR segment granularity - segments = runSuccessfulTask(indexTask); + segmentWithSchemas = runSuccessfulTask(indexTask); + segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); Set usedSegmentsBeforeAfterOverwrite = getAllUsedSegments(); @@ -2250,7 +2349,8 @@ public void testOldSegmentCoveredByTombstonesWhenDropFlagTrueSinceIngestionInter ); // Ingest data with DAY segment granularity - List segments = runSuccessfulTask(indexTask); + DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); Set usedSegmentsBeforeOverwrite = getAllUsedSegments(); @@ -2274,7 +2374,8 @@ public void testOldSegmentCoveredByTombstonesWhenDropFlagTrueSinceIngestionInter ); // Ingest data with overwrite and HOUR segment granularity - segments = runSuccessfulTask(indexTask); + segmentWithSchemas = runSuccessfulTask(indexTask); + segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(24, segments.size()); Set usedSegmentsBeforeAfterOverwrite = getAllUsedSegments(); @@ -2314,7 +2415,8 @@ public void verifyPublishingOnlyTombstones() throws Exception ); // Ingest data with DAY segment granularity - List segments = runSuccessfulTask(indexTask); + DataSegmentsWithSchemas segmentWithSchemas = runSuccessfulTask(indexTask); + List segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); Set usedSegmentsBeforeOverwrite = getAllUsedSegments(); @@ -2347,7 +2449,8 @@ public void verifyPublishingOnlyTombstones() throws Exception ); // Ingest data with overwrite and same segment granularity - segments = runSuccessfulTask(indexTask); + segmentWithSchemas = runSuccessfulTask(indexTask); + segments = new ArrayList<>(segmentWithSchemas.getSegments()); Assert.assertEquals(1, segments.size()); // one tombstone Assert.assertTrue(segments.get(0).isTombstone()); @@ -2445,19 +2548,21 @@ public static void checkTaskStatusErrorMsgForParseExceptionsExceeded(TaskStatus ); } - private List runSuccessfulTask(IndexTask task) throws Exception + private DataSegmentsWithSchemas runSuccessfulTask(IndexTask task) throws Exception { - Pair> pair = runTask(task); + Pair pair = runTask(task); Assert.assertEquals(pair.lhs.toString(), TaskState.SUCCESS, pair.lhs.getStatusCode()); return pair.rhs; } - private Pair> runTask(IndexTask task) throws Exception + private Pair runTask(IndexTask task) throws Exception { task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK); final TaskStatus status = taskRunner.run(task).get(); - final List segments = taskRunner.getPublishedSegments(); - return Pair.of(status, segments); + + final Set segments = new TreeSet<>(taskRunner.getPublishedSegments()); + final SegmentSchemaMapping segmentSchemaMapping = taskRunner.getSegmentSchemas(); + return Pair.of(status, new DataSegmentsWithSchemas(segments, segmentSchemaMapping)); } private static IndexTuningConfig createTuningConfigWithMaxRowsPerSegment( @@ -2732,4 +2837,34 @@ public void testEqualsAndHashCode() .usingGetClass() .verify(); } + + private void verifySchemaAndAggFactory( + DataSegmentsWithSchemas segmentWithSchemas, + RowSignature actualRowSignature, + Map aggregatorFactoryMap + ) + { + Assert.assertEquals(segmentWithSchemas.getSegments().size(), segmentWithSchemas.getSegmentSchemaMapping().getSegmentIdToMetadataMap().size()); + Assert.assertEquals(1, segmentWithSchemas.getSegmentSchemaMapping().getSchemaFingerprintToPayloadMap().size()); + Assert.assertEquals( + actualRowSignature, + segmentWithSchemas.getSegmentSchemaMapping() + .getSchemaFingerprintToPayloadMap() + .values() + .stream() + .findAny() + .get() + .getRowSignature() + ); + Assert.assertEquals( + aggregatorFactoryMap, + segmentWithSchemas.getSegmentSchemaMapping() + .getSchemaFingerprintToPayloadMap() + .values() + .stream() + .findAny() + .get() + .getAggregatorFactories() + ); + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 44f4ee1ad932..6b093cd745f7 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -72,14 +72,19 @@ import org.apache.druid.metadata.SegmentsMetadataManagerConfig; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9Factory; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.join.NoopJoinableFactory; import org.apache.druid.segment.loading.LocalDataSegmentPusher; import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig; import org.apache.druid.segment.loading.NoopDataSegmentKiller; import org.apache.druid.segment.loading.SegmentCacheManager; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.SegmentSchemaCache; +import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; import org.apache.druid.server.DruidNode; import org.apache.druid.server.metrics.NoopServiceEmitter; @@ -87,6 +92,7 @@ import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.DataSegment; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.rules.TemporaryFolder; @@ -109,7 +115,8 @@ public abstract class IngestionTestBase extends InitializedNullHandlingTest public TemporaryFolder temporaryFolder = new TemporaryFolder(); @Rule - public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule(); + public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = + new TestDerbyConnector.DerbyConnectorRule(CentralizedDatasourceSchemaConfig.create(true)); protected final TestUtils testUtils = new TestUtils(); private final ObjectMapper objectMapper = testUtils.getTestObjectMapper(); @@ -119,6 +126,8 @@ public abstract class IngestionTestBase extends InitializedNullHandlingTest private SegmentsMetadataManager segmentsMetadataManager; private TaskLockbox lockbox; private File baseDir; + private SegmentSchemaManager segmentSchemaManager; + private SegmentSchemaCache segmentSchemaCache; private SupervisorManager supervisorManager; protected File reportsFile; @@ -131,18 +140,30 @@ public void setUpIngestionTestBase() throws IOException final SQLMetadataConnector connector = derbyConnectorRule.getConnector(); connector.createTaskTables(); + connector.createSegmentSchemasTable(); connector.createSegmentTable(); taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null)); + segmentSchemaManager = new SegmentSchemaManager( + derbyConnectorRule.metadataTablesConfigSupplier().get(), + objectMapper, + derbyConnectorRule.getConnector() + ); + storageCoordinator = new IndexerSQLMetadataStorageCoordinator( objectMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), - derbyConnectorRule.getConnector() + derbyConnectorRule.getConnector(), + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() ); + segmentSchemaCache = new SegmentSchemaCache(new NoopServiceEmitter()); segmentsMetadataManager = new SqlSegmentsMetadataManager( objectMapper, SegmentsMetadataManagerConfig::new, derbyConnectorRule.metadataTablesConfigSupplier(), - derbyConnectorRule.getConnector() + derbyConnectorRule.getConnector(), + segmentSchemaCache, + CentralizedDatasourceSchemaConfig.create() ); lockbox = new TaskLockbox(taskStorage, storageCoordinator); segmentCacheManagerFactory = new SegmentCacheManagerFactory(getObjectMapper()); @@ -236,6 +257,7 @@ public TaskActionToolbox createTaskActionToolbox() public TaskToolbox createTaskToolbox(TaskConfig config, Task task, SupervisorManager supervisorManager) { + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig = CentralizedDatasourceSchemaConfig.create(true); this.supervisorManager = supervisorManager; return new TaskToolbox.Builder() .config(config) @@ -256,6 +278,7 @@ public TaskToolbox createTaskToolbox(TaskConfig config, Task task, SupervisorMan .appenderatorsManager(new TestAppenderatorsManager()) .taskLogPusher(null) .attemptId("1") + .centralizedTableSchemaConfig(centralizedDatasourceSchemaConfig) .build(); } @@ -322,6 +345,7 @@ public TaskActionClient create(Task task) public class TestLocalTaskActionClient extends CountingLocalTaskActionClientForTest { private final Set publishedSegments = new HashSet<>(); + private SegmentSchemaMapping segmentSchemaMapping = new SegmentSchemaMapping(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION); private TestLocalTaskActionClient(Task task) { @@ -334,8 +358,10 @@ public RetType submit(TaskAction taskAction) final RetType result = super.submit(taskAction); if (taskAction instanceof SegmentTransactionalInsertAction) { publishedSegments.addAll(((SegmentTransactionalInsertAction) taskAction).getSegments()); + segmentSchemaMapping.merge(((SegmentTransactionalInsertAction) taskAction).getSegmentSchemaMapping()); } else if (taskAction instanceof SegmentInsertAction) { publishedSegments.addAll(((SegmentInsertAction) taskAction).getSegments()); + segmentSchemaMapping.merge(((SegmentInsertAction) taskAction).getSegmentSchemaMapping()); } return result; } @@ -344,6 +370,11 @@ public Set getPublishedSegments() { return publishedSegments; } + + public SegmentSchemaMapping getSegmentSchemas() + { + return segmentSchemaMapping; + } } public class TestTaskRunner implements TaskRunner @@ -392,6 +423,11 @@ public List getPublishedSegments() return segments; } + public SegmentSchemaMapping getSegmentSchemas() + { + return taskActionClient.getSegmentSchemas(); + } + @Override public ListenableFuture run(Task task) { @@ -406,6 +442,8 @@ public ListenableFuture run(Task task) final TaskConfig config = new TaskConfigBuilder() .setBatchProcessingMode(TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name()) .build(); + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig = new CentralizedDatasourceSchemaConfig(); + centralizedDatasourceSchemaConfig.setEnabled(true); final TaskToolbox box = new TaskToolbox.Builder() .config(config) .taskExecutorNode(new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false)) @@ -425,6 +463,7 @@ public ListenableFuture run(Task task) .appenderatorsManager(new TestAppenderatorsManager()) .taskLogPusher(null) .attemptId("1") + .centralizedTableSchemaConfig(centralizedDatasourceSchemaConfig) .build(); @@ -509,6 +548,21 @@ public Map getBlacklistedTaskSlotCount() } } + public void verifySchema(DataSegmentsWithSchemas dataSegmentsWithSchemas) + { + int nonTombstoneSegments = 0; + for (DataSegment segment : dataSegmentsWithSchemas.getSegments()) { + if (segment.isTombstone()) { + continue; + } + nonTombstoneSegments++; + Assert.assertTrue(dataSegmentsWithSchemas.getSegmentSchemaMapping() + .getSegmentIdToMetadataMap() + .containsKey(segment.getId().toString())); + } + Assert.assertEquals(nonTombstoneSegments, dataSegmentsWithSchemas.getSegmentSchemaMapping().getSegmentIdToMetadataMap().size()); + } + public TaskReport.ReportMap getReports() throws IOException { return objectMapper.readValue(reportsFile, TaskReport.ReportMap.class); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java index 855e9cbc70ce..54b1f35ea255 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/KillUnusedSegmentsTaskTest.java @@ -75,7 +75,7 @@ public void setup() public void testKill() throws Exception { final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); - final Set announced = getMetadataStorageCoordinator().commitSegments(segments); + final Set announced = getMetadataStorageCoordinator().commitSegments(segments, null); Assert.assertEquals(segments, announced); Assert.assertTrue( @@ -124,7 +124,7 @@ public void testKill() throws Exception public void testKillWithMarkUnused() throws Exception { final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); - final Set announced = getMetadataStorageCoordinator().commitSegments(segments); + final Set announced = getMetadataStorageCoordinator().commitSegments(segments, null); Assert.assertEquals(segments, announced); Assert.assertTrue( @@ -181,7 +181,7 @@ public void testKillSegmentsWithVersions() throws Exception final Set segments = ImmutableSet.of(segment1V1, segment2V1, segment3V1, segment4V2, segment5V3); - Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments, null)); Assert.assertEquals( segments.size(), getSegmentsMetadataManager().markSegmentsAsUnused( @@ -229,7 +229,7 @@ public void testKillSegmentsWithEmptyVersions() throws Exception final Set segments = ImmutableSet.of(segment1V1, segment2V1, segment3V1, segment4V2, segment5V3); - Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments, null)); Assert.assertEquals( segments.size(), getSegmentsMetadataManager().markSegmentsAsUnused( @@ -277,7 +277,7 @@ public void testKillSegmentsWithVersionsAndLimit() throws Exception final Set segments = ImmutableSet.of(segment1V1, segment2V1, segment3V1, segment4V2, segment5V3); - Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments, null)); Assert.assertEquals( segments.size(), getSegmentsMetadataManager().markSegmentsAsUnused( @@ -326,7 +326,7 @@ public void testKillWithNonExistentVersion() throws Exception final Set segments = ImmutableSet.of(segment1V1, segment2V1, segment3V1, segment4V2, segment5V3); - Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments, null)); Assert.assertEquals( segments.size(), getSegmentsMetadataManager().markSegmentsAsUnused( @@ -380,7 +380,7 @@ public void testKillUnusedSegmentsWithUsedLoadSpec() throws Exception final Set segments = ImmutableSet.of(segment1V1, segment2V2, segment3V3); final Set unusedSegments = ImmutableSet.of(segment1V1, segment2V2); - Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments, null)); Assert.assertEquals( unusedSegments.size(), getSegmentsMetadataManager().markSegmentsAsUnused( @@ -427,7 +427,7 @@ public void testGetInputSourceResources() public void testKillBatchSizeOneAndLimit4() throws Exception { final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); - final Set announced = getMetadataStorageCoordinator().commitSegments(segments); + final Set announced = getMetadataStorageCoordinator().commitSegments(segments, null); Assert.assertEquals(segments, announced); Assert.assertEquals( @@ -474,7 +474,7 @@ public void testKillBatchSizeOneAndLimit4() throws Exception public void testKillMultipleUnusedSegmentsWithNullMaxUsedStatusLastUpdatedTime() throws Exception { final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); - final Set announced = getMetadataStorageCoordinator().commitSegments(segments); + final Set announced = getMetadataStorageCoordinator().commitSegments(segments, null); Assert.assertEquals(segments, announced); @@ -551,7 +551,7 @@ public void testKillMultipleUnusedSegmentsWithNullMaxUsedStatusLastUpdatedTime() public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedTime() throws Exception { final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); - final Set announced = getMetadataStorageCoordinator().commitSegments(segments); + final Set announced = getMetadataStorageCoordinator().commitSegments(segments, null); Assert.assertEquals(segments, announced); @@ -662,7 +662,7 @@ public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedT public void testKillMultipleUnusedSegmentsWithDifferentMaxUsedStatusLastUpdatedTime2() throws Exception { final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); - final Set announced = getMetadataStorageCoordinator().commitSegments(segments); + final Set announced = getMetadataStorageCoordinator().commitSegments(segments, null); Assert.assertEquals(segments, announced); @@ -760,7 +760,7 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime final DataSegment segment5 = newSegment(Intervals.of("2019-04-01/2019-05-01"), version.minusHours(3).toString()); final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4, segment5); - Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments)); + Assert.assertEquals(segments, getMetadataStorageCoordinator().commitSegments(segments, null)); Assert.assertEquals( 3, @@ -845,7 +845,8 @@ public void testKillMultipleUnusedSegmentsWithVersionAndDifferentLastUpdatedTime public void testKillBatchSizeThree() throws Exception { final Set segments = ImmutableSet.of(segment1, segment2, segment3, segment4); - final Set announced = getMetadataStorageCoordinator().commitSegments(segments); + final Set announced = getMetadataStorageCoordinator().commitSegments(segments, null); + Assert.assertEquals(segments, announced); final KillUnusedSegmentsTask task = new KillUnusedSegmentsTaskBuilder() diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TestAppenderatorsManager.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TestAppenderatorsManager.java index f54b0ecd2af2..515b9f350eff 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TestAppenderatorsManager.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/TestAppenderatorsManager.java @@ -112,7 +112,8 @@ public Appenderator createOpenSegmentsOfflineAppenderatorForTask( IndexMerger indexMerger, RowIngestionMeters rowIngestionMeters, ParseExceptionHandler parseExceptionHandler, - boolean useMaxMemoryEstimates + boolean useMaxMemoryEstimates, + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { return Appenderators.createOpenSegmentsOffline( @@ -126,7 +127,8 @@ public Appenderator createOpenSegmentsOfflineAppenderatorForTask( indexMerger, rowIngestionMeters, parseExceptionHandler, - useMaxMemoryEstimates + useMaxMemoryEstimates, + centralizedDatasourceSchemaConfig ); } @@ -142,7 +144,8 @@ public Appenderator createClosedSegmentsOfflineAppenderatorForTask( IndexMerger indexMerger, RowIngestionMeters rowIngestionMeters, ParseExceptionHandler parseExceptionHandler, - boolean useMaxMemoryEstimates + boolean useMaxMemoryEstimates, + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { return Appenderators.createClosedSegmentsOffline( @@ -156,7 +159,8 @@ public Appenderator createClosedSegmentsOfflineAppenderatorForTask( indexMerger, rowIngestionMeters, parseExceptionHandler, - useMaxMemoryEstimates + useMaxMemoryEstimates, + centralizedDatasourceSchemaConfig ); } @@ -172,7 +176,8 @@ public Appenderator createOfflineAppenderatorForTask( IndexMerger indexMerger, RowIngestionMeters rowIngestionMeters, ParseExceptionHandler parseExceptionHandler, - boolean useMaxMemoryEstimates + boolean useMaxMemoryEstimates, + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig ) { return Appenderators.createOffline( @@ -186,7 +191,8 @@ public Appenderator createOfflineAppenderatorForTask( indexMerger, rowIngestionMeters, parseExceptionHandler, - useMaxMemoryEstimates + useMaxMemoryEstimates, + centralizedDatasourceSchemaConfig ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java index dc1a1a31e272..ed07d8d79683 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractMultiPhaseParallelIndexingTest.java @@ -49,6 +49,7 @@ import org.apache.druid.query.scan.ScanQueryRunnerFactory; import org.apache.druid.query.scan.ScanResultValue; import org.apache.druid.query.spec.SpecificSegmentSpec; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.Segment; import org.apache.druid.segment.SegmentLazyLoadFailCallback; import org.apache.druid.segment.indexing.DataSchema; @@ -67,7 +68,6 @@ import java.io.File; import java.util.Collections; import java.util.List; -import java.util.Set; @SuppressWarnings("SameParameterValue") abstract class AbstractMultiPhaseParallelIndexingTest extends AbstractParallelIndexSupervisorTaskTest @@ -109,7 +109,7 @@ boolean isUseInputFormatApi() return useInputFormatApi; } - Set runTestTask( + DataSegmentsWithSchemas runTestTask( @Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @@ -138,7 +138,7 @@ Set runTestTask( ); } - Set runTestTask( + DataSegmentsWithSchemas runTestTask( @Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @@ -177,10 +177,10 @@ void runTaskAndVerifyStatus(Task task, TaskState expectedTaskStatus) Assert.assertEquals("Actual task status: " + taskStatus, expectedTaskStatus, taskStatus.getStatusCode()); } - Set runTask(Task task, TaskState expectedTaskStatus) + DataSegmentsWithSchemas runTask(Task task, TaskState expectedTaskStatus) { runTaskAndVerifyStatus(task, expectedTaskStatus); - return getIndexingServiceClient().getPublishedSegments(task); + return getIndexingServiceClient().getSegmentAndSchemas(task); } TaskReport.ReportMap runTaskAndGetReports(Task task, TaskState expectedTaskStatus) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java index 7caf64c4900d..29ed44f0ad0b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/AbstractParallelIndexSupervisorTaskTest.java @@ -83,6 +83,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.expression.LookupEnabledTestExprMacroTable; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.incremental.ParseExceptionReport; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; @@ -93,6 +94,7 @@ import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig; import org.apache.druid.segment.loading.NoopDataSegmentKiller; import org.apache.druid.segment.loading.StorageLocationConfig; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider; @@ -120,7 +122,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -533,13 +534,13 @@ public TaskStatus getStatus(String taskId) } } - public Set getPublishedSegments(String taskId) + public DataSegmentsWithSchemas getPublishedSegments(String taskId) { final TaskContainer taskContainer = tasks.get(taskId); if (taskContainer == null || taskContainer.actionClient == null) { - return Collections.emptySet(); + return new DataSegmentsWithSchemas(CentralizedDatasourceSchemaConfig.SCHEMA_VERSION); } else { - return taskContainer.actionClient.getPublishedSegments(); + return new DataSegmentsWithSchemas(taskContainer.actionClient.getPublishedSegments(), taskContainer.actionClient.getSegmentSchemas()); } } } @@ -664,7 +665,7 @@ public ListenableFuture taskStatus(String taskId) } } - public Set getPublishedSegments(Task task) + public DataSegmentsWithSchemas getSegmentAndSchemas(Task task) { return taskRunner.getPublishedSegments(task.getId()); } @@ -711,6 +712,7 @@ protected TaskToolbox createTaskToolbox(Task task, TaskActionClient actionClient TaskConfig config = new TaskConfigBuilder() .setBatchProcessingMode(TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name()) .build(); + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig = CentralizedDatasourceSchemaConfig.create(true); return new TaskToolbox.Builder() .config(config) .taskExecutorNode(new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false)) @@ -747,6 +749,7 @@ public File getStorageDirectory() .taskLogPusher(null) .attemptId("1") .emitter(new StubServiceEmitter()) + .centralizedTableSchemaConfig(centralizedDatasourceSchemaConfig) .build(); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java index b45af2af4e4a..0839ed044057 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionAdjustingCorePartitionSizeTest.java @@ -110,7 +110,7 @@ public void testLessPartitionsThanBuckets() throws IOException partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS - ) + ).getSegments() ); Assert.assertEquals(3, segments.size()); segments.sort(Comparator.comparing(segment -> segment.getShardSpec().getPartitionNum())); @@ -152,7 +152,7 @@ public void testEqualNumberOfPartitionsToBuckets() throws IOException partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS - ); + ).getSegments(); Assert.assertEquals(5, segments.size()); segments.forEach(segment -> { Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java index b0376a675f63..cb58d6f79dfc 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionMultiPhaseParallelIndexingTest.java @@ -178,7 +178,7 @@ public void testRun() throws Exception inputDir, false, false - ), TaskState.SUCCESS); + ), TaskState.SUCCESS).getSegments(); final Map expectedIntervalToNumSegments = computeExpectedIntervalToNumSegments( maxRowsPerSegment, @@ -200,7 +200,7 @@ public void testRun() throws Exception newInputDirForReplace(), false, true - ), TaskState.SUCCESS); + ), TaskState.SUCCESS).getSegments(); final Map expectedIntervalToNumSegmentsAfterReplace = computeExpectedIntervalToNumSegments( maxRowsPerSegment, @@ -242,7 +242,7 @@ public void testRunWithHashPartitionFunction() throws Exception HashPartitionFunction.MURMUR3_32_ABS ), inputDir, false, false - ), TaskState.SUCCESS); + ), TaskState.SUCCESS).getSegments(); final Map expectedIntervalToNumSegments = computeExpectedIntervalToNumSegments( maxRowsPerSegment, numShards @@ -280,7 +280,7 @@ public void testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSucce new HashedPartitionsSpec(null, numShards, ImmutableList.of("dim1", "dim2")), inputDir, false, false ), - TaskState.SUCCESS) + TaskState.SUCCESS).getSegments() ); // Append publishedSegments.addAll( @@ -289,7 +289,7 @@ public void testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSucce new DynamicPartitionsSpec(5, null), inputDir, true, false ), - TaskState.SUCCESS)); + TaskState.SUCCESS).getSegments()); // And append again publishedSegments.addAll( runTask( @@ -297,7 +297,7 @@ public void testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSucce new DynamicPartitionsSpec(10, null), inputDir, true, false ), - TaskState.SUCCESS) + TaskState.SUCCESS).getSegments() ); final Map> intervalToSegments = new HashMap<>(); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingWithNullColumnTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingWithNullColumnTest.java index 69bcde0487f3..1448a5723f36 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingWithNullColumnTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/MultiPhaseParallelIndexingWithNullColumnTest.java @@ -157,7 +157,7 @@ public void testIngestNullColumn() throws JsonProcessingException Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode()); - Set segments = getIndexingServiceClient().getPublishedSegments(task); + Set segments = getIndexingServiceClient().getSegmentAndSchemas(task).getSegments(); Assert.assertFalse(segments.isEmpty()); for (DataSegment segment : segments) { Assert.assertEquals(dimensionSchemas.size(), segment.getDimensions().size()); @@ -214,7 +214,7 @@ public void testIngestNullColumn_useFieldDiscovery_includeAllDimensions_shouldSt Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode()); - Set segments = getIndexingServiceClient().getPublishedSegments(task); + Set segments = getIndexingServiceClient().getSegmentAndSchemas(task).getSegments(); Assert.assertFalse(segments.isEmpty()); final List expectedExplicitDimensions = ImmutableList.of("ts", "unknownDim", "dim1"); final Set expectedImplicitDimensions = ImmutableSet.of("dim2", "dim3"); @@ -281,7 +281,7 @@ public void testIngestNullColumn_explicitPathSpec_useFieldDiscovery_includeAllDi Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode()); - Set segments = getIndexingServiceClient().getPublishedSegments(task); + Set segments = getIndexingServiceClient().getSegmentAndSchemas(task).getSegments(); Assert.assertFalse(segments.isEmpty()); final List expectedExplicitDimensions = ImmutableList.of("dim1", "k"); final Set expectedImplicitDimensions = ImmutableSet.of("dim2", "dim3"); @@ -339,7 +339,7 @@ public void testIngestNullColumn_storeEmptyColumnsOff_shouldNotStoreEmptyColumns task.addToContext(Tasks.STORE_EMPTY_COLUMNS_KEY, false); Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode()); - Set segments = getIndexingServiceClient().getPublishedSegments(task); + Set segments = getIndexingServiceClient().getSegmentAndSchemas(task).getSegments(); Assert.assertFalse(segments.isEmpty()); final List expectedDimensions = DimensionsSpec.getDefaultSchemas( Collections.singletonList("ts") diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java index be75a32c878c..a32aed819e0c 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskKillTest.java @@ -39,6 +39,7 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.hamcrest.CoreMatchers; import org.joda.time.Interval; import org.junit.After; @@ -311,7 +312,8 @@ private TestRunner( supervisorTask.getId(), supervisorTask.getGroupId(), supervisorTask.getIngestionSchema(), - supervisorTask.getContext() + supervisorTask.getContext(), + CentralizedDatasourceSchemaConfig.create() ); this.supervisorTask = supervisorTask; } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java index d2ba0af0873a..772bdafb2b17 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskResourceTest.java @@ -48,6 +48,7 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.appenderator.SegmentAllocator; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.security.AuthConfig; @@ -543,7 +544,8 @@ private class TestRunner extends SinglePhaseParallelIndexTaskRunner supervisorTask.getId(), supervisorTask.getGroupId(), supervisorTask.getIngestionSchema(), - supervisorTask.getContext() + supervisorTask.getContext(), + CentralizedDatasourceSchemaConfig.create(true) ); this.supervisorTask = supervisorTask; } @@ -717,7 +719,8 @@ public TaskStatus runTask(final TaskToolbox toolbox) throws Exception getId(), Collections.emptySet(), Collections.singleton(segment), - new TaskReport.ReportMap() + new TaskReport.ReportMap(), + null ) ); return TaskStatus.fromCode(getId(), state); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java index 77841c9acd82..a14d11d6f784 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialCompactionTest.java @@ -36,6 +36,7 @@ import org.apache.druid.indexing.common.task.SpecificSegmentsSpec; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; @@ -53,7 +54,6 @@ import java.util.Comparator; import java.util.List; import java.util.Map; -import java.util.Set; public class PartialCompactionTest extends AbstractMultiPhaseParallelIndexingTest { @@ -97,20 +97,25 @@ public void setup() throws IOException @Test public void testPartialCompactHashAndDynamicPartitionedSegments() { - final Map> hashPartitionedSegments = SegmentUtils.groupSegmentsByInterval( + DataSegmentsWithSchemas dataSegmentsWithSchemas = runTestTask( new HashedPartitionsSpec(null, 3, null), TaskState.SUCCESS, false - ) - ); - final Map> linearlyPartitionedSegments = SegmentUtils.groupSegmentsByInterval( + ); + verifySchema(dataSegmentsWithSchemas); + final Map> hashPartitionedSegments = + SegmentUtils.groupSegmentsByInterval(dataSegmentsWithSchemas.getSegments()); + + dataSegmentsWithSchemas = runTestTask( new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true - ) - ); + ); + verifySchema(dataSegmentsWithSchemas); + final Map> linearlyPartitionedSegments = + SegmentUtils.groupSegmentsByInterval(dataSegmentsWithSchemas.getSegments()); // Pick half of each partition lists to compact together hashPartitionedSegments.values().forEach( segmentsInInterval -> segmentsInInterval.sort( @@ -137,8 +142,10 @@ public void testPartialCompactHashAndDynamicPartitionedSegments() .inputSpec(SpecificSegmentsSpec.fromSegments(segmentsToCompact)) .tuningConfig(newTuningConfig(new DynamicPartitionsSpec(20, null), 2, false)) .build(); + dataSegmentsWithSchemas = runTask(compactionTask, TaskState.SUCCESS); + verifySchema(dataSegmentsWithSchemas); final Map> compactedSegments = SegmentUtils.groupSegmentsByInterval( - runTask(compactionTask, TaskState.SUCCESS) + dataSegmentsWithSchemas.getSegments() ); for (List segmentsInInterval : compactedSegments.values()) { final int expectedAtomicUpdateGroupSize = segmentsInInterval.size(); @@ -151,20 +158,24 @@ public void testPartialCompactHashAndDynamicPartitionedSegments() @Test public void testPartialCompactRangeAndDynamicPartitionedSegments() { - final Map> rangePartitionedSegments = SegmentUtils.groupSegmentsByInterval( + DataSegmentsWithSchemas dataSegmentsWithSchemas = runTestTask( new SingleDimensionPartitionsSpec(10, null, "dim1", false), TaskState.SUCCESS, false - ) - ); - final Map> linearlyPartitionedSegments = SegmentUtils.groupSegmentsByInterval( + ); + final Map> rangePartitionedSegments = + SegmentUtils.groupSegmentsByInterval(dataSegmentsWithSchemas.getSegments()); + + dataSegmentsWithSchemas = runTestTask( new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true - ) - ); + ); + final Map> linearlyPartitionedSegments = + SegmentUtils.groupSegmentsByInterval(dataSegmentsWithSchemas.getSegments()); + // Pick half of each partition lists to compact together rangePartitionedSegments.values().forEach( segmentsInInterval -> segmentsInInterval.sort( @@ -191,8 +202,10 @@ public void testPartialCompactRangeAndDynamicPartitionedSegments() .inputSpec(SpecificSegmentsSpec.fromSegments(segmentsToCompact)) .tuningConfig(newTuningConfig(new DynamicPartitionsSpec(20, null), 2, false)) .build(); + + dataSegmentsWithSchemas = runTask(compactionTask, TaskState.SUCCESS); final Map> compactedSegments = SegmentUtils.groupSegmentsByInterval( - runTask(compactionTask, TaskState.SUCCESS) + dataSegmentsWithSchemas.getSegments() ); for (List segmentsInInterval : compactedSegments.values()) { final int expectedAtomicUpdateGroupSize = segmentsInInterval.size(); @@ -202,7 +215,7 @@ public void testPartialCompactRangeAndDynamicPartitionedSegments() } } - private Set runTestTask( + private DataSegmentsWithSchemas runTestTask( PartitionsSpec partitionsSpec, TaskState expectedTaskState, boolean appendToExisting diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTaskTest.java index a51856f7353a..d35bba9b4ac3 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTaskTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.indexer.partitions.HashedPartitionsSpec; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.hamcrest.Matchers; import org.junit.Assert; import org.junit.Before; @@ -103,7 +104,9 @@ public void setup() ParallelIndexTestingFactory.SUBTASK_SPEC_ID, ParallelIndexTestingFactory.NUM_ATTEMPTS, ingestionSpec, - ParallelIndexTestingFactory.CONTEXT + ParallelIndexTestingFactory.CONTEXT, + CentralizedDatasourceSchemaConfig.create(), + null ); } @@ -140,7 +143,9 @@ public void requiresGranularitySpecInputIntervals() .partitionsSpec(partitionsSpec) .build() ), - ParallelIndexTestingFactory.CONTEXT + ParallelIndexTestingFactory.CONTEXT, + CentralizedDatasourceSchemaConfig.create(), + null ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java index f7be58aeae11..65b58fb6fd50 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionAdjustingCorePartitionSizeTest.java @@ -118,7 +118,7 @@ public void testLessPartitionsThanBuckets() throws IOException partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS - ) + ).getSegments() ); Assert.assertEquals(1, segments.size()); final DataSegment segment = segments.get(0); @@ -158,7 +158,7 @@ public void testEqualNumberOfPartitionsToBuckets() throws IOException partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS - ); + ).getSegments(); Assert.assertEquals(5, segments.size()); segments.forEach(segment -> { Assert.assertSame(SingleDimensionShardSpec.class, segment.getShardSpec().getClass()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java index 759d70fe2f99..9c04ce6c6ee4 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java @@ -41,6 +41,7 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.scan.ScanResultValue; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.DimensionRangeShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec; @@ -264,7 +265,7 @@ public void createsCorrectRangePartitions() throws Exception int targetRowsPerSegment = NUM_ROW * 2 / DIM_FILE_CARDINALITY / NUM_PARTITION; // verify dropExisting false - final Set publishedSegments = runTask(runTestTask( + final DataSegmentsWithSchemas publishedDataSegmentsWithSchemas = runTask(runTestTask( new DimensionRangePartitionsSpec( targetRowsPerSegment, null, @@ -276,8 +277,14 @@ public void createsCorrectRangePartitions() throws Exception false ), useMultivalueDim ? TaskState.FAILED : TaskState.SUCCESS); + final Set publishedSegments = publishedDataSegmentsWithSchemas.getSegments(); if (!useMultivalueDim) { assertRangePartitions(publishedSegments); + Assert.assertEquals(1, publishedDataSegmentsWithSchemas.getSegmentSchemaMapping().getSchemaFingerprintToPayloadMap().size()); + Assert.assertEquals(publishedSegments.size(), publishedDataSegmentsWithSchemas.getSegmentSchemaMapping().getSegmentIdToMetadataMap().size()); + for (DataSegment segment : publishedSegments) { + Assert.assertTrue(publishedDataSegmentsWithSchemas.getSegmentSchemaMapping().getSegmentIdToMetadataMap().containsKey(segment.getId().toString())); + } } // verify dropExisting true @@ -289,7 +296,7 @@ public void createsCorrectRangePartitions() throws Exception File inputDirectory = temporaryFolder.newFolder("dataReplace"); createInputFilesForReplace(inputDirectory, useMultivalueDim); - final Set publishedSegmentsAfterReplace = runTask(runTestTask( + final DataSegmentsWithSchemas publishedDataSegmentsWithSchemasAfterReplace = runTask(runTestTask( new DimensionRangePartitionsSpec( targetRowsPerSegment, null, @@ -301,6 +308,8 @@ public void createsCorrectRangePartitions() throws Exception true ), useMultivalueDim ? TaskState.FAILED : TaskState.SUCCESS); + final Set publishedSegmentsAfterReplace = publishedDataSegmentsWithSchemasAfterReplace.getSegments(); + int tombstones = 0; for (DataSegment ds : publishedSegmentsAfterReplace) { if (ds.isTombstone()) { @@ -311,6 +320,13 @@ public void createsCorrectRangePartitions() throws Exception if (!useMultivalueDim) { Assert.assertEquals(11, tombstones); Assert.assertEquals(10, publishedSegmentsAfterReplace.size() - tombstones); + for (DataSegment segment : publishedSegmentsAfterReplace) { + if (!segment.isTombstone()) { + Assert.assertTrue(publishedDataSegmentsWithSchemasAfterReplace.getSegmentSchemaMapping().getSegmentIdToMetadataMap().containsKey(segment.getId().toString())); + } + } + Assert.assertEquals(10, publishedDataSegmentsWithSchemasAfterReplace.getSegmentSchemaMapping().getSegmentIdToMetadataMap().size()); + Assert.assertEquals(1, publishedDataSegmentsWithSchemasAfterReplace.getSegmentSchemaMapping().getSchemaFingerprintToPayloadMap().size()); } } @@ -321,8 +337,7 @@ public void testAppendLinearlyPartitionedSegmentsToHashPartitionedDatasourceSucc return; } final int targetRowsPerSegment = NUM_ROW / DIM_FILE_CARDINALITY / NUM_PARTITION; - final Set publishedSegments = new HashSet<>(); - publishedSegments.addAll( + DataSegmentsWithSchemas dataSegmentsWithSchemas = runTask(runTestTask( new SingleDimensionPartitionsSpec( targetRowsPerSegment, @@ -333,27 +348,32 @@ public void testAppendLinearlyPartitionedSegmentsToHashPartitionedDatasourceSucc inputDir, false, false - ), TaskState.SUCCESS) - ); + ), TaskState.SUCCESS); + verifySchema(dataSegmentsWithSchemas); + + final Set publishedSegments = new HashSet<>(dataSegmentsWithSchemas.getSegments()); // Append - publishedSegments.addAll( + dataSegmentsWithSchemas = runTask(runTestTask( new DynamicPartitionsSpec(5, null), inputDir, true, false - ), TaskState.SUCCESS) - ); + ), TaskState.SUCCESS); + publishedSegments.addAll(dataSegmentsWithSchemas.getSegments()); + verifySchema(dataSegmentsWithSchemas); + // And append again - publishedSegments.addAll( + dataSegmentsWithSchemas = runTask(runTestTask( new DynamicPartitionsSpec(10, null), inputDir, true, false - ), TaskState.SUCCESS) - ); + ), TaskState.SUCCESS); + verifySchema(dataSegmentsWithSchemas); + publishedSegments.addAll(dataSegmentsWithSchemas.getSegments()); final Map> intervalToSegments = new HashMap<>(); publishedSegments.forEach( segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java index 143e0b0474a2..5ad774386b65 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/SinglePhaseParallelIndexingTest.java @@ -45,6 +45,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; +import org.apache.druid.segment.DataSegmentsWithSchemas; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.segment.incremental.ParseExceptionReport; import org.apache.druid.segment.incremental.RowIngestionMetersTotals; @@ -272,7 +273,9 @@ private void assertShardSpec( Collection originalSegmentsIfAppend ) { - final Collection segments = getIndexingServiceClient().getPublishedSegments(task); + final DataSegmentsWithSchemas dataSegmentsWithSchemas = getIndexingServiceClient().getSegmentAndSchemas(task); + verifySchema(dataSegmentsWithSchemas); + final Collection segments = dataSegmentsWithSchemas.getSegments(); if (!appendToExisting && actualLockGranularity == LockGranularity.TIME_CHUNK) { // Initial write final Map> intervalToSegments = SegmentUtils.groupSegmentsByInterval(segments); @@ -303,7 +306,9 @@ private void assertShardSpec( private void assertShardSpecAfterOverwrite(ParallelIndexSupervisorTask task, LockGranularity actualLockGranularity) { - final Collection segments = getIndexingServiceClient().getPublishedSegments(task); + DataSegmentsWithSchemas dataSegmentsWithSchemas = getIndexingServiceClient().getSegmentAndSchemas(task); + verifySchema(dataSegmentsWithSchemas); + final Collection segments = dataSegmentsWithSchemas.getSegments(); final Map> intervalToSegments = SegmentUtils.groupSegmentsByInterval(segments); if (actualLockGranularity != LockGranularity.SEGMENT) { // Check the core partition set in the shardSpec @@ -414,7 +419,9 @@ public void testRunInParallelIngestNullColumn() task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK); Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode()); - Set segments = getIndexingServiceClient().getPublishedSegments(task); + DataSegmentsWithSchemas dataSegmentsWithSchemas = getIndexingServiceClient().getSegmentAndSchemas(task); + verifySchema(dataSegmentsWithSchemas); + Set segments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : segments) { for (int i = 0; i < dimensionSchemas.size(); i++) { Assert.assertEquals(dimensionSchemas.get(i).getName(), segment.getDimensions().get(i)); @@ -467,7 +474,9 @@ public void testRunInParallelIngestNullColumn_storeEmptyColumnsOff_shouldNotStor task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK); Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode()); - Set segments = getIndexingServiceClient().getPublishedSegments(task); + DataSegmentsWithSchemas dataSegmentsWithSchemas = getIndexingServiceClient().getSegmentAndSchemas(task); + verifySchema(dataSegmentsWithSchemas); + Set segments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : segments) { Assert.assertFalse(segment.getDimensions().contains("unknownDim")); } @@ -903,7 +912,10 @@ public void testIngestBothExplicitAndImplicitDims() throws IOException task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK); Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode()); - Set segments = getIndexingServiceClient().getPublishedSegments(task); + DataSegmentsWithSchemas dataSegmentsWithSchemas = getIndexingServiceClient().getSegmentAndSchemas(task); + verifySchema(dataSegmentsWithSchemas); + Set segments = dataSegmentsWithSchemas.getSegments(); + for (DataSegment segment : segments) { Assert.assertEquals(ImmutableList.of("ts", "explicitDim", "implicitDim"), segment.getDimensions()); } @@ -984,7 +996,9 @@ public void testIngestBothExplicitAndImplicitDimsSchemaDiscovery() throws IOExce task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK); Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode()); - Set segments = getIndexingServiceClient().getPublishedSegments(task); + DataSegmentsWithSchemas dataSegmentsWithSchemas = getIndexingServiceClient().getSegmentAndSchemas(task); + verifySchema(dataSegmentsWithSchemas); + Set segments = dataSegmentsWithSchemas.getSegments(); for (DataSegment segment : segments) { Assert.assertEquals(ImmutableList.of("ts", "explicitDim", "implicitDim"), segment.getDimensions()); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java index 230cfa4668c9..b80641fe94bf 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ActionsTestTask.java @@ -78,7 +78,7 @@ public TaskLock acquireAppendLockOn(Interval interval) public SegmentPublishResult commitReplaceSegments(DataSegment... segments) { return runAction( - SegmentTransactionalReplaceAction.create(Sets.newHashSet(segments)) + SegmentTransactionalReplaceAction.create(Sets.newHashSet(segments), null) ); } @@ -90,7 +90,7 @@ public Map getAnnouncedSegmentsToParentSegments() public SegmentPublishResult commitAppendSegments(DataSegment... segments) { SegmentPublishResult publishResult = runAction( - SegmentTransactionalAppendAction.forSegments(Sets.newHashSet(segments)) + SegmentTransactionalAppendAction.forSegments(Sets.newHashSet(segments), null) ); for (DataSegment segment : publishResult.getSegments()) { announcedSegmentsToParentSegments.remove(segment.getId()); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java index 415c63a0ee26..273339fe7f3f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/concurrent/ConcurrentReplaceAndAppendTest.java @@ -54,6 +54,7 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.server.DruidNode; import org.apache.druid.server.metrics.NoopServiceEmitter; @@ -979,6 +980,7 @@ private void verifyIntervalHasVisibleSegments(Interval interval, DataSegment... private void verifySegments(Interval interval, Segments visibility, DataSegment... expectedSegments) { try { + Collection allUsedSegments = dummyTaskActionClient.submit( new RetrieveUsedSegmentsAction( WIKI, @@ -1016,10 +1018,13 @@ private TaskToolboxFactory createToolboxFactory( TaskActionClientFactory taskActionClientFactory ) { + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig = new CentralizedDatasourceSchemaConfig(); + centralizedDatasourceSchemaConfig.setEnabled(true); TestTaskToolboxFactory.Builder builder = new TestTaskToolboxFactory.Builder() .setConfig(taskConfig) .setIndexIO(new IndexIO(getObjectMapper(), ColumnConfig.DEFAULT)) - .setTaskActionClientFactory(taskActionClientFactory); + .setTaskActionClientFactory(taskActionClientFactory) + .setCentralizedTableSchemaConfig(centralizedDatasourceSchemaConfig); return new TestTaskToolboxFactory(builder) { @Override diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RealtimeishTask.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RealtimeishTask.java index d2c3e7eecb59..68f0ff77bb9d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RealtimeishTask.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RealtimeishTask.java @@ -105,7 +105,8 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception .version(lock1.getVersion()) .size(0) .build() - ) + ), + null ); toolbox.getTaskActionClient().submit(firstSegmentInsertAction); @@ -125,7 +126,8 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception .version(lock2.getVersion()) .size(0) .build() - ) + ), + null ); toolbox.getTaskActionClient().submit(secondSegmentInsertAction); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java index db56811ee2c6..f4cb82dcd713 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/RemoteTaskRunnerTest.java @@ -37,9 +37,13 @@ import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexing.common.IndexingServiceCondition; +import org.apache.druid.indexing.common.TaskLockType; import org.apache.druid.indexing.common.TestIndexTask; import org.apache.druid.indexing.common.TestTasks; import org.apache.druid.indexing.common.TestUtils; +import org.apache.druid.indexing.common.actions.SegmentTransactionalAppendAction; +import org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction; +import org.apache.druid.indexing.common.actions.SegmentTransactionalReplaceAction; import org.apache.druid.indexing.common.task.Task; import org.apache.druid.indexing.common.task.TaskResource; import org.apache.druid.indexing.overlord.config.RemoteTaskRunnerConfig; @@ -72,6 +76,7 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.Map; import java.util.Set; import java.util.concurrent.Future; @@ -1145,4 +1150,46 @@ public void testStreamTaskReportsKnownTask() throws Exception capturedRequest.getValue().getUrl().toString() ); } + + @Test + public void testBuildPublishAction() + { + TestIndexTask task = new TestIndexTask( + "test_index1", + new TaskResource("test_index1", 1), + "foo", + TaskStatus.success("test_index1"), + jsonMapper + ); + + Assert.assertEquals( + SegmentTransactionalAppendAction.class, + task.testBuildPublishAction( + Collections.emptySet(), + Collections.emptySet(), + null, + TaskLockType.APPEND + ).getClass() + ); + + Assert.assertEquals( + SegmentTransactionalReplaceAction.class, + task.testBuildPublishAction( + Collections.emptySet(), + Collections.emptySet(), + null, + TaskLockType.REPLACE + ).getClass() + ); + + Assert.assertEquals( + SegmentTransactionalInsertAction.class, + task.testBuildPublishAction( + Collections.emptySet(), + Collections.emptySet(), + null, + TaskLockType.EXCLUSIVE + ).getClass() + ); + } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java index 0046645106ca..ece18aa852d2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLifecycleTest.java @@ -125,6 +125,7 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9Factory; import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.handoff.SegmentHandoffNotifier; import org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory; @@ -468,6 +469,7 @@ private TaskStorage setUpTaskStorage() new NamedType(NoopInputFormat.class, "noopInputFormat") ); testDerbyConnector.createTaskTables(); + testDerbyConnector.createSegmentSchemasTable(); testDerbyConnector.createSegmentTable(); taskStorage = new MetadataTaskStorage( testDerbyConnector, @@ -567,9 +569,9 @@ private TestIndexerMetadataStorageCoordinator setUpMetadataStorageCoordinator() return new TestIndexerMetadataStorageCoordinator() { @Override - public Set commitSegments(Set segments) + public Set commitSegments(Set segments, final SegmentSchemaMapping segmentSchemaMapping) { - Set retVal = super.commitSegments(segments); + Set retVal = super.commitSegments(segments, segmentSchemaMapping); if (publishCountDown != null) { publishCountDown.countDown(); } @@ -1154,7 +1156,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception .size(0) .build(); - toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(segment))); + toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(segment), null)); return TaskStatus.success(getId()); } }; @@ -1195,7 +1197,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception .size(0) .build(); - toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(segment))); + toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(segment), null)); return TaskStatus.success(getId()); } }; @@ -1237,7 +1239,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception .size(0) .build(); - toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(segment))); + toolbox.getTaskActionClient().submit(new SegmentInsertAction(ImmutableSet.of(segment), null)); return TaskStatus.success(getId()); } }; diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java index 19af66254ba8..4dc0416cd1f2 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockBoxConcurrencyTest.java @@ -33,6 +33,8 @@ import org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.joda.time.Interval; import org.junit.After; import org.junit.Assert; @@ -57,6 +59,7 @@ public class TaskLockBoxConcurrencyTest private ExecutorService service; private TaskStorage taskStorage; private TaskLockbox lockbox; + private SegmentSchemaManager segmentSchemaManager; @Before public void setup() @@ -73,9 +76,16 @@ public void setup() ) ); + segmentSchemaManager = new SegmentSchemaManager(derby.metadataTablesConfigSupplier().get(), objectMapper, derbyConnector); lockbox = new TaskLockbox( taskStorage, - new IndexerSQLMetadataStorageCoordinator(objectMapper, derby.metadataTablesConfigSupplier().get(), derbyConnector) + new IndexerSQLMetadataStorageCoordinator( + objectMapper, + derby.metadataTablesConfigSupplier().get(), + derbyConnector, + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() + ) ); service = Execs.multiThreaded(2, "TaskLockBoxConcurrencyTest-%d"); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java index ab4bf3a504fc..7c16e2efc240 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskLockboxTest.java @@ -57,6 +57,8 @@ import org.apache.druid.metadata.MetadataStorageTablesConfig; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; @@ -96,6 +98,7 @@ public class TaskLockboxTest private IndexerMetadataStorageCoordinator metadataStorageCoordinator; private TaskLockbox lockbox; private TaskLockboxValidator validator; + private SegmentSchemaManager segmentSchemaManager; private final int HIGH_PRIORITY = 15; private final int MEDIUM_PRIORITY = 10; @@ -109,12 +112,15 @@ public void setup() { objectMapper = TestHelper.makeJsonMapper(); objectMapper.registerSubtypes(NumberedShardSpec.class, HashBasedNumberedShardSpec.class); - final TestDerbyConnector derbyConnector = derby.getConnector(); derbyConnector.createTaskTables(); derbyConnector.createPendingSegmentsTable(); + derbyConnector.createSegmentSchemasTable(); derbyConnector.createSegmentTable(); final MetadataStorageTablesConfig tablesConfig = derby.metadataTablesConfigSupplier().get(); + + segmentSchemaManager = new SegmentSchemaManager(tablesConfig, objectMapper, derbyConnector); + taskStorage = new MetadataTaskStorage( derbyConnector, new TaskStorageConfig(null), @@ -128,7 +134,13 @@ public void setup() EmittingLogger.registerEmitter(emitter); EasyMock.replay(emitter); - metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator(objectMapper, tablesConfig, derbyConnector); + metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( + objectMapper, + tablesConfig, + derbyConnector, + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() + ); lockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator); validator = new TaskLockboxValidator(lockbox, taskStorage); @@ -450,10 +462,13 @@ public void testSyncWithUnknownTaskTypesFromModuleNotLoaded() loadedMapper ) ); + IndexerMetadataStorageCoordinator loadedMetadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( loadedMapper, derby.metadataTablesConfigSupplier().get(), - derbyConnector + derbyConnector, + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() ); TaskLockbox theBox = new TaskLockbox(taskStorage, metadataStorageCoordinator); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java index 1caf74c6681b..b9dcf97a54ca 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TaskQueueScaleTest.java @@ -48,6 +48,8 @@ import org.apache.druid.metadata.TaskLookup; import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.server.metrics.NoopServiceEmitter; import org.joda.time.Duration; import org.joda.time.Period; @@ -85,6 +87,7 @@ public class TaskQueueScaleTest private TaskStorage taskStorage; private TestTaskRunner taskRunner; private Closer closer; + private SegmentSchemaManager segmentSchemaManager; @Before public void setUp() @@ -98,11 +101,13 @@ public void setUp() taskRunner = new TestTaskRunner(); closer.register(taskRunner::stop); final ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); - + segmentSchemaManager = new SegmentSchemaManager(derbyConnectorRule.metadataTablesConfigSupplier().get(), jsonMapper, derbyConnectorRule.getConnector()); final IndexerSQLMetadataStorageCoordinator storageCoordinator = new IndexerSQLMetadataStorageCoordinator( jsonMapper, derbyConnectorRule.metadataTablesConfigSupplier().get(), - derbyConnectorRule.getConnector() + derbyConnectorRule.getConnector(), + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() ); final TaskActionClientFactory unsupportedTaskActionFactory = diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestTaskToolboxFactory.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestTaskToolboxFactory.java index 0a108b412191..2ee1b19df86a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestTaskToolboxFactory.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/TestTaskToolboxFactory.java @@ -391,9 +391,10 @@ public Builder setAttemptId(String attemptId) return this; } - public void setCentralizedTableSchemaConfig(CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig) + public Builder setCentralizedTableSchemaConfig(CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig) { this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; + return this; } } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java index eabb640c133e..06a4bcb5b759 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskTestBase.java @@ -116,6 +116,7 @@ import org.apache.druid.segment.loading.LocalDataSegmentPusher; import org.apache.druid.segment.loading.LocalDataSegmentPusherConfig; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; +import org.apache.druid.segment.metadata.SegmentSchemaManager; import org.apache.druid.segment.realtime.appenderator.StreamAppenderator; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; import org.apache.druid.server.DruidNode; @@ -205,6 +206,7 @@ public abstract class SeekableStreamIndexTaskTestBase extends EasyMockSupport protected TaskLockbox taskLockbox; protected IndexerMetadataStorageCoordinator metadataStorageCoordinator; protected final Set checkpointRequestsHash = new HashSet<>(); + protected SegmentSchemaManager segmentSchemaManager; static { OBJECT_MAPPER = new TestUtils().getTestObjectMapper(); @@ -576,6 +578,7 @@ protected void makeToolboxFactory(TestUtils testUtils, ServiceEmitter emitter, b final TestDerbyConnector derbyConnector = derby.getConnector(); derbyConnector.createDataSourceTable(); derbyConnector.createPendingSegmentsTable(); + derbyConnector.createSegmentSchemasTable(); derbyConnector.createSegmentTable(); derbyConnector.createRulesTable(); derbyConnector.createConfigTable(); @@ -590,10 +593,13 @@ protected void makeToolboxFactory(TestUtils testUtils, ServiceEmitter emitter, b objectMapper ) ); + segmentSchemaManager = new SegmentSchemaManager(derby.metadataTablesConfigSupplier().get(), objectMapper, derbyConnector); metadataStorageCoordinator = new IndexerSQLMetadataStorageCoordinator( objectMapper, derby.metadataTablesConfigSupplier().get(), - derbyConnector + derbyConnector, + segmentSchemaManager, + CentralizedDatasourceSchemaConfig.create() ); taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator); final TaskActionToolbox taskActionToolbox = new TaskActionToolbox( diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SequenceMetadataTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SequenceMetadataTest.java index fbe63ffe2689..cae9ec1e686f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SequenceMetadataTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/seekablestream/SequenceMetadataTest.java @@ -21,10 +21,14 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import org.apache.druid.data.input.impl.ByteEntity; import org.apache.druid.indexing.common.TaskToolbox; import org.apache.druid.indexing.common.actions.TaskActionClient; +import org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord; +import org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.SegmentUtils; import org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher; import org.apache.druid.timeline.DataSegment; @@ -37,6 +41,8 @@ import org.mockito.Mockito; import org.mockito.junit.MockitoJUnitRunner; +import java.math.BigInteger; +import java.util.Collections; import java.util.Set; @RunWith(MockitoJUnitRunner.class) @@ -80,7 +86,7 @@ public void testPublishAnnotatedSegmentsThrowExceptionIfOverwriteSegmentsNotNull ISE exception = Assert.assertThrows( ISE.class, - () -> transactionalSegmentPublisher.publishAnnotatedSegments(notNullNotEmptySegment, ImmutableSet.of(), null) + () -> transactionalSegmentPublisher.publishAnnotatedSegments(notNullNotEmptySegment, ImmutableSet.of(), null, null) ); Assert.assertEquals( "Stream ingestion task unexpectedly attempted to overwrite segments: " @@ -92,8 +98,14 @@ public void testPublishAnnotatedSegmentsThrowExceptionIfOverwriteSegmentsNotNull @Test public void testPublishAnnotatedSegmentsSucceedIfDropSegmentsAndOverwriteSegmentsNullAndEmpty() throws Exception { - Mockito.when(mockSeekableStreamIndexTaskRunner.deserializePartitionsFromMetadata(ArgumentMatchers.any(), ArgumentMatchers.any())).thenReturn(mockSeekableStreamEndSequenceNumbers); + Mockito.when( + mockSeekableStreamIndexTaskRunner.deserializePartitionsFromMetadata( + ArgumentMatchers.any(), + ArgumentMatchers.any() + )) + .thenReturn(mockSeekableStreamEndSequenceNumbers); Mockito.when(mockSeekableStreamEndSequenceNumbers.getPartitionSequenceNumberMap()).thenReturn(ImmutableMap.of()); + Mockito.when(mockSeekableStreamEndSequenceNumbers.getStream()).thenReturn("stream"); Mockito.when(mockTaskToolbox.getTaskActionClient()).thenReturn(mockTaskActionClient); DataSegment dataSegment = DataSegment.builder() .dataSource("foo") @@ -110,11 +122,70 @@ public void testPublishAnnotatedSegmentsSucceedIfDropSegmentsAndOverwriteSegment ImmutableMap.of(), ImmutableMap.of(), true, - ImmutableSet.of(), + ImmutableSet.of(0), null ); TransactionalSegmentPublisher transactionalSegmentPublisher = sequenceMetadata.createPublisher(mockSeekableStreamIndexTaskRunner, mockTaskToolbox, false); - transactionalSegmentPublisher.publishAnnotatedSegments(null, notNullNotEmptySegment, ImmutableMap.of()); + transactionalSegmentPublisher.publishAnnotatedSegments(null, notNullNotEmptySegment, ImmutableMap.of(), null); + + transactionalSegmentPublisher = sequenceMetadata.createPublisher(mockSeekableStreamIndexTaskRunner, mockTaskToolbox, true); + + transactionalSegmentPublisher.publishAnnotatedSegments(null, notNullNotEmptySegment, ImmutableMap.of(), null); + } + + @Test + public void testCanHandle() + { + SequenceMetadata sequenceMetadata = new SequenceMetadata<>( + 1, + "test", + ImmutableMap.of(0, 0), + ImmutableMap.of(), + true, + ImmutableSet.of(0), + null + ); + + OrderedPartitionableRecord record = new OrderedPartitionableRecord<>( + "stream", + 0, + 0, + Collections.singletonList(new ByteEntity(StringUtils.toUtf8("unparseable"))) + ); + + Mockito.when(mockSeekableStreamIndexTaskRunner.createSequenceNumber(ArgumentMatchers.any())).thenReturn(makeSequenceNumber("1", false)); + Mockito.when(mockSeekableStreamIndexTaskRunner.isEndOffsetExclusive()).thenReturn(true); + Assert.assertFalse(sequenceMetadata.canHandle(mockSeekableStreamIndexTaskRunner, record)); + + Mockito.when(mockSeekableStreamIndexTaskRunner.isEndOffsetExclusive()).thenReturn(false); + Assert.assertFalse(sequenceMetadata.canHandle(mockSeekableStreamIndexTaskRunner, record)); + } + + private OrderedSequenceNumber makeSequenceNumber(String seq, boolean isExclusive) + { + return new OrderedSequenceNumber(seq, isExclusive) + { + @Override + public int compareTo(OrderedSequenceNumber o) + { + return new BigInteger(this.get()).compareTo(new BigInteger(o.get())); + } + + @Override + public boolean equals(Object o) + { + if (o.getClass() != this.getClass()) { + return false; + } + return new BigInteger(this.get()).equals(new BigInteger(((OrderedSequenceNumber) o).get())); + } + + @Override + public int hashCode() + { + return super.hashCode(); + } + }; } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index f57494a1e03b..1de41bb43a0f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -32,6 +32,7 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.PartialShardSpec; @@ -142,7 +143,10 @@ public int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interv } @Override - public Set commitSegments(Set segments) + public Set commitSegments( + Set segments, + final SegmentSchemaMapping segmentSchemaMapping + ) { Set added = new HashSet<>(); for (final DataSegment segment : segments) { @@ -167,20 +171,22 @@ public Map allocatePendingSegments @Override public SegmentPublishResult commitReplaceSegments( Set replaceSegments, - Set locksHeldByReplaceTask + Set locksHeldByReplaceTask, + SegmentSchemaMapping segmentSchemaMapping ) { - return SegmentPublishResult.ok(commitSegments(replaceSegments)); + return SegmentPublishResult.ok(commitSegments(replaceSegments, segmentSchemaMapping)); } @Override public SegmentPublishResult commitAppendSegments( Set appendSegments, Map appendSegmentToReplaceLock, - String taskGroup + String taskGroup, + SegmentSchemaMapping segmentSchemaMapping ) { - return SegmentPublishResult.ok(commitSegments(appendSegments)); + return SegmentPublishResult.ok(commitSegments(appendSegments, segmentSchemaMapping)); } @Override @@ -189,21 +195,23 @@ public SegmentPublishResult commitAppendSegmentsAndMetadata( Map appendSegmentToReplaceLock, DataSourceMetadata startMetadata, DataSourceMetadata endMetadata, - String taskGroup + String taskGroup, + SegmentSchemaMapping segmentSchemaMapping ) { - return SegmentPublishResult.ok(commitSegments(appendSegments)); + return SegmentPublishResult.ok(commitSegments(appendSegments, segmentSchemaMapping)); } @Override public SegmentPublishResult commitSegmentsAndMetadata( Set segments, @Nullable DataSourceMetadata startMetadata, - @Nullable DataSourceMetadata endMetadata + @Nullable DataSourceMetadata endMetadata, + SegmentSchemaMapping segmentSchemaMapping ) { // Don't actually compare metadata, just do it! - return SegmentPublishResult.ok(commitSegments(segments)); + return SegmentPublishResult.ok(commitSegments(segments, segmentSchemaMapping)); } @Override diff --git a/integration-tests/docker/docker-compose.cds-coordinator-smq-disabled.yml b/integration-tests/docker/docker-compose.cds-coordinator-smq-disabled.yml new file mode 100644 index 000000000000..090a746750fe --- /dev/null +++ b/integration-tests/docker/docker-compose.cds-coordinator-smq-disabled.yml @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "2.2" +services: + druid-zookeeper-kafka: + extends: + file: docker-compose.base.yml + service: druid-zookeeper-kafka + + druid-metadata-storage: + extends: + file: docker-compose.base.yml + service: druid-metadata-storage + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + depends_on: + - druid-zookeeper-kafka + + druid-coordinator: + extends: + file: docker-compose.base.yml + service: druid-coordinator + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + - druid_centralizedDatasourceSchema_enabled=true + - druid_centralizedDatasourceSchema_backFillEnabled=true + - druid_centralizedDatasourceSchema_backFillPeriod=15000 + - druid_coordinator_segmentMetadata_metadataRefreshPeriod=PT15S + - druid_coordinator_segmentMetadata_disableSegmentMetadataQueries=true + depends_on: + - druid-overlord + - druid-metadata-storage + - druid-zookeeper-kafka + + druid-overlord: + extends: + file: docker-compose.base.yml + service: druid-overlord + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + - druid_centralizedDatasourceSchema_enabled=true + depends_on: + - druid-metadata-storage + - druid-zookeeper-kafka + + druid-historical: + extends: + file: docker-compose.base.yml + service: druid-historical + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + depends_on: + - druid-zookeeper-kafka + + druid-middlemanager: + extends: + file: docker-compose.base.yml + service: druid-middlemanager + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + - druid_indexer_fork_property_druid_centralizedDatasourceSchema_enabled=true + depends_on: + - druid-zookeeper-kafka + - druid-overlord + + druid-broker: + extends: + file: docker-compose.base.yml + service: druid-broker + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + - druid_sql_planner_metadataRefreshPeriod=PT20S + - druid_sql_planner_disableSegmentMetadataQueries=true + depends_on: + - druid-coordinator + - druid-zookeeper-kafka + - druid-middlemanager + - druid-historical + + druid-router: + extends: + file: docker-compose.base.yml + service: druid-router + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + depends_on: + - druid-zookeeper-kafka + - druid-coordinator + - druid-broker + - druid-overlord + +networks: + druid-it-net: + name: druid-it-net + ipam: + config: + - subnet: 172.172.172.0/24 diff --git a/integration-tests/docker/docker-compose.cds-task-schema-publish-disabled.yml b/integration-tests/docker/docker-compose.cds-task-schema-publish-disabled.yml new file mode 100644 index 000000000000..190f4eaf8bc2 --- /dev/null +++ b/integration-tests/docker/docker-compose.cds-task-schema-publish-disabled.yml @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "2.2" +services: + druid-zookeeper-kafka: + extends: + file: docker-compose.base.yml + service: druid-zookeeper-kafka + + druid-metadata-storage: + extends: + file: docker-compose.base.yml + service: druid-metadata-storage + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + depends_on: + - druid-zookeeper-kafka + + druid-coordinator: + extends: + file: docker-compose.base.yml + service: druid-coordinator + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + - druid_centralizedDatasourceSchema_enabled=true + - druid_centralizedDatasourceSchema_taskSchemaPublishDisabled=true + - druid_centralizedDatasourceSchema_backFillEnabled=true + - druid_centralizedDatasourceSchema_backFillPeriod=15000 + - druid_coordinator_segmentMetadata_metadataRefreshPeriod=PT15S + depends_on: + - druid-overlord + - druid-metadata-storage + - druid-zookeeper-kafka + + druid-overlord: + extends: + file: docker-compose.base.yml + service: druid-overlord + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + - druid_centralizedDatasourceSchema_enabled=true + - druid_centralizedDatasourceSchema_taskSchemaPublishDisabled=true + depends_on: + - druid-metadata-storage + - druid-zookeeper-kafka + + druid-historical: + extends: + file: docker-compose.base.yml + service: druid-historical + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + depends_on: + - druid-zookeeper-kafka + + druid-middlemanager: + extends: + file: docker-compose.base.yml + service: druid-middlemanager + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + - druid_indexer_fork_property_druid_centralizedDatasourceSchema_enabled=true + depends_on: + - druid-zookeeper-kafka + - druid-overlord + + druid-broker: + extends: + file: docker-compose.base.yml + service: druid-broker + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + - druid_sql_planner_metadataRefreshPeriod=PT20S + - druid_sql_planner_disableSegmentMetadataQueries=true + depends_on: + - druid-coordinator + - druid-zookeeper-kafka + - druid-middlemanager + - druid-historical + + druid-router: + extends: + file: docker-compose.base.yml + service: druid-router + environment: + - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + depends_on: + - druid-zookeeper-kafka + - druid-coordinator + - druid-broker + - druid-overlord + +networks: + druid-it-net: + name: druid-it-net + ipam: + config: + - subnet: 172.172.172.0/24 diff --git a/integration-tests/docker/docker-compose.centralized-datasource-schema.yml b/integration-tests/docker/docker-compose.centralized-datasource-schema.yml index 2abcd4cc0e9e..39ce98b1302b 100644 --- a/integration-tests/docker/docker-compose.centralized-datasource-schema.yml +++ b/integration-tests/docker/docker-compose.centralized-datasource-schema.yml @@ -36,7 +36,8 @@ services: environment: - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} - druid_centralizedDatasourceSchema_enabled=true - - druid_centralizedDatasourceSchema_announceRealtimeSegmentSchema=true + - druid_centralizedDatasourceSchema_backFillEnabled=true + - druid_centralizedDatasourceSchema_backFillPeriod=15000 - druid_coordinator_segmentMetadata_metadataRefreshPeriod=PT15S depends_on: - druid-overlord @@ -49,6 +50,7 @@ services: service: druid-overlord environment: - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} + - druid_centralizedDatasourceSchema_enabled=true depends_on: - druid-metadata-storage - druid-zookeeper-kafka @@ -68,7 +70,7 @@ services: service: druid-middlemanager environment: - DRUID_INTEGRATION_TEST_GROUP=${DRUID_INTEGRATION_TEST_GROUP} - - druid_centralizedDatasourceSchema_announceRealtimeSegmentSchema=true + - druid_indexer_fork_property_druid_centralizedDatasourceSchema_enabled=true depends_on: - druid-zookeeper-kafka - druid-overlord diff --git a/integration-tests/docker/druid.sh b/integration-tests/docker/druid.sh index 5aac15512f84..f112f91d1591 100755 --- a/integration-tests/docker/druid.sh +++ b/integration-tests/docker/druid.sh @@ -85,7 +85,7 @@ setupData() # The "query" and "security" test groups require data to be setup before running the tests. # In particular, they requires segments to be download from a pre-existing s3 bucket. # This is done by using the loadSpec put into metadatastore and s3 credientials set below. - if [ "$DRUID_INTEGRATION_TEST_GROUP" = "query" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "query-retry" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "query-error" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "high-availability" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "security" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "ldap-security" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "upgrade" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "centralized-datasource-schema" ]; then + if [ "$DRUID_INTEGRATION_TEST_GROUP" = "query" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "query-retry" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "query-error" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "high-availability" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "security" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "ldap-security" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "upgrade" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "centralized-datasource-schema" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "cds-task-schema-publish-disabled" ] || [ "$DRUID_INTEGRATION_TEST_GROUP" = "cds-coordinator-smq-disabled" ]; then # touch is needed because OverlayFS's copy-up operation breaks POSIX standards. See https://github.com/docker/for-linux/issues/72. find /var/lib/mysql -type f -exec touch {} \; && service mysql start \ && cat /test-data/${DRUID_INTEGRATION_TEST_GROUP}-sample-data.sql | mysql -u root druid \ diff --git a/integration-tests/docker/test-data/cds-coordinator-smq-disabled-sample-data.sql b/integration-tests/docker/test-data/cds-coordinator-smq-disabled-sample-data.sql new file mode 100644 index 000000000000..abe0f115189b --- /dev/null +++ b/integration-tests/docker/test-data/cds-coordinator-smq-disabled-sample-data.sql @@ -0,0 +1,20 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload,used_status_last_updated) VALUES ('twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9','twitterstream','2013-05-13T01:08:18.192Z','2013-01-01T00:00:00.000Z','2013-01-02T00:00:00.000Z',0,'2013-01-02T04:13:41.980Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z\",\"version\":\"2013-01-02T04:13:41.980Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/2013-01-02T04:13:41.980Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":445235220,\"identifier\":\"twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9\"}','1970-01-01T00:00:00.000Z'); +INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload,used_status_last_updated) VALUES ('twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9','twitterstream','2013-05-13T00:03:28.640Z','2013-01-02T00:00:00.000Z','2013-01-03T00:00:00.000Z',0,'2013-01-03T03:44:58.791Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z\",\"version\":\"2013-01-03T03:44:58.791Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z/2013-01-03T03:44:58.791Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":435325540,\"identifier\":\"twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9\"}','1970-01-01T00:00:00.000Z'); +INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload,used_status_last_updated) VALUES ('twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9','twitterstream','2013-05-13T00:03:48.807Z','2013-01-03T00:00:00.000Z','2013-01-04T00:00:00.000Z',0,'2013-01-04T04:09:13.590Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z\",\"version\":\"2013-01-04T04:09:13.590Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z/2013-01-04T04:09:13.590Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":411651320,\"identifier\":\"twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9\"}','1970-01-01T00:00:00.000Z'); +INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload,used_status_last_updated) VALUES ('wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','wikipedia_editstream','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"wikipedia_editstream\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}','1970-01-01T00:00:00.000Z'); +INSERT INTO druid_segments (id, dataSource, created_date, start, end, partitioned, version, used, payload,used_status_last_updated) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}','1970-01-01T00:00:00.000Z'); diff --git a/integration-tests/docker/test-data/cds-task-schema-publish-disabled-sample-data.sql b/integration-tests/docker/test-data/cds-task-schema-publish-disabled-sample-data.sql new file mode 100644 index 000000000000..abe0f115189b --- /dev/null +++ b/integration-tests/docker/test-data/cds-task-schema-publish-disabled-sample-data.sql @@ -0,0 +1,20 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload,used_status_last_updated) VALUES ('twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9','twitterstream','2013-05-13T01:08:18.192Z','2013-01-01T00:00:00.000Z','2013-01-02T00:00:00.000Z',0,'2013-01-02T04:13:41.980Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z\",\"version\":\"2013-01-02T04:13:41.980Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/2013-01-02T04:13:41.980Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":445235220,\"identifier\":\"twitterstream_2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z_2013-01-02T04:13:41.980Z_v9\"}','1970-01-01T00:00:00.000Z'); +INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload,used_status_last_updated) VALUES ('twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9','twitterstream','2013-05-13T00:03:28.640Z','2013-01-02T00:00:00.000Z','2013-01-03T00:00:00.000Z',0,'2013-01-03T03:44:58.791Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z\",\"version\":\"2013-01-03T03:44:58.791Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z/2013-01-03T03:44:58.791Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":435325540,\"identifier\":\"twitterstream_2013-01-02T00:00:00.000Z_2013-01-03T00:00:00.000Z_2013-01-03T03:44:58.791Z_v9\"}','1970-01-01T00:00:00.000Z'); +INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload,used_status_last_updated) VALUES ('twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9','twitterstream','2013-05-13T00:03:48.807Z','2013-01-03T00:00:00.000Z','2013-01-04T00:00:00.000Z',0,'2013-01-04T04:09:13.590Z_v9',1,'{\"dataSource\":\"twitterstream\",\"interval\":\"2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z\",\"version\":\"2013-01-04T04:09:13.590Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/twitterstream/2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z/2013-01-04T04:09:13.590Z_v9/0/index.zip\"},\"dimensions\":\"has_links,first_hashtag,user_time_zone,user_location,has_mention,user_lang,rt_name,user_name,is_retweet,is_viral,has_geo,url_domain,user_mention_name,reply_to_name\",\"metrics\":\"count,tweet_length,num_followers,num_links,num_mentions,num_hashtags,num_favorites,user_total_tweets\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":411651320,\"identifier\":\"twitterstream_2013-01-03T00:00:00.000Z_2013-01-04T00:00:00.000Z_2013-01-04T04:09:13.590Z_v9\"}','1970-01-01T00:00:00.000Z'); +INSERT INTO druid_segments (id,dataSource,created_date,start,end,partitioned,version,used,payload,used_status_last_updated) VALUES ('wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9','wikipedia_editstream','2013-03-15T20:49:52.348Z','2012-12-29T00:00:00.000Z','2013-01-10T08:00:00.000Z',0,'2013-01-10T08:13:47.830Z_v9',1,'{\"dataSource\":\"wikipedia_editstream\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia_editstream/2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z/2013-01-10T08:13:47.830Z_v9/0/index.zip\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}','1970-01-01T00:00:00.000Z'); +INSERT INTO druid_segments (id, dataSource, created_date, start, end, partitioned, version, used, payload,used_status_last_updated) VALUES ('wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z', 'wikipedia', '2013-08-08T21:26:23.799Z', '2013-08-01T00:00:00.000Z', '2013-08-02T00:00:00.000Z', '0', '2013-08-08T21:22:48.989Z', '1', '{\"dataSource\":\"wikipedia\",\"interval\":\"2013-08-01T00:00:00.000Z/2013-08-02T00:00:00.000Z\",\"version\":\"2013-08-08T21:22:48.989Z\",\"loadSpec\":{\"type\":\"s3_zip\",\"bucket\":\"static.druid.io\",\"key\":\"data/segments/wikipedia/20130801T000000.000Z_20130802T000000.000Z/2013-08-08T21_22_48.989Z/0/index.zip\"},\"dimensions\":\"dma_code,continent_code,geo,area_code,robot,country_name,network,city,namespace,anonymous,unpatrolled,page,postal_code,language,newpage,user,region_lookup\",\"metrics\":\"count,delta,variation,added,deleted\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":24664730,\"identifier\":\"wikipedia_2013-08-01T00:00:00.000Z_2013-08-02T00:00:00.000Z_2013-08-08T21:22:48.989Z\"}','1970-01-01T00:00:00.000Z'); diff --git a/integration-tests/script/docker_compose_args.sh b/integration-tests/script/docker_compose_args.sh index b746a530a7d8..c37d22ca3144 100644 --- a/integration-tests/script/docker_compose_args.sh +++ b/integration-tests/script/docker_compose_args.sh @@ -75,6 +75,14 @@ getComposeArgs() then # cluster with overriden properties for broker and coordinator echo "-f ${DOCKERDIR}/docker-compose.centralized-datasource-schema.yml" + elif [ "$DRUID_INTEGRATION_TEST_GROUP" = "cds-task-schema-publish-disabled" ] + then + # cluster with overriden properties for broker and coordinator + echo "-f ${DOCKERDIR}/docker-compose.cds-task-schema-publish-disabled.yml" + elif [ "$DRUID_INTEGRATION_TEST_GROUP" = "cds-coordinator-smq-disabled" ] + then + # cluster with overriden properties for broker and coordinator + echo "-f ${DOCKERDIR}/docker-compose.cds-coordinator-smq-disabled.yml" else # default echo "-f ${DOCKERDIR}/docker-compose.yml" diff --git a/integration-tests/src/main/resources/log4j2.xml b/integration-tests/src/main/resources/log4j2.xml index dbce142e7f60..405619e31801 100644 --- a/integration-tests/src/main/resources/log4j2.xml +++ b/integration-tests/src/main/resources/log4j2.xml @@ -28,5 +28,14 @@ + + + + + + + + + diff --git a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java index a54d22ef0216..516dcb65434a 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java @@ -163,4 +163,8 @@ public class TestNGGroup public static final String HTTP_ENDPOINT = "http-endpoint"; public static final String CENTRALIZED_DATASOURCE_SCHEMA = "centralized-datasource-schema"; + + public static final String CDS_TASK_SCHEMA_PUBLISH_DISABLED = "cds-task-schema-publish-disabled"; + + public static final String CDS_COORDINATOR_SMQ_DISABLED = "cds-coordinator-smq-disabled"; } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java index 5e9071e45507..84ddb7612e65 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppendBatchIndexTest.java @@ -40,7 +40,7 @@ import java.util.UUID; import java.util.function.Function; -@Test(groups = {TestNGGroup.APPEND_INGESTION}) +@Test(groups = {TestNGGroup.APPEND_INGESTION, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITAppendBatchIndexTest extends AbstractITBatchIndexTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppenderatorDriverRealtimeIndexTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppenderatorDriverRealtimeIndexTaskTest.java index ba97d77f44c3..a077dbd0a978 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppenderatorDriverRealtimeIndexTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAppenderatorDriverRealtimeIndexTaskTest.java @@ -44,7 +44,7 @@ /** * See {@link AbstractITRealtimeIndexTaskTest} for test details. */ -@Test(groups = TestNGGroup.REALTIME_INDEX) +@Test(groups = {TestNGGroup.REALTIME_INDEX, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITAppenderatorDriverRealtimeIndexTaskTest extends AbstractITRealtimeIndexTaskTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java index a463e389d79e..0ecfe7ed5c40 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java @@ -38,7 +38,7 @@ import java.io.Closeable; import java.util.function.Function; -@Test(groups = TestNGGroup.BATCH_INDEX) +@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITBestEffortRollupParallelIndexTest extends AbstractITBatchIndexTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java index 7619bcf8b6e0..40549a2685e3 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCombiningInputSourceParallelIndexTest.java @@ -32,7 +32,7 @@ import java.util.Map; import java.util.function.Function; -@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE}) +@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITCombiningInputSourceParallelIndexTest extends AbstractITBatchIndexTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionSparseColumnTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionSparseColumnTest.java index 6039282c1185..27b771308b35 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionSparseColumnTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionSparseColumnTest.java @@ -41,7 +41,7 @@ import java.util.List; import java.util.Map; -@Test(groups = {TestNGGroup.COMPACTION, TestNGGroup.QUICKSTART_COMPATIBLE}) +@Test(groups = {TestNGGroup.COMPACTION, TestNGGroup.QUICKSTART_COMPATIBLE, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITCompactionSparseColumnTest extends AbstractIndexerTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java index 90381bee1430..6dbcb90c3df5 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITCompactionTaskTest.java @@ -51,7 +51,7 @@ import java.util.Map; import java.util.Set; -@Test(groups = {TestNGGroup.COMPACTION, TestNGGroup.QUICKSTART_COMPATIBLE}) +@Test(groups = {TestNGGroup.COMPACTION, TestNGGroup.QUICKSTART_COMPATIBLE, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITCompactionTaskTest extends AbstractIndexerTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHttpInputSourceTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHttpInputSourceTest.java index bb0d7c5b9a21..11404bdd56e4 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHttpInputSourceTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHttpInputSourceTest.java @@ -29,7 +29,7 @@ import java.io.IOException; import java.util.UUID; -@Test(groups = TestNGGroup.INPUT_SOURCE) +@Test(groups = {TestNGGroup.INPUT_SOURCE, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITHttpInputSourceTest extends AbstractITBatchIndexTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java index 04222abad802..77c64733a622 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java @@ -41,7 +41,7 @@ import java.util.Map; import java.util.function.Function; -@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE}) +@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITIndexerTest extends AbstractITBatchIndexTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaIndexingServiceNonTransactionalSerializedTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaIndexingServiceNonTransactionalSerializedTest.java index a3b845b87004..33bf5a5d79b3 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaIndexingServiceNonTransactionalSerializedTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaIndexingServiceNonTransactionalSerializedTest.java @@ -25,7 +25,7 @@ import org.testng.annotations.Guice; import org.testng.annotations.Test; -@Test(groups = TestNGGroup.KAFKA_INDEX_SLOW) +@Test(groups = {TestNGGroup.KAFKA_INDEX_SLOW, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITKafkaIndexingServiceNonTransactionalSerializedTest extends AbstractKafkaIndexingServiceTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaIndexingServiceTransactionalSerializedTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaIndexingServiceTransactionalSerializedTest.java index fdd06ff4f883..a50aa6ce10ef 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaIndexingServiceTransactionalSerializedTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaIndexingServiceTransactionalSerializedTest.java @@ -25,7 +25,7 @@ import org.testng.annotations.Guice; import org.testng.annotations.Test; -@Test(groups = TestNGGroup.TRANSACTIONAL_KAFKA_INDEX_SLOW) +@Test(groups = {TestNGGroup.TRANSACTIONAL_KAFKA_INDEX_SLOW, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITKafkaIndexingServiceTransactionalSerializedTest extends AbstractKafkaIndexingServiceTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java index 34fa4d908188..0cc47b9bc630 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITLocalInputSourceAllInputFormatTest.java @@ -30,7 +30,7 @@ import java.util.List; import java.util.Map; -@Test(groups = TestNGGroup.INPUT_FORMAT) +@Test(groups = {TestNGGroup.INPUT_FORMAT, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITLocalInputSourceAllInputFormatTest extends AbstractLocalInputSourceParallelIndexTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOverwriteBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOverwriteBatchIndexTest.java index 80ed846b0787..e81cf74b4571 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOverwriteBatchIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITOverwriteBatchIndexTest.java @@ -33,7 +33,7 @@ import java.util.UUID; import java.util.function.Function; -@Test(groups = TestNGGroup.BATCH_INDEX) +@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITOverwriteBatchIndexTest extends AbstractITBatchIndexTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java index 1cd90f09ac06..ddae46b18dd8 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java @@ -36,7 +36,7 @@ import java.io.Closeable; import java.util.function.Function; -@Test(groups = {TestNGGroup.PERFECT_ROLLUP_PARALLEL_BATCH_INDEX, TestNGGroup.SHUFFLE_DEEP_STORE}) +@Test(groups = {TestNGGroup.PERFECT_ROLLUP_PARALLEL_BATCH_INDEX, TestNGGroup.SHUFFLE_DEEP_STORE, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITPerfectRollupParallelIndexTest extends AbstractITBatchIndexTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITRealtimeIndexTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITRealtimeIndexTaskTest.java index 8cc25d9ff50e..6dc2988c3e01 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITRealtimeIndexTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITRealtimeIndexTaskTest.java @@ -44,7 +44,7 @@ /** * See {@link AbstractITRealtimeIndexTaskTest} for test details. */ -@Test(groups = TestNGGroup.REALTIME_INDEX) +@Test(groups = {TestNGGroup.REALTIME_INDEX, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITRealtimeIndexTaskTest extends AbstractITRealtimeIndexTaskTest { diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSystemTableBatchIndexTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSystemTableBatchIndexTaskTest.java index 7582ae46bf14..e5f60d87c482 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSystemTableBatchIndexTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITSystemTableBatchIndexTaskTest.java @@ -30,7 +30,7 @@ import java.io.Closeable; import java.util.function.Function; -@Test(groups = TestNGGroup.BATCH_INDEX) +@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.CDS_TASK_SCHEMA_PUBLISH_DISABLED, TestNGGroup.CDS_COORDINATOR_SMQ_DISABLED}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITSystemTableBatchIndexTaskTest extends AbstractITBatchIndexTest { diff --git a/processing/src/main/java/org/apache/druid/metadata/MetadataStorageConnector.java b/processing/src/main/java/org/apache/druid/metadata/MetadataStorageConnector.java index 911b6d6bc611..1c185f38575b 100644 --- a/processing/src/main/java/org/apache/druid/metadata/MetadataStorageConnector.java +++ b/processing/src/main/java/org/apache/druid/metadata/MetadataStorageConnector.java @@ -90,4 +90,9 @@ default void exportTable( void createSupervisorsTable(); void deleteAllRecords(String tableName); + + /** + * SegmentSchema table is created only when CentralizedDatasourceSchema feature is enabled. + */ + void createSegmentSchemasTable(); } diff --git a/processing/src/main/java/org/apache/druid/metadata/MetadataStorageTablesConfig.java b/processing/src/main/java/org/apache/druid/metadata/MetadataStorageTablesConfig.java index e9dc41ec1e11..b70c6894ba7e 100644 --- a/processing/src/main/java/org/apache/druid/metadata/MetadataStorageTablesConfig.java +++ b/processing/src/main/java/org/apache/druid/metadata/MetadataStorageTablesConfig.java @@ -34,7 +34,7 @@ public class MetadataStorageTablesConfig public static MetadataStorageTablesConfig fromBase(String base) { - return new MetadataStorageTablesConfig(base, null, null, null, null, null, null, null, null, null, null, null); + return new MetadataStorageTablesConfig(base, null, null, null, null, null, null, null, null, null, null, null, null); } public static final String TASK_ENTRY_TYPE = "task"; @@ -81,6 +81,9 @@ public static MetadataStorageTablesConfig fromBase(String base) @JsonProperty("supervisors") private final String supervisorTable; + @JsonProperty("segmentSchemas") + private final String segmentSchemasTable; + @JsonCreator public MetadataStorageTablesConfig( @JsonProperty("base") String base, @@ -94,7 +97,8 @@ public MetadataStorageTablesConfig( @JsonProperty("taskLock") String taskLockTable, @JsonProperty("audit") String auditTable, @JsonProperty("supervisors") String supervisorTable, - @JsonProperty("upgradeSegments") String upgradeSegmentsTable + @JsonProperty("upgradeSegments") String upgradeSegmentsTable, + @JsonProperty("segmentSchemas") String segmentSchemasTable ) { this.base = (base == null) ? DEFAULT_BASE : base; @@ -113,6 +117,7 @@ public MetadataStorageTablesConfig( lockTables.put(TASK_ENTRY_TYPE, this.taskLockTable); this.auditTable = makeTableName(auditTable, "audit"); this.supervisorTable = makeTableName(supervisorTable, "supervisors"); + this.segmentSchemasTable = makeTableName(segmentSchemasTable, "segmentSchemas"); } private String makeTableName(String explicitTableName, String defaultSuffix) @@ -206,4 +211,9 @@ public String getTaskLockTable() { return taskLockTable; } + + public String getSegmentSchemasTable() + { + return segmentSchemasTable; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/DataSegmentWithSchema.java b/processing/src/main/java/org/apache/druid/segment/DataSegmentWithSchema.java new file mode 100644 index 000000000000..b82b4d266538 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/DataSegmentWithSchema.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import org.apache.druid.timeline.DataSegment; + +import javax.annotation.Nullable; + +/** + * Immutable wrapper class for segment and schema. + */ +public class DataSegmentWithSchema +{ + @Nullable + private final DataSegment dataSegment; + + @Nullable + private final SchemaPayloadPlus schemaPayloadPlus; + + public DataSegmentWithSchema( + @Nullable DataSegment dataSegment, + @Nullable SchemaPayloadPlus schemaPayloadPlus + ) + { + this.dataSegment = dataSegment; + this.schemaPayloadPlus = schemaPayloadPlus; + } + + @Nullable + public DataSegment getDataSegment() + { + return dataSegment; + } + + @Nullable + public SchemaPayloadPlus getSegmentSchemaMetadata() + { + return schemaPayloadPlus; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/DataSegmentsWithSchemas.java b/processing/src/main/java/org/apache/druid/segment/DataSegmentsWithSchemas.java new file mode 100644 index 000000000000..319e1665da96 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/DataSegmentsWithSchemas.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.timeline.DataSegment; + +import javax.annotation.Nullable; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +/** + * This immutable class encapsulates segments metadata and corresponding schema. + */ +public class DataSegmentsWithSchemas +{ + private final Set segments; + + @Nullable + private final SegmentSchemaMapping segmentSchemaMapping; + + public DataSegmentsWithSchemas(int schemaVersion) + { + this.segments = new HashSet<>(); + this.segmentSchemaMapping = new SegmentSchemaMapping(schemaVersion); + } + + @JsonCreator + public DataSegmentsWithSchemas( + @JsonProperty("segments") Set segments, + @JsonProperty("segmentSchemaMapping") @Nullable SegmentSchemaMapping segmentSchemaMapping + ) + { + this.segments = segments; + this.segmentSchemaMapping = segmentSchemaMapping; + } + + @JsonProperty + public Set getSegments() + { + return segments; + } + + @Nullable + @JsonProperty + public SegmentSchemaMapping getSegmentSchemaMapping() + { + return segmentSchemaMapping; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + DataSegmentsWithSchemas that = (DataSegmentsWithSchemas) o; + return Objects.equals(segments, that.segments) && Objects.equals( + segmentSchemaMapping, + that.segmentSchemaMapping + ); + } + + @Override + public int hashCode() + { + return Objects.hash(segments, segmentSchemaMapping); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/SchemaPayload.java b/processing/src/main/java/org/apache/druid/segment/SchemaPayload.java new file mode 100644 index 000000000000..2fb32d1fad8b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/SchemaPayload.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.segment.column.RowSignature; + +import javax.annotation.Nullable; +import java.util.Map; +import java.util.Objects; + +/** + * Representation of schema payload, includes information like RowSignature and aggregator factories. + */ +public class SchemaPayload +{ + private final RowSignature rowSignature; + @Nullable + private final Map aggregatorFactories; + + @JsonCreator + public SchemaPayload( + @JsonProperty("rowSignature") RowSignature rowSignature, + @JsonProperty("aggregatorFactories") @Nullable Map aggregatorFactories + ) + { + this.rowSignature = rowSignature; + this.aggregatorFactories = aggregatorFactories; + } + + public SchemaPayload(RowSignature rowSignature) + { + this.rowSignature = rowSignature; + this.aggregatorFactories = null; + } + + @JsonProperty + public RowSignature getRowSignature() + { + return rowSignature; + } + + @Nullable + @JsonProperty + public Map getAggregatorFactories() + { + return aggregatorFactories; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SchemaPayload that = (SchemaPayload) o; + return Objects.equals(rowSignature, that.rowSignature) + && Objects.equals(aggregatorFactories, that.aggregatorFactories); + } + + @Override + public int hashCode() + { + return Objects.hash(rowSignature, aggregatorFactories); + } + + @Override + public String toString() + { + return "SchemaPayload{" + + "rowSignature=" + rowSignature + + ", aggregatorFactories=" + aggregatorFactories + + '}'; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/SchemaPayloadPlus.java b/processing/src/main/java/org/apache/druid/segment/SchemaPayloadPlus.java new file mode 100644 index 000000000000..e1cc143a3bb8 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/SchemaPayloadPlus.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; + +/** + * Wrapper over {@link SchemaPayload} to include {@code numRows} information. + */ +public class SchemaPayloadPlus +{ + private final SchemaPayload schemaPayload; + private final Long numRows; + + @JsonCreator + public SchemaPayloadPlus( + @JsonProperty("schemaPayload") SchemaPayload schemaPayload, + @JsonProperty("numRows") Long numRows + ) + { + this.numRows = numRows; + this.schemaPayload = schemaPayload; + } + + @JsonProperty + public SchemaPayload getSchemaPayload() + { + return schemaPayload; + } + + @JsonProperty + public Long getNumRows() + { + return numRows; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SchemaPayloadPlus that = (SchemaPayloadPlus) o; + return Objects.equals(schemaPayload, that.schemaPayload) + && Objects.equals(numRows, that.numRows); + } + + @Override + public int hashCode() + { + return Objects.hash(schemaPayload, numRows); + } + + @Override + public String toString() + { + return "SegmentSchemaMetadata{" + + "schemaPayload=" + schemaPayload + + ", numRows=" + numRows + + '}'; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/SegmentMetadata.java b/processing/src/main/java/org/apache/druid/segment/SegmentMetadata.java new file mode 100644 index 000000000000..f12a676907ec --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/SegmentMetadata.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; + +/** + * Encapsulates segment level information like numRows, schema fingerprint. + */ +public class SegmentMetadata +{ + private final Long numRows; + private final String schemaFingerprint; + + @JsonCreator + public SegmentMetadata( + @JsonProperty("numRows") Long numRows, + @JsonProperty("schemaFingerprint") String schemaFingerprint + ) + { + this.numRows = numRows; + this.schemaFingerprint = schemaFingerprint; + } + + @JsonProperty + public long getNumRows() + { + return numRows; + } + + @JsonProperty + public String getSchemaFingerprint() + { + return schemaFingerprint; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SegmentMetadata that = (SegmentMetadata) o; + return Objects.equals(numRows, that.numRows) && Objects.equals( + schemaFingerprint, + that.schemaFingerprint + ); + } + + @Override + public int hashCode() + { + return Objects.hash(numRows, schemaFingerprint); + } + + @Override + public String toString() + { + return "SegmentStats{" + + "numRows=" + numRows + + ", fingerprint='" + schemaFingerprint + '\'' + + '}'; + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/SegmentSchemaMapping.java b/processing/src/main/java/org/apache/druid/segment/SegmentSchemaMapping.java new file mode 100644 index 000000000000..8069bd714934 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/SegmentSchemaMapping.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.timeline.SegmentId; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +/** + * Compact representation of segment schema for multiple segments. Note, that this is a mutable class. + */ +public class SegmentSchemaMapping +{ + private final Map segmentIdToMetadataMap; + + private final Map schemaFingerprintToPayloadMap; + + private final int schemaVersion; + + @JsonCreator + public SegmentSchemaMapping( + @JsonProperty("segmentIdToMetadataMap") Map segmentIdToMetadataMap, + @JsonProperty("schemaFingerprintToPayloadMap") Map schemaFingerprintToPayloadMap, + @JsonProperty("schemaVersion") int schemaVersion + ) + { + this.segmentIdToMetadataMap = segmentIdToMetadataMap; + this.schemaFingerprintToPayloadMap = schemaFingerprintToPayloadMap; + this.schemaVersion = schemaVersion; + } + + public SegmentSchemaMapping(int schemaVersion) + { + this.segmentIdToMetadataMap = new HashMap<>(); + this.schemaFingerprintToPayloadMap = new HashMap<>(); + this.schemaVersion = schemaVersion; + } + + @JsonProperty + public Map getSegmentIdToMetadataMap() + { + return segmentIdToMetadataMap; + } + + @JsonProperty + public Map getSchemaFingerprintToPayloadMap() + { + return schemaFingerprintToPayloadMap; + } + + @JsonProperty + public int getSchemaVersion() + { + return schemaVersion; + } + + public boolean isNonEmpty() + { + return segmentIdToMetadataMap.size() > 0; + } + + /** + * Add schema information for the segment. + */ + public void addSchema( + SegmentId segmentId, + SchemaPayloadPlus schemaPayloadPlus, + String fingerprint + ) + { + segmentIdToMetadataMap.put(segmentId.toString(), new SegmentMetadata(schemaPayloadPlus.getNumRows(), fingerprint)); + schemaFingerprintToPayloadMap.put(fingerprint, schemaPayloadPlus.getSchemaPayload()); + } + + /** + * Merge with another instance. + */ + public void merge(SegmentSchemaMapping other) + { + this.segmentIdToMetadataMap.putAll(other.getSegmentIdToMetadataMap()); + this.schemaFingerprintToPayloadMap.putAll(other.getSchemaFingerprintToPayloadMap()); + } + + public int getSchemaCount() + { + return schemaFingerprintToPayloadMap.size(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SegmentSchemaMapping that = (SegmentSchemaMapping) o; + return schemaVersion == that.schemaVersion && Objects.equals( + segmentIdToMetadataMap, + that.segmentIdToMetadataMap + ) && Objects.equals(schemaFingerprintToPayloadMap, that.schemaFingerprintToPayloadMap); + } + + @Override + public int hashCode() + { + return Objects.hash(segmentIdToMetadataMap, schemaFingerprintToPayloadMap, schemaVersion); + } + + @Override + public String toString() + { + return "SegmentSchemaMapping{" + + "segmentIdToMetadataMap=" + segmentIdToMetadataMap + + ", schemaFingerprintToPayloadMap=" + schemaFingerprintToPayloadMap + + ", version='" + schemaVersion + '\'' + + '}'; + } +} diff --git a/processing/src/test/java/org/apache/druid/guice/MetadataStorageTablesConfigTest.java b/processing/src/test/java/org/apache/druid/guice/MetadataStorageTablesConfigTest.java index 242a2cf10334..6f49e94e4e9c 100644 --- a/processing/src/test/java/org/apache/druid/guice/MetadataStorageTablesConfigTest.java +++ b/processing/src/test/java/org/apache/druid/guice/MetadataStorageTablesConfigTest.java @@ -64,6 +64,7 @@ public ObjectMapper jsonMapper() Assert.assertEquals(props.getProperty("druid.metadata.storage.tables.base"), config.getBase()); Assert.assertEquals(props.getProperty("druid.metadata.storage.tables.segments"), config.getSegmentsTable()); + Assert.assertEquals(props.getProperty("druid.metadata.storage.tables.segmentSchemas"), config.getSegmentSchemasTable()); Assert.assertEquals(props.getProperty("druid.metadata.storage.tables.rules"), config.getRulesTable()); Assert.assertEquals(props.getProperty("druid.metadata.storage.tables.config"), config.getConfigTable()); Assert.assertEquals( @@ -82,4 +83,18 @@ public ObjectMapper jsonMapper() Assert.assertEquals(props.getProperty("druid.metadata.storage.tables.supervisors"), config.getSupervisorTable()); Assert.assertEquals(props.getProperty("druid.metadata.storage.tables.upgradeSegments"), config.getUpgradeSegmentsTable()); } + + @Test + public void testReadConfig() + { + MetadataStorageTablesConfig fromBase = MetadataStorageTablesConfig.fromBase("druid.metadata.storage.tables"); + Assert.assertEquals("druid.metadata.storage.tables_segments", fromBase.getSegmentsTable()); + Assert.assertEquals("druid.metadata.storage.tables_segmentSchemas", fromBase.getSegmentSchemasTable()); + Assert.assertEquals("druid.metadata.storage.tables_tasklocks", fromBase.getTaskLockTable()); + Assert.assertEquals("druid.metadata.storage.tables_rules", fromBase.getRulesTable()); + Assert.assertEquals("druid.metadata.storage.tables_config", fromBase.getConfigTable()); + Assert.assertEquals("druid.metadata.storage.tables_dataSource", fromBase.getDataSourceTable()); + Assert.assertEquals("druid.metadata.storage.tables_supervisors", fromBase.getSupervisorTable()); + Assert.assertEquals("druid.metadata.storage.tables_upgradeSegments", fromBase.getUpgradeSegmentsTable()); + } } diff --git a/processing/src/test/java/org/apache/druid/metadata/TestMetadataStorageConnector.java b/processing/src/test/java/org/apache/druid/metadata/TestMetadataStorageConnector.java index 3c98e6bcdddc..d8722a2719f0 100644 --- a/processing/src/test/java/org/apache/druid/metadata/TestMetadataStorageConnector.java +++ b/processing/src/test/java/org/apache/druid/metadata/TestMetadataStorageConnector.java @@ -95,4 +95,10 @@ public void deleteAllRecords(String tableName) { throw new UnsupportedOperationException(); } + + @Override + public void createSegmentSchemasTable() + { + throw new UnsupportedOperationException(); + } } diff --git a/processing/src/test/java/org/apache/druid/metadata/TestMetadataStorageTablesConfig.java b/processing/src/test/java/org/apache/druid/metadata/TestMetadataStorageTablesConfig.java index 8a864f425548..784b7e2cad69 100644 --- a/processing/src/test/java/org/apache/druid/metadata/TestMetadataStorageTablesConfig.java +++ b/processing/src/test/java/org/apache/druid/metadata/TestMetadataStorageTablesConfig.java @@ -38,6 +38,7 @@ public TestMetadataStorageTablesConfig() null, null, null, + null, null ); } diff --git a/processing/src/test/java/org/apache/druid/segment/column/DataSegmentsWithSchemasTest.java b/processing/src/test/java/org/apache/druid/segment/column/DataSegmentsWithSchemasTest.java new file mode 100644 index 000000000000..2c93f3c3068d --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/column/DataSegmentsWithSchemasTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.column; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.segment.DataSegmentsWithSchemas; +import org.apache.druid.segment.SchemaPayload; +import org.apache.druid.segment.SchemaPayloadPlus; +import org.apache.druid.segment.SegmentSchemaMapping; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.LinearShardSpec; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.Collections; + +public class DataSegmentsWithSchemasTest +{ + private ObjectMapper mapper = TestHelper.makeJsonMapper(); + + @Test + public void testSerde() throws IOException + { + final DataSegment segment = new DataSegment( + "foo", + Intervals.of("2023-01-01/2023-01-02"), + "2023-01-01", + ImmutableMap.of("path", "a-1"), + ImmutableList.of("dim1"), + ImmutableList.of("m1"), + new LinearShardSpec(0), + 9, + 100 + ); + + SegmentSchemaMapping segmentSchemaMapping = new SegmentSchemaMapping(0); + segmentSchemaMapping.addSchema( + segment.getId(), + new SchemaPayloadPlus( + new SchemaPayload( + RowSignature.builder().add("c", ColumnType.FLOAT).build()), + 20L + ), + "fp" + ); + + DataSegmentsWithSchemas dataSegmentsWithSchemas = new DataSegmentsWithSchemas(Collections.singleton(segment), segmentSchemaMapping); + + byte[] bytes = mapper.writeValueAsBytes(dataSegmentsWithSchemas); + + DataSegmentsWithSchemas deserialized = mapper.readValue(bytes, DataSegmentsWithSchemas.class); + + Assert.assertEquals(deserialized, dataSegmentsWithSchemas); + } + + @Test + public void testEquals() + { + final DataSegment segment = new DataSegment( + "foo", + Intervals.of("2023-01-01/2023-01-02"), + "2023-01-01", + ImmutableMap.of("path", "a-1"), + ImmutableList.of("dim1"), + ImmutableList.of("m1"), + new LinearShardSpec(0), + 9, + 100 + ); + + SegmentSchemaMapping segmentSchemaMapping = new SegmentSchemaMapping(0); + segmentSchemaMapping.addSchema( + segment.getId(), + new SchemaPayloadPlus( + new SchemaPayload( + RowSignature.builder().add("c", ColumnType.FLOAT).build()), + 20L + ), + "fp" + ); + + DataSegmentsWithSchemas dataSegmentsWithSchemas = new DataSegmentsWithSchemas(Collections.singleton(segment), segmentSchemaMapping); + + DataSegmentsWithSchemas emptySegmentWithSchemas = new DataSegmentsWithSchemas(0); + + Assert.assertNotEquals(dataSegmentsWithSchemas, emptySegmentWithSchemas); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/column/SchemaPayloadPlusTest.java b/processing/src/test/java/org/apache/druid/segment/column/SchemaPayloadPlusTest.java new file mode 100644 index 000000000000..ac501c1524e8 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/column/SchemaPayloadPlusTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.column; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.aggregation.last.StringLastAggregatorFactory; +import org.apache.druid.segment.SchemaPayload; +import org.apache.druid.segment.SchemaPayloadPlus; +import org.apache.druid.segment.TestHelper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.Collections; + +public class SchemaPayloadPlusTest +{ + + static { + NullHandling.initializeForTests(); + } + + private ObjectMapper mapper = TestHelper.makeJsonMapper(); + + @Test + public void testSerde() throws IOException + { + RowSignature rowSignature = RowSignature.builder().add("c", ColumnType.FLOAT).build(); + + StringLastAggregatorFactory factory = new StringLastAggregatorFactory("billy", "nilly", null, 20); + SchemaPayload payload = new SchemaPayload(rowSignature, Collections.singletonMap("twosum", factory)); + + SchemaPayloadPlus metadata = new SchemaPayloadPlus(payload, 20L); + + byte[] bytes = mapper.writeValueAsBytes(metadata); + SchemaPayloadPlus deserialized = mapper.readValue(bytes, SchemaPayloadPlus.class); + + Assert.assertEquals(metadata, deserialized); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/column/SchemaPayloadTest.java b/processing/src/test/java/org/apache/druid/segment/column/SchemaPayloadTest.java new file mode 100644 index 000000000000..0878e92d2d8c --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/column/SchemaPayloadTest.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.column; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.aggregation.last.StringLastAggregatorFactory; +import org.apache.druid.segment.SchemaPayload; +import org.apache.druid.segment.TestHelper; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.Collections; + +public class SchemaPayloadTest +{ + + static { + NullHandling.initializeForTests(); + } + + private ObjectMapper mapper = TestHelper.makeJsonMapper(); + + @Test + public void testSerde() throws IOException + { + RowSignature rowSignature = RowSignature.builder().add("c", ColumnType.FLOAT).build(); + + StringLastAggregatorFactory factory = new StringLastAggregatorFactory("billy", "nilly", null, 20); + SchemaPayload payload = new SchemaPayload(rowSignature, Collections.singletonMap("twosum", factory)); + + SchemaPayload payloadCopy = new SchemaPayload( + RowSignature.builder().add("c", ColumnType.FLOAT).build(), + Collections.singletonMap("twosum", new StringLastAggregatorFactory("billy", "nilly", null, 20)) + ); + + Assert.assertEquals(payload, payloadCopy); + + byte[] bytes = mapper.writeValueAsBytes(payload); + SchemaPayload deserialized = mapper.readValue(bytes, SchemaPayload.class); + + Assert.assertEquals(payload, deserialized); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/column/SegmentSchemaMappingTest.java b/processing/src/test/java/org/apache/druid/segment/column/SegmentSchemaMappingTest.java new file mode 100644 index 000000000000..38c9362a11ce --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/column/SegmentSchemaMappingTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.column; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.aggregation.last.StringLastAggregatorFactory; +import org.apache.druid.segment.SchemaPayload; +import org.apache.druid.segment.SchemaPayloadPlus; +import org.apache.druid.segment.SegmentMetadata; +import org.apache.druid.segment.SegmentSchemaMapping; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.timeline.SegmentId; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.Collections; + +public class SegmentSchemaMappingTest +{ + static { + NullHandling.initializeForTests(); + } + + private ObjectMapper mapper = TestHelper.makeJsonMapper(); + + @Test + public void testSerde() throws IOException + { + RowSignature rowSignature = RowSignature.builder().add("c", ColumnType.FLOAT).build(); + + SegmentId segmentId = SegmentId.dummy("ds1"); + + StringLastAggregatorFactory factory = new StringLastAggregatorFactory("billy", "nilly", null, 20); + SchemaPayload payload = new SchemaPayload(rowSignature, Collections.singletonMap("twosum", factory)); + SchemaPayloadPlus schemaPayloadPlus = new SchemaPayloadPlus(payload, 20L); + + SegmentSchemaMapping segmentSchemaMapping = new SegmentSchemaMapping( + Collections.singletonMap(segmentId.toString(), new SegmentMetadata(20L, "fp1")), + Collections.singletonMap("fp1", payload), + 1 + ); + + byte[] bytes = mapper.writeValueAsBytes(segmentSchemaMapping); + SegmentSchemaMapping deserialized = mapper.readValue(bytes, SegmentSchemaMapping.class); + + Assert.assertEquals(segmentSchemaMapping, deserialized); + + SegmentSchemaMapping copy = new SegmentSchemaMapping(1); + copy.merge(segmentSchemaMapping); + + Assert.assertEquals(segmentSchemaMapping, copy); + + SegmentSchemaMapping copy2 = new SegmentSchemaMapping(1); + copy2.addSchema(segmentId, schemaPayloadPlus, "fp1"); + + Assert.assertEquals(segmentSchemaMapping, copy2); + } + + @Test + public void testEquals() + { + RowSignature rowSignature = RowSignature.builder().add("c", ColumnType.FLOAT).build(); + + SegmentId segmentId = SegmentId.dummy("ds1"); + + StringLastAggregatorFactory factory = new StringLastAggregatorFactory("billy", "nilly", null, 20); + SchemaPayload payload = new SchemaPayload(rowSignature, Collections.singletonMap("twosum", factory)); + + SegmentSchemaMapping segmentSchemaMapping = new SegmentSchemaMapping( + Collections.singletonMap(segmentId.toString(), new SegmentMetadata(20L, "fp1")), + Collections.singletonMap("fp1", payload), + 1 + ); + + SegmentSchemaMapping segmentSchemaMappingWithDifferentVersion = new SegmentSchemaMapping( + Collections.singletonMap(segmentId.toString(), new SegmentMetadata(20L, "fp1")), + Collections.singletonMap("fp1", payload), + 0 + ); + + Assert.assertNotEquals(segmentSchemaMapping, segmentSchemaMappingWithDifferentVersion); + + SegmentSchemaMapping segmentSchemaMappingWithDifferentPayload = new SegmentSchemaMapping( + Collections.emptyMap(), + Collections.emptyMap(), + 0 + ); + + Assert.assertNotEquals(segmentSchemaMapping, segmentSchemaMappingWithDifferentPayload); + } +} diff --git a/processing/src/test/resources/test.runtime.properties b/processing/src/test/resources/test.runtime.properties index f3af08d0f92e..4f713bc66ddf 100644 --- a/processing/src/test/resources/test.runtime.properties +++ b/processing/src/test/resources/test.runtime.properties @@ -30,3 +30,4 @@ druid.metadata.storage.tables.supervisors=iii_supervisors druid.metadata.storage.tables.upgradeSegments=jjj_upgradeSegments druid.query.segmentMetadata.defaultAnalysisTypes=["cardinality", "size"] druid.query.segmentMetadata.defaultHistory=P2W +druid.metadata.storage.tables.segmentSchemas=kkk_segmentSchemas diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/DataSourceMetadata.java b/server/src/main/java/org/apache/druid/indexing/overlord/DataSourceMetadata.java index 1e5b3d1d5db8..7fd3d0668ca5 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/DataSourceMetadata.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/DataSourceMetadata.java @@ -21,12 +21,13 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.segment.SegmentSchemaMapping; import java.util.Set; /** * Commit metadata for a dataSource. Used by - * {@link IndexerMetadataStorageCoordinator#commitSegmentsAndMetadata(Set, DataSourceMetadata, DataSourceMetadata)} + * {@link IndexerMetadataStorageCoordinator#commitSegmentsAndMetadata(Set, DataSourceMetadata, DataSourceMetadata, SegmentSchemaMapping)} * to provide metadata transactions for segment inserts. * * Two metadata instances can be added together, and any conflicts are resolved in favor of the right-hand side. diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java index 2390e7b55003..23513c82ad7b 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/IndexerMetadataStorageCoordinator.java @@ -22,6 +22,7 @@ import org.apache.druid.java.util.common.Pair; import org.apache.druid.metadata.PendingSegmentRecord; import org.apache.druid.metadata.ReplaceTaskLock; +import org.apache.druid.segment.SegmentSchemaMapping; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.PartialShardSpec; @@ -189,14 +190,15 @@ List retrieveUnusedSegmentsForInterval( int markSegmentsAsUnusedWithinInterval(String dataSource, Interval interval); /** - * Attempts to insert a set of segments to the metadata storage. Returns the set of segments actually added (segments - * with identifiers already in the metadata storage will not be added). + * Attempts to insert a set of segments and corresponding schema to the metadata storage. + * Returns the set of segments actually added (segments with identifiers already in the metadata storage will not be added). * * @param segments set of segments to add + * @param segmentSchemaMapping segment schema information to add * * @return set of segments actually added */ - Set commitSegments(Set segments) throws IOException; + Set commitSegments(Set segments, @Nullable SegmentSchemaMapping segmentSchemaMapping) throws IOException; /** * Allocates pending segments for the given requests in the pending segments table. @@ -278,8 +280,8 @@ SegmentIdWithShardSpec allocatePendingSegment( int deletePendingSegments(String dataSource); /** - * Attempts to insert a set of segments to the metadata storage. Returns the set of segments actually added (segments - * with identifiers already in the metadata storage will not be added). + * Attempts to insert a set of segments and corresponding schema to the metadata storage. + * Returns the set of segments actually added (segments with identifiers already in the metadata storage will not be added). *

* If startMetadata and endMetadata are set, this insertion will be atomic with a compare-and-swap on dataSource * commit metadata. @@ -294,6 +296,7 @@ SegmentIdWithShardSpec allocatePendingSegment( * @param endMetadata dataSource metadata post-insert will have this endMetadata merged in with * {@link DataSourceMetadata#plus(DataSourceMetadata)}. If null, this insert will not * involve a metadata transaction + * @param segmentSchemaMapping segment schema information to persist. * * @return segment publish result indicating transaction success or failure, and set of segments actually published. * This method must only return a failure code if it is sure that the transaction did not happen. If it is not sure, @@ -305,12 +308,14 @@ SegmentIdWithShardSpec allocatePendingSegment( SegmentPublishResult commitSegmentsAndMetadata( Set segments, @Nullable DataSourceMetadata startMetadata, - @Nullable DataSourceMetadata endMetadata + @Nullable DataSourceMetadata endMetadata, + @Nullable SegmentSchemaMapping segmentSchemaMapping ) throws IOException; /** - * Commits segments created by an APPEND task. This method also handles segment - * upgrade scenarios that may result from concurrent append and replace. + * Commits segments and corresponding schema created by an APPEND task. + * This method also handles segment upgrade scenarios that may result + * from concurrent append and replace. *