Skip to content

Commit

Permalink
Merge branch 'master' into array_fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
vogievetsky committed Jan 30, 2024
2 parents 00852a6 + ef46d88 commit 49d1a01
Show file tree
Hide file tree
Showing 295 changed files with 8,378 additions and 2,487 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/static-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
if: ${{ matrix.java == '8' }}
run: |
echo 'Running Maven install...' &&
${MVN} clean install -q -ff -pl '!distribution,!:druid-it-image,!:druid-it-cases' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C &&
${MVN} clean install -q -ff -pl '!distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C &&
${MVN} install -q -ff -pl 'distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS}
- name: checkstyle
Expand Down Expand Up @@ -129,7 +129,7 @@ jobs:
- name: maven install
run: |
echo 'Running Maven install...' &&
${MVN} clean install -q -ff -pl '!distribution,!:druid-it-image,!:druid-it-cases' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C &&
${MVN} clean install -q -ff -pl '!distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C &&
${MVN} install -q -ff -pl 'distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS}
- name: intellij inspections
Expand Down
2 changes: 1 addition & 1 deletion NOTICE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Apache Druid
Copyright 2018-2023 The Apache Software Foundation
Copyright 2018-2024 The Apache Software Foundation

This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>29.0.0-SNAPSHOT</version>
<version>30.0.0-SNAPSHOT</version>
</parent>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ public static Pair<PlannerFactory, SqlEngine> createSqlSystem(
)
{
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(closer);
final SpecificSegmentsQuerySegmentWalker walker = new SpecificSegmentsQuerySegmentWalker(conglomerate);
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate);
final PlannerConfig plannerConfig = new PlannerConfig();

for (final Map.Entry<DataSegment, QueryableIndex> segmentEntry : segmentMap.entrySet()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ public void setup()
PROCESSING_CONFIG
);

final SpecificSegmentsQuerySegmentWalker walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate).add(
dataSegment,
index
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public int getNumThreads()
{
return 1;
}

@Override
public String getFormatString()
{
Expand Down Expand Up @@ -326,7 +326,7 @@ public void setup()
PROCESSING_CONFIG
);

final SpecificSegmentsQuerySegmentWalker walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(
final SpecificSegmentsQuerySegmentWalker walker = SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate).add(
dataSegment,
index
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public void setup()
final QueryRunnerFactoryConglomerate conglomerate = QueryStackTests.createQueryRunnerFactoryConglomerate(closer);
final PlannerConfig plannerConfig = new PlannerConfig();

this.walker = closer.register(new SpecificSegmentsQuerySegmentWalker(conglomerate).add(dataSegment, index));
this.walker = closer.register(SpecificSegmentsQuerySegmentWalker.createWalker(conglomerate).add(dataSegment, index));
final DruidSchemaCatalog rootSchema =
CalciteTests.createMockRootSchema(conglomerate, walker, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER);
engine = CalciteTests.createMockSqlEngine(walker, conglomerate);
Expand Down
2 changes: 1 addition & 1 deletion cloud/aws-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>29.0.0-SNAPSHOT</version>
<version>30.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion cloud/gcp-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>29.0.0-SNAPSHOT</version>
<version>30.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

Expand Down
10 changes: 5 additions & 5 deletions distribution/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ services:
- ZOO_MY_ID=1

coordinator:
image: apache/druid:29.0.0
image: apache/druid:30.0.0
container_name: coordinator
volumes:
- druid_shared:/opt/shared
Expand All @@ -67,7 +67,7 @@ services:
- environment

broker:
image: apache/druid:29.0.0
image: apache/druid:30.0.0
container_name: broker
volumes:
- broker_var:/opt/druid/var
Expand All @@ -83,7 +83,7 @@ services:
- environment

historical:
image: apache/druid:29.0.0
image: apache/druid:30.0.0
container_name: historical
volumes:
- druid_shared:/opt/shared
Expand All @@ -100,7 +100,7 @@ services:
- environment

middlemanager:
image: apache/druid:29.0.0
image: apache/druid:30.0.0
container_name: middlemanager
volumes:
- druid_shared:/opt/shared
Expand All @@ -118,7 +118,7 @@ services:
- environment

router:
image: apache/druid:29.0.0
image: apache/druid:30.0.0
container_name: router
volumes:
- router_var:/opt/druid/var
Expand Down
4 changes: 3 additions & 1 deletion distribution/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>29.0.0-SNAPSHOT</version>
<version>30.0.0-SNAPSHOT</version>
</parent>

<dependencies>
Expand Down Expand Up @@ -443,6 +443,8 @@
<argument>-c</argument>
<argument>org.apache.druid.extensions.contrib:druid-tdigestsketch</argument>
<argument>-c</argument>
<argument>org.apache.druid.extensions.contrib:druid-ddsketch</argument>
<argument>-c</argument>
<argument>org.apache.druid.extensions.contrib:gce-extensions</argument>
<argument>-c</argument>
<argument>org.apache.druid.extensions.contrib:aliyun-oss-extensions</argument>
Expand Down
1 change: 1 addition & 0 deletions docs/configuration/extensions.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ All of these community extensions can be downloaded using [pull-deps](../operati
|druid-cassandra-storage|Apache Cassandra deep storage.|[link](../development/extensions-contrib/cassandra.md)|
|druid-cloudfiles-extensions|Rackspace Cloudfiles deep storage and firehose.|[link](../development/extensions-contrib/cloudfiles.md)|
|druid-compressed-bigdecimal|Compressed Big Decimal Type | [link](../development/extensions-contrib/compressed-big-decimal.md)|
|druid-ddsketch|Support for DDSketch approximate quantiles based on [DDSketch](https://github.com/datadog/sketches-java) | [link](../development/extensions-contrib/ddsketch-quantiles.md)|
|druid-distinctcount|DistinctCount aggregator|[link](../development/extensions-contrib/distinctcount.md)|
|druid-redis-cache|A cache implementation for Druid based on Redis.|[link](../development/extensions-contrib/redis-cache.md)|
|druid-time-min-max|Min/Max aggregator for timestamp.|[link](../development/extensions-contrib/time-min-max.md)|
Expand Down
2 changes: 1 addition & 1 deletion docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ All Druid components can communicate with each other over HTTP.
|Property|Description|Default|
|--------|-----------|-------|
|`druid.global.http.numConnections`|Size of connection pool per destination URL. If there are more HTTP requests than this number that all need to speak to the same URL, then they will queue up.|`20`|
|`druid.global.http.eagerInitialization`|Indicates that http connections should be eagerly initialized. If set to true, `numConnections` connections are created upon initialization|`true`|
|`druid.global.http.eagerInitialization`|Indicates that http connections should be eagerly initialized. If set to true, `numConnections` connections are created upon initialization|`false`|
|`druid.global.http.compressionCodec`|Compression codec to communicate with others. May be "gzip" or "identity".|`gzip`|
|`druid.global.http.readTimeout`|The timeout for data reads.|`PT15M`|
|`druid.global.http.unusedConnectionTimeout`|The timeout for idle connections in connection pool. The connection in the pool will be closed after this timeout and a new one will be established. This timeout should be less than `druid.global.http.readTimeout`. Set this timeout = ~90% of `druid.global.http.readTimeout`|`PT4M`|
Expand Down
139 changes: 139 additions & 0 deletions docs/development/extensions-contrib/ddsketch-quantiles.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
---
id: ddsketch-quantiles
title: "DDSketches for Approximate Quantiles module"
---

<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->


This module provides aggregators for approximate quantile queries using the [DDSketch](https://github.com/datadog/sketches-java) library. The DDSketch library provides a fast, and fully-mergeable quantile sketch with relative error. If the true quantile is 100, a sketch with relative error of 1% guarantees a quantile value between 101 and 99. This is important and highly valuable behavior for long tail distributions. The best use case for these sketches is for accurately describing the upper quantiles of long tailed distributions such as network latencies.

To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) in the extensions load list.

```
druid.extensions.loadList=["druid-ddsketch", ...]
```

### Aggregator

The result of the aggregation is a DDSketch that is the union of all sketches either built from raw data or read from the segments. The single number that is returned represents the total number of included data points. The default aggregator type of `ddSketch` uses the collapsingLowestDense strategy for storing and merging sketch. This means that in favor of keeping the highest values represented at the highest accuracy, the sketch will collapse and merge lower, smaller values in the sketch. Collapsed bins will lose accuracy guarantees. The default number of bins is 1000. Sketches can only be merged when using the same relativeError values.

The `ddSketch` aggregator operates over raw data and precomputed sketches.

```json
{
"type" : "ddSketch",
"name" : <output_name>,
"fieldName" : <input_name>,
"relativeError" : <double(0, 1)>,
"numBins": <int>
}
```

|property|description|required?|
|--------|-----------|---------|
|type|Must be "ddSketch" |yes|
|name|A String for the output (result) name of the calculation.|yes|
|fieldName|A String for the name of the input field (can contain sketches or raw numeric values).|yes|
|relativeError|Describes the precision in which to store the sketch. Must be a number between 0 and 1.|no, defaults to 0.01 (1% error)|
|numBins|Total number of bins the sketch is allowed to use to describe the distribution. This has a direct impact on max memory used. The more total bins available, the larger the range of accurate quantiles. With relative accuracy of 2%, only 275 bins are required to cover values between 1 millisecond and 1 minute. 800 bins are required to cover values between 1 nanosecond and 1 day.|no, defaults to 1000|


### Post Aggregators

To compute approximate quantiles, use `quantilesFromDDSketch` to query for a set of quantiles or `quantileFromDDSketch` to query for a single quantile. Call these post-aggregators on the sketches created by the `ddSketch` aggregators.


#### quantilesFromDDSketch

Use `quantilesFromDDSketch` to fetch multiple quantiles.

```json
{
"type" : "quantilesFromDDSketch",
"name" : <output_name>,
"field" : <reference to DDSketch>,
"fractions" : <array of doubles in [0,1]>
}
```

|property|description|required?|
|--------|-----------|---------|
|type|Must be "quantilesFromDDSketch" |yes|
|name|A String for the output (result) name of the calculation.|yes|
|field|A computed ddSketch.|yes|
|fractions|Array of doubles from 0 to 1 of the quantiles to compute|yes|

#### quantileFromDDSketch

Use `quantileFromDDSketch` to fetch a single quantile.

```json
{
"type" : "quantileFromDDSketch",
"name" : <output_name>,
"field" : <reference to DDsketch>,
"fraction" : <double [0,1]>
}
```

|property|description|required?|
|--------|-----------|---------|
|type|Must be "quantileFromDDSketch" |yes|
|name|A String for the output (result) name of the calculation.|yes|
|field|A computed ddSketch.|yes|
|fraction|A double from 0 to 1 of the quantile to compute|yes|


### Example

As an example of a query with sketches pre-aggregated at ingestion time, one could set up the following aggregator at ingest:

```json
{
"type": "ddSketch",
"name": "sketch",
"fieldName": "value",
"relativeError": 0.01,
"numBins": 1000,
}
```

Compute quantiles from the pre-aggregated sketches using the following aggregator and post-aggregator.

```json
{
"aggregations": [{
"type": "ddSketch",
"name": "sketch",
"fieldName": "sketch",
}],
"postAggregations": [
{
"type": "quantilesFromDDSketch",
"name": "quantiles",
"fractions": [0.5, 0.75, 0.9, 0.99],
"field": {
"type": "fieldAccess",
"fieldName": "sketch"
}
}]
}
```
Loading

0 comments on commit 49d1a01

Please sign in to comment.