diff --git a/integration-test/src/test/java/org/apache/iotdb/relational/it/query/recent/IoTDBTableAggregationIT.java b/integration-test/src/test/java/org/apache/iotdb/relational/it/query/recent/IoTDBTableAggregationIT.java index d684f83d36a2..860545fdb1f2 100644 --- a/integration-test/src/test/java/org/apache/iotdb/relational/it/query/recent/IoTDBTableAggregationIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/relational/it/query/recent/IoTDBTableAggregationIT.java @@ -4345,6 +4345,42 @@ public void approxPercentileTest() { DATABASE_NAME); } + @Test + public void percentileTest() { + tableResultSetEqualTest( + "select percentile(time, 0.5),percentile(s1,0.5),percentile(s2,0.5),percentile(s3,0.5),percentile(s4,0.5),percentile(s9,0.5) from table1", + buildHeaders(6), + new String[] {"2024-09-24T06:15:40.000Z,40,43000,37.5,43.0,2024-09-24T06:15:40.000Z,"}, + DATABASE_NAME); + + tableResultSetEqualTest( + "select time,province,percentile(time, 0.5),percentile(s1,0.5),percentile(s2,0.5) from table1 group by 1,2 order by 2,1", + new String[] {"time", "province", "_col2", "_col3", "_col4"}, + new String[] { + "2024-09-24T06:15:30.000Z,beijing,2024-09-24T06:15:30.000Z,30,null,", + "2024-09-24T06:15:31.000Z,beijing,2024-09-24T06:15:31.000Z,null,31000,", + "2024-09-24T06:15:35.000Z,beijing,2024-09-24T06:15:35.000Z,null,35000,", + "2024-09-24T06:15:36.000Z,beijing,2024-09-24T06:15:36.000Z,36,null,", + "2024-09-24T06:15:40.000Z,beijing,2024-09-24T06:15:40.000Z,40,40000,", + "2024-09-24T06:15:41.000Z,beijing,2024-09-24T06:15:41.000Z,41,null,", + "2024-09-24T06:15:46.000Z,beijing,2024-09-24T06:15:46.000Z,null,46000,", + "2024-09-24T06:15:50.000Z,beijing,2024-09-24T06:15:50.000Z,null,50000,", + "2024-09-24T06:15:51.000Z,beijing,2024-09-24T06:15:51.000Z,null,null,", + "2024-09-24T06:15:55.000Z,beijing,2024-09-24T06:15:55.000Z,55,null,", + "2024-09-24T06:15:30.000Z,shanghai,2024-09-24T06:15:30.000Z,30,null,", + "2024-09-24T06:15:31.000Z,shanghai,2024-09-24T06:15:31.000Z,null,31000,", + "2024-09-24T06:15:35.000Z,shanghai,2024-09-24T06:15:35.000Z,null,35000,", + "2024-09-24T06:15:36.000Z,shanghai,2024-09-24T06:15:36.000Z,36,null,", + "2024-09-24T06:15:40.000Z,shanghai,2024-09-24T06:15:40.000Z,40,40000,", + "2024-09-24T06:15:41.000Z,shanghai,2024-09-24T06:15:41.000Z,41,null,", + "2024-09-24T06:15:46.000Z,shanghai,2024-09-24T06:15:46.000Z,null,46000,", + "2024-09-24T06:15:50.000Z,shanghai,2024-09-24T06:15:50.000Z,null,50000,", + "2024-09-24T06:15:51.000Z,shanghai,2024-09-24T06:15:51.000Z,null,null,", + "2024-09-24T06:15:55.000Z,shanghai,2024-09-24T06:15:55.000Z,55,null,", + }, + DATABASE_NAME); + } + @Test public void exceptionTest() { tableAssertTestFail( @@ -4423,6 +4459,22 @@ public void exceptionTest() { "select approx_percentile(s5,0.5) from table1", "701: Aggregation functions [approx_percentile] should have value column as numeric type [INT32, INT64, FLOAT, DOUBLE, TIMESTAMP]", DATABASE_NAME); + tableAssertTestFail( + "select percentile() from table1", + "701: Aggregation functions [percentile] should only have two arguments", + DATABASE_NAME); + tableAssertTestFail( + "select percentile(s1,1.1) from table1", + "701: percentage should be in [0,1], got 1.1", + DATABASE_NAME); + tableAssertTestFail( + "select percentile(s1,'test') from table1", + "701: The second argument of 'percentile' function percentage must be a double literal", + DATABASE_NAME); + tableAssertTestFail( + "select percentile(s5,0.5) from table1", + "701: Aggregation functions [percentile] should have value column as numeric type [INT32, INT64, FLOAT, DOUBLE, TIMESTAMP]", + DATABASE_NAME); } // ================================================================== diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/Percentile.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/Percentile.java new file mode 100644 index 000000000000..60ade9a489cb --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/Percentile.java @@ -0,0 +1,164 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iotdb.db.queryengine.execution.operator.source.relational; + +import org.apache.iotdb.db.exception.sql.SemanticException; + +import org.apache.tsfile.utils.RamUsageEstimator; +import org.apache.tsfile.utils.ReadWriteIOUtils; + +import java.nio.ByteBuffer; +import java.util.Arrays; + +public class Percentile { + private double[] values; + private int size; + private int capacity; + private boolean sorted; + + private static final int INITIAL_CAPACITY = 32; + private static final double GROWTH_FACTOR = 1.5; + + public Percentile() { + this.capacity = INITIAL_CAPACITY; + this.values = new double[capacity]; + this.size = 0; + this.sorted = true; + } + + public void addValue(double value) { + ensureCapacity(); + values[size++] = value; + sorted = false; + } + + public void addValues(double... vals) { + if (vals == null || vals.length == 0) return; + + int newSize = size + vals.length; + if (newSize > capacity) { + grow(newSize); + } + + System.arraycopy(vals, 0, values, size, vals.length); + size = newSize; + sorted = false; + } + + public void merge(Percentile other) { + if (other == null || other.size == 0) { + return; + } + + int newSize = size + other.size; + if (newSize > capacity) { + grow(newSize); + } + + System.arraycopy(other.values, 0, values, size, other.size); + size = newSize; + sorted = false; + } + + public double getPercentile(double percentile) { + if (size == 0) { + return Double.NaN; + } + if (percentile < 0.0 || percentile > 1.0) { + throw new SemanticException("percentage should be in [0,1], got " + percentile); + } + + ensureSorted(); + + if (size == 1) { + return values[0]; + } + + double realIndex = percentile * (size - 1); + int index = (int) realIndex; + double fraction = realIndex - index; + + if (index >= size - 1) { + return values[size - 1]; + } + + return values[index] + fraction * (values[index + 1] - values[index]); + } + + public int getSize() { + return size; + } + + public void clear() { + size = 0; + sorted = true; + } + + private void ensureCapacity() { + if (size >= capacity) { + grow(size + 1); + } + } + + private void grow(int minCapacity) { + int newCapacity = Math.max((int) (capacity * GROWTH_FACTOR), minCapacity); + double[] newValues = new double[newCapacity]; + System.arraycopy(values, 0, newValues, 0, size); + values = newValues; + capacity = newCapacity; + } + + private void ensureSorted() { + if (!sorted && size > 1) { + Arrays.sort(values, 0, size); + sorted = true; + } + } + + public void serialize(ByteBuffer buffer) { + ReadWriteIOUtils.write(size, buffer); + ReadWriteIOUtils.write(capacity, buffer); + ReadWriteIOUtils.write(sorted, buffer); + for (int i = 0; i < size; i++) { + ReadWriteIOUtils.write(values[i], buffer); + } + } + + public static Percentile deserialize(ByteBuffer buffer) { + Percentile percentile = new Percentile(); + percentile.size = ReadWriteIOUtils.readInt(buffer); + percentile.capacity = ReadWriteIOUtils.readInt(buffer); + percentile.sorted = ReadWriteIOUtils.readBool(buffer); + + if (percentile.capacity != percentile.values.length) { + percentile.values = new double[percentile.capacity]; + } + + for (int i = 0; i < percentile.size; i++) { + percentile.values[i] = ReadWriteIOUtils.readDouble(buffer); + } + + return percentile; + } + + public int getSerializedSize() { + return Integer.BYTES + Integer.BYTES + 1 + (size * Double.BYTES); + } + + public long getEstimatedSize() { + long shallowSize = RamUsageEstimator.shallowSizeOfInstance(Percentile.class); + return shallowSize + getSerializedSize(); + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/AccumulatorFactory.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/AccumulatorFactory.java index d7195cc6dfb5..2e4a697af77c 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/AccumulatorFactory.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/AccumulatorFactory.java @@ -45,6 +45,7 @@ import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedMinAccumulator; import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedMinByAccumulator; import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedModeAccumulator; +import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedPercentileAccumulator; import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedSumAccumulator; import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedUserDefinedAggregateAccumulator; import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.GroupedVarianceAccumulator; @@ -283,6 +284,8 @@ private static GroupedAccumulator createBuiltinGroupedAccumulator( } else { return new GroupedApproxPercentileWithWeightAccumulator(inputDataTypes.get(0)); } + case PERCENTILE: + return new GroupedPercentileAccumulator(inputDataTypes.get(0)); default: throw new IllegalArgumentException("Invalid Aggregation function: " + aggregationType); } @@ -367,6 +370,8 @@ public static TableAccumulator createBuiltinAccumulator( } else { return new ApproxPercentileWithWeightAccumulator(inputDataTypes.get(0)); } + case PERCENTILE: + return new PercentileAccumulator(inputDataTypes.get(0)); default: throw new IllegalArgumentException("Invalid Aggregation function: " + aggregationType); } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/PercentileAccumulator.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/PercentileAccumulator.java new file mode 100644 index 000000000000..234b4e15477d --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/PercentileAccumulator.java @@ -0,0 +1,226 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation; + +import org.apache.iotdb.db.exception.sql.SemanticException; +import org.apache.iotdb.db.queryengine.execution.operator.source.relational.Percentile; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.file.metadata.statistics.Statistics; +import org.apache.tsfile.utils.Binary; +import org.apache.tsfile.utils.RamUsageEstimator; +import org.apache.tsfile.utils.ReadWriteIOUtils; +import org.apache.tsfile.write.UnSupportedDataTypeException; + +import java.nio.ByteBuffer; + +public class PercentileAccumulator implements TableAccumulator { + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(PercentileAccumulator.class); + + private final TSDataType seriesDataType; + private Percentile percentile = new Percentile(); + private double percentage; + + public PercentileAccumulator(TSDataType seriesDataType) { + this.seriesDataType = seriesDataType; + } + + @Override + public long getEstimatedSize() { + return INSTANCE_SIZE; + } + + @Override + public TableAccumulator copy() { + return new PercentileAccumulator(seriesDataType); + } + + @Override + public void addInput(Column[] arguments, AggregationMask mask) { + if (arguments.length != 2) { + throw new SemanticException( + String.format("PERCENTILE requires 2 arguments, but got %d", arguments.length)); + } + percentage = arguments[1].getDouble(0); + switch (seriesDataType) { + case INT32: + addIntInput(arguments[0], mask); + return; + case INT64: + case TIMESTAMP: + addLongInput(arguments[0], mask); + return; + case FLOAT: + addFloatInput(arguments[0], mask); + return; + case DOUBLE: + addDoubleInput(arguments[0], mask); + return; + default: + throw new UnSupportedDataTypeException( + String.format("Unsupported data type in Percentile Aggregation: %s", seriesDataType)); + } + } + + @Override + public void addIntermediate(Column argument) { + for (int i = 0; i < argument.getPositionCount(); i++) { + if (!argument.isNull(i)) { + byte[] data = argument.getBinary(i).getValues(); + ByteBuffer buffer = ByteBuffer.wrap(data); + this.percentage = ReadWriteIOUtils.readDouble(buffer); + percentile.merge(Percentile.deserialize(buffer)); + } + } + } + + @Override + public void evaluateIntermediate(ColumnBuilder columnBuilder) { + int percentileDataLength = percentile.getSerializedSize(); + ByteBuffer buffer = ByteBuffer.allocate(8 + percentileDataLength); + ReadWriteIOUtils.write(percentage, buffer); + percentile.serialize(buffer); + columnBuilder.writeBinary(new Binary(buffer.array())); + } + + @Override + public void evaluateFinal(ColumnBuilder columnBuilder) { + double result = percentile.getPercentile(percentage); + if (Double.isNaN(result)) { + columnBuilder.appendNull(); + return; + } + switch (seriesDataType) { + case INT32: + columnBuilder.writeInt((int) result); + break; + case INT64: + case TIMESTAMP: + columnBuilder.writeLong((long) result); + break; + case FLOAT: + columnBuilder.writeFloat((float) result); + break; + case DOUBLE: + columnBuilder.writeDouble(result); + break; + default: + throw new UnSupportedDataTypeException( + String.format("Unsupported data type in PERCENTILE Aggregation: %s", seriesDataType)); + } + } + + @Override + public boolean hasFinalResult() { + return false; + } + + @Override + public void addStatistics(Statistics[] statistics) { + throw new UnsupportedOperationException("PercentileAccumulator does not support statistics"); + } + + @Override + public void reset() { + percentile.clear(); + } + + private void addIntInput(Column column, AggregationMask mask) { + int positionCount = mask.getSelectedPositionCount(); + + if (mask.isSelectAll()) { + for (int i = 0; i < positionCount; i++) { + if (!column.isNull(i)) { + percentile.addValue(column.getInt(i)); + } + } + } else { + int[] selectedPositions = mask.getSelectedPositions(); + int position; + for (int i = 0; i < positionCount; i++) { + position = selectedPositions[i]; + if (!column.isNull(position)) { + percentile.addValue(column.getInt(position)); + } + } + } + } + + private void addLongInput(Column column, AggregationMask mask) { + int positionCount = mask.getSelectedPositionCount(); + + if (mask.isSelectAll()) { + for (int i = 0; i < positionCount; i++) { + if (!column.isNull(i)) { + percentile.addValue(column.getLong(i)); + } + } + } else { + int[] selectedPositions = mask.getSelectedPositions(); + int position; + for (int i = 0; i < positionCount; i++) { + position = selectedPositions[i]; + if (!column.isNull(position)) { + percentile.addValue(column.getLong(position)); + } + } + } + } + + private void addFloatInput(Column column, AggregationMask mask) { + int positionCount = mask.getSelectedPositionCount(); + + if (mask.isSelectAll()) { + for (int i = 0; i < positionCount; i++) { + if (!column.isNull(i)) { + percentile.addValue(column.getFloat(i)); + } + } + } else { + int[] selectedPositions = mask.getSelectedPositions(); + int position; + for (int i = 0; i < positionCount; i++) { + position = selectedPositions[i]; + if (!column.isNull(position)) { + percentile.addValue(column.getFloat(position)); + } + } + } + } + + private void addDoubleInput(Column column, AggregationMask mask) { + int positionCount = mask.getSelectedPositionCount(); + + if (mask.isSelectAll()) { + for (int i = 0; i < positionCount; i++) { + if (!column.isNull(i)) { + percentile.addValue(column.getDouble(i)); + } + } + } else { + int[] selectedPositions = mask.getSelectedPositions(); + int position; + for (int i = 0; i < positionCount; i++) { + position = selectedPositions[i]; + if (!column.isNull(position)) { + percentile.addValue(column.getDouble(position)); + } + } + } + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/grouped/GroupedPercentileAccumulator.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/grouped/GroupedPercentileAccumulator.java new file mode 100644 index 000000000000..50d34c771f54 --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/grouped/GroupedPercentileAccumulator.java @@ -0,0 +1,253 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped; + +import org.apache.iotdb.db.exception.sql.SemanticException; +import org.apache.iotdb.db.queryengine.execution.operator.source.relational.Percentile; +import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.AggregationMask; +import org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.array.PercentileBigArray; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.utils.Binary; +import org.apache.tsfile.utils.RamUsageEstimator; +import org.apache.tsfile.utils.ReadWriteIOUtils; +import org.apache.tsfile.write.UnSupportedDataTypeException; + +import java.nio.ByteBuffer; + +public class GroupedPercentileAccumulator implements GroupedAccumulator { + private static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(GroupedPercentileAccumulator.class); + private final TSDataType seriesDataType; + private double percentage; + private final PercentileBigArray array = new PercentileBigArray(); + + public GroupedPercentileAccumulator(TSDataType seriesDataType) { + this.seriesDataType = seriesDataType; + } + + @Override + public long getEstimatedSize() { + return INSTANCE_SIZE; + } + + @Override + public void setGroupCount(long groupCount) { + array.ensureCapacity(groupCount); + } + + @Override + public void addInput(int[] groupIds, Column[] arguments, AggregationMask mask) { + if (arguments.length != 2) { + throw new SemanticException( + String.format("PERCENTILE requires 2 arguments, but got %d", arguments.length)); + } + percentage = arguments[1].getDouble(0); + + switch (seriesDataType) { + case INT32: + addIntInput(groupIds, arguments, mask); + break; + case INT64: + case TIMESTAMP: + addLongInput(groupIds, arguments, mask); + break; + case FLOAT: + addFloatInput(groupIds, arguments, mask); + break; + case DOUBLE: + addDoubleInput(groupIds, arguments, mask); + break; + default: + throw new UnSupportedDataTypeException( + String.format( + "Unsupported data type in PERCENTILE Aggregation: %s", seriesDataType)); + } + } + + @Override + public void addIntermediate(int[] groupIds, Column argument) { + for (int i = 0; i < groupIds.length; i++) { + int groupId = groupIds[i]; + if (!argument.isNull(i)) { + byte[] data = argument.getBinary(i).getValues(); + ByteBuffer buffer = ByteBuffer.wrap(data); + this.percentage = ReadWriteIOUtils.readDouble(buffer); + Percentile other = Percentile.deserialize(buffer); + array.get(groupId).merge(other); + } + } + } + + @Override + public void evaluateIntermediate(int groupId, ColumnBuilder columnBuilder) { + Percentile percentile = array.get(groupId); + int percentileDataLength = percentile.getSerializedSize(); + ByteBuffer buffer = ByteBuffer.allocate(8 + percentileDataLength); + ReadWriteIOUtils.write(percentage, buffer); + percentile.serialize(buffer); + columnBuilder.writeBinary(new Binary(buffer.array())); + } + + @Override + public void evaluateFinal(int groupId, ColumnBuilder columnBuilder) { + Percentile percentile = array.get(groupId); + double result = percentile.getPercentile(percentage); + if (Double.isNaN(result)) { + columnBuilder.appendNull(); + return; + } + switch (seriesDataType) { + case INT32: + columnBuilder.writeInt((int) result); + break; + case INT64: + case TIMESTAMP: + columnBuilder.writeLong((long) result); + break; + case FLOAT: + columnBuilder.writeFloat((float) result); + break; + case DOUBLE: + columnBuilder.writeDouble(result); + break; + default: + throw new UnSupportedDataTypeException( + String.format("Unsupported data type in PERCENTILE Aggregation: %s", seriesDataType)); + } + } + + @Override + public void prepareFinal() {} + + @Override + public void reset() { + array.reset(); + } + + public void addIntInput(int[] groupIds, Column[] arguments, AggregationMask mask) { + Column valueColumn = arguments[0]; + + int positionCount = mask.getPositionCount(); + + if (mask.isSelectAll()) { + for (int i = 0; i < positionCount; i++) { + int groupId = groupIds[i]; + Percentile percentile = array.get(groupId); + if (!valueColumn.isNull(i)) { + percentile.addValue(valueColumn.getInt(i)); + } + } + } else { + int[] selectedPositions = mask.getSelectedPositions(); + int position; + int groupId; + for (int i = 0; i < positionCount; i++) { + position = selectedPositions[i]; + groupId = groupIds[position]; + Percentile percentile = array.get(groupId); + if (!valueColumn.isNull(position)) { + percentile.addValue(valueColumn.getInt(position)); + } + } + } + } + + public void addLongInput(int[] groupIds, Column[] arguments, AggregationMask mask) { + Column valueColumn = arguments[0]; + + int positionCount = mask.getPositionCount(); + + if (mask.isSelectAll()) { + for (int i = 0; i < positionCount; i++) { + int groupId = groupIds[i]; + Percentile percentile = array.get(groupId); + if (!valueColumn.isNull(i)) { + percentile.addValue(valueColumn.getLong(i)); + } + } + } else { + int[] selectedPositions = mask.getSelectedPositions(); + int position; + int groupId; + for (int i = 0; i < positionCount; i++) { + position = selectedPositions[i]; + groupId = groupIds[position]; + Percentile percentile = array.get(groupId); + if (!valueColumn.isNull(position)) { + percentile.addValue(valueColumn.getLong(position)); + } + } + } + } + + public void addFloatInput(int[] groupIds, Column[] arguments, AggregationMask mask) { + Column valueColumn = arguments[0]; + + int positionCount = mask.getPositionCount(); + + if (mask.isSelectAll()) { + for (int i = 0; i < positionCount; i++) { + int groupId = groupIds[i]; + Percentile percentile = array.get(groupId); + if (!valueColumn.isNull(i)) { + percentile.addValue(valueColumn.getFloat(i)); + } + } + } else { + int[] selectedPositions = mask.getSelectedPositions(); + int position; + int groupId; + for (int i = 0; i < positionCount; i++) { + position = selectedPositions[i]; + groupId = groupIds[position]; + Percentile percentile = array.get(groupId); + if (!valueColumn.isNull(position)) { + percentile.addValue(valueColumn.getFloat(position)); + } + } + } + } + + public void addDoubleInput(int[] groupIds, Column[] arguments, AggregationMask mask) { + Column valueColumn = arguments[0]; + + int positionCount = mask.getPositionCount(); + + if (mask.isSelectAll()) { + for (int i = 0; i < positionCount; i++) { + int groupId = groupIds[i]; + Percentile percentile = array.get(groupId); + if (!valueColumn.isNull(i)) { + percentile.addValue(valueColumn.getDouble(i)); + } + } + } else { + int[] selectedPositions = mask.getSelectedPositions(); + int position; + int groupId; + for (int i = 0; i < positionCount; i++) { + position = selectedPositions[i]; + groupId = groupIds[position]; + Percentile percentile = array.get(groupId); + if (!valueColumn.isNull(position)) { + percentile.addValue(valueColumn.getDouble(position)); + } + } + } + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/grouped/array/PercentileBigArray.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/grouped/array/PercentileBigArray.java new file mode 100644 index 000000000000..ea0797b9ad79 --- /dev/null +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/source/relational/aggregation/grouped/array/PercentileBigArray.java @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iotdb.db.queryengine.execution.operator.source.relational.aggregation.grouped.array; + +import org.apache.iotdb.db.queryengine.execution.operator.source.relational.Percentile; + +import static org.apache.tsfile.utils.RamUsageEstimator.shallowSizeOf; +import static org.apache.tsfile.utils.RamUsageEstimator.shallowSizeOfInstance; + +public final class PercentileBigArray { + private static final long INSTANCE_SIZE = shallowSizeOfInstance(PercentileBigArray.class); + private final ObjectBigArray array; + private long sizeOfTDigest; + + public PercentileBigArray() { + array = new ObjectBigArray<>(); + } + + public long sizeOf() { + return INSTANCE_SIZE + shallowSizeOf(array) + sizeOfTDigest; + } + + public Percentile get(long index) { + Percentile percentile = array.get(index); + if (percentile == null) { + percentile = new Percentile(); + set(index, percentile); + } + return percentile; + } + + public void set(long index, Percentile value) { + updateRetainedSize(index, value); + array.set(index, value); + } + + public boolean isEmpty() { + return sizeOfTDigest == 0; + } + + public void ensureCapacity(long length) { + array.ensureCapacity(length); + } + + public void updateRetainedSize(long index, Percentile value) { + Percentile percentile = array.get(index); + if (percentile != null) { + sizeOfTDigest -= percentile.getEstimatedSize(); + } + if (value != null) { + sizeOfTDigest += value.getEstimatedSize(); + } + } + + public void reset() { + array.forEach( + item -> { + if (item != null) { + item.clear(); + } + }); + } +} diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/TableMetadataImpl.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/TableMetadataImpl.java index a12add71ffc5..d3f0b3f9acbf 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/TableMetadataImpl.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/metadata/TableMetadataImpl.java @@ -741,6 +741,27 @@ && isIntegerNumber(argumentTypes.get(2)))) { functionName)); } + break; + case SqlConstant.PERCENTILE: + if (argumentTypes.size() != 2) { + throw new SemanticException( + String.format( + "Aggregation functions [%s] should only have two arguments", functionName)); + } + + if (!isNumericType(argumentTypes.get(0))) { + throw new SemanticException( + String.format( + "Aggregation functions [%s] should have value column as numeric type [INT32, INT64, FLOAT, DOUBLE, TIMESTAMP]", + functionName)); + } + // Validate percentage parameter + if (!isDecimalType(argumentTypes.get(1))) { + throw new SemanticException( + String.format( + "Aggregation functions [%s] should have percentage as decimal type", + functionName)); + } break; case SqlConstant.COUNT: break; @@ -766,6 +787,7 @@ && isIntegerNumber(argumentTypes.get(2)))) { case SqlConstant.MAX_BY: case SqlConstant.MIN_BY: case SqlConstant.APPROX_PERCENTILE: + case SqlConstant.PERCENTILE: return argumentTypes.get(0); case SqlConstant.AVG: case SqlConstant.SUM: diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/sql/parser/AstBuilder.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/sql/parser/AstBuilder.java index 431ed9d3d022..2867688245da 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/sql/parser/AstBuilder.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/sql/parser/AstBuilder.java @@ -333,6 +333,7 @@ import static org.apache.iotdb.db.utils.constant.SqlConstant.FIRST_BY_AGGREGATION; import static org.apache.iotdb.db.utils.constant.SqlConstant.LAST_AGGREGATION; import static org.apache.iotdb.db.utils.constant.SqlConstant.LAST_BY_AGGREGATION; +import static org.apache.iotdb.db.utils.constant.SqlConstant.PERCENTILE; public class AstBuilder extends RelationalSqlBaseVisitor { @@ -3390,6 +3391,11 @@ public Node visitFunctionCall(RelationalSqlParser.FunctionCallContext ctx) { throw new SemanticException( "The third argument of 'approx_percentile' function percentage must be a double literal"); } + } else if (name.toString().equalsIgnoreCase(PERCENTILE)) { + if (arguments.size() == 2 && !(arguments.get(1) instanceof DoubleLiteral)) { + throw new SemanticException( + "The second argument of 'percentile' function percentage must be a double literal"); + } } return new FunctionCall(getLocation(ctx), name, window, nulls, distinct, mode, arguments); diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/constant/SqlConstant.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/constant/SqlConstant.java index 564185474ad3..ab75ae1f1264 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/constant/SqlConstant.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/utils/constant/SqlConstant.java @@ -81,6 +81,7 @@ protected SqlConstant() { public static final String APPROX_COUNT_DISTINCT = "approx_count_distinct"; public static final String APPROX_MOST_FREQUENT = "approx_most_frequent"; public static final String APPROX_PERCENTILE = "approx_percentile"; + public static final String PERCENTILE = "percentile"; // names of scalar functions public static final String DIFF = "diff"; diff --git a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/udf/builtin/relational/TableBuiltinAggregationFunction.java b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/udf/builtin/relational/TableBuiltinAggregationFunction.java index 1915ecfbb1cc..e12be7cd27be 100644 --- a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/udf/builtin/relational/TableBuiltinAggregationFunction.java +++ b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/udf/builtin/relational/TableBuiltinAggregationFunction.java @@ -59,7 +59,8 @@ public enum TableBuiltinAggregationFunction { VAR_SAMP("var_samp"), APPROX_COUNT_DISTINCT("approx_count_distinct"), APPROX_MOST_FREQUENT("approx_most_frequent"), - APPROX_PERCENTILE("approx_percentile"); + APPROX_PERCENTILE("approx_percentile"), + PERCENTILE("percentile"); private final String functionName; @@ -106,6 +107,7 @@ public static Type getIntermediateType(String name, List originalArgumentT case "var_samp": case "approx_count_distinct": case "approx_percentile": + case "percentile": return RowType.anonymous(Collections.emptyList()); case "extreme": case "max": diff --git a/iotdb-protocol/thrift-commons/src/main/thrift/common.thrift b/iotdb-protocol/thrift-commons/src/main/thrift/common.thrift index 58c4484a4ae8..fdd459bf72bb 100644 --- a/iotdb-protocol/thrift-commons/src/main/thrift/common.thrift +++ b/iotdb-protocol/thrift-commons/src/main/thrift/common.thrift @@ -295,6 +295,7 @@ enum TAggregationType { APPROX_COUNT_DISTINCT, APPROX_MOST_FREQUENT, APPROX_PERCENTILE, + PERCENTILE, } struct TShowConfigurationTemplateResp {