Skip to content

Commit

Permalink
[CALCITE-4644] Add PERCENTILE_CONT and PERCENTILE_DISC functions (Rafay)
Browse files Browse the repository at this point in the history
In this change, we can parse and validate those functions,
but execution will be [CALCITE-4666].

Validation is based on new method SqlAggFunction.isPercentile()
(experimental and subject to change without notice).

There are connections between the RANGE clause in windowed
aggregate functions and interpolation used by PERCENTILE_CONT.
Therefore use the same logic to determine whether "x" is a
valid type in OVER (ORDER BY x RANGE r)" and
"PERCENTILE_CONT(f) WITHIN GROUP (ORER BY x)"

Close #2444
  • Loading branch information
a-rafay authored and julianhyde committed Jul 5, 2021
1 parent f928e07 commit 85953ce
Show file tree
Hide file tree
Showing 18 changed files with 417 additions and 28 deletions.
2 changes: 2 additions & 0 deletions core/src/main/codegen/templates/Parser.jj
Expand Up @@ -6933,6 +6933,8 @@ SqlIdentifier ReservedFunctionName() :
| <NULLIF>
| <OCTET_LENGTH>
| <PERCENT_RANK>
| <PERCENTILE_CONT>
| <PERCENTILE_DISC>
| <POWER>
| <RANK>
| <REGR_COUNT>
Expand Down
Expand Up @@ -472,6 +472,12 @@ ExInst<SqlValidatorException> intervalFieldExceedsPrecision(Number a0,
@BaseMessage("Type ''{0}'' is not supported")
ExInst<SqlValidatorException> typeNotSupported(String a0);

@BaseMessage("Invalid type ''{0}'' in ORDER BY clause of ''{1}'' function. Only NUMERIC types are supported")
ExInst<SqlValidatorException> unsupportedTypeInOrderBy(String a0, String a1);

@BaseMessage("''{0}'' requires precisely one ORDER BY key")
ExInst<SqlValidatorException> orderByRequiresOneKey(String a0);

@BaseMessage("DISTINCT/ALL not allowed with {0} function")
ExInst<SqlValidatorException> functionQuantifierNotAllowed(String a0);

Expand Down Expand Up @@ -540,6 +546,9 @@ ExInst<SqlValidatorException> intervalFractionalSecondPrecisionOutOfRange(
@BaseMessage("Argument to function ''{0}'' must be a positive integer literal")
ExInst<SqlValidatorException> argumentMustBePositiveInteger(String a0);

@BaseMessage("Argument to function ''{0}'' must be a numeric literal between {1,number,#} and {2,number,#}")
ExInst<SqlValidatorException> argumentMustBeNumericLiteralInRange(String a0, int min, int max);

@BaseMessage("Validation Error: {0}")
ExInst<CalciteException> validationError(String a0);

Expand Down
11 changes: 11 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/SqlAggFunction.java
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.calcite.sql;

import org.apache.calcite.linq4j.function.Experimental;
import org.apache.calcite.plan.Context;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
Expand Down Expand Up @@ -215,4 +216,14 @@ public boolean allowsNullTreatment() {
public @Nullable SqlAggFunction getRollup() {
return null;
}

/** Returns whether this aggregate function is a PERCENTILE function.
* Such functions require a {@code WITHIN GROUP} clause that has precisely
* one sort key.
*
* <p>NOTE: This API is experimental and subject to change without notice. */
@Experimental
public boolean isPercentile() {
return false;
}
}
7 changes: 7 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/SqlKind.java
Expand Up @@ -847,6 +847,12 @@ public enum SqlKind {
/** The {@code COLLECT} aggregate function. */
COLLECT,

/** The {@code PERCENTILE_CONT} aggregate function. */
PERCENTILE_CONT,

/** The {@code PERCENTILE_DISC} aggregate function. */
PERCENTILE_DISC,

/** The {@code FUSION} aggregate function. */
FUSION,

Expand Down Expand Up @@ -1074,6 +1080,7 @@ public enum SqlKind {
FUSION, SINGLE_VALUE, ROW_NUMBER, RANK, PERCENT_RANK, DENSE_RANK,
CUME_DIST, JSON_ARRAYAGG, JSON_OBJECTAGG, BIT_AND, BIT_OR, BIT_XOR,
LISTAGG, STRING_AGG, ARRAY_AGG, ARRAY_CONCAT_AGG, GROUP_CONCAT, COUNTIF,
PERCENTILE_CONT, PERCENTILE_DISC,
INTERSECTION, ANY_VALUE);

/**
Expand Down
29 changes: 10 additions & 19 deletions core/src/main/java/org/apache/calcite/sql/SqlWindow.java
Expand Up @@ -705,28 +705,19 @@ private static void validateFrameBoundary(
// if this is a range spec check and make sure the boundary type
// and order by type are compatible
if (orderTypeFam != null && !isRows) {
RelDataType bndType = validator.deriveType(scope, boundVal);
SqlTypeFamily bndTypeFam = bndType.getSqlTypeName().getFamily();
switch (orderTypeFam) {
case NUMERIC:
if (SqlTypeFamily.NUMERIC != bndTypeFam) {
throw validator.newValidationError(boundVal,
RESOURCE.orderByRangeMismatch());
}
break;
case DATE:
case TIME:
case TIMESTAMP:
if (SqlTypeFamily.INTERVAL_DAY_TIME != bndTypeFam
&& SqlTypeFamily.INTERVAL_YEAR_MONTH != bndTypeFam) {
throw validator.newValidationError(boundVal,
RESOURCE.orderByRangeMismatch());
}
break;
default:
final RelDataType boundType = validator.deriveType(scope, boundVal);
final SqlTypeFamily boundTypeFamily =
boundType.getSqlTypeName().getFamily();
final List<SqlTypeFamily> allowableBoundTypeFamilies =
orderTypeFam.allowableDifferenceTypes();
if (allowableBoundTypeFamilies.isEmpty()) {
throw validator.newValidationError(boundVal,
RESOURCE.orderByDataTypeProhibitsRange());
}
if (!allowableBoundTypeFamilies.contains(boundTypeFamily)) {
throw validator.newValidationError(boundVal,
RESOURCE.orderByRangeMismatch());
}
}
break;
default:
Expand Down
Expand Up @@ -51,6 +51,7 @@ public final class SqlBasicAggFunction extends SqlAggFunction {
private final SqlSyntax syntax;
private final boolean allowsNullTreatment;
private final boolean allowsSeparator;
private final boolean percentile;

//~ Constructors -----------------------------------------------------------

Expand All @@ -60,7 +61,8 @@ private SqlBasicAggFunction(String name, @Nullable SqlIdentifier sqlIdentifier,
SqlOperandTypeChecker operandTypeChecker, SqlFunctionCategory funcType,
boolean requiresOrder, boolean requiresOver,
Optionality requiresGroupOrder, Optionality distinctOptionality,
SqlSyntax syntax, boolean allowsNullTreatment, boolean allowsSeparator) {
SqlSyntax syntax, boolean allowsNullTreatment, boolean allowsSeparator,
boolean percentile) {
super(name, sqlIdentifier, kind,
requireNonNull(returnTypeInference, "returnTypeInference"), operandTypeInference,
requireNonNull(operandTypeChecker, "operandTypeChecker"),
Expand All @@ -70,6 +72,7 @@ private SqlBasicAggFunction(String name, @Nullable SqlIdentifier sqlIdentifier,
this.syntax = requireNonNull(syntax, "syntax");
this.allowsNullTreatment = allowsNullTreatment;
this.allowsSeparator = allowsSeparator;
this.percentile = percentile;
}

/** Creates a SqlBasicAggFunction whose name is the same as its kind. */
Expand All @@ -86,7 +89,7 @@ public static SqlBasicAggFunction create(String name, SqlKind kind,
return new SqlBasicAggFunction(name, null, kind, returnTypeInference, null,
operandTypeChecker, SqlFunctionCategory.NUMERIC, false, false,
Optionality.FORBIDDEN, Optionality.OPTIONAL, SqlSyntax.FUNCTION, false,
false);
false, false);
}

//~ Methods ----------------------------------------------------------------
Expand Down Expand Up @@ -131,7 +134,7 @@ SqlBasicAggFunction withDistinct(Optionality distinctOptionality) {
getReturnTypeInference(), getOperandTypeInference(),
getOperandTypeChecker(), getFunctionType(), requiresOrder(),
requiresOver(), requiresGroupOrder(), distinctOptionality, syntax,
allowsNullTreatment, allowsSeparator);
allowsNullTreatment, allowsSeparator, percentile);
}

/** Sets {@link #getFunctionType()}. */
Expand All @@ -140,7 +143,7 @@ public SqlBasicAggFunction withFunctionType(SqlFunctionCategory category) {
getReturnTypeInference(), getOperandTypeInference(),
getOperandTypeChecker(), category, requiresOrder(),
requiresOver(), requiresGroupOrder(), distinctOptionality, syntax,
allowsNullTreatment, allowsSeparator);
allowsNullTreatment, allowsSeparator, percentile);
}

@Override public SqlSyntax getSyntax() {
Expand All @@ -153,7 +156,7 @@ public SqlBasicAggFunction withSyntax(SqlSyntax syntax) {
getReturnTypeInference(), getOperandTypeInference(),
getOperandTypeChecker(), getFunctionType(), requiresOrder(),
requiresOver(), requiresGroupOrder(), distinctOptionality, syntax,
allowsNullTreatment, allowsSeparator);
allowsNullTreatment, allowsSeparator, percentile);
}

@Override public boolean allowsNullTreatment() {
Expand All @@ -166,7 +169,7 @@ public SqlBasicAggFunction withAllowsNullTreatment(boolean allowsNullTreatment)
getReturnTypeInference(), getOperandTypeInference(),
getOperandTypeChecker(), getFunctionType(), requiresOrder(),
requiresOver(), requiresGroupOrder(), distinctOptionality, syntax,
allowsNullTreatment, allowsSeparator);
allowsNullTreatment, allowsSeparator, percentile);
}

/** Returns whether this aggregate function allows '{@code SEPARATOR string}'
Expand All @@ -180,8 +183,21 @@ public SqlBasicAggFunction withAllowsSeparator(boolean allowsSeparator) {
return new SqlBasicAggFunction(getName(), getSqlIdentifier(), kind,
getReturnTypeInference(), getOperandTypeInference(),
getOperandTypeChecker(), getFunctionType(), requiresOrder(),
requiresOver(), requiresGroupOrder(), distinctOptionality, syntax,
allowsNullTreatment, allowsSeparator);
requiresOver(), requiresGroupOrder(), distinctOptionality, syntax,
allowsNullTreatment, allowsSeparator, percentile);
}

@Override public boolean isPercentile() {
return percentile;
}

/** Sets {@link #isPercentile()}. */
public SqlBasicAggFunction withPercentile(boolean percentile) {
return new SqlBasicAggFunction(getName(), getSqlIdentifier(), kind,
getReturnTypeInference(), getOperandTypeInference(),
getOperandTypeChecker(), getFunctionType(), requiresOrder(),
requiresOver(), requiresGroupOrder(), distinctOptionality, syntax,
allowsNullTreatment, allowsSeparator, percentile);
}

/** Sets {@link #requiresGroupOrder()}. */
Expand All @@ -190,6 +206,6 @@ public SqlBasicAggFunction withGroupOrder(Optionality groupOrder) {
getReturnTypeInference(), getOperandTypeInference(),
getOperandTypeChecker(), getFunctionType(), requiresOrder(),
requiresOver(), groupOrder, distinctOptionality, syntax,
allowsNullTreatment, allowsSeparator);
allowsNullTreatment, allowsSeparator, percentile);
}
}
Expand Up @@ -2233,6 +2233,36 @@ public class SqlStdOperatorTable extends ReflectiveSqlOperatorTable {
.withFunctionType(SqlFunctionCategory.SYSTEM)
.withGroupOrder(Optionality.OPTIONAL);

/**
* {@code PERCENTILE_CONT} inverse distribution aggregate function.
*
* <p>The argument must be a numeric literal in the range 0 to 1 inclusive
* (representing a percentage), and the return type is {@code DOUBLE}.
*/
public static final SqlAggFunction PERCENTILE_CONT =
SqlBasicAggFunction
.create(SqlKind.PERCENTILE_CONT, ReturnTypes.DOUBLE,
OperandTypes.UNIT_INTERVAL_NUMERIC_LITERAL)
.withFunctionType(SqlFunctionCategory.SYSTEM)
.withGroupOrder(Optionality.MANDATORY)
.withPercentile(true);

/**
* {@code PERCENTILE_DISC} inverse distribution aggregate function.
*
* <p>The argument must be a numeric literal in the range 0 to 1 inclusive
* (representing a percentage), and the return type is {@code DOUBLE}.
* (The return type should determined by the type of the {@code ORDER BY}
* expression, but this cannot be determined by the function itself.)
*/
public static final SqlAggFunction PERCENTILE_DISC =
SqlBasicAggFunction
.create(SqlKind.PERCENTILE_DISC, ReturnTypes.DOUBLE,
OperandTypes.UNIT_INTERVAL_NUMERIC_LITERAL)
.withFunctionType(SqlFunctionCategory.SYSTEM)
.withGroupOrder(Optionality.MANDATORY)
.withPercentile(true);

/**
* The LISTAGG operator. String aggregator function.
*/
Expand Down
43 changes: 43 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
Expand Up @@ -369,6 +369,49 @@ private boolean hasFractionalPart(BigDecimal bd) {
}
};

/**
* Operand type-checking strategy type must be a numeric non-NULL
* literal in the range 0 and 1 inclusive.
*/
public static final SqlSingleOperandTypeChecker UNIT_INTERVAL_NUMERIC_LITERAL =
new FamilyOperandTypeChecker(ImmutableList.of(SqlTypeFamily.NUMERIC),
i -> false) {
@Override public boolean checkSingleOperandType(
SqlCallBinding callBinding,
SqlNode node,
int iFormalOperand,
boolean throwOnFailure) {
if (!LITERAL.checkSingleOperandType(
callBinding,
node,
iFormalOperand,
throwOnFailure)) {
return false;
}

if (!super.checkSingleOperandType(
callBinding,
node,
iFormalOperand,
throwOnFailure)) {
return false;
}

final SqlLiteral arg = (SqlLiteral) node;
final BigDecimal value = arg.getValueAs(BigDecimal.class);
if (value.compareTo(BigDecimal.ZERO) < 0
|| value.compareTo(BigDecimal.ONE) > 0) {
if (throwOnFailure) {
throw callBinding.newError(
RESOURCE.argumentMustBeNumericLiteralInRange(
callBinding.getOperator().getName(), 0, 1));
}
return false;
}
return true;
}
};

/**
* Operand type-checking strategy where two operands must both be in the
* same type family.
Expand Down
36 changes: 36 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/type/SqlTypeFamily.java
Expand Up @@ -21,6 +21,7 @@
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeFamily;
import org.apache.calcite.sql.SqlIntervalQualifier;
import org.apache.calcite.sql.SqlWindow;
import org.apache.calcite.sql.parser.SqlParserPos;

import com.google.common.collect.ImmutableList;
Expand All @@ -30,6 +31,7 @@

import java.sql.Types;
import java.util.Collection;
import java.util.List;
import java.util.Map;

/**
Expand Down Expand Up @@ -128,6 +130,40 @@ public enum SqlTypeFamily implements RelDataTypeFamily {
return JDBC_TYPE_TO_FAMILY.get(jdbcType);
}

/** For this type family, returns the allow types of the difference between
* two values of this family.
*
* <p>Equivalently, given an {@code ORDER BY} expression with one key,
* returns the allowable type families of the difference between two keys.
*
* <p>Example 1. For {@code ORDER BY empno}, a NUMERIC, the difference
* between two {@code empno} values is also NUMERIC.
*
* <p>Example 2. For {@code ORDER BY hireDate}, a DATE, the difference
* between two {@code hireDate} values might be an INTERVAL_DAY_TIME
* or INTERVAL_YEAR_MONTH.
*
* <p>The result determines whether a {@link SqlWindow} with a {@code RANGE}
* is valid (for example, {@code OVER (ORDER BY empno RANGE 10} is valid
* because {@code 10} is numeric);
* and whether a call to
* {@link org.apache.calcite.sql.fun.SqlStdOperatorTable#PERCENTILE_CONT PERCENTILE_CONT}
* is valid (for example, {@code PERCENTILE_CONT(0.25)} ORDER BY (hireDate)}
* is valid because {@code hireDate} values may be interpolated by adding
* values of type {@code INTERVAL_DAY_TIME}. */
public List<SqlTypeFamily> allowableDifferenceTypes() {
switch (this) {
case NUMERIC:
return ImmutableList.of(NUMERIC);
case DATE:
case TIME:
case TIMESTAMP:
return ImmutableList.of(INTERVAL_DAY_TIME, INTERVAL_YEAR_MONTH);
default:
return ImmutableList.of();
}
}

/** Returns the collection of {@link SqlTypeName}s included in this family. */
public Collection<SqlTypeName> getTypeNames() {
switch (this) {
Expand Down

0 comments on commit 85953ce

Please sign in to comment.