Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL: Support GROUP BY and ORDER BY for NULL types. #16252

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -399,16 +399,16 @@ public void testNullInputs()
ImmutableList.of(
new ArrayOfDoublesSketchToMetricsSumEstimatePostAggregator(
"p1",
expressionPostAgg("p0", "null", null)
expressionPostAgg("p0", "null", ColumnType.STRING)
),
new ArrayOfDoublesSketchSetOpPostAggregator(
"p4",
ArrayOfDoublesSketchOperations.Operation.UNION.name(),
null,
null,
ImmutableList.of(
expressionPostAgg("p2", "null", null),
expressionPostAgg("p3", "null", null)
expressionPostAgg("p2", "null", ColumnType.STRING),
expressionPostAgg("p3", "null", ColumnType.STRING)
)
),
new ArrayOfDoublesSketchSetOpPostAggregator(
Expand All @@ -417,7 +417,7 @@ public void testNullInputs()
null,
null,
ImmutableList.of(
expressionPostAgg("p5", "null", null),
expressionPostAgg("p5", "null", ColumnType.STRING),
new FieldAccessPostAggregator("p6", "a1")
)
),
Expand All @@ -428,7 +428,7 @@ public void testNullInputs()
null,
ImmutableList.of(
new FieldAccessPostAggregator("p8", "a1"),
expressionPostAgg("p9", "null", null)
expressionPostAgg("p9", "null", ColumnType.STRING)
)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,11 @@ private static DimFilter toSimpleLeafFilter(

// Numeric lhs needs a numeric comparison.
final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType());
if (comparator == null) {
// Type is not comparable.
return null;
}

final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator);
final DimFilter filter;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ public static ColumnType getValueTypeForRelDataTypeFull(final RelDataType type)
return ColumnType.DOUBLE;
} else if (isLongType(sqlTypeName)) {
return ColumnType.LONG;
} else if (isStringType(sqlTypeName)) {
} else if (isStringType(sqlTypeName) || sqlTypeName == SqlTypeName.NULL) {
return ColumnType.STRING;
} else if (SqlTypeName.OTHER == sqlTypeName) {
if (type instanceof RowSignatures.ComplexSqlType) {
Expand Down Expand Up @@ -244,12 +244,22 @@ public static boolean isLongType(SqlTypeName sqlTypeName)
}

/**
* Returns the natural StringComparator associated with the RelDataType
* Returns the natural StringComparator associated with the RelDataType, or null if the type is not convertible to
* {@link ColumnType} by {@link #getColumnTypeForRelDataType(RelDataType)}.
*/
@Nullable
public static StringComparator getStringComparatorForRelDataType(RelDataType dataType)
{
final ColumnType valueType = getColumnTypeForRelDataType(dataType);
return getStringComparatorForValueType(valueType);
if (dataType.getSqlTypeName() == SqlTypeName.NULL) {
return StringComparators.NATURAL;
} else {
final ColumnType valueType = getColumnTypeForRelDataType(dataType);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a new columnType called null just like the relDataType : SqlTypeName.null
In that case, we can push this logic inside getStringComparatorsForValueType() and remove this custom handling ?

thoughts ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ColumnType is meant to represent types that the native query engine can store and process. We don't really have a pure-NULL type there, so adding one may have larger consequences that I did not want to run into in this patch. I thought it would be better to keep the changes to the SQL layer.

if (valueType == null) {
return null;
}

return getStringComparatorForValueType(valueType);
}
}

/**
Expand Down
110 changes: 110 additions & 0 deletions sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6647,6 +6647,116 @@ public void testCountStarWithTimeInIntervalFilterLosAngeles()
);
}

@Test
public void testGroupByNullType()
{
// Cannot vectorize due to null constant expression.
cannotVectorize();
testQuery(
"SELECT NULL as nullcol, COUNT(*) FROM druid.foo GROUP BY 1",
ImmutableList.of(
new GroupByQuery.Builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(expressionVirtualColumn("v0", "null", ColumnType.STRING))
.setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)))
.setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{null, 6L}
)
);
}

@Test
public void testOrderByNullType()
Comment on lines +6674 to +6675
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of overriding+disabling the testcase; it can be marked as unsupported with:

Suggested change
@Test
public void testOrderByNullType()
@NotYetSupported(Modes.NOT_ENOUGH_RULES)
@Test
public void testOrderByNullType()

this has the benefit that it will be verified that it still fails and could also be located based on the type of failure it have

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, thanks, cool idea!

It is supported for the basic planner, so I kept the override in DecoupledPlanningCalciteQueryTest and changed the annotation from @Disabled to @NotYetSupported.

{
// Cannot vectorize due to null constant expression.
cannotVectorize();
testQuery(
// Order on subquery, since the native engine doesn't currently support ordering when selecting directly
// from a table.
"SELECT dim1, NULL as nullcol FROM (SELECT DISTINCT dim1 FROM druid.foo LIMIT 1) ORDER BY 2",
ImmutableList.of(
WindowOperatorQueryBuilder
.builder()
.setDataSource(
new TopNQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.dimension(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING))
.threshold(1)
.metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC))
.postAggregators(expressionPostAgg("s0", "null", ColumnType.STRING))
.context(QUERY_CONTEXT_DEFAULT)
.build()
)
.setSignature(
RowSignature.builder()
.add("d0", ColumnType.STRING)
.add("s0", ColumnType.STRING)
.build()
)
.setOperators(
OperatorFactoryBuilders.naiveSortOperator("s0", ColumnWithDirection.Direction.ASC)
)
.setLeafOperators(
OperatorFactoryBuilders
.scanOperatorFactoryBuilder()
.setOffsetLimit(0, Long.MAX_VALUE)
.setProjectedColumns("d0", "s0")
.build()
)
.build()
),
ImmutableList.of(
new Object[]{"", null}
)
);
}

@Test
public void testGroupByOrderByNullType()
{
// Cannot vectorize due to null constant expression.
cannotVectorize();

testQuery(
"SELECT NULL as nullcol, COUNT(*) FROM druid.foo GROUP BY 1 ORDER BY 1",
ImmutableList.of(
new GroupByQuery.Builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(expressionVirtualColumn("v0", "null", ColumnType.STRING))
.setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)))
.setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0")))
.setLimitSpec(
queryFramework().engine().featureAvailable(EngineFeature.GROUPBY_IMPLICITLY_SORTS)
? NoopLimitSpec.instance()
: new DefaultLimitSpec(
ImmutableList.of(
new OrderByColumnSpec(
"d0",
Direction.ASCENDING,
StringComparators.NATURAL
)
),
Integer.MAX_VALUE
)
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{null, 6L}
)
);
}

@Test
public void testCountStarWithTimeInIntervalFilterInvalidInterval()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public void testValuesContainingNull()
ImmutableList.of(new Object[]{null, "United States"}),
RowSignature
.builder()
.add("EXPR$0", null)
.add("EXPR$0", ColumnType.STRING)
.add("EXPR$1", ColumnType.STRING)
.build()
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ public void testUnion()
.columns("dim1", "v0")
.context(QUERY_CONTEXT_DEFAULT)
.resultFormat(ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.virtualColumns(
expressionVirtualColumn("v0", "null", null)
)
.virtualColumns(expressionVirtualColumn("v0", "null", ColumnType.STRING))
.legacy(false)
.build(),
Druids.newScanQueryBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.druid.sql.calcite;

import org.apache.druid.sql.calcite.NotYetSupported.NotYetSupportedProcessor;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.extension.RegisterExtension;

Expand All @@ -34,4 +35,12 @@ protected QueryTestBuilder testBuilder()
{
return decoupledExtension.testBuilder();
}

@Override
@Test
@NotYetSupported(NotYetSupported.Modes.NOT_ENOUGH_RULES)
public void testOrderByNullType()
{
super.testOrderByNullType();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2859,7 +2859,7 @@ public void testCalciteLiteralToDruidLiteral()
);

assertDruidLiteral(
new DruidLiteral(null, null),
new DruidLiteral(ExpressionType.STRING, null),
Expressions.calciteLiteralToDruidLiteral(
plannerContext,
rexBuilder.makeNullLiteral(rexBuilder.getTypeFactory().createSqlType(SqlTypeName.NULL))
Expand Down