Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Grouping Engine fix when a limit spec with different order by columns is applied #16534

Merged
merged 9 commits into from
Jun 20, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -686,13 +686,27 @@ public Sequence<ResultRow> processSubtotalsSpec(
processingConfig.intermediateComputeSizeBytes()
);

List<String> queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName)
List<String> queryDimNamesInOrder = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I understand the reasons/etc here - but if these are the column the query's output will be ordered; can't we place these logics inside a member method as GroupByQuery#getOrderColumnNames or something ?

.collect(Collectors.toList());

// Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec.
Set<String> aggsAndPostAggs = null;
if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery);

DefaultLimitSpec limitSpec = (DefaultLimitSpec) baseSubtotalQuery.getLimitSpec();
if (!limitSpec.getColumns().isEmpty() && limitSpec.isLimited()) {
Map<String, String> dimToOutputNames = baseSubtotalQuery.getDimensions()
.stream()
.collect(Collectors.toMap(
DimensionSpec::getDimension,
DimensionSpec::getOutputName
));
queryDimNamesInOrder = limitSpec.getColumns()
.stream()
.map(spec -> dimToOutputNames.get(spec.getDimension()))
.collect(Collectors.toList());
}
}

List<List<String>> subtotals = query.getSubtotalsSpec();
Expand Down Expand Up @@ -724,7 +738,7 @@ public Sequence<ResultRow> processSubtotalsSpec(
.withLimitSpec(subtotalQueryLimitSpec);

final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne;
if (Utils.isPrefix(subtotalSpec, queryDimNames)) {
if (Utils.isPrefix(subtotalSpec, queryDimNamesInOrder)) {
// Since subtotalSpec is a prefix of base query dimensions, so results from base query are also sorted
// by subtotalSpec as needed by stream merging.
subtotalsResults.add(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13795,10 +13795,8 @@ public void testGroupingSetsWithLimitOrderByGran()
.build()
),
ImmutableList.<Object[]>builder().add(
new Object[]{"", null, 2L},
new Object[]{"a", null, 1L},
new Object[]{"", null, 1L},
new Object[]{"a", null, 1L},
new Object[]{"", null, 3L},
new Object[]{"a", null, 2L},
new Object[]{"abc", null, 1L},
new Object[]{NULL_STRING, null, 6L},
new Object[]{"", timestamp("2000-01-01"), 2L},
Expand Down Expand Up @@ -16278,4 +16276,30 @@ public void testGroupingSetsWithAggrgateCase()
)
).run();
}

@SqlTestFrameworkConfig.NumMergeBuffers(3)
@Test
public void testGroupingSetsWithDifferentOrderLimitSpec()
{
cannotVectorize();
msqIncompatible();
testBuilder()
.sql(
"SELECT\n"
+ " isNew, isRobot, COUNT(*) AS \"Cnt\"\n"
+ "FROM \"wikipedia\"\n"
+ "GROUP BY GROUPING SETS ((isRobot), (isNew))\n"
+ "ORDER BY 2, 1\n"
+ "limit 100"
)
.expectedResults(
ResultMatchMode.RELAX_NULLS,
ImmutableList.of(
new Object[]{"false", null, 36966L},
new Object[]{"true", null, 2278L},
new Object[]{null, "false", 23824L},
new Object[]{null, "true", 15420L}
)
).run();
}
}