From 2d21f3d5d5b4cc6745022776c982a0d5ba6bea5b Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Tue, 19 May 2026 00:23:49 +0800 Subject: [PATCH 1/2] ORCA: align CBitSet vec_size for grouping-set bitsets CreateGroupingSetsForRollup / Cube and the GROUPING_SET_EMPTY case in GetColumnAttnosForGroupBy were constructing their accumulator/seed CBitSets via the default ctor (vec_size = 256), then Union'ing in per-grouping-set bitsets built with vec_size = num_cols. CBitSet::Union just splices in any missing CBitSetLinks wholesale, so the accumulator ended up with a link at offset 0 (vec_size 256) plus a stray link at offset num_cols (vec_size num_cols) covering the high tleSortGroupRef. CBitSet::Get then computed the offset using the destination's m_vector_size = 256 and never consulted the stray link, while CBitSetIter happily walked both -- so Get(k) disagreed with the iterator for any k >= num_cols. In CreateDXLProjectNullsForGroupingSets this caused tleSortGroupRefs >= num_cols to be misclassified as non-grouping columns and NULL'd out, even in grouping sets that included them. Visible as: select generate_series(1, a) g, a+b ab from (values (1,1),(2,2)) t(a,b) group by rollup(a, ab) order by 1,2; returning 0 rows instead of 6 -- the rollup(a, ab) branch projected the a column as NULL, so generate_series(1, NULL) produced no rows. Fix by passing num_cols when constructing the accumulator and seed bitsets so all participants in the Union share m_vector_size. Add the repro to groupingsets.sql. --- src/backend/gpopt/translate/CTranslatorUtils.cpp | 12 +++++++----- src/test/regress/expected/groupingsets.out | 14 ++++++++++++++ .../regress/expected/groupingsets_optimizer.out | 14 ++++++++++++++ src/test/regress/sql/groupingsets.sql | 5 +++++ 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorUtils.cpp b/src/backend/gpopt/translate/CTranslatorUtils.cpp index 0887c72725c..90e0a70ba23 100644 --- a/src/backend/gpopt/translate/CTranslatorUtils.cpp +++ b/src/backend/gpopt/translate/CTranslatorUtils.cpp @@ -918,7 +918,7 @@ CTranslatorUtils::GetColumnAttnosForGroupBy( case GROUPING_SET_EMPTY: { col_attnos_arr_current = GPOS_NEW(mp) CBitSetArray(mp); - CBitSet *bset = GPOS_NEW(mp) CBitSet(mp); + CBitSet *bset = GPOS_NEW(mp) CBitSet(mp, num_cols); col_attnos_arr_current->Append(bset); break; } @@ -1113,11 +1113,12 @@ CTranslatorUtils::CreateGroupingSetsForRollup(CMemoryPool *mp, GPOS_ASSERT(grouping_set->kind == GROUPING_SET_ROLLUP); CBitSetArray *col_attnos_arr = GPOS_NEW(mp) CBitSetArray(mp); ListCell *lc = nullptr; - CBitSet *current_result = GPOS_NEW(mp) CBitSet(mp); + + CBitSet *current_result = GPOS_NEW(mp) CBitSet(mp, num_cols); // Maintaining the order of grouping sets is essential because the // UnionAll operator matches each child's distribution with the // distribution of the first child - col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp)); + col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp, num_cols)); ForEach(lc, grouping_set->content) { GroupingSet *gs_current = (GroupingSet *) lfirst(lc); @@ -1152,8 +1153,9 @@ CTranslatorUtils::CreateGroupingSetsForCube(CMemoryPool *mp, GPOS_ASSERT(grouping_set->kind == GROUPING_SET_CUBE); CBitSetArray *col_attnos_arr = GPOS_NEW(mp) CBitSetArray(mp); - // add an empty set - col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp)); + // add an empty set — vec_size must match what CreateAttnoSetForGroupingSet + // produces (num_cols), otherwise Union below leaves misaligned links. + col_attnos_arr->Append(GPOS_NEW(mp) CBitSet(mp, num_cols)); ListCell *lc = nullptr; ForEach(lc, grouping_set->content) diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 5222cc22b70..30746f66f13 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -2470,4 +2470,18 @@ group by rollup (a,b) order by a; | | 6 (8 rows) +-- ORCA: rollup over a derived-expression group alias with a target-list SRF. +select generate_series(1, a) g, a+b ab + from (values (1,1),(2,2)) t(a,b) + group by rollup(a, ab) order by 1,2; + g | ab +---+---- + 1 | 2 + 1 | 4 + 1 | + 1 | + 2 | 4 + 2 | +(6 rows) + -- end diff --git a/src/test/regress/expected/groupingsets_optimizer.out b/src/test/regress/expected/groupingsets_optimizer.out index a07017eca32..8b492f86f72 100644 --- a/src/test/regress/expected/groupingsets_optimizer.out +++ b/src/test/regress/expected/groupingsets_optimizer.out @@ -2645,4 +2645,18 @@ group by rollup (a,b) order by a; | | 6 (8 rows) +-- ORCA: rollup over a derived-expression group alias with a target-list SRF. +select generate_series(1, a) g, a+b ab + from (values (1,1),(2,2)) t(a,b) + group by rollup(a, ab) order by 1,2; + g | ab +---+---- + 1 | 2 + 1 | 4 + 1 | + 1 | + 2 | 4 + 2 | +(6 rows) + -- end diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index 851a1eea6bb..1bfd35925d1 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -723,4 +723,9 @@ select a, b, rank(b) within group (order by b nulls last) from (values (1,1),(1,4),(1,5),(3,1),(3,2)) v(a,b) group by rollup (a,b) order by a; +-- ORCA: rollup over a derived-expression group alias with a target-list SRF. +select generate_series(1, a) g, a+b ab + from (values (1,1),(2,2)) t(a,b) + group by rollup(a, ab) order by 1,2; + -- end From db6a19fff8f2d1b49c6906d1ff091ba23ec0d96f Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Thu, 21 May 2026 00:37:20 +0800 Subject: [PATCH 2/2] ORCA: assert matching CBitSet vec_size and cover cube() in test --- src/backend/gporca/libgpos/src/common/CBitSet.cpp | 6 ++++++ src/test/regress/expected/groupingsets.out | 14 ++++++++++++++ .../regress/expected/groupingsets_optimizer.out | 14 ++++++++++++++ src/test/regress/sql/groupingsets.sql | 5 +++++ 4 files changed, 39 insertions(+) diff --git a/src/backend/gporca/libgpos/src/common/CBitSet.cpp b/src/backend/gporca/libgpos/src/common/CBitSet.cpp index 1eb9d2c939f..c3d7703f59d 100644 --- a/src/backend/gporca/libgpos/src/common/CBitSet.cpp +++ b/src/backend/gporca/libgpos/src/common/CBitSet.cpp @@ -347,6 +347,8 @@ CBitSet::ExchangeClear(ULONG pos) void CBitSet::Union(const CBitSet *pbsOther) { + GPOS_ASSERT(m_vector_size == pbsOther->m_vector_size); + CBitSetLink *bsl = nullptr; CBitSetLink *bsl_other = nullptr; @@ -425,6 +427,10 @@ CBitSet::Intersection(const CBitSet *pbsOther) return; } + // See CBitSet::Union: link offsets depend on m_vector_size, so mixing + // bitsets with different sizes makes FindLinkByOffset miss links. + GPOS_ASSERT(m_vector_size == pbsOther->m_vector_size); + CBitSetLink *bsl_other = nullptr; CBitSetLink *bsl = m_bsllist.First(); diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 30746f66f13..a1cb32e2478 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -2484,4 +2484,18 @@ select generate_series(1, a) g, a+b ab 2 | (6 rows) +-- Same shape with cube(): exercises additional grouping-set combinations. +select generate_series(1, a) g, a+b ab + from (values (1,1),(2,2)) t(a,b) + group by cube(a, ab) order by 1,2; + g | ab +---+---- + 1 | 2 + 1 | 4 + 1 | + 1 | + 2 | 4 + 2 | +(6 rows) + -- end diff --git a/src/test/regress/expected/groupingsets_optimizer.out b/src/test/regress/expected/groupingsets_optimizer.out index 8b492f86f72..f02364c362b 100644 --- a/src/test/regress/expected/groupingsets_optimizer.out +++ b/src/test/regress/expected/groupingsets_optimizer.out @@ -2659,4 +2659,18 @@ select generate_series(1, a) g, a+b ab 2 | (6 rows) +-- Same shape with cube(): exercises additional grouping-set combinations. +select generate_series(1, a) g, a+b ab + from (values (1,1),(2,2)) t(a,b) + group by cube(a, ab) order by 1,2; + g | ab +---+---- + 1 | 2 + 1 | 4 + 1 | + 1 | + 2 | 4 + 2 | +(6 rows) + -- end diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index 1bfd35925d1..f7bcba8a46a 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -728,4 +728,9 @@ select generate_series(1, a) g, a+b ab from (values (1,1),(2,2)) t(a,b) group by rollup(a, ab) order by 1,2; +-- Same shape with cube(): exercises additional grouping-set combinations. +select generate_series(1, a) g, a+b ab + from (values (1,1),(2,2)) t(a,b) + group by cube(a, ab) order by 1,2; + -- end