From b84fc3be6c49ede4238a111b47d26beb63c67655 Mon Sep 17 00:00:00 2001 From: Alexey Gordeev Date: Thu, 1 Jul 2021 10:24:42 +0500 Subject: [PATCH 1/4] Check if subpath can do motion. 'cdbpath_create_motion_path' function doesn't check subpath's params Relids. When Path contains index scan with quals it leads to segfault. Fix adds a few additional checks of can motion node be generated atop of subpath. Additional regression test included. Related to issue #12228. Partial backport of 9cc1da6 (#10012). --- src/backend/cdb/cdbpath.c | 21 +++++++++++++ src/test/regress/expected/rpt_joins.out | 41 ++++++++++++++++++++++++- src/test/regress/sql/rpt_joins.sql | 26 ++++++++++++++++ 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 1a2f13a5c720..27f56a6d4e40 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -139,6 +139,11 @@ cdbpath_create_motion_path(PlannerInfo *root, /* singleQE-->entry? Don't move. Slice's QE will run on entry db. */ if (CdbPathLocus_IsSingleQE(subpath->locus)) { + /* + * If the subpath requires parameters, we cannot generate Motion atop of it. + */ + if (!bms_is_empty(PATH_REQ_OUTER(subpath))) + return NULL; /* * Create CdbMotionPath node to indicate that the slice must be * dispatched to a singleton gang running on the entry db. We @@ -170,6 +175,11 @@ cdbpath_create_motion_path(PlannerInfo *root, if (CdbPathLocus_IsSegmentGeneral(subpath->locus)) { + /* + * If the subpath requires parameters, we cannot generate Motion atop of it. + */ + if (!bms_is_empty(PATH_REQ_OUTER(subpath))) + return NULL; /* * Data is only available on segments, to distingush it with * CdbLocusType_General, adding a motion to indicated this @@ -304,6 +314,11 @@ cdbpath_create_motion_path(PlannerInfo *root, /* Does subpath produce same multiset of rows on every qExec of its gang? */ else if (CdbPathLocus_IsReplicated(subpath->locus)) { + /* + * If the subpath requires parameters, we cannot generate Motion atop of it. + */ + if (!bms_is_empty(PATH_REQ_OUTER(subpath))) + return NULL; /* No-op if replicated-->replicated. */ if (CdbPathLocus_IsReplicated(locus)) { @@ -365,6 +380,12 @@ cdbpath_create_motion_path(PlannerInfo *root, * materialize nodes on top of motion nodes */ + /* + * If the subpath requires parameters, we cannot generate Motion atop of it. + */ + if (!bms_is_empty(PATH_REQ_OUTER(subpath))) + return NULL; + /* Create CdbMotionPath node. */ pathnode = makeNode(CdbMotionPath); pathnode->path.pathtype = T_Motion; diff --git a/src/test/regress/expected/rpt_joins.out b/src/test/regress/expected/rpt_joins.out index 7ac230f23e69..e98bbb927e1f 100644 --- a/src/test/regress/expected/rpt_joins.out +++ b/src/test/regress/expected/rpt_joins.out @@ -2153,10 +2153,49 @@ select max(c1) from pg_class left join t_5628 on true; 2 (1 row) +-- +-- regression test for nest loop join of indexed rpt and entry, segment should not fall with segfault; see issue #12228 +-- +create table test_tz(tz interval) distributed replicated; +insert into test_tz +select i * '1 hour'::interval +from generate_series(0,23) i; +create index test_tz_idx on test_tz(tz); +set optimizer=off; +set enable_hashjoin=off; +set enable_seqscan=off; +EXPLAIN (costs off) +SELECT tzn.name, tst.tz +FROM test_tz tst +JOIN pg_timezone_names tzn ON tzn.name = 'UTC' AND tst.tz = tzn.utc_offset; + QUERY PLAN +-------------------------------------------------------------------- + Nested Loop + Join Filter: (tst.tz = pg_timezone_names.utc_offset) + -> Function Scan on pg_timezone_names + Filter: (name = 'UTC'::text) + -> Materialize + -> Gather Motion 1:1 (slice1; segments: 1) + -> Index Only Scan using test_tz_idx on test_tz tst + Optimizer: Postgres query optimizer +(8 rows) + +SELECT tzn.name, tst.tz +FROM test_tz tst +JOIN pg_timezone_names tzn ON tzn.name = 'UTC' AND tst.tz = tzn.utc_offset; + name | tz +------+----- + UTC | @ 0 +(1 row) + +reset optimizer; +reset enable_hashjoin; +reset enable_seqscan; drop schema rpt_joins cascade; -NOTICE: drop cascades to 5 other objects +NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to table j1_tbl drop cascades to table j2_tbl drop cascades to table rpt_joins.t1 drop cascades to table rpt_joins.t2 drop cascades to table rpt_joins.t3 +drop cascades to table test_tz diff --git a/src/test/regress/sql/rpt_joins.sql b/src/test/regress/sql/rpt_joins.sql index e37f9d207664..eda069594775 100644 --- a/src/test/regress/sql/rpt_joins.sql +++ b/src/test/regress/sql/rpt_joins.sql @@ -450,4 +450,30 @@ insert into t_5628 values (1,1), (2,2); explain (costs off) select max(c1) from pg_class left join t_5628 on true; select max(c1) from pg_class left join t_5628 on true; +-- +-- regression test for nest loop join of indexed rpt and entry, segment should not fall with segfault; see issue #12228 +-- +create table test_tz(tz interval) distributed replicated; +insert into test_tz +select i * '1 hour'::interval +from generate_series(0,23) i; +create index test_tz_idx on test_tz(tz); + +set optimizer=off; +set enable_hashjoin=off; +set enable_seqscan=off; + +EXPLAIN (costs off) +SELECT tzn.name, tst.tz +FROM test_tz tst +JOIN pg_timezone_names tzn ON tzn.name = 'UTC' AND tst.tz = tzn.utc_offset; + +SELECT tzn.name, tst.tz +FROM test_tz tst +JOIN pg_timezone_names tzn ON tzn.name = 'UTC' AND tst.tz = tzn.utc_offset; + +reset optimizer; +reset enable_hashjoin; +reset enable_seqscan; + drop schema rpt_joins cascade; From cb0d03b2dba975b4bc95c2cd90226b47ec8f8014 Mon Sep 17 00:00:00 2001 From: Alexey Gordeev Date: Thu, 1 Jul 2021 14:22:48 +0500 Subject: [PATCH 2/4] Removing default(fallback) params check. --- src/backend/cdb/cdbpath.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 27f56a6d4e40..7b9a4634b8ca 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -380,12 +380,6 @@ cdbpath_create_motion_path(PlannerInfo *root, * materialize nodes on top of motion nodes */ - /* - * If the subpath requires parameters, we cannot generate Motion atop of it. - */ - if (!bms_is_empty(PATH_REQ_OUTER(subpath))) - return NULL; - /* Create CdbMotionPath node. */ pathnode = makeNode(CdbMotionPath); pathnode->path.pathtype = T_Motion; From ebfb060622bd4e0a789427e29dbd58e49ad28b5e Mon Sep 17 00:00:00 2001 From: Alexey Gordeev Date: Mon, 12 Jul 2021 11:23:03 +0500 Subject: [PATCH 3/4] Fixing PR #12238. Removing unnecessary check for replicated locus type. Adding missed blank lines. Mentioning broken regression test. (cherry picked from commit 323752317984de22aba579c13768e600dff274e2) --- src/backend/cdb/cdbpath.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 7b9a4634b8ca..346e0ec1297e 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -144,6 +144,7 @@ cdbpath_create_motion_path(PlannerInfo *root, */ if (!bms_is_empty(PATH_REQ_OUTER(subpath))) return NULL; + /* * Create CdbMotionPath node to indicate that the slice must be * dispatched to a singleton gang running on the entry db. We @@ -180,6 +181,7 @@ cdbpath_create_motion_path(PlannerInfo *root, */ if (!bms_is_empty(PATH_REQ_OUTER(subpath))) return NULL; + /* * Data is only available on segments, to distingush it with * CdbLocusType_General, adding a motion to indicated this @@ -314,11 +316,6 @@ cdbpath_create_motion_path(PlannerInfo *root, /* Does subpath produce same multiset of rows on every qExec of its gang? */ else if (CdbPathLocus_IsReplicated(subpath->locus)) { - /* - * If the subpath requires parameters, we cannot generate Motion atop of it. - */ - if (!bms_is_empty(PATH_REQ_OUTER(subpath))) - return NULL; /* No-op if replicated-->replicated. */ if (CdbPathLocus_IsReplicated(locus)) { @@ -380,6 +377,11 @@ cdbpath_create_motion_path(PlannerInfo *root, * materialize nodes on top of motion nodes */ + /* + * TODO: Check if subpath require parameters(like in PR #12238) when the following FIXME from regression tests will be resolved. + * FIXME: A process terminates during execution, see https://github.com/greenplum-db/gpdb/issues/10791 + */ + /* Create CdbMotionPath node. */ pathnode = makeNode(CdbMotionPath); pathnode->path.pathtype = T_Motion; From 66832ea95c117f3afde1940f74646fa2b71a463a Mon Sep 17 00:00:00 2001 From: Alexey Gordeev Date: Wed, 4 Aug 2021 17:57:10 +0500 Subject: [PATCH 4/4] Removing parts backported from 9cc1da6. 'if' conditions backported from 9cc1da6 partially duplicates the logic over require_existing_order flag. Instead of applying additional logic, we can move require_existing_order flag analysis higher, before SingleQE and SegmentGeneral, in the place where it really make sense. Adding additional comments and warning message, so we can find what going wrong even on release version. Adding missed Insist's to cdbpath_create_motion_path callers. --- src/backend/cdb/cdbpath.c | 58 ++++++++++++--------------- src/backend/optimizer/path/allpaths.c | 1 + src/backend/optimizer/util/pathnode.c | 1 + 3 files changed, 27 insertions(+), 33 deletions(-) diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 346e0ec1297e..e08df698cba5 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -81,7 +81,9 @@ cdbpath_cost_motion(PlannerInfo *root, CdbMotionPath *motionpath) * If no motion is needed, the caller's subpath is returned unchanged. * Else if require_existing_order is true, NULL is returned if the * motion would not preserve an ordering at least as strong as the - * specified ordering; also NULL is returned if pathkeys is NIL + * specified ordering or if path has parameterization relids, + * which generally may indicate we can't add motion; + * also NULL is returned if pathkeys is NIL * meaning the caller is just checking and doesn't want to add motion. * Else a CdbMotionPath is returned having either the specified pathkeys * (if given and the motion uses Merge Receive), or the pathkeys @@ -136,15 +138,25 @@ cdbpath_create_motion_path(PlannerInfo *root, return subpath; } - /* singleQE-->entry? Don't move. Slice's QE will run on entry db. */ - if (CdbPathLocus_IsSingleQE(subpath->locus)) + /* No motion needed if subpath can run anywhere giving same output. */ + if (CdbPathLocus_IsGeneral(subpath->locus)) { /* - * If the subpath requires parameters, we cannot generate Motion atop of it. + * general-->(entry|singleqe), no motion is needed, can run + * directly on any of the common segments */ - if (!bms_is_empty(PATH_REQ_OUTER(subpath))) - return NULL; + subpath->locus.numsegments = numsegments; + return subpath; + } + + /* Fail if caller refuses motion. */ + if (require_existing_order && + !pathkeys) + return NULL; + /* singleQE-->entry? Don't move. Slice's QE will run on entry db. */ + if (CdbPathLocus_IsSingleQE(subpath->locus)) + { /* * Create CdbMotionPath node to indicate that the slice must be * dispatched to a singleton gang running on the entry db. We @@ -176,12 +188,6 @@ cdbpath_create_motion_path(PlannerInfo *root, if (CdbPathLocus_IsSegmentGeneral(subpath->locus)) { - /* - * If the subpath requires parameters, we cannot generate Motion atop of it. - */ - if (!bms_is_empty(PATH_REQ_OUTER(subpath))) - return NULL; - /* * Data is only available on segments, to distingush it with * CdbLocusType_General, adding a motion to indicated this @@ -212,22 +218,6 @@ cdbpath_create_motion_path(PlannerInfo *root, return (Path *) pathnode; } - /* No motion needed if subpath can run anywhere giving same output. */ - if (CdbPathLocus_IsGeneral(subpath->locus)) - { - /* - * general-->(entry|singleqe), no motion is needed, can run - * directly on any of the common segments - */ - subpath->locus.numsegments = numsegments; - return subpath; - } - - /* Fail if caller refuses motion. */ - if (require_existing_order && - !pathkeys) - return NULL; - /* replicated-->singleton would give redundant copies of the rows. */ if (CdbPathLocus_IsReplicated(subpath->locus)) goto invalid_motion_request; @@ -377,11 +367,6 @@ cdbpath_create_motion_path(PlannerInfo *root, * materialize nodes on top of motion nodes */ - /* - * TODO: Check if subpath require parameters(like in PR #12238) when the following FIXME from regression tests will be resolved. - * FIXME: A process terminates during execution, see https://github.com/greenplum-db/gpdb/issues/10791 - */ - /* Create CdbMotionPath node. */ pathnode = makeNode(CdbMotionPath); pathnode->path.pathtype = T_Motion; @@ -403,6 +388,7 @@ cdbpath_create_motion_path(PlannerInfo *root, /* Unexpected source or destination locus. */ invalid_motion_request: Assert(0); + elog(WARNING, "cdbpath_create_motion_path can't apply valid motion"); return NULL; } /* cdbpath_create_motion_path */ @@ -902,6 +888,11 @@ cdbpath_distkeys_from_preds(PlannerInfo *root, * mergeclause_list is a List of RestrictInfo. Its members are * the equijoin predicates between the outer and inner rel. * It comes from select_mergejoin_clauses() in joinpath.c. + * + * Besides ordering logic, + * outer_require_existing_order and inner_require_existing_order + * indicates path has parameterization relids, so we can decide + * can we add a motion node atop of processing node */ typedef struct @@ -2100,6 +2091,7 @@ turn_volatile_seggen_to_singleqe(PlannerInfo *root, Path *path, Node *node) CdbPathLocus_MakeSingleQE(&singleQE, CdbPathLocus_NumSegments(path->locus)); mpath = cdbpath_create_motion_path(root, path, NIL, false, singleQE); + Insist(mpath); ppath = create_projection_path_with_quals(root, mpath->parent, mpath, NIL); ppath->force = true; return (Path *) ppath; diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c169bb80624c..fbec3c62848d 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -437,6 +437,7 @@ bring_to_singleQE(PlannerInfo *root, RelOptInfo *rel, List *outer_quals) NIL, // DESTROY pathkeys false, target_locus); + Insist(path); path = (Path *) create_material_path(root, rel, path); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 231fc53cfb03..a5b65f6b543c 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1581,6 +1581,7 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel, else { subpath = cdbpath_create_motion_path(root, subpath, subpath->pathkeys, false, targetlocus); + Insist(subpath); } pathnode->sameslice_relids = bms_union(pathnode->sameslice_relids, subpath->sameslice_relids);