Fix corruption in gapfill plan

This change fixes a bug with gapfill that lead to certain query plans failing with the error "could not find pathkey item to sort". This was caused by a corruption in the query plan caused by removing the last two arguments to the `time_bucket_gapfill` during query planning. Since the function's expression was modified via a reference to the original expression in the query plan, it affected also the target list. The error occurred because the planner couldn't match the function (including arguments) with the corresponding equivalence member (which still included the two removed arguments). The reason the two arguments were removed, originally, was to avoid passing them on to `time_bucket`, which is called internally by `time_bucket_gapfill`. However, the last to arguments aren't passed on anyway, so it isn't necessary to modify the original argument list. Fixes timescale#2232
erimatnor · Jan 27, 2021 · 9ab333d · 9ab333d
1 parent 126f1c8
commit 9ab333d
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 47 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,11 @@ accidentally triggering the load of a previous DB version.**
 **Minor features**
 * #2736 Support adding columns to hypertables with compression enabled
 
+**Thanks**
+* @WarriorOfWire for reporting the bug with gapfill queries not being
+  able to find pathkey item to sort
+
+
 ## 2.0.0 (2020-12-18)
 
 With this release, we are officially moving TimescaleDB 2.0 to GA, 

diff --git a/tsl/src/nodes/gapfill/planner.c b/tsl/src/nodes/gapfill/planner.c
@@ -196,9 +196,6 @@ gapfill_plan_create(PlannerInfo *root, RelOptInfo *rel, CustomPath *path, List *
 	cscan->custom_private =
 		list_make4(gfpath->func, root->parse->groupClause, root->parse->jointree, args);
 
-	/* remove start and end argument from time_bucket call */
-	gfpath->func->args = list_make2(linitial(gfpath->func->args), lsecond(gfpath->func->args));
-
 	return &cscan->scan.plan;
 }
 

diff --git a/tsl/test/shared/expected/gapfill.out b/tsl/test/shared/expected/gapfill.out
@@ -10,13 +10,13 @@ SELECT
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1
 ORDER BY 1;
-                                       QUERY PLAN                                        
------------------------------------------------------------------------------------------
+                                              QUERY PLAN                                               
+-------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
          ->  Sort
-               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                ->  Values Scan on "*VALUES*"
 (6 rows)
 
@@ -28,15 +28,15 @@ SELECT
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1
 ORDER BY 2;
-                                          QUERY PLAN                                           
------------------------------------------------------------------------------------------------
+                                                 QUERY PLAN                                                  
+-------------------------------------------------------------------------------------------------------------
  Sort
    Sort Key: (avg("*VALUES*".column2))
    ->  Custom Scan (GapFill)
          ->  GroupAggregate
-               Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+               Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                      ->  Values Scan on "*VALUES*"
 (8 rows)
 
@@ -48,15 +48,15 @@ SELECT
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1
 ORDER BY 1 DESC;
-                                             QUERY PLAN                                              
------------------------------------------------------------------------------------------------------
+                                                    QUERY PLAN                                                     
+-------------------------------------------------------------------------------------------------------------------
  Sort
-   Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1)) DESC
+   Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now())) DESC
    ->  Custom Scan (GapFill)
          ->  Sort
-               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1)) NULLS FIRST
+               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now())) NULLS FIRST
                ->  HashAggregate
-                     Group Key: time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1)
+                     Group Key: time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now())
                      ->  Values Scan on "*VALUES*"
 (8 rows)
 
@@ -68,15 +68,15 @@ SELECT
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1
 ORDER BY 2,1;
-                                               QUERY PLAN                                               
---------------------------------------------------------------------------------------------------------
+                                                      QUERY PLAN                                                      
+----------------------------------------------------------------------------------------------------------------------
  Sort
-   Sort Key: (avg("*VALUES*".column2)), (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+   Sort Key: (avg("*VALUES*".column2)), (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
    ->  Custom Scan (GapFill)
          ->  GroupAggregate
-               Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+               Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                      ->  Values Scan on "*VALUES*"
 (8 rows)
 
@@ -87,13 +87,13 @@ SELECT
   avg(c2)
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1;
-                                       QUERY PLAN                                        
------------------------------------------------------------------------------------------
+                                              QUERY PLAN                                               
+-------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
          ->  Sort
-               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                ->  Values Scan on "*VALUES*"
 (6 rows)
 
@@ -105,17 +105,17 @@ SELECT
 FROM gapfill_plan_test
 GROUP BY 1
 ORDER BY 1;
-                                                QUERY PLAN                                                
-----------------------------------------------------------------------------------------------------------
+                                                                                                          QUERY PLAN                                                                                                          
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  Finalize GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Gather Merge
                Workers Planned: 2
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                      ->  Partial HashAggregate
-                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time")
+                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                            ->  Result
                                  ->  Parallel Append
                                        ->  Parallel Seq Scan on _hyper_8_47_chunk
@@ -132,17 +132,17 @@ SELECT
 FROM gapfill_plan_test
 GROUP BY 1
 ORDER BY 1;
-                                                QUERY PLAN                                                
-----------------------------------------------------------------------------------------------------------
+                                                                                                          QUERY PLAN                                                                                                          
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  Finalize GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Gather Merge
                Workers Planned: 2
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                      ->  Partial HashAggregate
-                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time")
+                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                            ->  Result
                                  ->  Parallel Append
                                        ->  Parallel Seq Scan on _hyper_8_47_chunk
@@ -159,17 +159,17 @@ SELECT
 FROM gapfill_plan_test
 GROUP BY 1
 ORDER BY 1;
-                                                QUERY PLAN                                                
-----------------------------------------------------------------------------------------------------------
+                                                                                                          QUERY PLAN                                                                                                          
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  Finalize GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Gather Merge
                Workers Planned: 2
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                      ->  Partial HashAggregate
-                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time")
+                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                            ->  Result
                                  ->  Parallel Append
                                        ->  Parallel Seq Scan on _hyper_8_47_chunk
@@ -188,20 +188,20 @@ FROM gapfill_plan_test
 GROUP BY 1
 ORDER BY 2
 LIMIT 1;
-                                                      QUERY PLAN                                                      
-----------------------------------------------------------------------------------------------------------------------
+                                                                                                                QUERY PLAN                                                                                                                
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Limit
    ->  Sort
          Sort Key: (interpolate(avg(value), NULL::record, NULL::record))
          ->  Custom Scan (GapFill)
                ->  Finalize GroupAggregate
-                     Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                     Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                      ->  Gather Merge
                            Workers Planned: 2
                            ->  Sort
-                                 Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                                 Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                                  ->  Partial HashAggregate
-                                       Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time")
+                                       Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                                        ->  Result
                                              ->  Parallel Append
                                                    ->  Parallel Seq Scan on _hyper_8_47_chunk
@@ -249,7 +249,7 @@ ORDER BY 1;
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  GroupAggregate
-         Group Key: time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time")
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Custom Scan (ChunkAppend) on gapfill_plan_test
                Order: time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                ->  Index Scan Backward using _hyper_8_46_chunk_gapfill_plan_test_time_idx on _hyper_8_46_chunk
@@ -267,7 +267,7 @@ ORDER BY 1;
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  GroupAggregate
-         Group Key: time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time")
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Custom Scan (ChunkAppend) on gapfill_plan_test
                Order: time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                ->  Index Scan Backward using _hyper_8_46_chunk_gapfill_plan_test_time_idx on _hyper_8_46_chunk
@@ -3070,3 +3070,25 @@ GROUP BY 1 ORDER BY 1;
   9223372036854775807 |    32767 |  2147483647 |  9223372036854775807 |  2147483647 |  Infinity
 (3 rows)
 
+-- issue #2232: This query used to trigger error "could not find
+-- pathkey item to sort" due to a corrupt query plan
+SELECT time_bucket_gapfill('60 seconds', time) AS time
+FROM metrics_tstz
+WHERE time >= '2017-01-01' AND time < '2018-01-01'
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 10;
+             time             
+------------------------------
+ Sun Dec 31 23:59:00 2017 PST
+ Sun Dec 31 23:58:00 2017 PST
+ Sun Dec 31 23:57:00 2017 PST
+ Sun Dec 31 23:56:00 2017 PST
+ Sun Dec 31 23:55:00 2017 PST
+ Sun Dec 31 23:54:00 2017 PST
+ Sun Dec 31 23:53:00 2017 PST
+ Sun Dec 31 23:52:00 2017 PST
+ Sun Dec 31 23:51:00 2017 PST
+ Sun Dec 31 23:50:00 2017 PST
+(10 rows)
+
diff --git a/tsl/test/shared/sql/gapfill.sql b/tsl/test/shared/sql/gapfill.sql
@@ -1389,3 +1389,11 @@ FROM (values (:big_int_min,(-32768)::smallint,(-2147483648)::int,:big_int_min,-2
              (:big_int_max, 32767::smallint, 2147483647::int,:big_int_max, 2147483647::bigint, 'Infinity'::double precision)) v(time,s,i,b,b2,d)
 GROUP BY 1 ORDER BY 1;
 
+-- issue #2232: This query used to trigger error "could not find
+-- pathkey item to sort" due to a corrupt query plan
+EXPLAIN SELECT time_bucket_gapfill('60 seconds', time) AS time
+FROM metrics_tstz
+WHERE time >= '2017-01-01' AND time < '2018-01-01'
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 10;