From ca131f74b21b08e787580f6fe8339991aeb38df4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Erik=20Nordstr=C3=B6m?= <erik@timescale.com>
Date: Wed, 27 Jan 2021 16:11:55 +0100
Subject: [PATCH] Fix corruption in gapfill plan

This change fixes a bug with gapfill that caused certain query plans
to fail with the error "could not find pathkey item to sort". This was
caused by a corruption in the query plan which happened as a result of
removing the last two arguments to the `time_bucket_gapfill` function
expression during query planning. Since the function expression was
modified via a reference to the original query plan tree, it affected
also the expression in the target list. When the planner couldn't
match the target list with the corresponding equivalence member (which
still included the two removed arguments), the error was generated.

The original reason for removing the two arguments was to avoid
passing them on to `time_bucket`, which is called internally by
`time_bucket_gapfill`. However, the last to arguments aren't passed on
anyway, so it isn't necessary to modify the original argument list.

Fixes #2232
---
 CHANGELOG.md                         |   6 ++
 tsl/src/nodes/gapfill/planner.c      |   3 -
 tsl/test/shared/expected/gapfill.out | 133 ++++++++++++++++++---------
 tsl/test/shared/sql/gapfill.sql      |  23 +++++
 4 files changed, 118 insertions(+), 47 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9638f427fe4..1f3b95080a6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,10 +10,16 @@ accidentally triggering the load of a previous DB version.**
 * #2842 Do not mark job as started when seting next_start field
 * #2845 Fix continuous aggregate privileges during upgrade
 * #2865 Apply volatile function quals at decompresschunk node
+* #2868 Fix corruption in gapfill plan
 
 **Minor features**
 * #2736 Support adding columns to hypertables with compression enabled
 
+**Thanks**
+* @WarriorOfWire for reporting the bug with gapfill queries not being
+  able to find pathkey item to sort
+
+
 ## 2.0.0 (2020-12-18)
 
 With this release, we are officially moving TimescaleDB 2.0 to GA, 
diff --git a/tsl/src/nodes/gapfill/planner.c b/tsl/src/nodes/gapfill/planner.c
index 765a14cec1f..ab913dca5ee 100644
--- a/tsl/src/nodes/gapfill/planner.c
+++ b/tsl/src/nodes/gapfill/planner.c
@@ -196,9 +196,6 @@ gapfill_plan_create(PlannerInfo *root, RelOptInfo *rel, CustomPath *path, List *
 	cscan->custom_private =
 		list_make4(gfpath->func, root->parse->groupClause, root->parse->jointree, args);
 
-	/* remove start and end argument from time_bucket call */
-	gfpath->func->args = list_make2(linitial(gfpath->func->args), lsecond(gfpath->func->args));
-
 	return &cscan->scan.plan;
 }
 
diff --git a/tsl/test/shared/expected/gapfill.out b/tsl/test/shared/expected/gapfill.out
index 3d348e3c2ca..f9f0b2f82c8 100644
--- a/tsl/test/shared/expected/gapfill.out
+++ b/tsl/test/shared/expected/gapfill.out
@@ -10,13 +10,13 @@ SELECT
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1
 ORDER BY 1;
-                                       QUERY PLAN                                        
------------------------------------------------------------------------------------------
+                                              QUERY PLAN                                               
+-------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
          ->  Sort
-               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                ->  Values Scan on "*VALUES*"
 (6 rows)
 
@@ -28,15 +28,15 @@ SELECT
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1
 ORDER BY 2;
-                                          QUERY PLAN                                           
------------------------------------------------------------------------------------------------
+                                                 QUERY PLAN                                                  
+-------------------------------------------------------------------------------------------------------------
  Sort
    Sort Key: (avg("*VALUES*".column2))
    ->  Custom Scan (GapFill)
          ->  GroupAggregate
-               Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+               Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                      ->  Values Scan on "*VALUES*"
 (8 rows)
 
@@ -48,15 +48,15 @@ SELECT
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1
 ORDER BY 1 DESC;
-                                             QUERY PLAN                                              
------------------------------------------------------------------------------------------------------
+                                                    QUERY PLAN                                                     
+-------------------------------------------------------------------------------------------------------------------
  Sort
-   Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1)) DESC
+   Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now())) DESC
    ->  Custom Scan (GapFill)
          ->  Sort
-               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1)) NULLS FIRST
+               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now())) NULLS FIRST
                ->  HashAggregate
-                     Group Key: time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1)
+                     Group Key: time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now())
                      ->  Values Scan on "*VALUES*"
 (8 rows)
 
@@ -68,15 +68,15 @@ SELECT
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1
 ORDER BY 2,1;
-                                               QUERY PLAN                                               
---------------------------------------------------------------------------------------------------------
+                                                      QUERY PLAN                                                      
+----------------------------------------------------------------------------------------------------------------------
  Sort
-   Sort Key: (avg("*VALUES*".column2)), (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+   Sort Key: (avg("*VALUES*".column2)), (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
    ->  Custom Scan (GapFill)
          ->  GroupAggregate
-               Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+               Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                      ->  Values Scan on "*VALUES*"
 (8 rows)
 
@@ -87,13 +87,13 @@ SELECT
   avg(c2)
 FROM (VALUES (now(),1),(now(),NULL),(now(),NULL)) as t(time,c2)
 GROUP BY 1;
-                                       QUERY PLAN                                        
------------------------------------------------------------------------------------------
+                                              QUERY PLAN                                               
+-------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
          ->  Sort
-               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1))
+               Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, "*VALUES*".column1, now(), now()))
                ->  Values Scan on "*VALUES*"
 (6 rows)
 
@@ -105,17 +105,17 @@ SELECT
 FROM gapfill_plan_test
 GROUP BY 1
 ORDER BY 1;
-                                                QUERY PLAN                                                
-----------------------------------------------------------------------------------------------------------
+                                                                                                          QUERY PLAN                                                                                                          
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  Finalize GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Gather Merge
                Workers Planned: 2
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                      ->  Partial HashAggregate
-                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time")
+                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                            ->  Result
                                  ->  Parallel Append
                                        ->  Parallel Seq Scan on _hyper_8_47_chunk
@@ -132,17 +132,17 @@ SELECT
 FROM gapfill_plan_test
 GROUP BY 1
 ORDER BY 1;
-                                                QUERY PLAN                                                
-----------------------------------------------------------------------------------------------------------
+                                                                                                          QUERY PLAN                                                                                                          
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  Finalize GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Gather Merge
                Workers Planned: 2
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                      ->  Partial HashAggregate
-                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time")
+                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                            ->  Result
                                  ->  Parallel Append
                                        ->  Parallel Seq Scan on _hyper_8_47_chunk
@@ -159,17 +159,17 @@ SELECT
 FROM gapfill_plan_test
 GROUP BY 1
 ORDER BY 1;
-                                                QUERY PLAN                                                
-----------------------------------------------------------------------------------------------------------
+                                                                                                          QUERY PLAN                                                                                                          
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  Finalize GroupAggregate
-         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Gather Merge
                Workers Planned: 2
                ->  Sort
-                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                     Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                      ->  Partial HashAggregate
-                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time")
+                           Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                            ->  Result
                                  ->  Parallel Append
                                        ->  Parallel Seq Scan on _hyper_8_47_chunk
@@ -188,20 +188,20 @@ FROM gapfill_plan_test
 GROUP BY 1
 ORDER BY 2
 LIMIT 1;
-                                                      QUERY PLAN                                                      
-----------------------------------------------------------------------------------------------------------------------
+                                                                                                                QUERY PLAN                                                                                                                
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Limit
    ->  Sort
          Sort Key: (interpolate(avg(value), NULL::record, NULL::record))
          ->  Custom Scan (GapFill)
                ->  Finalize GroupAggregate
-                     Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                     Group Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                      ->  Gather Merge
                            Workers Planned: 2
                            ->  Sort
-                                 Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time"))
+                                 Sort Key: (time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
                                  ->  Partial HashAggregate
-                                       Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time")
+                                       Group Key: time_bucket_gapfill('@ 5 mins'::interval, _hyper_8_47_chunk."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                                        ->  Result
                                              ->  Parallel Append
                                                    ->  Parallel Seq Scan on _hyper_8_47_chunk
@@ -249,7 +249,7 @@ ORDER BY 1;
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  GroupAggregate
-         Group Key: time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time")
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Custom Scan (ChunkAppend) on gapfill_plan_test
                Order: time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                ->  Index Scan Backward using _hyper_8_46_chunk_gapfill_plan_test_time_idx on _hyper_8_46_chunk
@@ -267,7 +267,7 @@ ORDER BY 1;
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Custom Scan (GapFill)
    ->  GroupAggregate
-         Group Key: time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time")
+         Group Key: (time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone))
          ->  Custom Scan (ChunkAppend) on gapfill_plan_test
                Order: time_bucket_gapfill('@ 5 mins'::interval, gapfill_plan_test."time", 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone, 'Wed Dec 31 16:00:00 1969 PST'::timestamp with time zone)
                ->  Index Scan Backward using _hyper_8_46_chunk_gapfill_plan_test_time_idx on _hyper_8_46_chunk
@@ -3070,3 +3070,48 @@ GROUP BY 1 ORDER BY 1;
   9223372036854775807 |    32767 |  2147483647 |  9223372036854775807 |  2147483647 |  Infinity
 (3 rows)
 
+-- issue #2232: This query used to trigger error "could not find
+-- pathkey item to sort" due to a corrupt query plan
+SELECT time_bucket_gapfill('1 h', time) AS time,
+       locf(sum(v1)) AS v1_sum,
+	   interpolate(sum(v2)) AS v2_sum
+FROM metrics_tstz
+WHERE time >= '2018-01-01 04:00' AND time < '2018-01-01 08:00'
+GROUP BY 1
+ORDER BY 1 DESC;
+             time             | v1_sum | v2_sum 
+------------------------------+--------+--------
+ Mon Jan 01 07:00:00 2018 PST |    2.3 |     70
+ Mon Jan 01 06:00:00 2018 PST |    2.1 |     65
+ Mon Jan 01 05:00:00 2018 PST |    2.1 |     60
+ Mon Jan 01 04:00:00 2018 PST |        |       
+(4 rows)
+
+-- query without gapfill:
+SELECT time_bucket('1 h', time) AS time,
+       sum(v1) AS v1_sum,
+	   sum(v2) AS v1_sum
+FROM metrics_tstz
+WHERE time >= '2018-01-01 04:00' AND time < '2018-01-01 08:00'
+GROUP BY 1
+ORDER BY 1 DESC;
+             time             | v1_sum | v1_sum 
+------------------------------+--------+--------
+ Mon Jan 01 07:00:00 2018 PST |    2.3 |     70
+ Mon Jan 01 05:00:00 2018 PST |    2.1 |     60
+(2 rows)
+
+-- query to show original data
+SELECT * FROM metrics_tstz
+WHERE time >= '2018-01-01 04:00' AND time < '2018-01-01 08:00'
+ORDER BY 1 DESC;
+             time             | device_id | v1  | v2 
+------------------------------+-----------+-----+----
+ Mon Jan 01 07:00:00 2018 PST |         1 |   0 |  0
+ Mon Jan 01 07:00:00 2018 PST |         2 | 1.4 | 40
+ Mon Jan 01 07:00:00 2018 PST |         3 | 0.9 | 30
+ Mon Jan 01 05:00:00 2018 PST |         1 | 0.5 | 10
+ Mon Jan 01 05:00:00 2018 PST |         2 | 0.7 | 20
+ Mon Jan 01 05:00:00 2018 PST |         3 | 0.9 | 30
+(6 rows)
+
diff --git a/tsl/test/shared/sql/gapfill.sql b/tsl/test/shared/sql/gapfill.sql
index 931091ef50b..d545e2f4451 100644
--- a/tsl/test/shared/sql/gapfill.sql
+++ b/tsl/test/shared/sql/gapfill.sql
@@ -1389,3 +1389,26 @@ FROM (values (:big_int_min,(-32768)::smallint,(-2147483648)::int,:big_int_min,-2
              (:big_int_max, 32767::smallint, 2147483647::int,:big_int_max, 2147483647::bigint, 'Infinity'::double precision)) v(time,s,i,b,b2,d)
 GROUP BY 1 ORDER BY 1;
 
+-- issue #2232: This query used to trigger error "could not find
+-- pathkey item to sort" due to a corrupt query plan
+SELECT time_bucket_gapfill('1 h', time) AS time,
+       locf(sum(v1)) AS v1_sum,
+	   interpolate(sum(v2)) AS v2_sum
+FROM metrics_tstz
+WHERE time >= '2018-01-01 04:00' AND time < '2018-01-01 08:00'
+GROUP BY 1
+ORDER BY 1 DESC;
+
+-- query without gapfill:
+SELECT time_bucket('1 h', time) AS time,
+       sum(v1) AS v1_sum,
+	   sum(v2) AS v1_sum
+FROM metrics_tstz
+WHERE time >= '2018-01-01 04:00' AND time < '2018-01-01 08:00'
+GROUP BY 1
+ORDER BY 1 DESC;
+
+-- query to show original data
+SELECT * FROM metrics_tstz
+WHERE time >= '2018-01-01 04:00' AND time < '2018-01-01 08:00'
+ORDER BY 1 DESC;