diff --git a/pkg/sql/logictest/testdata/logic_test/suboperators b/pkg/sql/logictest/testdata/logic_test/suboperators index 1f1495e81d01..ef5385766955 100644 --- a/pkg/sql/logictest/testdata/logic_test/suboperators +++ b/pkg/sql/logictest/testdata/logic_test/suboperators @@ -495,8 +495,16 @@ SELECT a, b FROM abc WHERE (a, b) = ANY(ARRAY(SELECT ROW(a, b) FROM abc LIMIT 1) ---- 1 10 +# The next query might error out when spilling to disk in the row-by-row engine, +# so we only use vectorized engine for it. +statement ok +SET vectorize = on; + query T rowsort SELECT * FROM comp WHERE v IN (SELECT v FROM comp); ---- (1,2) (3,4) + +statement ok +RESET vectorize; diff --git a/pkg/sql/logictest/testdata/logic_test/tuple b/pkg/sql/logictest/testdata/logic_test/tuple index 317236a44a37..30748cea7434 100644 --- a/pkg/sql/logictest/testdata/logic_test/tuple +++ b/pkg/sql/logictest/testdata/logic_test/tuple @@ -1283,3 +1283,27 @@ NULL (,,1100) (,1.1,) (1,,) + +# Regression test for incorrectly spilling tuples to disk which produces +# "encoding corruption" of other columns (#125367). +statement ok +CREATE TABLE t125367 AS SELECT + g AS _int8, g * '1 day'::INTERVAL AS _interval, g::DECIMAL AS _decimal + FROM generate_series(1, 5) AS g; +UPDATE t125367 SET _interval = '7 years 1 mon 887 days 18:22:39.99567'; +SET testing_optimizer_random_seed = 1481092000980190599; +SET testing_optimizer_disable_rule_probability = 1.000000; +SET vectorize = off; +SET distsql_workmem = '2B'; + +statement error pgcode 0A000 unimplemented: can't spill column type RECORD to disk +SELECT tab_1._interval AS col_1, (NULL:::STRING, NULL:::JSONB, NULL:::TIME) AS col_2 + FROM t125367 AS tab_2 JOIN t125367 AS tab_1 ON (tab_2._int8) = (tab_1._int8) + ORDER BY col_2, tab_2._interval, tab_2._decimal, col_1 DESC + LIMIT 1000; + +statement ok +RESET testing_optimizer_random_seed; +RESET testing_optimizer_disable_rule_probability; +RESET vectorize; +RESET distsql_workmem; diff --git a/pkg/sql/logictest/testdata/logic_test/void b/pkg/sql/logictest/testdata/logic_test/void index 0dbd6a619071..a245e02d6de9 100644 --- a/pkg/sql/logictest/testdata/logic_test/void +++ b/pkg/sql/logictest/testdata/logic_test/void @@ -139,6 +139,7 @@ ORDER BY # Regression test for #83754. # Non-vectorized and vectorized results should match. +skipif config fakedist-disk query T SELECT COALESCE(tab_115318.col_199168, NULL) AS col_199169 diff --git a/pkg/sql/rowcontainer/disk_row_container.go b/pkg/sql/rowcontainer/disk_row_container.go index c2e36baeb0fe..b214a0e9cabf 100644 --- a/pkg/sql/rowcontainer/disk_row_container.go +++ b/pkg/sql/rowcontainer/disk_row_container.go @@ -102,7 +102,7 @@ func MakeDiskRowContainer( typs []*types.T, ordering colinfo.ColumnOrdering, e diskmap.Factory, -) (DiskRowContainer, error) { +) (_ DiskRowContainer, retErr error) { diskMap := e.NewSortedDiskMap() d := DiskRowContainer{ diskMap: diskMap, @@ -114,6 +114,13 @@ func MakeDiskRowContainer( engine: e, datumAlloc: &tree.DatumAlloc{}, } + defer func() { + if retErr != nil { + // Ensure to close the container since we're not returning it to the + // caller. + d.Close(ctx) + } + }() // The ordering is specified for a subset of the columns. These will be // encoded as a key in the given order according to the given direction so @@ -144,12 +151,13 @@ func MakeDiskRowContainer( d.encodings[i] = rowenc.EncodingDirToDatumEncoding(orderInfo.Direction) switch t := typs[orderInfo.ColIdx]; t.Family() { case types.TSQueryFamily, types.TSVectorFamily: - // Ensure to close the container since we're not returning it to the - // caller. - d.Close(ctx) return DiskRowContainer{}, unimplemented.NewWithIssueDetailf( 92165, "", "can't order by column type %s", t.SQLStringForError(), ) + case types.TupleFamily: + return DiskRowContainer{}, unimplemented.NewWithIssueDetailf( + 49975, "", "can't spill column type %s to disk", t.SQLStringForError(), + ) } } diff --git a/pkg/sql/rowenc/keyside/encode.go b/pkg/sql/rowenc/keyside/encode.go index e08c556982a9..7d1791559a04 100644 --- a/pkg/sql/rowenc/keyside/encode.go +++ b/pkg/sql/rowenc/keyside/encode.go @@ -145,6 +145,12 @@ func Encode(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, error) { } return encoding.EncodeBytesDescending(b, data), nil case *tree.DTuple: + // TODO(49975): due to the fact that we're not adding any "tuple + // marker", this encoding is faulty since it can lead to incorrect + // decoding: e.g. tuple (NULL, NULL) is encoded as [0, 0], but when + // decoding it, encoding.PeekLength will return 1 leaving the second + // zero in the buffer which could later result in corruption of the + // datum for the next column. for _, datum := range t.D { var err error b, err = Encode(b, datum, dir)