From 4279e15ef8c11830caf66fe2b2616fb736dbb799 Mon Sep 17 00:00:00 2001 From: Andrew Werner Date: Tue, 20 Jul 2021 10:51:38 -0400 Subject: [PATCH] sql/sem/builtins: add crdb_internal.datums_to_bytes, use in hash sharded indexes This new builtin function encodes datums using the key encoding. It is useful in particular because it is a function with immutable volatility from a large number of data types to bytes. Release note (sql change): Added a builtin function, crdb_internal.datums_to_bytes, which can encode any data type which can be used in an forward index key into bytes in an immutable way. This function is now used in the expression for hash sharded indexes. --- .../settings/settings-for-tenants.txt | 2 +- docs/generated/settings/settings.html | 2 +- pkg/clusterversion/cockroach_versions.go | 9 +- pkg/sql/create_index.go | 7 +- pkg/sql/create_table.go | 69 +++++++- .../testdata/logic_test/alter_primary_key | 18 +- .../testdata/logic_test/create_table | 2 +- .../testdata/logic_test/hash_sharded_index | 48 +++--- .../execbuilder/testdata/hash_sharded_index | 54 +++++- pkg/sql/sem/builtins/builtins.go | 31 ++++ .../sem/builtins/key_encode_builtin_test.go | 158 ++++++++++++++++++ 11 files changed, 352 insertions(+), 48 deletions(-) create mode 100644 pkg/sql/sem/builtins/key_encode_builtin_test.go diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index 4d9b97757596..d0ced466d8bc 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -152,4 +152,4 @@ trace.datadog.project string CockroachDB the project under which traces will be trace.debug.enable boolean false if set, traces for recent requests can be seen at https:///debug/requests trace.lightstep.token string if set, traces go to Lightstep using this token trace.zipkin.collector string if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'). Only one tracer can be configured at a time. -version version 21.1-140 set the active cluster version in the format '.' +version version 21.1-142 set the active cluster version in the format '.' diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 166ffa36283a..9bdc63fe4d12 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -156,6 +156,6 @@ trace.debug.enablebooleanfalseif set, traces for recent requests can be seen at https:///debug/requests trace.lightstep.tokenstringif set, traces go to Lightstep using this token trace.zipkin.collectorstringif set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'). Only one tracer can be configured at a time. -versionversion21.1-140set the active cluster version in the format '.' +versionversion21.1-142set the active cluster version in the format '.' diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index e5e0413b00e8..3ff1f1fe92cf 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -288,13 +288,15 @@ const ( // AutoSpanConfigReconciliationJob adds the AutoSpanConfigReconciliationJob // type. AutoSpanConfigReconciliationJob - // PreventNewInterleavedTables interleaved table creation is completely // blocked on this version. PreventNewInterleavedTables // EnsureNoInterleavedTables interleaved tables no longer exist in // this version. EnsureNoInterleavedTables + // UseKeyEncodeForHashShardedIndexes changes the expression used in hash + // sharded indexes from string casts to crdb_internal.datums_to_bytes. + UseKeyEncodeForHashShardedIndexes // Step (1): Add new versions here. ) @@ -494,7 +496,10 @@ var versionsSingleton = keyedVersions{ Key: EnsureNoInterleavedTables, Version: roachpb.Version{Major: 21, Minor: 1, Internal: 140}, }, - + { + Key: UseKeyEncodeForHashShardedIndexes, + Version: roachpb.Version{Major: 21, Minor: 1, Internal: 142}, + }, // Step (2): Add new versions here. } diff --git a/pkg/sql/create_index.go b/pkg/sql/create_index.go index 768b6557dfd3..3abf91b9b0fa 100644 --- a/pkg/sql/create_index.go +++ b/pkg/sql/create_index.go @@ -526,7 +526,8 @@ func setupShardedIndex( return nil, nil, false, err } shardCol, newColumn, err := maybeCreateAndAddShardCol(int(buckets), tableDesc, - colNames, isNewTable) + colNames, isNewTable, + evalCtx.Settings.Version.IsActive(ctx, clusterversion.UseKeyEncodeForHashShardedIndexes)) if err != nil { return nil, nil, false, err } @@ -548,9 +549,9 @@ func setupShardedIndex( // `desc`, if one doesn't already exist for the given index column set and number of shard // buckets. func maybeCreateAndAddShardCol( - shardBuckets int, desc *tabledesc.Mutable, colNames []string, isNewTable bool, + shardBuckets int, desc *tabledesc.Mutable, colNames []string, isNewTable, useKeyEncodeInExpr bool, ) (col catalog.Column, created bool, err error) { - shardColDesc, err := makeShardColumnDesc(colNames, shardBuckets) + shardColDesc, err := makeShardColumnDesc(colNames, shardBuckets, useKeyEncodeInExpr) if err != nil { return nil, false, err } diff --git a/pkg/sql/create_table.go b/pkg/sql/create_table.go index f937cc61f7d2..deea8046ea6d 100644 --- a/pkg/sql/create_table.go +++ b/pkg/sql/create_table.go @@ -18,6 +18,7 @@ import ( "strings" "github.com/cockroachdb/cockroach/pkg/build" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/geo/geoindex" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/server/telemetry" @@ -1571,7 +1572,9 @@ func NewTableDesc( return nil, err } shardCol, _, err := maybeCreateAndAddShardCol(int(buckets), &desc, - []string{string(d.Name)}, true /* isNewTable */) + []string{string(d.Name)}, true, /* isNewTable */ + evalCtx.Settings.Version.IsActive(ctx, clusterversion.UseKeyEncodeForHashShardedIndexes), + ) if err != nil { return nil, err } @@ -2563,24 +2566,80 @@ func replaceLikeTableOpts(n *tree.CreateTable, params runParams) (tree.TableDefs // makeShardColumnDesc returns a new column descriptor for a hidden computed shard column // based on all the `colNames`. -func makeShardColumnDesc(colNames []string, buckets int) (*descpb.ColumnDescriptor, error) { +func makeShardColumnDesc( + colNames []string, buckets int, useKeyEncodeInExpr bool, +) (*descpb.ColumnDescriptor, error) { col := &descpb.ColumnDescriptor{ Hidden: true, Nullable: false, Type: types.Int4, } col.Name = tabledesc.GetShardColumnName(colNames, int32(buckets)) - col.ComputeExpr = makeHashShardComputeExpr(colNames, buckets) + if useKeyEncodeInExpr { + col.ComputeExpr = makeHashShardComputeExpr(colNames, buckets) + } else { + col.ComputeExpr = makeDeprecatedHashShardComputeExpr(colNames, buckets) + } + return col, nil } -// makeHashShardComputeExpr creates the serialized computed expression for a hash shard +// makeDeprecatedHashShardComputeExpr creates the serialized computed expression for a hash shard // column based on the column names and the number of buckets. The expression will be // of the form: // -// mod(fnv32(colNames[0]::STRING)+fnv32(colNames[1])+...,buckets) +// mod(fnv32(crdb_internal.datums_to_bytes(...)),buckets) // func makeHashShardComputeExpr(colNames []string, buckets int) *string { + unresolvedFunc := func(funcName string) tree.ResolvableFunctionReference { + return tree.ResolvableFunctionReference{ + FunctionReference: &tree.UnresolvedName{ + NumParts: 1, + Parts: tree.NameParts{funcName}, + }, + } + } + columnItems := func() tree.Exprs { + exprs := make(tree.Exprs, len(colNames)) + for i := range exprs { + exprs[i] = &tree.ColumnItem{ColumnName: tree.Name(colNames[i])} + } + return exprs + } + hashedColumnsExpr := func() tree.Expr { + return &tree.FuncExpr{ + Func: unresolvedFunc("fnv32"), + Exprs: tree.Exprs{ + &tree.FuncExpr{ + Func: unresolvedFunc("crdb_internal.datums_to_bytes"), + Exprs: columnItems(), + }, + }, + } + } + modBuckets := func(expr tree.Expr) tree.Expr { + return &tree.FuncExpr{ + Func: unresolvedFunc("mod"), + Exprs: tree.Exprs{ + expr, + &tree.CastExpr{ + Expr: tree.NewDInt(tree.DInt(buckets)), + Type: types.Int4, + }, + }, + } + } + res := tree.Serialize(modBuckets(hashedColumnsExpr())) + return &res +} + +// makeDeprecatedHashShardComputeExpr creates the serialized computed expression for a hash shard +// column based on the column names and the number of buckets. The expression will be +// of the form: +// +// mod(fnv32(colNames[0]::STRING)+fnv32(colNames[1])+...,buckets) +// +func makeDeprecatedHashShardComputeExpr(colNames []string, buckets int) *string { unresolvedFunc := func(funcName string) tree.ResolvableFunctionReference { return tree.ResolvableFunctionReference{ FunctionReference: &tree.UnresolvedName{ diff --git a/pkg/sql/logictest/testdata/logic_test/alter_primary_key b/pkg/sql/logictest/testdata/logic_test/alter_primary_key index d97b89412138..e2c53d0d6bdb 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_primary_key +++ b/pkg/sql/logictest/testdata/logic_test/alter_primary_key @@ -208,7 +208,7 @@ t CREATE TABLE public.t ( z INT8 NOT NULL, w INT8 NULL, v JSONB NULL, - crdb_internal_z_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(z AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_z_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(z)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (y ASC), UNIQUE INDEX i3 (z ASC) STORING (y), UNIQUE INDEX t_x_key (x ASC), @@ -363,8 +363,8 @@ t CREATE TABLE public.t ( x INT8 NOT NULL, y INT8 NOT NULL, z INT8 NULL, - crdb_internal_z_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(z AS STRING), '':::STRING)), 5:::INT8)) STORED, - crdb_internal_y_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(y AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_z_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(z)), CAST(5:::INT8 AS INT4))) STORED, + crdb_internal_y_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(y)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (y ASC) USING HASH WITH BUCKET_COUNT = 10, UNIQUE INDEX t_x_key (x ASC), INDEX i1 (z ASC) USING HASH WITH BUCKET_COUNT = 5, @@ -422,7 +422,7 @@ query TT SHOW CREATE t ---- t CREATE TABLE public.t ( - crdb_internal_x_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 5:::INT8)) STORED, + crdb_internal_x_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(5:::INT8 AS INT4))) STORED, x INT8 NOT NULL, y INT8 NOT NULL, z INT8 NULL, @@ -554,7 +554,7 @@ SHOW CREATE t t CREATE TABLE public.t ( x INT8 NOT NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_x_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_x_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (x ASC) USING HASH WITH BUCKET_COUNT = 4, FAMILY "primary" (x, rowid, crdb_internal_x_shard_4), CONSTRAINT check_crdb_internal_x_shard_4 CHECK (crdb_internal_x_shard_4 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8)) @@ -926,9 +926,9 @@ query TT SHOW CREATE t ---- t CREATE TABLE public.t ( - crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 2:::INT8)) STORED, + crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(2:::INT8 AS INT4))) STORED, x INT8 NOT NULL, - crdb_internal_x_shard_3 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 3:::INT8)) STORED, + crdb_internal_x_shard_3 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(3:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (x ASC) USING HASH WITH BUCKET_COUNT = 3, FAMILY "primary" (crdb_internal_x_shard_2, x, crdb_internal_x_shard_3), CONSTRAINT check_crdb_internal_x_shard_2 CHECK (crdb_internal_x_shard_2 IN (0:::INT8, 1:::INT8)), @@ -946,10 +946,10 @@ query TT SHOW CREATE t ---- t CREATE TABLE public.t ( - crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 2:::INT8)) STORED, + crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(2:::INT8 AS INT4))) STORED, x INT8 NOT NULL, y INT8 NOT NULL, - crdb_internal_y_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(y AS STRING), '':::STRING)), 2:::INT8)) STORED, + crdb_internal_y_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(y)), CAST(2:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (y ASC) USING HASH WITH BUCKET_COUNT = 2, UNIQUE INDEX t_x_key (x ASC) USING HASH WITH BUCKET_COUNT = 2, FAMILY fam_0_x_y_crdb_internal_x_shard_2 (x, y, crdb_internal_x_shard_2, crdb_internal_y_shard_2), diff --git a/pkg/sql/logictest/testdata/logic_test/create_table b/pkg/sql/logictest/testdata/logic_test/create_table index b97d3d1c017c..5285ca99f5a2 100644 --- a/pkg/sql/logictest/testdata/logic_test/create_table +++ b/pkg/sql/logictest/testdata/logic_test/create_table @@ -356,7 +356,7 @@ SHOW CREATE TABLE like_hash ---- like_hash CREATE TABLE public.like_hash ( a INT8 NULL, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX like_hash_base_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 4, diff --git a/pkg/sql/logictest/testdata/logic_test/hash_sharded_index b/pkg/sql/logictest/testdata/logic_test/hash_sharded_index index d17cd85783cb..dddac2e4d180 100644 --- a/pkg/sql/logictest/testdata/logic_test/hash_sharded_index +++ b/pkg/sql/logictest/testdata/logic_test/hash_sharded_index @@ -9,7 +9,7 @@ query TT SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, a INT8 NOT NULL, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (crdb_internal_a_shard_10, a), @@ -44,7 +44,7 @@ SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( a INT8 NOT NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (crdb_internal_a_shard_10, a), CONSTRAINT check_crdb_internal_a_shard_10 CHECK (crdb_internal_a_shard_10 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8, 8:::INT8, 9:::INT8)) @@ -99,7 +99,7 @@ SHOW CREATE TABLE specific_family specific_family CREATE TABLE public.specific_family ( a INT8 NULL, b INT8 NULL, - crdb_internal_b_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(b AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_b_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(b)), CAST(10:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX specific_family_b_idx (b ASC) USING HASH WITH BUCKET_COUNT = 10, @@ -117,7 +117,7 @@ SHOW CREATE TABLE sharded_secondary ---- sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 4, @@ -140,7 +140,7 @@ SHOW CREATE TABLE sharded_secondary ---- sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_crdb_internal_a_shard_4_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 4, @@ -171,7 +171,7 @@ SHOW CREATE TABLE sharded_secondary sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (a, rowid, crdb_internal_a_shard_10), @@ -191,8 +191,8 @@ SHOW CREATE TABLE sharded_secondary sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 10, INDEX sharded_secondary_a_idx1 (a ASC) USING HASH WITH BUCKET_COUNT = 4, @@ -211,7 +211,7 @@ SHOW CREATE TABLE sharded_secondary sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx1 (a ASC) USING HASH WITH BUCKET_COUNT = 4, FAMILY "primary" (a, rowid, crdb_internal_a_shard_4), @@ -271,7 +271,7 @@ SHOW CREATE TABLE sharded_secondary sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 10, INDEX sharded_secondary_a_idx1 (a ASC) USING HASH WITH BUCKET_COUNT = 10, @@ -295,8 +295,8 @@ SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( a INT8 NOT NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, INDEX sharded_primary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 4, FAMILY "primary" (crdb_internal_a_shard_10, a, crdb_internal_a_shard_4), @@ -315,7 +315,7 @@ SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( a INT8 NOT NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (crdb_internal_a_shard_10, a), CONSTRAINT check_crdb_internal_a_shard_10 CHECK (crdb_internal_a_shard_10 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8, 8:::INT8, 9:::INT8)) @@ -329,7 +329,7 @@ SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( a INT8 NOT NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, INDEX sharded_primary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (crdb_internal_a_shard_10, a), @@ -409,7 +409,7 @@ SHOW CREATE TABLE column_used_on_unsharded ---- column_used_on_unsharded CREATE TABLE public.column_used_on_unsharded ( a INT8 NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX column_used_on_unsharded_crdb_internal_a_shard_10_idx (crdb_internal_a_shard_10 ASC), @@ -435,7 +435,7 @@ SHOW CREATE TABLE column_used_on_unsharded_create_table ---- column_used_on_unsharded_create_table CREATE TABLE public.column_used_on_unsharded_create_table ( a INT8 NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX column_used_on_unsharded_create_table_crdb_internal_a_shard_10_idx (crdb_internal_a_shard_10 ASC), @@ -490,10 +490,10 @@ query TT SHOW CREATE TABLE weird_names ---- weird_names CREATE TABLE public.weird_names ( - "crdb_internal_I am a column with spaces_shard_12" INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST("I am a column with spaces" AS STRING), '':::STRING)), 12:::INT8)) STORED, + "crdb_internal_I am a column with spaces_shard_12" INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes("I am a column with spaces")), CAST(12:::INT8 AS INT4))) STORED, "I am a column with spaces" INT8 NOT NULL, "'quotes' in the column's name" INT8 NULL, - "crdb_internal_'quotes' in the column's name_shard_4" INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST("'quotes' in the column's name" AS STRING), '':::STRING)), 4:::INT8)) STORED, + "crdb_internal_'quotes' in the column's name_shard_4" INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes("'quotes' in the column's name")), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY ("I am a column with spaces" ASC) USING HASH WITH BUCKET_COUNT = 12, INDEX foo ("'quotes' in the column's name" ASC) USING HASH WITH BUCKET_COUNT = 4, FAMILY "primary" ("I am a column with spaces", "'quotes' in the column's name", "crdb_internal_I am a column with spaces_shard_12", "crdb_internal_'quotes' in the column's name_shard_4"), @@ -570,8 +570,8 @@ rename_column CREATE TABLE public.rename_column ( c0 INT8 NOT NULL, c1 INT8 NOT NULL, c2 INT8 NULL, - crdb_internal_c0_c1_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c0 AS STRING), '':::STRING)) + fnv32(COALESCE(CAST(c1 AS STRING), '':::STRING)), 8:::INT8)) STORED, - crdb_internal_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c2 AS STRING), '':::STRING)), 8:::INT8)) STORED, + crdb_internal_c0_c1_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c0, c1)), CAST(8:::INT8 AS INT4))) STORED, + crdb_internal_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c2)), CAST(8:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (c0 ASC, c1 ASC) USING HASH WITH BUCKET_COUNT = 8, INDEX rename_column_c2_idx (c2 ASC) USING HASH WITH BUCKET_COUNT = 8, FAMILY "primary" (c0, c1, c2, crdb_internal_c0_c1_shard_8, crdb_internal_c2_shard_8), @@ -596,8 +596,8 @@ rename_column CREATE TABLE public.rename_column ( c1 INT8 NOT NULL, c2 INT8 NOT NULL, c3 INT8 NULL, - crdb_internal_c1_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c1 AS STRING), '':::STRING)) + fnv32(COALESCE(CAST(c2 AS STRING), '':::STRING)), 8:::INT8)) STORED, - crdb_internal_c3_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c3 AS STRING), '':::STRING)), 8:::INT8)) STORED, + crdb_internal_c1_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c1, c2)), CAST(8:::INT8 AS INT4))) STORED, + crdb_internal_c3_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c3)), CAST(8:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (c1 ASC, c2 ASC) USING HASH WITH BUCKET_COUNT = 8, INDEX rename_column_c2_idx (c3 ASC) USING HASH WITH BUCKET_COUNT = 8, FAMILY "primary" (c1, c2, c3, crdb_internal_c1_c2_shard_8, crdb_internal_c3_shard_8), @@ -621,8 +621,8 @@ rename_column CREATE TABLE public.rename_column ( c0 INT8 NOT NULL, c1 INT8 NOT NULL, c2 INT8 NULL, - crdb_internal_c0_c1_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c0 AS STRING), '':::STRING)) + fnv32(COALESCE(CAST(c1 AS STRING), '':::STRING)), 8:::INT8)) STORED, - crdb_internal_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c2 AS STRING), '':::STRING)), 8:::INT8)) STORED, + crdb_internal_c0_c1_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c0, c1)), CAST(8:::INT8 AS INT4))) STORED, + crdb_internal_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c2)), CAST(8:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (c0 ASC, c1 ASC) USING HASH WITH BUCKET_COUNT = 8, INDEX rename_column_c2_idx (c2 ASC) USING HASH WITH BUCKET_COUNT = 8, FAMILY "primary" (c0, c1, c2, crdb_internal_c0_c1_shard_8, crdb_internal_c2_shard_8), diff --git a/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index b/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index index 35d39ff35bb0..5e85a7f6f58c 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index @@ -28,7 +28,7 @@ vectorized: true └── • render │ columns: (crdb_internal_a_shard_11_comp, column1) │ estimated row count: 2 - │ render crdb_internal_a_shard_11_comp: mod(fnv32(COALESCE(column1::STRING, '')), 11)::INT4 + │ render crdb_internal_a_shard_11_comp: mod(fnv32(crdb_internal.datums_to_bytes(column1)), 11)::INT4 │ render column1: column1 │ └── • values @@ -63,7 +63,7 @@ vectorized: true └── • render │ columns: (crdb_internal_a_shard_12_comp, rowid_default, column1) │ estimated row count: 2 - │ render crdb_internal_a_shard_12_comp: mod(fnv32(COALESCE(column1::STRING, '')), 12)::INT4 + │ render crdb_internal_a_shard_12_comp: mod(fnv32(crdb_internal.datums_to_bytes(column1)), 12)::INT4 │ render rowid_default: unique_rowid() │ render column1: column1 │ @@ -72,3 +72,53 @@ vectorized: true size: 1 column, 2 rows row 0, expr 0: 1 row 1, expr 0: 2 + + +statement ok +CREATE TABLE sharded_primary_with_many_column_types ( + i2 INT2, + i4 INT4, + i8 INT8, + f4 FLOAT4, + f8 FLOAT8, + s STRING, + c CHAR, + b BYTES, + dc DECIMAL, + ival INTERVAL, + oid OID, + tstz TIMESTAMPTZ, + ts TIMESTAMP, + da DATE, + inet INET, + vb VARBIT, + FAMILY (i2, i4, i8, f4, f8, s, c, b, dc, ival, oid, tstz, ts, da, inet, vb), + PRIMARY KEY (i2, i4, i8, f4, f8, s, c, b, dc, ival, oid, tstz, ts, da, inet, vb) USING HASH WITH BUCKET_COUNT = 7 +); + +query T +EXPLAIN (VERBOSE) SELECT * FROM sharded_primary_with_many_column_types WHERE (i2, i4, i8, f4, f8, s, c, b, dc, ival, oid, tstz, ts, da, inet, vb) = (1::INT2, + 1::INT4, + 1::INT8, + 1::FLOAT4, + 1::FLOAT8, + 1::STRING, + 1::CHAR, + 1::STRING::BYTES, + 1::DECIMAL, + 1::INTERVAL, + 1::OID, + 1::TIMESTAMPTZ, + 1::TIMESTAMP, + 1::DATE, + ('127.0.0.' || 1::STRING)::INET, + 1::VARBIT); +---- +distribution: local +vectorized: true +· +• scan + columns: (i2, i4, i8, f4, f8, s, c, b, dc, ival, oid, tstz, ts, da, inet, vb) + estimated row count: 1 (missing stats) + table: sharded_primary_with_many_column_types@primary + spans: /3/1/1/1/1/1/"1"/"1"/"1"/1/00:00:01/1/1970-01-01T00:00:01Z/1970-01-01T00:00:01Z/1/"\x00 \u007f\x00\x00\x01"/B/0 diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index 5054cf298683..ee11b59e9b11 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -64,6 +64,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/streaming" "github.com/cockroachdb/cockroach/pkg/util/duration" + "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented" "github.com/cockroachdb/cockroach/pkg/util/fuzzystrmatch" "github.com/cockroachdb/cockroach/pkg/util/hlc" @@ -3923,6 +3924,36 @@ value if you rely on the HLC for accuracy.`, Volatility: tree.VolatilityVolatile, }), + "crdb_internal.datums_to_bytes": makeBuiltin( + tree.FunctionProperties{ + Category: categorySystemInfo, + NullableArgs: true, + Undocumented: true, + }, + tree.Overload{ + Types: tree.VariadicType{VarType: types.Any}, + ReturnType: tree.FixedReturnType(types.Bytes), + Fn: func(_ *tree.EvalContext, args tree.Datums) (tree.Datum, error) { + var out []byte + for i, arg := range args { + var err error + out, err = rowenc.EncodeTableKey(out, arg, encoding.Ascending) + if err != nil { + return nil, pgerror.Newf( + pgcode.DatatypeMismatch, + "illegal argument %d of type %s", + i, arg.ResolvedType(), + ) + } + } + return tree.NewDBytes(tree.DBytes(out)), nil + }, + Volatility: tree.VolatilityImmutable, + Info: "Converts datums into key-encoded bytes. " + + "Supports NULLs and all data types which may be used in index keys", + }, + ), + // Enum functions. "enum_first": makeBuiltin( tree.FunctionProperties{NullableArgs: true, Category: categoryEnum}, diff --git a/pkg/sql/sem/builtins/key_encode_builtin_test.go b/pkg/sql/sem/builtins/key_encode_builtin_test.go new file mode 100644 index 000000000000..86e86bcef6a9 --- /dev/null +++ b/pkg/sql/sem/builtins/key_encode_builtin_test.go @@ -0,0 +1,158 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package builtins_test + +import ( + "context" + "fmt" + "math/rand" + "strings" + "sync" + "testing" + + "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv" + "github.com/cockroachdb/cockroach/pkg/sql/randgen" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/stretchr/testify/require" +) + +// Ensure that we can generate a bunch of rows of all of the relevant data +// types and get reasonable values out of them with no errors. We do this by +// first creating tables with a single column, one table per type and add +// values to that table, ensuring that we don't get an error and that we get +// unique values. Then we exercise random combinations of these types in the +// same way. +func TestCrdbInternalDatumsToBytes(t *testing.T) { + + defer leaktest.AfterTest(t)() + + ctx := context.Background() + s, sqlDB, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + types := []string{ + "INT2", "INT4", "INT8", + "FLOAT4", "FLOAT8", + "STRING", "CHAR", "BYTES", + "DECIMAL", + "INTERVAL", + "OID", + "TIMESTAMPTZ", "TIMESTAMP", "DATE", + "INET", + "VARBIT", + "STRING[]", + "INT[]", + } + r := rand.New(rand.NewSource(timeutil.Now().UnixNano())) + insertRows := func(t *testing.T, tdb *sqlutils.SQLRunner, columnNames []string) { + tab := catalogkv.TestingGetTableDescriptor(kvDB, keys.SystemSQLCodec, "defaultdb", t.Name()) + const numRows = 100 + for i := 0; i < numRows; i++ { + var row []string + for _, col := range tab.WritableColumns() { + if col.GetName() == "rowid" { + continue + } + var d tree.Datum + if i == 0 { + d = tree.DNull + } else { + const nullOk = false + d = randgen.RandDatum(r, col.GetType(), nullOk) + } + row = append(row, tree.AsStringWithFlags(d, tree.FmtParsable)) + } + tdb.Exec(t, fmt.Sprintf(`INSERT INTO "%s" VALUES (%s) ON CONFLICT (%s) DO NOTHING`, + t.Name(), strings.Join(row, ", "), strings.Join(columnNames, ", "))) + } + } + createTable := func(t *testing.T, tdb *sqlutils.SQLRunner, typ []string) (columnNames []string) { + columnNames = make([]string, len(typ)) + columnSpecs := make([]string, len(typ)) + for i := range typ { + columnNames[i] = fmt.Sprintf("c%d", i) + columnSpecs[i] = fmt.Sprintf("c%d %s", i, typ[i]) + } + tdb.Exec(t, "SET experimental_enable_unique_without_index_constraints = true") + + // Create the table. + createStmt := fmt.Sprintf(`CREATE TABLE "%s" (%s, UNIQUE WITHOUT INDEX (%s))`, + t.Name(), + strings.Join(columnSpecs, ", "), + strings.Join(columnNames, ", ")) + tdb.Exec(t, createStmt) + return columnNames + } + testTableWithColumnTypes := func(t *testing.T, typ ...string) { + conn, err := sqlDB.Conn(ctx) + require.NoError(t, err) + tdb := sqlutils.MakeSQLRunner(conn) + columnNames := createTable(t, tdb, typ) + insertRows(t, tdb, columnNames) + read := fmt.Sprintf(` +WITH t AS ( + SELECT (t.*) AS cols, crdb_internal.datums_to_bytes(t.*) AS encoded + FROM "%s" AS t + ) +SELECT (SELECT count(DISTINCT (cols)) FROM t) - + (SELECT count(DISTINCT (encoded)) FROM t);`, + t.Name()) + tdb.CheckQueryResults(t, read, [][]string{{"0"}}) + } + t.Run("testTableWithColumnTypes values and nulls all work", func(t *testing.T) { + var wg sync.WaitGroup + defer wg.Wait() + for i := range types { + typ := types[i] + wg.Add(1) + go func() { + defer wg.Done() + t.Run(typ, func(t *testing.T) { testTableWithColumnTypes(t, typ) }) + }() + } + }) + t.Run("testTableWithColumnTypes various combinations work and seem unique", func(t *testing.T) { + const numCombinations = 10 + for i := 0; i < numCombinations; i++ { + t.Run("", func(t *testing.T) { + numColumns := r.Intn(len(types)*3) + 1 // arbitrary, at least 1 + colTypes := make([]string, numColumns) + for i := range colTypes { + colTypes[i] = types[r.Intn(len(types))] + } + testTableWithColumnTypes(t, colTypes...) + }) + } + }) +} + +// Test that some data types cannot be key encoded. +func TestCrdbInternalDatumsToBytesIllegalType(t *testing.T) { + defer leaktest.AfterTest(t)() + + ctx := context.Background() + s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + tdb := sqlutils.MakeSQLRunner(sqlDB) + for _, val := range []string{ + "'{\"a\": 1}'::JSONB", + } { + t.Run(val, func(t *testing.T) { + tdb.ExpectErr(t, ".*illegal argument.*", + fmt.Sprintf("SELECT crdb_internal.datums_to_bytes(%s)", val)) + }) + } +}