diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index 4d9b97757596..d0ced466d8bc 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -152,4 +152,4 @@ trace.datadog.project string CockroachDB the project under which traces will be trace.debug.enable boolean false if set, traces for recent requests can be seen at https:///debug/requests trace.lightstep.token string if set, traces go to Lightstep using this token trace.zipkin.collector string if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'). Only one tracer can be configured at a time. -version version 21.1-140 set the active cluster version in the format '.' +version version 21.1-142 set the active cluster version in the format '.' diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 166ffa36283a..9bdc63fe4d12 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -156,6 +156,6 @@ trace.debug.enablebooleanfalseif set, traces for recent requests can be seen at https:///debug/requests trace.lightstep.tokenstringif set, traces go to Lightstep using this token trace.zipkin.collectorstringif set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'). Only one tracer can be configured at a time. -versionversion21.1-140set the active cluster version in the format '.' +versionversion21.1-142set the active cluster version in the format '.' diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index e5e0413b00e8..3ff1f1fe92cf 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -288,13 +288,15 @@ const ( // AutoSpanConfigReconciliationJob adds the AutoSpanConfigReconciliationJob // type. AutoSpanConfigReconciliationJob - // PreventNewInterleavedTables interleaved table creation is completely // blocked on this version. PreventNewInterleavedTables // EnsureNoInterleavedTables interleaved tables no longer exist in // this version. EnsureNoInterleavedTables + // UseKeyEncodeForHashShardedIndexes changes the expression used in hash + // sharded indexes from string casts to crdb_internal.datums_to_bytes. + UseKeyEncodeForHashShardedIndexes // Step (1): Add new versions here. ) @@ -494,7 +496,10 @@ var versionsSingleton = keyedVersions{ Key: EnsureNoInterleavedTables, Version: roachpb.Version{Major: 21, Minor: 1, Internal: 140}, }, - + { + Key: UseKeyEncodeForHashShardedIndexes, + Version: roachpb.Version{Major: 21, Minor: 1, Internal: 142}, + }, // Step (2): Add new versions here. } diff --git a/pkg/sql/create_index.go b/pkg/sql/create_index.go index 768b6557dfd3..3abf91b9b0fa 100644 --- a/pkg/sql/create_index.go +++ b/pkg/sql/create_index.go @@ -526,7 +526,8 @@ func setupShardedIndex( return nil, nil, false, err } shardCol, newColumn, err := maybeCreateAndAddShardCol(int(buckets), tableDesc, - colNames, isNewTable) + colNames, isNewTable, + evalCtx.Settings.Version.IsActive(ctx, clusterversion.UseKeyEncodeForHashShardedIndexes)) if err != nil { return nil, nil, false, err } @@ -548,9 +549,9 @@ func setupShardedIndex( // `desc`, if one doesn't already exist for the given index column set and number of shard // buckets. func maybeCreateAndAddShardCol( - shardBuckets int, desc *tabledesc.Mutable, colNames []string, isNewTable bool, + shardBuckets int, desc *tabledesc.Mutable, colNames []string, isNewTable, useKeyEncodeInExpr bool, ) (col catalog.Column, created bool, err error) { - shardColDesc, err := makeShardColumnDesc(colNames, shardBuckets) + shardColDesc, err := makeShardColumnDesc(colNames, shardBuckets, useKeyEncodeInExpr) if err != nil { return nil, false, err } diff --git a/pkg/sql/create_table.go b/pkg/sql/create_table.go index f937cc61f7d2..deea8046ea6d 100644 --- a/pkg/sql/create_table.go +++ b/pkg/sql/create_table.go @@ -18,6 +18,7 @@ import ( "strings" "github.com/cockroachdb/cockroach/pkg/build" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/geo/geoindex" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/server/telemetry" @@ -1571,7 +1572,9 @@ func NewTableDesc( return nil, err } shardCol, _, err := maybeCreateAndAddShardCol(int(buckets), &desc, - []string{string(d.Name)}, true /* isNewTable */) + []string{string(d.Name)}, true, /* isNewTable */ + evalCtx.Settings.Version.IsActive(ctx, clusterversion.UseKeyEncodeForHashShardedIndexes), + ) if err != nil { return nil, err } @@ -2563,24 +2566,80 @@ func replaceLikeTableOpts(n *tree.CreateTable, params runParams) (tree.TableDefs // makeShardColumnDesc returns a new column descriptor for a hidden computed shard column // based on all the `colNames`. -func makeShardColumnDesc(colNames []string, buckets int) (*descpb.ColumnDescriptor, error) { +func makeShardColumnDesc( + colNames []string, buckets int, useKeyEncodeInExpr bool, +) (*descpb.ColumnDescriptor, error) { col := &descpb.ColumnDescriptor{ Hidden: true, Nullable: false, Type: types.Int4, } col.Name = tabledesc.GetShardColumnName(colNames, int32(buckets)) - col.ComputeExpr = makeHashShardComputeExpr(colNames, buckets) + if useKeyEncodeInExpr { + col.ComputeExpr = makeHashShardComputeExpr(colNames, buckets) + } else { + col.ComputeExpr = makeDeprecatedHashShardComputeExpr(colNames, buckets) + } + return col, nil } -// makeHashShardComputeExpr creates the serialized computed expression for a hash shard +// makeDeprecatedHashShardComputeExpr creates the serialized computed expression for a hash shard // column based on the column names and the number of buckets. The expression will be // of the form: // -// mod(fnv32(colNames[0]::STRING)+fnv32(colNames[1])+...,buckets) +// mod(fnv32(crdb_internal.datums_to_bytes(...)),buckets) // func makeHashShardComputeExpr(colNames []string, buckets int) *string { + unresolvedFunc := func(funcName string) tree.ResolvableFunctionReference { + return tree.ResolvableFunctionReference{ + FunctionReference: &tree.UnresolvedName{ + NumParts: 1, + Parts: tree.NameParts{funcName}, + }, + } + } + columnItems := func() tree.Exprs { + exprs := make(tree.Exprs, len(colNames)) + for i := range exprs { + exprs[i] = &tree.ColumnItem{ColumnName: tree.Name(colNames[i])} + } + return exprs + } + hashedColumnsExpr := func() tree.Expr { + return &tree.FuncExpr{ + Func: unresolvedFunc("fnv32"), + Exprs: tree.Exprs{ + &tree.FuncExpr{ + Func: unresolvedFunc("crdb_internal.datums_to_bytes"), + Exprs: columnItems(), + }, + }, + } + } + modBuckets := func(expr tree.Expr) tree.Expr { + return &tree.FuncExpr{ + Func: unresolvedFunc("mod"), + Exprs: tree.Exprs{ + expr, + &tree.CastExpr{ + Expr: tree.NewDInt(tree.DInt(buckets)), + Type: types.Int4, + }, + }, + } + } + res := tree.Serialize(modBuckets(hashedColumnsExpr())) + return &res +} + +// makeDeprecatedHashShardComputeExpr creates the serialized computed expression for a hash shard +// column based on the column names and the number of buckets. The expression will be +// of the form: +// +// mod(fnv32(colNames[0]::STRING)+fnv32(colNames[1])+...,buckets) +// +func makeDeprecatedHashShardComputeExpr(colNames []string, buckets int) *string { unresolvedFunc := func(funcName string) tree.ResolvableFunctionReference { return tree.ResolvableFunctionReference{ FunctionReference: &tree.UnresolvedName{ diff --git a/pkg/sql/logictest/testdata/logic_test/alter_primary_key b/pkg/sql/logictest/testdata/logic_test/alter_primary_key index d97b89412138..e2c53d0d6bdb 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_primary_key +++ b/pkg/sql/logictest/testdata/logic_test/alter_primary_key @@ -208,7 +208,7 @@ t CREATE TABLE public.t ( z INT8 NOT NULL, w INT8 NULL, v JSONB NULL, - crdb_internal_z_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(z AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_z_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(z)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (y ASC), UNIQUE INDEX i3 (z ASC) STORING (y), UNIQUE INDEX t_x_key (x ASC), @@ -363,8 +363,8 @@ t CREATE TABLE public.t ( x INT8 NOT NULL, y INT8 NOT NULL, z INT8 NULL, - crdb_internal_z_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(z AS STRING), '':::STRING)), 5:::INT8)) STORED, - crdb_internal_y_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(y AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_z_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(z)), CAST(5:::INT8 AS INT4))) STORED, + crdb_internal_y_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(y)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (y ASC) USING HASH WITH BUCKET_COUNT = 10, UNIQUE INDEX t_x_key (x ASC), INDEX i1 (z ASC) USING HASH WITH BUCKET_COUNT = 5, @@ -422,7 +422,7 @@ query TT SHOW CREATE t ---- t CREATE TABLE public.t ( - crdb_internal_x_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 5:::INT8)) STORED, + crdb_internal_x_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(5:::INT8 AS INT4))) STORED, x INT8 NOT NULL, y INT8 NOT NULL, z INT8 NULL, @@ -554,7 +554,7 @@ SHOW CREATE t t CREATE TABLE public.t ( x INT8 NOT NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_x_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_x_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (x ASC) USING HASH WITH BUCKET_COUNT = 4, FAMILY "primary" (x, rowid, crdb_internal_x_shard_4), CONSTRAINT check_crdb_internal_x_shard_4 CHECK (crdb_internal_x_shard_4 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8)) @@ -926,9 +926,9 @@ query TT SHOW CREATE t ---- t CREATE TABLE public.t ( - crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 2:::INT8)) STORED, + crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(2:::INT8 AS INT4))) STORED, x INT8 NOT NULL, - crdb_internal_x_shard_3 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 3:::INT8)) STORED, + crdb_internal_x_shard_3 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(3:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (x ASC) USING HASH WITH BUCKET_COUNT = 3, FAMILY "primary" (crdb_internal_x_shard_2, x, crdb_internal_x_shard_3), CONSTRAINT check_crdb_internal_x_shard_2 CHECK (crdb_internal_x_shard_2 IN (0:::INT8, 1:::INT8)), @@ -946,10 +946,10 @@ query TT SHOW CREATE t ---- t CREATE TABLE public.t ( - crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 2:::INT8)) STORED, + crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(2:::INT8 AS INT4))) STORED, x INT8 NOT NULL, y INT8 NOT NULL, - crdb_internal_y_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(y AS STRING), '':::STRING)), 2:::INT8)) STORED, + crdb_internal_y_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(y)), CAST(2:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (y ASC) USING HASH WITH BUCKET_COUNT = 2, UNIQUE INDEX t_x_key (x ASC) USING HASH WITH BUCKET_COUNT = 2, FAMILY fam_0_x_y_crdb_internal_x_shard_2 (x, y, crdb_internal_x_shard_2, crdb_internal_y_shard_2), diff --git a/pkg/sql/logictest/testdata/logic_test/create_table b/pkg/sql/logictest/testdata/logic_test/create_table index b97d3d1c017c..5285ca99f5a2 100644 --- a/pkg/sql/logictest/testdata/logic_test/create_table +++ b/pkg/sql/logictest/testdata/logic_test/create_table @@ -356,7 +356,7 @@ SHOW CREATE TABLE like_hash ---- like_hash CREATE TABLE public.like_hash ( a INT8 NULL, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX like_hash_base_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 4, diff --git a/pkg/sql/logictest/testdata/logic_test/hash_sharded_index b/pkg/sql/logictest/testdata/logic_test/hash_sharded_index index d17cd85783cb..dddac2e4d180 100644 --- a/pkg/sql/logictest/testdata/logic_test/hash_sharded_index +++ b/pkg/sql/logictest/testdata/logic_test/hash_sharded_index @@ -9,7 +9,7 @@ query TT SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, a INT8 NOT NULL, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (crdb_internal_a_shard_10, a), @@ -44,7 +44,7 @@ SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( a INT8 NOT NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (crdb_internal_a_shard_10, a), CONSTRAINT check_crdb_internal_a_shard_10 CHECK (crdb_internal_a_shard_10 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8, 8:::INT8, 9:::INT8)) @@ -99,7 +99,7 @@ SHOW CREATE TABLE specific_family specific_family CREATE TABLE public.specific_family ( a INT8 NULL, b INT8 NULL, - crdb_internal_b_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(b AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_b_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(b)), CAST(10:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX specific_family_b_idx (b ASC) USING HASH WITH BUCKET_COUNT = 10, @@ -117,7 +117,7 @@ SHOW CREATE TABLE sharded_secondary ---- sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 4, @@ -140,7 +140,7 @@ SHOW CREATE TABLE sharded_secondary ---- sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_crdb_internal_a_shard_4_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 4, @@ -171,7 +171,7 @@ SHOW CREATE TABLE sharded_secondary sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (a, rowid, crdb_internal_a_shard_10), @@ -191,8 +191,8 @@ SHOW CREATE TABLE sharded_secondary sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 10, INDEX sharded_secondary_a_idx1 (a ASC) USING HASH WITH BUCKET_COUNT = 4, @@ -211,7 +211,7 @@ SHOW CREATE TABLE sharded_secondary sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx1 (a ASC) USING HASH WITH BUCKET_COUNT = 4, FAMILY "primary" (a, rowid, crdb_internal_a_shard_4), @@ -271,7 +271,7 @@ SHOW CREATE TABLE sharded_secondary sharded_secondary CREATE TABLE public.sharded_secondary ( a INT8 NULL, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX sharded_secondary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 10, INDEX sharded_secondary_a_idx1 (a ASC) USING HASH WITH BUCKET_COUNT = 10, @@ -295,8 +295,8 @@ SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( a INT8 NOT NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, - crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, + crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, INDEX sharded_primary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 4, FAMILY "primary" (crdb_internal_a_shard_10, a, crdb_internal_a_shard_4), @@ -315,7 +315,7 @@ SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( a INT8 NOT NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (crdb_internal_a_shard_10, a), CONSTRAINT check_crdb_internal_a_shard_10 CHECK (crdb_internal_a_shard_10 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8, 8:::INT8, 9:::INT8)) @@ -329,7 +329,7 @@ SHOW CREATE TABLE sharded_primary ---- sharded_primary CREATE TABLE public.sharded_primary ( a INT8 NOT NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (a ASC) USING HASH WITH BUCKET_COUNT = 10, INDEX sharded_primary_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 10, FAMILY "primary" (crdb_internal_a_shard_10, a), @@ -409,7 +409,7 @@ SHOW CREATE TABLE column_used_on_unsharded ---- column_used_on_unsharded CREATE TABLE public.column_used_on_unsharded ( a INT8 NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX column_used_on_unsharded_crdb_internal_a_shard_10_idx (crdb_internal_a_shard_10 ASC), @@ -435,7 +435,7 @@ SHOW CREATE TABLE column_used_on_unsharded_create_table ---- column_used_on_unsharded_create_table CREATE TABLE public.column_used_on_unsharded_create_table ( a INT8 NULL, - crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 10:::INT8)) STORED, + crdb_internal_a_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(10:::INT8 AS INT4))) STORED, rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(), CONSTRAINT "primary" PRIMARY KEY (rowid ASC), INDEX column_used_on_unsharded_create_table_crdb_internal_a_shard_10_idx (crdb_internal_a_shard_10 ASC), @@ -490,10 +490,10 @@ query TT SHOW CREATE TABLE weird_names ---- weird_names CREATE TABLE public.weird_names ( - "crdb_internal_I am a column with spaces_shard_12" INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST("I am a column with spaces" AS STRING), '':::STRING)), 12:::INT8)) STORED, + "crdb_internal_I am a column with spaces_shard_12" INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes("I am a column with spaces")), CAST(12:::INT8 AS INT4))) STORED, "I am a column with spaces" INT8 NOT NULL, "'quotes' in the column's name" INT8 NULL, - "crdb_internal_'quotes' in the column's name_shard_4" INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST("'quotes' in the column's name" AS STRING), '':::STRING)), 4:::INT8)) STORED, + "crdb_internal_'quotes' in the column's name_shard_4" INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes("'quotes' in the column's name")), CAST(4:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY ("I am a column with spaces" ASC) USING HASH WITH BUCKET_COUNT = 12, INDEX foo ("'quotes' in the column's name" ASC) USING HASH WITH BUCKET_COUNT = 4, FAMILY "primary" ("I am a column with spaces", "'quotes' in the column's name", "crdb_internal_I am a column with spaces_shard_12", "crdb_internal_'quotes' in the column's name_shard_4"), @@ -570,8 +570,8 @@ rename_column CREATE TABLE public.rename_column ( c0 INT8 NOT NULL, c1 INT8 NOT NULL, c2 INT8 NULL, - crdb_internal_c0_c1_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c0 AS STRING), '':::STRING)) + fnv32(COALESCE(CAST(c1 AS STRING), '':::STRING)), 8:::INT8)) STORED, - crdb_internal_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c2 AS STRING), '':::STRING)), 8:::INT8)) STORED, + crdb_internal_c0_c1_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c0, c1)), CAST(8:::INT8 AS INT4))) STORED, + crdb_internal_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c2)), CAST(8:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (c0 ASC, c1 ASC) USING HASH WITH BUCKET_COUNT = 8, INDEX rename_column_c2_idx (c2 ASC) USING HASH WITH BUCKET_COUNT = 8, FAMILY "primary" (c0, c1, c2, crdb_internal_c0_c1_shard_8, crdb_internal_c2_shard_8), @@ -596,8 +596,8 @@ rename_column CREATE TABLE public.rename_column ( c1 INT8 NOT NULL, c2 INT8 NOT NULL, c3 INT8 NULL, - crdb_internal_c1_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c1 AS STRING), '':::STRING)) + fnv32(COALESCE(CAST(c2 AS STRING), '':::STRING)), 8:::INT8)) STORED, - crdb_internal_c3_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c3 AS STRING), '':::STRING)), 8:::INT8)) STORED, + crdb_internal_c1_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c1, c2)), CAST(8:::INT8 AS INT4))) STORED, + crdb_internal_c3_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c3)), CAST(8:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (c1 ASC, c2 ASC) USING HASH WITH BUCKET_COUNT = 8, INDEX rename_column_c2_idx (c3 ASC) USING HASH WITH BUCKET_COUNT = 8, FAMILY "primary" (c1, c2, c3, crdb_internal_c1_c2_shard_8, crdb_internal_c3_shard_8), @@ -621,8 +621,8 @@ rename_column CREATE TABLE public.rename_column ( c0 INT8 NOT NULL, c1 INT8 NOT NULL, c2 INT8 NULL, - crdb_internal_c0_c1_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c0 AS STRING), '':::STRING)) + fnv32(COALESCE(CAST(c1 AS STRING), '':::STRING)), 8:::INT8)) STORED, - crdb_internal_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(c2 AS STRING), '':::STRING)), 8:::INT8)) STORED, + crdb_internal_c0_c1_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c0, c1)), CAST(8:::INT8 AS INT4))) STORED, + crdb_internal_c2_shard_8 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c2)), CAST(8:::INT8 AS INT4))) STORED, CONSTRAINT "primary" PRIMARY KEY (c0 ASC, c1 ASC) USING HASH WITH BUCKET_COUNT = 8, INDEX rename_column_c2_idx (c2 ASC) USING HASH WITH BUCKET_COUNT = 8, FAMILY "primary" (c0, c1, c2, crdb_internal_c0_c1_shard_8, crdb_internal_c2_shard_8), diff --git a/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index b/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index index 35d39ff35bb0..5e85a7f6f58c 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index @@ -28,7 +28,7 @@ vectorized: true └── • render │ columns: (crdb_internal_a_shard_11_comp, column1) │ estimated row count: 2 - │ render crdb_internal_a_shard_11_comp: mod(fnv32(COALESCE(column1::STRING, '')), 11)::INT4 + │ render crdb_internal_a_shard_11_comp: mod(fnv32(crdb_internal.datums_to_bytes(column1)), 11)::INT4 │ render column1: column1 │ └── • values @@ -63,7 +63,7 @@ vectorized: true └── • render │ columns: (crdb_internal_a_shard_12_comp, rowid_default, column1) │ estimated row count: 2 - │ render crdb_internal_a_shard_12_comp: mod(fnv32(COALESCE(column1::STRING, '')), 12)::INT4 + │ render crdb_internal_a_shard_12_comp: mod(fnv32(crdb_internal.datums_to_bytes(column1)), 12)::INT4 │ render rowid_default: unique_rowid() │ render column1: column1 │ @@ -72,3 +72,53 @@ vectorized: true size: 1 column, 2 rows row 0, expr 0: 1 row 1, expr 0: 2 + + +statement ok +CREATE TABLE sharded_primary_with_many_column_types ( + i2 INT2, + i4 INT4, + i8 INT8, + f4 FLOAT4, + f8 FLOAT8, + s STRING, + c CHAR, + b BYTES, + dc DECIMAL, + ival INTERVAL, + oid OID, + tstz TIMESTAMPTZ, + ts TIMESTAMP, + da DATE, + inet INET, + vb VARBIT, + FAMILY (i2, i4, i8, f4, f8, s, c, b, dc, ival, oid, tstz, ts, da, inet, vb), + PRIMARY KEY (i2, i4, i8, f4, f8, s, c, b, dc, ival, oid, tstz, ts, da, inet, vb) USING HASH WITH BUCKET_COUNT = 7 +); + +query T +EXPLAIN (VERBOSE) SELECT * FROM sharded_primary_with_many_column_types WHERE (i2, i4, i8, f4, f8, s, c, b, dc, ival, oid, tstz, ts, da, inet, vb) = (1::INT2, + 1::INT4, + 1::INT8, + 1::FLOAT4, + 1::FLOAT8, + 1::STRING, + 1::CHAR, + 1::STRING::BYTES, + 1::DECIMAL, + 1::INTERVAL, + 1::OID, + 1::TIMESTAMPTZ, + 1::TIMESTAMP, + 1::DATE, + ('127.0.0.' || 1::STRING)::INET, + 1::VARBIT); +---- +distribution: local +vectorized: true +· +• scan + columns: (i2, i4, i8, f4, f8, s, c, b, dc, ival, oid, tstz, ts, da, inet, vb) + estimated row count: 1 (missing stats) + table: sharded_primary_with_many_column_types@primary + spans: /3/1/1/1/1/1/"1"/"1"/"1"/1/00:00:01/1/1970-01-01T00:00:01Z/1970-01-01T00:00:01Z/1/"\x00 \u007f\x00\x00\x01"/B/0 diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index 5054cf298683..ee11b59e9b11 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -64,6 +64,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/streaming" "github.com/cockroachdb/cockroach/pkg/util/duration" + "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented" "github.com/cockroachdb/cockroach/pkg/util/fuzzystrmatch" "github.com/cockroachdb/cockroach/pkg/util/hlc" @@ -3923,6 +3924,36 @@ value if you rely on the HLC for accuracy.`, Volatility: tree.VolatilityVolatile, }), + "crdb_internal.datums_to_bytes": makeBuiltin( + tree.FunctionProperties{ + Category: categorySystemInfo, + NullableArgs: true, + Undocumented: true, + }, + tree.Overload{ + Types: tree.VariadicType{VarType: types.Any}, + ReturnType: tree.FixedReturnType(types.Bytes), + Fn: func(_ *tree.EvalContext, args tree.Datums) (tree.Datum, error) { + var out []byte + for i, arg := range args { + var err error + out, err = rowenc.EncodeTableKey(out, arg, encoding.Ascending) + if err != nil { + return nil, pgerror.Newf( + pgcode.DatatypeMismatch, + "illegal argument %d of type %s", + i, arg.ResolvedType(), + ) + } + } + return tree.NewDBytes(tree.DBytes(out)), nil + }, + Volatility: tree.VolatilityImmutable, + Info: "Converts datums into key-encoded bytes. " + + "Supports NULLs and all data types which may be used in index keys", + }, + ), + // Enum functions. "enum_first": makeBuiltin( tree.FunctionProperties{NullableArgs: true, Category: categoryEnum}, diff --git a/pkg/sql/sem/builtins/key_encode_builtin_test.go b/pkg/sql/sem/builtins/key_encode_builtin_test.go new file mode 100644 index 000000000000..86e86bcef6a9 --- /dev/null +++ b/pkg/sql/sem/builtins/key_encode_builtin_test.go @@ -0,0 +1,158 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package builtins_test + +import ( + "context" + "fmt" + "math/rand" + "strings" + "sync" + "testing" + + "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv" + "github.com/cockroachdb/cockroach/pkg/sql/randgen" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/stretchr/testify/require" +) + +// Ensure that we can generate a bunch of rows of all of the relevant data +// types and get reasonable values out of them with no errors. We do this by +// first creating tables with a single column, one table per type and add +// values to that table, ensuring that we don't get an error and that we get +// unique values. Then we exercise random combinations of these types in the +// same way. +func TestCrdbInternalDatumsToBytes(t *testing.T) { + + defer leaktest.AfterTest(t)() + + ctx := context.Background() + s, sqlDB, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + types := []string{ + "INT2", "INT4", "INT8", + "FLOAT4", "FLOAT8", + "STRING", "CHAR", "BYTES", + "DECIMAL", + "INTERVAL", + "OID", + "TIMESTAMPTZ", "TIMESTAMP", "DATE", + "INET", + "VARBIT", + "STRING[]", + "INT[]", + } + r := rand.New(rand.NewSource(timeutil.Now().UnixNano())) + insertRows := func(t *testing.T, tdb *sqlutils.SQLRunner, columnNames []string) { + tab := catalogkv.TestingGetTableDescriptor(kvDB, keys.SystemSQLCodec, "defaultdb", t.Name()) + const numRows = 100 + for i := 0; i < numRows; i++ { + var row []string + for _, col := range tab.WritableColumns() { + if col.GetName() == "rowid" { + continue + } + var d tree.Datum + if i == 0 { + d = tree.DNull + } else { + const nullOk = false + d = randgen.RandDatum(r, col.GetType(), nullOk) + } + row = append(row, tree.AsStringWithFlags(d, tree.FmtParsable)) + } + tdb.Exec(t, fmt.Sprintf(`INSERT INTO "%s" VALUES (%s) ON CONFLICT (%s) DO NOTHING`, + t.Name(), strings.Join(row, ", "), strings.Join(columnNames, ", "))) + } + } + createTable := func(t *testing.T, tdb *sqlutils.SQLRunner, typ []string) (columnNames []string) { + columnNames = make([]string, len(typ)) + columnSpecs := make([]string, len(typ)) + for i := range typ { + columnNames[i] = fmt.Sprintf("c%d", i) + columnSpecs[i] = fmt.Sprintf("c%d %s", i, typ[i]) + } + tdb.Exec(t, "SET experimental_enable_unique_without_index_constraints = true") + + // Create the table. + createStmt := fmt.Sprintf(`CREATE TABLE "%s" (%s, UNIQUE WITHOUT INDEX (%s))`, + t.Name(), + strings.Join(columnSpecs, ", "), + strings.Join(columnNames, ", ")) + tdb.Exec(t, createStmt) + return columnNames + } + testTableWithColumnTypes := func(t *testing.T, typ ...string) { + conn, err := sqlDB.Conn(ctx) + require.NoError(t, err) + tdb := sqlutils.MakeSQLRunner(conn) + columnNames := createTable(t, tdb, typ) + insertRows(t, tdb, columnNames) + read := fmt.Sprintf(` +WITH t AS ( + SELECT (t.*) AS cols, crdb_internal.datums_to_bytes(t.*) AS encoded + FROM "%s" AS t + ) +SELECT (SELECT count(DISTINCT (cols)) FROM t) - + (SELECT count(DISTINCT (encoded)) FROM t);`, + t.Name()) + tdb.CheckQueryResults(t, read, [][]string{{"0"}}) + } + t.Run("testTableWithColumnTypes values and nulls all work", func(t *testing.T) { + var wg sync.WaitGroup + defer wg.Wait() + for i := range types { + typ := types[i] + wg.Add(1) + go func() { + defer wg.Done() + t.Run(typ, func(t *testing.T) { testTableWithColumnTypes(t, typ) }) + }() + } + }) + t.Run("testTableWithColumnTypes various combinations work and seem unique", func(t *testing.T) { + const numCombinations = 10 + for i := 0; i < numCombinations; i++ { + t.Run("", func(t *testing.T) { + numColumns := r.Intn(len(types)*3) + 1 // arbitrary, at least 1 + colTypes := make([]string, numColumns) + for i := range colTypes { + colTypes[i] = types[r.Intn(len(types))] + } + testTableWithColumnTypes(t, colTypes...) + }) + } + }) +} + +// Test that some data types cannot be key encoded. +func TestCrdbInternalDatumsToBytesIllegalType(t *testing.T) { + defer leaktest.AfterTest(t)() + + ctx := context.Background() + s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{}) + defer s.Stopper().Stop(ctx) + tdb := sqlutils.MakeSQLRunner(sqlDB) + for _, val := range []string{ + "'{\"a\": 1}'::JSONB", + } { + t.Run(val, func(t *testing.T) { + tdb.ExpectErr(t, ".*illegal argument.*", + fmt.Sprintf("SELECT crdb_internal.datums_to_bytes(%s)", val)) + }) + } +}