Skip to content

Commit

Permalink
sql/sem/builtins: add crdb_internal.datums_to_bytes, use in hash shar…
Browse files Browse the repository at this point in the history
…ded indexes

This new builtin function encodes datums using the key encoding. It is useful
in particular because it is a function with immutable volatility from a large
number of data types to bytes.

Release note (sql change): Added a builtin function,
crdb_internal.datums_to_bytes, which can encode any data type which can be
used in an forward index key into bytes in an immutable way. This function
is now used in the expression for hash sharded indexes.
  • Loading branch information
ajwerner committed Aug 19, 2021
1 parent df6603d commit 4279e15
Show file tree
Hide file tree
Showing 11 changed files with 352 additions and 48 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,4 @@ trace.datadog.project string CockroachDB the project under which traces will be
trace.debug.enable boolean false if set, traces for recent requests can be seen at https://<ui>/debug/requests
trace.lightstep.token string if set, traces go to Lightstep using this token
trace.zipkin.collector string if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'). Only one tracer can be configured at a time.
version version 21.1-140 set the active cluster version in the format '<major>.<minor>'
version version 21.1-142 set the active cluster version in the format '<major>.<minor>'
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,6 @@
<tr><td><code>trace.debug.enable</code></td><td>boolean</td><td><code>false</code></td><td>if set, traces for recent requests can be seen at https://<ui>/debug/requests</td></tr>
<tr><td><code>trace.lightstep.token</code></td><td>string</td><td><code></code></td><td>if set, traces go to Lightstep using this token</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'). Only one tracer can be configured at a time.</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>21.1-140</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>21.1-142</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
9 changes: 7 additions & 2 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,13 +288,15 @@ const (
// AutoSpanConfigReconciliationJob adds the AutoSpanConfigReconciliationJob
// type.
AutoSpanConfigReconciliationJob

// PreventNewInterleavedTables interleaved table creation is completely
// blocked on this version.
PreventNewInterleavedTables
// EnsureNoInterleavedTables interleaved tables no longer exist in
// this version.
EnsureNoInterleavedTables
// UseKeyEncodeForHashShardedIndexes changes the expression used in hash
// sharded indexes from string casts to crdb_internal.datums_to_bytes.
UseKeyEncodeForHashShardedIndexes
// Step (1): Add new versions here.
)

Expand Down Expand Up @@ -494,7 +496,10 @@ var versionsSingleton = keyedVersions{
Key: EnsureNoInterleavedTables,
Version: roachpb.Version{Major: 21, Minor: 1, Internal: 140},
},

{
Key: UseKeyEncodeForHashShardedIndexes,
Version: roachpb.Version{Major: 21, Minor: 1, Internal: 142},
},
// Step (2): Add new versions here.
}

Expand Down
7 changes: 4 additions & 3 deletions pkg/sql/create_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,8 @@ func setupShardedIndex(
return nil, nil, false, err
}
shardCol, newColumn, err := maybeCreateAndAddShardCol(int(buckets), tableDesc,
colNames, isNewTable)
colNames, isNewTable,
evalCtx.Settings.Version.IsActive(ctx, clusterversion.UseKeyEncodeForHashShardedIndexes))
if err != nil {
return nil, nil, false, err
}
Expand All @@ -548,9 +549,9 @@ func setupShardedIndex(
// `desc`, if one doesn't already exist for the given index column set and number of shard
// buckets.
func maybeCreateAndAddShardCol(
shardBuckets int, desc *tabledesc.Mutable, colNames []string, isNewTable bool,
shardBuckets int, desc *tabledesc.Mutable, colNames []string, isNewTable, useKeyEncodeInExpr bool,
) (col catalog.Column, created bool, err error) {
shardColDesc, err := makeShardColumnDesc(colNames, shardBuckets)
shardColDesc, err := makeShardColumnDesc(colNames, shardBuckets, useKeyEncodeInExpr)
if err != nil {
return nil, false, err
}
Expand Down
69 changes: 64 additions & 5 deletions pkg/sql/create_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"strings"

"github.com/cockroachdb/cockroach/pkg/build"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/geo/geoindex"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
Expand Down Expand Up @@ -1571,7 +1572,9 @@ func NewTableDesc(
return nil, err
}
shardCol, _, err := maybeCreateAndAddShardCol(int(buckets), &desc,
[]string{string(d.Name)}, true /* isNewTable */)
[]string{string(d.Name)}, true, /* isNewTable */
evalCtx.Settings.Version.IsActive(ctx, clusterversion.UseKeyEncodeForHashShardedIndexes),
)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -2563,24 +2566,80 @@ func replaceLikeTableOpts(n *tree.CreateTable, params runParams) (tree.TableDefs

// makeShardColumnDesc returns a new column descriptor for a hidden computed shard column
// based on all the `colNames`.
func makeShardColumnDesc(colNames []string, buckets int) (*descpb.ColumnDescriptor, error) {
func makeShardColumnDesc(
colNames []string, buckets int, useKeyEncodeInExpr bool,
) (*descpb.ColumnDescriptor, error) {
col := &descpb.ColumnDescriptor{
Hidden: true,
Nullable: false,
Type: types.Int4,
}
col.Name = tabledesc.GetShardColumnName(colNames, int32(buckets))
col.ComputeExpr = makeHashShardComputeExpr(colNames, buckets)
if useKeyEncodeInExpr {
col.ComputeExpr = makeHashShardComputeExpr(colNames, buckets)
} else {
col.ComputeExpr = makeDeprecatedHashShardComputeExpr(colNames, buckets)
}

return col, nil
}

// makeHashShardComputeExpr creates the serialized computed expression for a hash shard
// makeDeprecatedHashShardComputeExpr creates the serialized computed expression for a hash shard
// column based on the column names and the number of buckets. The expression will be
// of the form:
//
// mod(fnv32(colNames[0]::STRING)+fnv32(colNames[1])+...,buckets)
// mod(fnv32(crdb_internal.datums_to_bytes(...)),buckets)
//
func makeHashShardComputeExpr(colNames []string, buckets int) *string {
unresolvedFunc := func(funcName string) tree.ResolvableFunctionReference {
return tree.ResolvableFunctionReference{
FunctionReference: &tree.UnresolvedName{
NumParts: 1,
Parts: tree.NameParts{funcName},
},
}
}
columnItems := func() tree.Exprs {
exprs := make(tree.Exprs, len(colNames))
for i := range exprs {
exprs[i] = &tree.ColumnItem{ColumnName: tree.Name(colNames[i])}
}
return exprs
}
hashedColumnsExpr := func() tree.Expr {
return &tree.FuncExpr{
Func: unresolvedFunc("fnv32"),
Exprs: tree.Exprs{
&tree.FuncExpr{
Func: unresolvedFunc("crdb_internal.datums_to_bytes"),
Exprs: columnItems(),
},
},
}
}
modBuckets := func(expr tree.Expr) tree.Expr {
return &tree.FuncExpr{
Func: unresolvedFunc("mod"),
Exprs: tree.Exprs{
expr,
&tree.CastExpr{
Expr: tree.NewDInt(tree.DInt(buckets)),
Type: types.Int4,
},
},
}
}
res := tree.Serialize(modBuckets(hashedColumnsExpr()))
return &res
}

// makeDeprecatedHashShardComputeExpr creates the serialized computed expression for a hash shard
// column based on the column names and the number of buckets. The expression will be
// of the form:
//
// mod(fnv32(colNames[0]::STRING)+fnv32(colNames[1])+...,buckets)
//
func makeDeprecatedHashShardComputeExpr(colNames []string, buckets int) *string {
unresolvedFunc := func(funcName string) tree.ResolvableFunctionReference {
return tree.ResolvableFunctionReference{
FunctionReference: &tree.UnresolvedName{
Expand Down
18 changes: 9 additions & 9 deletions pkg/sql/logictest/testdata/logic_test/alter_primary_key
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ t CREATE TABLE public.t (
z INT8 NOT NULL,
w INT8 NULL,
v JSONB NULL,
crdb_internal_z_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(z AS STRING), '':::STRING)), 4:::INT8)) STORED,
crdb_internal_z_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(z)), CAST(4:::INT8 AS INT4))) STORED,
CONSTRAINT "primary" PRIMARY KEY (y ASC),
UNIQUE INDEX i3 (z ASC) STORING (y),
UNIQUE INDEX t_x_key (x ASC),
Expand Down Expand Up @@ -363,8 +363,8 @@ t CREATE TABLE public.t (
x INT8 NOT NULL,
y INT8 NOT NULL,
z INT8 NULL,
crdb_internal_z_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(z AS STRING), '':::STRING)), 5:::INT8)) STORED,
crdb_internal_y_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(y AS STRING), '':::STRING)), 10:::INT8)) STORED,
crdb_internal_z_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(z)), CAST(5:::INT8 AS INT4))) STORED,
crdb_internal_y_shard_10 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(y)), CAST(10:::INT8 AS INT4))) STORED,
CONSTRAINT "primary" PRIMARY KEY (y ASC) USING HASH WITH BUCKET_COUNT = 10,
UNIQUE INDEX t_x_key (x ASC),
INDEX i1 (z ASC) USING HASH WITH BUCKET_COUNT = 5,
Expand Down Expand Up @@ -422,7 +422,7 @@ query TT
SHOW CREATE t
----
t CREATE TABLE public.t (
crdb_internal_x_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 5:::INT8)) STORED,
crdb_internal_x_shard_5 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(5:::INT8 AS INT4))) STORED,
x INT8 NOT NULL,
y INT8 NOT NULL,
z INT8 NULL,
Expand Down Expand Up @@ -554,7 +554,7 @@ SHOW CREATE t
t CREATE TABLE public.t (
x INT8 NOT NULL,
rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(),
crdb_internal_x_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 4:::INT8)) STORED,
crdb_internal_x_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(4:::INT8 AS INT4))) STORED,
CONSTRAINT "primary" PRIMARY KEY (x ASC) USING HASH WITH BUCKET_COUNT = 4,
FAMILY "primary" (x, rowid, crdb_internal_x_shard_4),
CONSTRAINT check_crdb_internal_x_shard_4 CHECK (crdb_internal_x_shard_4 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8))
Expand Down Expand Up @@ -926,9 +926,9 @@ query TT
SHOW CREATE t
----
t CREATE TABLE public.t (
crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 2:::INT8)) STORED,
crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(2:::INT8 AS INT4))) STORED,
x INT8 NOT NULL,
crdb_internal_x_shard_3 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 3:::INT8)) STORED,
crdb_internal_x_shard_3 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(3:::INT8 AS INT4))) STORED,
CONSTRAINT "primary" PRIMARY KEY (x ASC) USING HASH WITH BUCKET_COUNT = 3,
FAMILY "primary" (crdb_internal_x_shard_2, x, crdb_internal_x_shard_3),
CONSTRAINT check_crdb_internal_x_shard_2 CHECK (crdb_internal_x_shard_2 IN (0:::INT8, 1:::INT8)),
Expand All @@ -946,10 +946,10 @@ query TT
SHOW CREATE t
----
t CREATE TABLE public.t (
crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(x AS STRING), '':::STRING)), 2:::INT8)) STORED,
crdb_internal_x_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(x)), CAST(2:::INT8 AS INT4))) STORED,
x INT8 NOT NULL,
y INT8 NOT NULL,
crdb_internal_y_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(y AS STRING), '':::STRING)), 2:::INT8)) STORED,
crdb_internal_y_shard_2 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(y)), CAST(2:::INT8 AS INT4))) STORED,
CONSTRAINT "primary" PRIMARY KEY (y ASC) USING HASH WITH BUCKET_COUNT = 2,
UNIQUE INDEX t_x_key (x ASC) USING HASH WITH BUCKET_COUNT = 2,
FAMILY fam_0_x_y_crdb_internal_x_shard_2 (x, y, crdb_internal_x_shard_2, crdb_internal_y_shard_2),
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/create_table
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ SHOW CREATE TABLE like_hash
----
like_hash CREATE TABLE public.like_hash (
a INT8 NULL,
crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(COALESCE(CAST(a AS STRING), '':::STRING)), 4:::INT8)) STORED,
crdb_internal_a_shard_4 INT4 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), CAST(4:::INT8 AS INT4))) STORED,
rowid INT8 NOT VISIBLE NOT NULL DEFAULT unique_rowid(),
CONSTRAINT "primary" PRIMARY KEY (rowid ASC),
INDEX like_hash_base_a_idx (a ASC) USING HASH WITH BUCKET_COUNT = 4,
Expand Down
Loading

0 comments on commit 4279e15

Please sign in to comment.