Skip to content

Commit

Permalink
catalog: change hash-sharded indexes to use md5
Browse files Browse the repository at this point in the history
Release note (sql change): The hash function used by hash-sharded
indexes was changed to
`mod(fnv32(md5(crdb_internal.datums_to_bytes(columns))), bucket_count)`.
(Previously, it did not use `md5`.) This change was made to enhance the
uniformity of bucket distribution in cases when the bucket count is a
power of 2, and the columns being sharded have numerical properties that
make the fnv32 function return values with a non-uniformly distributed
modulus.
  • Loading branch information
rafiss committed Sep 6, 2023
1 parent 7227ee8 commit 73af3bb
Show file tree
Hide file tree
Showing 16 changed files with 297 additions and 287 deletions.
Expand Up @@ -126,7 +126,7 @@ CREATE TABLE public.t_to_be_hashed (
a INT8 NOT NULL,
b STRING NOT NULL,
c INT8 NULL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c)), 16:::INT8)) VIRTUAL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(c))), 16:::INT8)) VIRTUAL,
CONSTRAINT t_to_be_hashed_pkey PRIMARY KEY (a ASC),
INDEX t_to_be_hashed_c_idx (c ASC) USING HASH WITH (bucket_count=16),
FAMILY fam_0_a_b_c (a, b, c)
Expand All @@ -136,6 +136,7 @@ CREATE TABLE public.t_to_be_hashed (
)
-- Warning: Partitioned table with no zone configurations.


statement ok
CREATE UNIQUE INDEX ON t_to_be_hashed (c) USING HASH;

Expand All @@ -146,7 +147,7 @@ CREATE TABLE public.t_to_be_hashed (
a INT8 NOT NULL,
b STRING NOT NULL,
c INT8 NULL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c)), 16:::INT8)) VIRTUAL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(c))), 16:::INT8)) VIRTUAL,
CONSTRAINT t_to_be_hashed_pkey PRIMARY KEY (a ASC),
INDEX t_to_be_hashed_c_idx (c ASC) USING HASH WITH (bucket_count=16),
UNIQUE INDEX t_to_be_hashed_c_key (c ASC) USING HASH WITH (bucket_count=16),
Expand All @@ -157,6 +158,7 @@ CREATE TABLE public.t_to_be_hashed (
)
-- Warning: Partitioned table with no zone configurations.


statement ok
ALTER TABLE t_to_be_hashed ALTER PRIMARY KEY USING COLUMNS (a) USING HASH;

Expand All @@ -167,8 +169,8 @@ CREATE TABLE public.t_to_be_hashed (
a INT8 NOT NULL,
b STRING NOT NULL,
c INT8 NULL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c)), 16:::INT8)) VIRTUAL,
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 16:::INT8)) VIRTUAL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(c))), 16:::INT8)) VIRTUAL,
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(a))), 16:::INT8)) VIRTUAL,
CONSTRAINT t_to_be_hashed_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=16),
INDEX t_to_be_hashed_c_idx (c ASC) USING HASH WITH (bucket_count=16),
UNIQUE INDEX t_to_be_hashed_c_key (c ASC) USING HASH WITH (bucket_count=16),
Expand All @@ -179,6 +181,7 @@ CREATE TABLE public.t_to_be_hashed (
)
-- Warning: Partitioned table with no zone configurations.


statement ok
CREATE TABLE t_idx_pk_hashed_1 (
a INT PRIMARY KEY USING HASH,
Expand All @@ -195,11 +198,11 @@ query T
SELECT create_statement FROM [SHOW CREATE TABLE t_idx_pk_hashed_1];
----
CREATE TABLE public.t_idx_pk_hashed_1 (
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 16:::INT8)) VIRTUAL,
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(a))), 16:::INT8)) VIRTUAL,
a INT8 NOT NULL,
b STRING NOT NULL,
c INT8 NULL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c)), 16:::INT8)) VIRTUAL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(c))), 16:::INT8)) VIRTUAL,
CONSTRAINT t_idx_pk_hashed_1_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=16),
INDEX t_idx_pk_hashed_1_c_idx (c ASC) USING HASH WITH (bucket_count=16),
FAMILY fam_0_a_b_c (a, b, c)
Expand All @@ -209,6 +212,7 @@ CREATE TABLE public.t_idx_pk_hashed_1 (
)
-- Warning: Partitioned table with no zone configurations.


statement ok
CREATE TABLE t_idx_pk_hashed_2 (
a INT,
Expand All @@ -229,8 +233,8 @@ CREATE TABLE public.t_idx_pk_hashed_2 (
a INT8 NOT NULL,
b STRING NOT NULL,
c INT8 NULL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(c)), 16:::INT8)) VIRTUAL,
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 16:::INT8)) VIRTUAL,
crdb_internal_c_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(c))), 16:::INT8)) VIRTUAL,
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(a))), 16:::INT8)) VIRTUAL,
CONSTRAINT t_idx_pk_hashed_2_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=16),
INDEX t_idx_pk_hashed_2_c_idx (c ASC) USING HASH WITH (bucket_count=16),
FAMILY fam_0_a_b_c (a, b, c)
Expand All @@ -240,6 +244,7 @@ CREATE TABLE public.t_idx_pk_hashed_2 (
)
-- Warning: Partitioned table with no zone configurations.


subtest test_presplit_with_partitioning

statement ok
Expand Down
Expand Up @@ -22,10 +22,10 @@ query T
SELECT create_statement FROM [SHOW CREATE TABLE t_test_hsi_change_locality]
----
CREATE TABLE public.t_test_hsi_change_locality (
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 16:::INT8)) VIRTUAL,
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(a))), 16:::INT8)) VIRTUAL,
a INT8 NOT NULL,
b INT8 NULL,
crdb_internal_b_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(b)), 16:::INT8)) VIRTUAL,
crdb_internal_b_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(b))), 16:::INT8)) VIRTUAL,
CONSTRAINT t_test_hsi_change_locality_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=16),
INDEX idx_b (b ASC) USING HASH WITH (bucket_count=16),
FAMILY fam_0 (a, b)
Expand All @@ -49,10 +49,10 @@ query T
SELECT create_statement FROM [SHOW CREATE TABLE t_test_hsi_change_locality]
----
CREATE TABLE public.t_test_hsi_change_locality (
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 16:::INT8)) VIRTUAL,
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(a))), 16:::INT8)) VIRTUAL,
a INT8 NOT NULL,
b INT8 NULL,
crdb_internal_b_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(b)), 16:::INT8)) VIRTUAL,
crdb_internal_b_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(b))), 16:::INT8)) VIRTUAL,
crdb_region testdb.public.crdb_internal_region NOT VISIBLE NOT NULL DEFAULT default_to_database_primary_region(gateway_region())::testdb.public.crdb_internal_region,
CONSTRAINT t_test_hsi_change_locality_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=16),
INDEX idx_b (b ASC) USING HASH WITH (bucket_count=16),
Expand All @@ -77,11 +77,11 @@ SELECT crdb_region, crdb_internal_a_shard_16, a, b
FROM t_test_hsi_change_locality
ORDER BY crdb_region, crdb_internal_a_shard_16, a, b;
----
ca-central-1 0 7 8
ca-central-1 2 5 6
ca-central-1 4 3 4
ca-central-1 6 1 2
ca-central-1 14 9 10
ca-central-1 3 9 10
ca-central-1 11 1 2
ca-central-1 11 5 6
ca-central-1 11 7 8
ca-central-1 13 3 4

# Make sure switching back and forward between different localities is ok.
statement ok
Expand All @@ -91,10 +91,10 @@ query T
SELECT create_statement FROM [SHOW CREATE TABLE t_test_hsi_change_locality]
----
CREATE TABLE public.t_test_hsi_change_locality (
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 16:::INT8)) VIRTUAL,
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(a))), 16:::INT8)) VIRTUAL,
a INT8 NOT NULL,
b INT8 NULL,
crdb_internal_b_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(b)), 16:::INT8)) VIRTUAL,
crdb_internal_b_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(b))), 16:::INT8)) VIRTUAL,
crdb_region testdb.public.crdb_internal_region NOT VISIBLE NOT NULL DEFAULT default_to_database_primary_region(gateway_region())::testdb.public.crdb_internal_region,
CONSTRAINT t_test_hsi_change_locality_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=16),
INDEX idx_b (b ASC) USING HASH WITH (bucket_count=16),
Expand All @@ -119,10 +119,10 @@ query T
SELECT create_statement FROM [SHOW CREATE TABLE t_test_hsi_change_locality]
----
CREATE TABLE public.t_test_hsi_change_locality (
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(a)), 16:::INT8)) VIRTUAL,
crdb_internal_a_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(a))), 16:::INT8)) VIRTUAL,
a INT8 NOT NULL,
b INT8 NULL,
crdb_internal_b_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(b)), 16:::INT8)) VIRTUAL,
crdb_internal_b_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(b))), 16:::INT8)) VIRTUAL,
crdb_region testdb.public.crdb_internal_region NOT VISIBLE NOT NULL DEFAULT default_to_database_primary_region(gateway_region())::testdb.public.crdb_internal_region,
CONSTRAINT t_test_hsi_change_locality_pkey PRIMARY KEY (a ASC) USING HASH WITH (bucket_count=16),
INDEX idx_b (b ASC) USING HASH WITH (bucket_count=16),
Expand Down Expand Up @@ -168,10 +168,10 @@ query T
SELECT create_statement FROM [SHOW CREATE TABLE t_parent]
----
CREATE TABLE public.t_parent (
crdb_internal_id_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(id)), 16:::INT8)) VIRTUAL,
crdb_internal_id_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(id))), 16:::INT8)) VIRTUAL,
id INT8 NOT NULL,
id2 INT8 NOT NULL,
crdb_internal_id2_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(id2)), 16:::INT8)) VIRTUAL,
crdb_internal_id2_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(id2))), 16:::INT8)) VIRTUAL,
CONSTRAINT t_parent_pkey PRIMARY KEY (id ASC) USING HASH WITH (bucket_count=16),
UNIQUE INDEX uniq_id2 (id2 ASC) USING HASH WITH (bucket_count=16),
FAMILY fam_0_id_id2 (id, id2)
Expand All @@ -184,10 +184,10 @@ query T
SELECT create_statement FROM [SHOW CREATE TABLE t_parent]
----
CREATE TABLE public.t_parent (
crdb_internal_id_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(id)), 16:::INT8)) VIRTUAL,
crdb_internal_id_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(id))), 16:::INT8)) VIRTUAL,
id INT8 NOT NULL,
id2 INT8 NOT NULL,
crdb_internal_id2_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(id2)), 16:::INT8)) VIRTUAL,
crdb_internal_id2_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(id2))), 16:::INT8)) VIRTUAL,
crdb_region testdb.public.crdb_internal_region NOT VISIBLE NOT NULL DEFAULT default_to_database_primary_region(gateway_region())::testdb.public.crdb_internal_region,
CONSTRAINT t_parent_pkey PRIMARY KEY (id ASC) USING HASH WITH (bucket_count=16),
UNIQUE INDEX uniq_id2 (id2 ASC) USING HASH WITH (bucket_count=16),
Expand All @@ -199,11 +199,11 @@ SELECT crdb_region, crdb_internal_id_shard_16, id, id2
FROM t_parent
ORDER BY crdb_region, crdb_internal_id_shard_16, id, id2;
----
ca-central-1 0 7 8
ca-central-1 2 5 6
ca-central-1 4 3 4
ca-central-1 6 1 2
ca-central-1 14 9 10
ca-central-1 3 9 10
ca-central-1 11 1 2
ca-central-1 11 5 6
ca-central-1 11 7 8
ca-central-1 13 3 4

query ITITTITTB colnames,rowsort
SELECT * FROM crdb_internal.index_columns WHERE descriptor_name = 't_parent'
Expand All @@ -226,10 +226,10 @@ query T
SELECT create_statement FROM [SHOW CREATE TABLE t_parent]
----
CREATE TABLE public.t_parent (
crdb_internal_id_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(id)), 16:::INT8)) VIRTUAL,
crdb_internal_id_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(id))), 16:::INT8)) VIRTUAL,
id INT8 NOT NULL,
id2 INT8 NOT NULL,
crdb_internal_id2_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(id2)), 16:::INT8)) VIRTUAL,
crdb_internal_id2_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(id2))), 16:::INT8)) VIRTUAL,
crdb_region testdb.public.crdb_internal_region NOT VISIBLE NOT NULL DEFAULT default_to_database_primary_region(gateway_region())::testdb.public.crdb_internal_region,
CONSTRAINT t_parent_pkey PRIMARY KEY (id ASC) USING HASH WITH (bucket_count=16),
UNIQUE INDEX uniq_id2 (id2 ASC) USING HASH WITH (bucket_count=16),
Expand All @@ -254,10 +254,10 @@ query T
SELECT create_statement FROM [SHOW CREATE TABLE t_parent]
----
CREATE TABLE public.t_parent (
crdb_internal_id_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(id)), 16:::INT8)) VIRTUAL,
crdb_internal_id_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(id))), 16:::INT8)) VIRTUAL,
id INT8 NOT NULL,
id2 INT8 NOT NULL,
crdb_internal_id2_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(crdb_internal.datums_to_bytes(id2)), 16:::INT8)) VIRTUAL,
crdb_internal_id2_shard_16 INT8 NOT VISIBLE NOT NULL AS (mod(fnv32(md5(crdb_internal.datums_to_bytes(id2))), 16:::INT8)) VIRTUAL,
crdb_region testdb.public.crdb_internal_region NOT VISIBLE NOT NULL DEFAULT default_to_database_primary_region(gateway_region())::testdb.public.crdb_internal_region,
CONSTRAINT t_parent_pkey PRIMARY KEY (id ASC) USING HASH WITH (bucket_count=16),
UNIQUE INDEX uniq_id2 (id2 ASC) USING HASH WITH (bucket_count=16),
Expand Down

0 comments on commit 73af3bb

Please sign in to comment.