From 38581fcda8daf335f22938a754e97daaf67507a5 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 12 Feb 2025 22:26:19 +0000 Subject: [PATCH 1/2] Optimize TPCH DDL for faster MySQL loading --- config/mysql/sample_tpch_config.xml | 6 +++++ .../resources/benchmarks/tpch/ddl-mysql.sql | 25 +------------------ .../benchmarks/tpch/postload-mysql.sql | 24 ++++++++++++++++++ 3 files changed, 31 insertions(+), 24 deletions(-) create mode 100644 src/main/resources/benchmarks/tpch/postload-mysql.sql diff --git a/config/mysql/sample_tpch_config.xml b/config/mysql/sample_tpch_config.xml index ea37bceb3..aca83f8f9 100644 --- a/config/mysql/sample_tpch_config.xml +++ b/config/mysql/sample_tpch_config.xml @@ -14,6 +14,12 @@ 0.1 + + + + + /benchmarks/tpch/postload-mysql.sql + 1 diff --git a/src/main/resources/benchmarks/tpch/ddl-mysql.sql b/src/main/resources/benchmarks/tpch/ddl-mysql.sql index 7a7da2edc..52c8524f4 100644 --- a/src/main/resources/benchmarks/tpch/ddl-mysql.sql +++ b/src/main/resources/benchmarks/tpch/ddl-mysql.sql @@ -16,7 +16,6 @@ CREATE TABLE region ( r_comment varchar(152), PRIMARY KEY (r_regionkey) ); -CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC); CREATE TABLE nation ( n_nationkey integer NOT NULL, @@ -26,8 +25,6 @@ CREATE TABLE nation ( PRIMARY KEY (n_nationkey), FOREIGN KEY (n_regionkey) REFERENCES region (r_regionkey) ON DELETE CASCADE ); -CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC); -CREATE INDEX n_rk ON nation (n_regionkey ASC); CREATE TABLE part ( p_partkey integer NOT NULL, @@ -41,7 +38,6 @@ CREATE TABLE part ( p_comment varchar(23) NOT NULL, PRIMARY KEY (p_partkey) ); -CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC); CREATE TABLE supplier ( s_suppkey integer NOT NULL, @@ -54,8 +50,6 @@ CREATE TABLE supplier ( PRIMARY KEY (s_suppkey), FOREIGN KEY (s_nationkey) REFERENCES nation (n_nationkey) ON DELETE CASCADE ); -CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC); -CREATE INDEX s_nk ON supplier (s_nationkey ASC); CREATE TABLE partsupp ( ps_partkey integer NOT NULL, @@ -67,10 +61,6 @@ CREATE TABLE partsupp ( FOREIGN KEY (ps_partkey) REFERENCES part (p_partkey) ON DELETE CASCADE, FOREIGN KEY (ps_suppkey) REFERENCES supplier (s_suppkey) ON DELETE CASCADE ); -CREATE INDEX ps_pk ON partsupp (ps_partkey ASC); -CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC); -CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC); -CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC); CREATE TABLE customer ( c_custkey integer NOT NULL, @@ -84,8 +74,6 @@ CREATE TABLE customer ( PRIMARY KEY (c_custkey), FOREIGN KEY (c_nationkey) REFERENCES nation (n_nationkey) ON DELETE CASCADE ); -CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC); -CREATE INDEX c_nk ON customer (c_nationkey ASC); CREATE TABLE orders ( o_orderkey integer NOT NULL, @@ -100,9 +88,6 @@ CREATE TABLE orders ( PRIMARY KEY (o_orderkey), FOREIGN KEY (o_custkey) REFERENCES customer (c_custkey) ON DELETE CASCADE ); -CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC); -CREATE INDEX o_ck ON orders (o_custkey ASC); -CREATE INDEX o_od ON orders (o_orderdate ASC); CREATE TABLE lineitem ( l_orderkey integer NOT NULL, @@ -125,14 +110,6 @@ CREATE TABLE lineitem ( FOREIGN KEY (l_orderkey) REFERENCES orders (o_orderkey) ON DELETE CASCADE, FOREIGN KEY (l_partkey, l_suppkey) REFERENCES partsupp (ps_partkey, ps_suppkey) ON DELETE CASCADE ); -CREATE INDEX l_ok ON lineitem (l_orderkey ASC); -CREATE INDEX l_pk ON lineitem (l_partkey ASC); -CREATE INDEX l_sk ON lineitem (l_suppkey ASC); -CREATE INDEX l_sd ON lineitem (l_shipdate ASC); -CREATE INDEX l_cd ON lineitem (l_commitdate ASC); -CREATE INDEX l_rd ON lineitem (l_receiptdate ASC); -CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC); -CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC); SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS; -SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS; \ No newline at end of file +SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS; diff --git a/src/main/resources/benchmarks/tpch/postload-mysql.sql b/src/main/resources/benchmarks/tpch/postload-mysql.sql new file mode 100644 index 000000000..6817daa85 --- /dev/null +++ b/src/main/resources/benchmarks/tpch/postload-mysql.sql @@ -0,0 +1,24 @@ + +CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC); +CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC); +CREATE INDEX n_rk ON nation (n_regionkey ASC); +CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC); +CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC); +CREATE INDEX s_nk ON supplier (s_nationkey ASC); +CREATE INDEX ps_pk ON partsupp (ps_partkey ASC); +CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC); +CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC); +CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC); +CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC); +CREATE INDEX c_nk ON customer (c_nationkey ASC); +CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC); +CREATE INDEX o_ck ON orders (o_custkey ASC); +CREATE INDEX o_od ON orders (o_orderdate ASC); +CREATE INDEX l_ok ON lineitem (l_orderkey ASC); +CREATE INDEX l_pk ON lineitem (l_partkey ASC); +CREATE INDEX l_sk ON lineitem (l_suppkey ASC); +CREATE INDEX l_sd ON lineitem (l_shipdate ASC); +CREATE INDEX l_cd ON lineitem (l_commitdate ASC); +CREATE INDEX l_rd ON lineitem (l_receiptdate ASC); +CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC); +CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC); From 019582f89ebf00b35ce1e9f3c861041f5d02fff8 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 13 Feb 2025 00:06:18 +0000 Subject: [PATCH 2/2] Addressed Brian's review comments: Making TPCH load faster by 30% for SF=5) --- config/mysql/sample_tpch_config.xml | 10 ++++++---- src/main/resources/benchmarks/tpch/ddl-mysql.sql | 10 ++++++++++ src/main/resources/benchmarks/tpch/postload-mysql.sql | 7 ++++++- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/config/mysql/sample_tpch_config.xml b/config/mysql/sample_tpch_config.xml index aca83f8f9..b63627805 100644 --- a/config/mysql/sample_tpch_config.xml +++ b/config/mysql/sample_tpch_config.xml @@ -14,10 +14,12 @@ 0.1 - - - - + /benchmarks/tpch/postload-mysql.sql diff --git a/src/main/resources/benchmarks/tpch/ddl-mysql.sql b/src/main/resources/benchmarks/tpch/ddl-mysql.sql index 52c8524f4..90e67b098 100644 --- a/src/main/resources/benchmarks/tpch/ddl-mysql.sql +++ b/src/main/resources/benchmarks/tpch/ddl-mysql.sql @@ -1,3 +1,13 @@ +/* +For MySQL, TPCH indices are created post-load. which improves load +performance. See src/main/resources/benchmarks/tpch/postload-mysql.sql +(specified in in mysql/sample_tpch_config.xml). When indices +are created before the load, the insert operations increases overall +load time by >30%. This happens because every insert needs to update +all table indices, which results into additional binlog/redo log updates, +index seeks, and more data IOPS (if data does not fit in memory). +*/ + SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0; SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0; diff --git a/src/main/resources/benchmarks/tpch/postload-mysql.sql b/src/main/resources/benchmarks/tpch/postload-mysql.sql index 6817daa85..793b848bb 100644 --- a/src/main/resources/benchmarks/tpch/postload-mysql.sql +++ b/src/main/resources/benchmarks/tpch/postload-mysql.sql @@ -1,4 +1,9 @@ - +/* +This script runs after TPCH table creation and data loading. +It improves overall load performance by approximately 30%. +This script is referenced by the parameter in +mysql/sample_tpch_config.xml. +*/ CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC); CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC); CREATE INDEX n_rk ON nation (n_regionkey ASC);