diff --git a/config/mysql/sample_tpch_config.xml b/config/mysql/sample_tpch_config.xml index ea37bceb3..b63627805 100644 --- a/config/mysql/sample_tpch_config.xml +++ b/config/mysql/sample_tpch_config.xml @@ -14,6 +14,14 @@ 0.1 + + /benchmarks/tpch/postload-mysql.sql + 1 diff --git a/src/main/resources/benchmarks/tpch/ddl-mysql.sql b/src/main/resources/benchmarks/tpch/ddl-mysql.sql index 7a7da2edc..90e67b098 100644 --- a/src/main/resources/benchmarks/tpch/ddl-mysql.sql +++ b/src/main/resources/benchmarks/tpch/ddl-mysql.sql @@ -1,3 +1,13 @@ +/* +For MySQL, TPCH indices are created post-load. which improves load +performance. See src/main/resources/benchmarks/tpch/postload-mysql.sql +(specified in in mysql/sample_tpch_config.xml). When indices +are created before the load, the insert operations increases overall +load time by >30%. This happens because every insert needs to update +all table indices, which results into additional binlog/redo log updates, +index seeks, and more data IOPS (if data does not fit in memory). +*/ + SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0; SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0; @@ -16,7 +26,6 @@ CREATE TABLE region ( r_comment varchar(152), PRIMARY KEY (r_regionkey) ); -CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC); CREATE TABLE nation ( n_nationkey integer NOT NULL, @@ -26,8 +35,6 @@ CREATE TABLE nation ( PRIMARY KEY (n_nationkey), FOREIGN KEY (n_regionkey) REFERENCES region (r_regionkey) ON DELETE CASCADE ); -CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC); -CREATE INDEX n_rk ON nation (n_regionkey ASC); CREATE TABLE part ( p_partkey integer NOT NULL, @@ -41,7 +48,6 @@ CREATE TABLE part ( p_comment varchar(23) NOT NULL, PRIMARY KEY (p_partkey) ); -CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC); CREATE TABLE supplier ( s_suppkey integer NOT NULL, @@ -54,8 +60,6 @@ CREATE TABLE supplier ( PRIMARY KEY (s_suppkey), FOREIGN KEY (s_nationkey) REFERENCES nation (n_nationkey) ON DELETE CASCADE ); -CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC); -CREATE INDEX s_nk ON supplier (s_nationkey ASC); CREATE TABLE partsupp ( ps_partkey integer NOT NULL, @@ -67,10 +71,6 @@ CREATE TABLE partsupp ( FOREIGN KEY (ps_partkey) REFERENCES part (p_partkey) ON DELETE CASCADE, FOREIGN KEY (ps_suppkey) REFERENCES supplier (s_suppkey) ON DELETE CASCADE ); -CREATE INDEX ps_pk ON partsupp (ps_partkey ASC); -CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC); -CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC); -CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC); CREATE TABLE customer ( c_custkey integer NOT NULL, @@ -84,8 +84,6 @@ CREATE TABLE customer ( PRIMARY KEY (c_custkey), FOREIGN KEY (c_nationkey) REFERENCES nation (n_nationkey) ON DELETE CASCADE ); -CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC); -CREATE INDEX c_nk ON customer (c_nationkey ASC); CREATE TABLE orders ( o_orderkey integer NOT NULL, @@ -100,9 +98,6 @@ CREATE TABLE orders ( PRIMARY KEY (o_orderkey), FOREIGN KEY (o_custkey) REFERENCES customer (c_custkey) ON DELETE CASCADE ); -CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC); -CREATE INDEX o_ck ON orders (o_custkey ASC); -CREATE INDEX o_od ON orders (o_orderdate ASC); CREATE TABLE lineitem ( l_orderkey integer NOT NULL, @@ -125,14 +120,6 @@ CREATE TABLE lineitem ( FOREIGN KEY (l_orderkey) REFERENCES orders (o_orderkey) ON DELETE CASCADE, FOREIGN KEY (l_partkey, l_suppkey) REFERENCES partsupp (ps_partkey, ps_suppkey) ON DELETE CASCADE ); -CREATE INDEX l_ok ON lineitem (l_orderkey ASC); -CREATE INDEX l_pk ON lineitem (l_partkey ASC); -CREATE INDEX l_sk ON lineitem (l_suppkey ASC); -CREATE INDEX l_sd ON lineitem (l_shipdate ASC); -CREATE INDEX l_cd ON lineitem (l_commitdate ASC); -CREATE INDEX l_rd ON lineitem (l_receiptdate ASC); -CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC); -CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC); SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS; -SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS; \ No newline at end of file +SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS; diff --git a/src/main/resources/benchmarks/tpch/postload-mysql.sql b/src/main/resources/benchmarks/tpch/postload-mysql.sql new file mode 100644 index 000000000..793b848bb --- /dev/null +++ b/src/main/resources/benchmarks/tpch/postload-mysql.sql @@ -0,0 +1,29 @@ +/* +This script runs after TPCH table creation and data loading. +It improves overall load performance by approximately 30%. +This script is referenced by the parameter in +mysql/sample_tpch_config.xml. +*/ +CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC); +CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC); +CREATE INDEX n_rk ON nation (n_regionkey ASC); +CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC); +CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC); +CREATE INDEX s_nk ON supplier (s_nationkey ASC); +CREATE INDEX ps_pk ON partsupp (ps_partkey ASC); +CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC); +CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC); +CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC); +CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC); +CREATE INDEX c_nk ON customer (c_nationkey ASC); +CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC); +CREATE INDEX o_ck ON orders (o_custkey ASC); +CREATE INDEX o_od ON orders (o_orderdate ASC); +CREATE INDEX l_ok ON lineitem (l_orderkey ASC); +CREATE INDEX l_pk ON lineitem (l_partkey ASC); +CREATE INDEX l_sk ON lineitem (l_suppkey ASC); +CREATE INDEX l_sd ON lineitem (l_shipdate ASC); +CREATE INDEX l_cd ON lineitem (l_commitdate ASC); +CREATE INDEX l_rd ON lineitem (l_receiptdate ASC); +CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC); +CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC);