Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions config/mysql/sample_tpch_config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@
<!-- Control scale factor to generate different amount of data -->
<scalefactor>0.1</scalefactor>

<!--
The post load script is crucial for creating TPCH indices after loading the database.
Index creation was removed from the DDL script to speed up the load process.
Creating indices post load improves performance by nearly 30%.
See src/main/resources/benchmarks/tpch/postload-mysql.sql
-->
<afterload>/benchmarks/tpch/postload-mysql.sql</afterload>

<!-- The workload -->
<terminals>1</terminals>
<works>
Expand Down
35 changes: 11 additions & 24 deletions src/main/resources/benchmarks/tpch/ddl-mysql.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
/*
For MySQL, TPCH indices are created post-load. which improves load
performance. See src/main/resources/benchmarks/tpch/postload-mysql.sql
(specified in <afterload> in mysql/sample_tpch_config.xml). When indices
are created before the load, the insert operations increases overall
load time by >30%. This happens because every insert needs to update
all table indices, which results into additional binlog/redo log updates,
index seeks, and more data IOPS (if data does not fit in memory).
*/

SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;

Expand All @@ -16,7 +26,6 @@ CREATE TABLE region (
r_comment varchar(152),
PRIMARY KEY (r_regionkey)
);
CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC);

CREATE TABLE nation (
n_nationkey integer NOT NULL,
Expand All @@ -26,8 +35,6 @@ CREATE TABLE nation (
PRIMARY KEY (n_nationkey),
FOREIGN KEY (n_regionkey) REFERENCES region (r_regionkey) ON DELETE CASCADE
);
CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC);
CREATE INDEX n_rk ON nation (n_regionkey ASC);

CREATE TABLE part (
p_partkey integer NOT NULL,
Expand All @@ -41,7 +48,6 @@ CREATE TABLE part (
p_comment varchar(23) NOT NULL,
PRIMARY KEY (p_partkey)
);
CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC);

CREATE TABLE supplier (
s_suppkey integer NOT NULL,
Expand All @@ -54,8 +60,6 @@ CREATE TABLE supplier (
PRIMARY KEY (s_suppkey),
FOREIGN KEY (s_nationkey) REFERENCES nation (n_nationkey) ON DELETE CASCADE
);
CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC);
CREATE INDEX s_nk ON supplier (s_nationkey ASC);

CREATE TABLE partsupp (
ps_partkey integer NOT NULL,
Expand All @@ -67,10 +71,6 @@ CREATE TABLE partsupp (
FOREIGN KEY (ps_partkey) REFERENCES part (p_partkey) ON DELETE CASCADE,
FOREIGN KEY (ps_suppkey) REFERENCES supplier (s_suppkey) ON DELETE CASCADE
);
CREATE INDEX ps_pk ON partsupp (ps_partkey ASC);
CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC);
CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC);
CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC);

CREATE TABLE customer (
c_custkey integer NOT NULL,
Expand All @@ -84,8 +84,6 @@ CREATE TABLE customer (
PRIMARY KEY (c_custkey),
FOREIGN KEY (c_nationkey) REFERENCES nation (n_nationkey) ON DELETE CASCADE
);
CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC);
CREATE INDEX c_nk ON customer (c_nationkey ASC);

CREATE TABLE orders (
o_orderkey integer NOT NULL,
Expand All @@ -100,9 +98,6 @@ CREATE TABLE orders (
PRIMARY KEY (o_orderkey),
FOREIGN KEY (o_custkey) REFERENCES customer (c_custkey) ON DELETE CASCADE
);
CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC);
CREATE INDEX o_ck ON orders (o_custkey ASC);
CREATE INDEX o_od ON orders (o_orderdate ASC);

CREATE TABLE lineitem (
l_orderkey integer NOT NULL,
Expand All @@ -125,14 +120,6 @@ CREATE TABLE lineitem (
FOREIGN KEY (l_orderkey) REFERENCES orders (o_orderkey) ON DELETE CASCADE,
FOREIGN KEY (l_partkey, l_suppkey) REFERENCES partsupp (ps_partkey, ps_suppkey) ON DELETE CASCADE
);
CREATE INDEX l_ok ON lineitem (l_orderkey ASC);
CREATE INDEX l_pk ON lineitem (l_partkey ASC);
CREATE INDEX l_sk ON lineitem (l_suppkey ASC);
CREATE INDEX l_sd ON lineitem (l_shipdate ASC);
CREATE INDEX l_cd ON lineitem (l_commitdate ASC);
CREATE INDEX l_rd ON lineitem (l_receiptdate ASC);
CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC);
CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC);

SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
29 changes: 29 additions & 0 deletions src/main/resources/benchmarks/tpch/postload-mysql.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
This script runs after TPCH table creation and data loading.
It improves overall load performance by approximately 30%.
This script is referenced by the <afterLoad> parameter in
mysql/sample_tpch_config.xml.
*/
CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC);
CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC);
CREATE INDEX n_rk ON nation (n_regionkey ASC);
CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC);
CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC);
CREATE INDEX s_nk ON supplier (s_nationkey ASC);
CREATE INDEX ps_pk ON partsupp (ps_partkey ASC);
CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC);
CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC);
CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC);
CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC);
CREATE INDEX c_nk ON customer (c_nationkey ASC);
CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC);
CREATE INDEX o_ck ON orders (o_custkey ASC);
CREATE INDEX o_od ON orders (o_orderdate ASC);
CREATE INDEX l_ok ON lineitem (l_orderkey ASC);
CREATE INDEX l_pk ON lineitem (l_partkey ASC);
CREATE INDEX l_sk ON lineitem (l_suppkey ASC);
CREATE INDEX l_sd ON lineitem (l_shipdate ASC);
CREATE INDEX l_cd ON lineitem (l_commitdate ASC);
CREATE INDEX l_rd ON lineitem (l_receiptdate ASC);
CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC);
CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC);