Skip to content

Commit

Permalink
A new session variable to control SELECT .. LIMIT cost heuristic
Browse files Browse the repository at this point in the history
Summary:
This diff adds a new session variable
optimizer_limit_heuristic_multiplier_pct to increase
estimated number of rows to scan for SELECT .. LIMIT query.
With optimizer_switch='prefer_ordering_index=on', there
were cases that SELECT .. LIMIT 1 underestimated the number of
rows to scan, which ended up choosing a wrong index.
This diff allows to set a minimum percentages of rows to scan
from the given table.

This diff was basically a port from facebook@5867296 , with some changes, notably configurable with percent rather than on/off, and adjusting some test queries for MySQL 8.0.

Differential Revision: D51729868

fbshipit-source-id: 887901d
  • Loading branch information
Yoshinori Matsunobu authored and inikep committed Aug 2, 2024
1 parent 13c4bf2 commit 675f82e
Show file tree
Hide file tree
Showing 8 changed files with 393 additions and 1 deletion.
4 changes: 4 additions & 0 deletions mysql-test/r/mysqld--help-notwin.result
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,9 @@ The following options may be given as the first argument:
Detect groups implied by equality conditions when costing
group by loose index scans.
(Defaults to on; use --skip-optimizer-implicit-groups-for-lis to disable.)
--optimizer-limit-heuristic-multiplier-pct[=#]
Percent of the table rows that SELECT ... LIMIT is
estimated to scan.
--optimizer-max-subgraph-pairs=#
Maximum depth of subgraph pairs a query can have before
the hypergraph join optimizer starts reducing the search
Expand Down Expand Up @@ -3455,6 +3458,7 @@ optimizer-force-index-rewrite (No default value)
optimizer-full-scan TRUE
optimizer-group-by-cost-adjust 1
optimizer-implicit-groups-for-lis TRUE
optimizer-limit-heuristic-multiplier-pct 0
optimizer-max-subgraph-pairs 100000
optimizer-prune-level 1
optimizer-search-depth 62
Expand Down
167 changes: 167 additions & 0 deletions mysql-test/r/optimizer_limit_heuristic_multiplier_pct.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
SET optimizer_limit_heuristic_multiplier_pct = 0;
CREATE TABLE t1 (
pk INT PRIMARY KEY AUTO_INCREMENT,
i INT,
j INT,
INDEX (i),
INDEX (j)
);
INSERT INTO t1 (i,j) VALUES (1,1);
SET @d=1;
INSERT INTO t1 (i,j) SELECT i+@d, j+@d from t1;
SET @d=@d*2;
INSERT INTO t1 (i,j) SELECT i+@d, j+@d from t1;
SET @d=@d*2;
INSERT INTO t1 (i,j) SELECT i+@d, j+@d from t1;
SET @d=@d*2;
INSERT INTO t1 (i,j) SELECT i+@d, j+@d from t1;
SET @d=@d*2;
INSERT INTO t1 (i,j) SELECT i+@d, j+@d from t1;
SET @d=@d*2;
INSERT INTO t1 (i,j) SELECT i+@d, j+@d from t1;
SET @d=@d*2;
INSERT INTO t1 (i,j) SELECT i+@d, j+@d from t1;
SET @d=@d*2;
ANALYZE TABLE t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
SELECT @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
0
EXPLAIN SELECT * FROM t1
WHERE i<100 AND j<40
ORDER BY i LIMIT 5;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 NULL range i,j i 5 NULL 99 30.47 Using index condition; Using where
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`i` AS `i`,`test`.`t1`.`j` AS `j` from `test`.`t1` where ((`test`.`t1`.`i` < 100) and (`test`.`t1`.`j` < 40)) order by `test`.`t1`.`i` limit 5
SET optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
100
EXPLAIN SELECT * FROM t1
WHERE i<100 AND j<40
ORDER BY i LIMIT 5;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 NULL range i,j j 5 NULL 39 77.34 Using index condition; Using where; Using filesort
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`i` AS `i`,`test`.`t1`.`j` AS `j` from `test`.`t1` where ((`test`.`t1`.`i` < 100) and (`test`.`t1`.`j` < 40)) order by `test`.`t1`.`i` limit 5
SET optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
0
DROP TABLE t1;
CREATE TABLE t0 (
i0 INTEGER NOT NULL
);
INSERT INTO t0 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
CREATE TABLE t1 (
pk INTEGER PRIMARY KEY,
i1 INTEGER NOT NULL,
i2 INTEGER NOT NULL,
INDEX k1 (i1),
INDEX k2 (i1,i2)
) ENGINE=InnoDB;
INSERT INTO t1
SELECT a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0,
a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0,
a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0
FROM t0 AS a0, t0 AS a1;
CREATE TABLE t2 (
pk INTEGER PRIMARY KEY,
i1 INTEGER NOT NULL,
INDEX k1 (i1)
) ENGINE=InnoDB;
INSERT INTO t2
SELECT a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0,
a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0
FROM t0 AS a0, t0 AS a1;
ANALYZE TABLE t1,t2;
Table Op Msg_type Msg_text
test.t1 analyze status OK
test.t2 analyze status OK
# Query 1
# Query should use index to optimize the ORDER BY LIMIT
select @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
0
EXPLAIN SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.i1=t2.i1
WHERE t1.pk > 7000 and t1.i1 > 2
ORDER BY t1.i1 LIMIT 2;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 NULL index PRIMARY,k1,k2 k1 4 NULL 6 29.70 Using where
1 SIMPLE t2 NULL ref k1 k1 4 test.t1.i1 1 100.00 Using index
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`i1` AS `i1`,`test`.`t1`.`i2` AS `i2`,`test`.`t2`.`pk` AS `pk`,`test`.`t2`.`i1` AS `i1` from `test`.`t1` straight_join `test`.`t2` where ((`test`.`t2`.`i1` = `test`.`t1`.`i1`) and (`test`.`t1`.`pk` > 7000) and (`test`.`t1`.`i1` > 2)) order by `test`.`t1`.`i1` limit 2
SET optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
100
EXPLAIN SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.i1=t2.i1
WHERE t1.pk > 7000 and t1.i1 > 2
ORDER BY t1.i1 LIMIT 2;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 NULL range PRIMARY,k1,k2 PRIMARY 4 NULL 30 99.00 Using where; Using filesort
1 SIMPLE t2 NULL ref k1 k1 4 test.t1.i1 1 100.00 Using index
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`i1` AS `i1`,`test`.`t1`.`i2` AS `i2`,`test`.`t2`.`pk` AS `pk`,`test`.`t2`.`i1` AS `i1` from `test`.`t1` straight_join `test`.`t2` where ((`test`.`t2`.`i1` = `test`.`t1`.`i1`) and (`test`.`t1`.`pk` > 7000) and (`test`.`t1`.`i1` > 2)) order by `test`.`t1`.`i1` limit 2
SET optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
0
# Query 2
# Query should use index to optimize the ORDER BY LIMIT
EXPLAIN SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.i1=t2.i1
WHERE t1.pk > 5000 and t1.i1 > 2
ORDER BY t1.i1 LIMIT 5;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 NULL index PRIMARY,k1,k2 k1 4 NULL 10 49.50 Using where
1 SIMPLE t2 NULL ref k1 k1 4 test.t1.i1 1 100.00 Using index
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`i1` AS `i1`,`test`.`t1`.`i2` AS `i2`,`test`.`t2`.`pk` AS `pk`,`test`.`t2`.`i1` AS `i1` from `test`.`t1` straight_join `test`.`t2` where ((`test`.`t2`.`i1` = `test`.`t1`.`i1`) and (`test`.`t1`.`pk` > 5000) and (`test`.`t1`.`i1` > 2)) order by `test`.`t1`.`i1` limit 5
SET optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
100
EXPLAIN SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.i1=t2.i1
WHERE t1.pk > 5000 and t1.i1 > 2
ORDER BY t1.i1 LIMIT 5;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 NULL range PRIMARY,k1,k2 PRIMARY 4 NULL 50 99.00 Using where; Using filesort
1 SIMPLE t2 NULL ref k1 k1 4 test.t1.i1 1 100.00 Using index
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`i1` AS `i1`,`test`.`t1`.`i2` AS `i2`,`test`.`t2`.`pk` AS `pk`,`test`.`t2`.`i1` AS `i1` from `test`.`t1` straight_join `test`.`t2` where ((`test`.`t2`.`i1` = `test`.`t1`.`i1`) and (`test`.`t1`.`pk` > 5000) and (`test`.`t1`.`i1` > 2)) order by `test`.`t1`.`i1` limit 5
SET optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
0
# Query 3
# Changing range condition on i1 should make
# key on i1 get picked to give the order.
# Unaffected by system variable.
EXPLAIN SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.i1=t2.i1
WHERE t1.pk > 5000 and t1.i1 > 800
ORDER BY t1.i1 LIMIT 5;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 NULL index PRIMARY,k1,k2 k1 4 NULL 10 46.00 Using where
1 SIMPLE t2 NULL ref k1 k1 4 test.t1.i1 1 100.00 Using index
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`i1` AS `i1`,`test`.`t1`.`i2` AS `i2`,`test`.`t2`.`pk` AS `pk`,`test`.`t2`.`i1` AS `i1` from `test`.`t1` straight_join `test`.`t2` where ((`test`.`t2`.`i1` = `test`.`t1`.`i1`) and (`test`.`t1`.`pk` > 5000) and (`test`.`t1`.`i1` > 800)) order by `test`.`t1`.`i1` limit 5
SET optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
100
EXPLAIN SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.i1=t2.i1
WHERE t1.pk > 5000 and t1.i1 > 800
ORDER BY t1.i1 LIMIT 5;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 NULL range PRIMARY,k1,k2 PRIMARY 4 NULL 50 92.00 Using where; Using filesort
1 SIMPLE t2 NULL ref k1 k1 4 test.t1.i1 1 100.00 Using index
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`i1` AS `i1`,`test`.`t1`.`i2` AS `i2`,`test`.`t2`.`pk` AS `pk`,`test`.`t2`.`i1` AS `i1` from `test`.`t1` straight_join `test`.`t2` where ((`test`.`t2`.`i1` = `test`.`t1`.`i1`) and (`test`.`t1`.`pk` > 5000) and (`test`.`t1`.`i1` > 800)) order by `test`.`t1`.`i1` limit 5
SET optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@optimizer_limit_heuristic_multiplier_pct;
@@optimizer_limit_heuristic_multiplier_pct
0
DROP TABLE t0, t1, t2;
SET @@optimizer_limit_heuristic_multiplier_pct = 0;
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Default value of optimizer_limit_heuristic_multiplier_pct is 0
SELECT @@global.optimizer_limit_heuristic_multiplier_pct;
@@global.optimizer_limit_heuristic_multiplier_pct
0
optimizer_limit_heuristic_multiplier_pct is a dynamic variable (change to 100)
set @@global.optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@global.optimizer_limit_heuristic_multiplier_pct;
@@global.optimizer_limit_heuristic_multiplier_pct
100
restore the default value
SET @@global.optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@global.optimizer_limit_heuristic_multiplier_pct;
@@global.optimizer_limit_heuristic_multiplier_pct
0
restart the server with non default value (1)
# restart: --optimizer_limit_heuristic_multiplier_pct=1
SELECT @@global.optimizer_limit_heuristic_multiplier_pct;
@@global.optimizer_limit_heuristic_multiplier_pct
1
restart the server with the default value (0)
# restart:
SELECT @@global.optimizer_limit_heuristic_multiplier_pct;
@@global.optimizer_limit_heuristic_multiplier_pct
0
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
-- source include/load_sysvars.inc

####
# Verify default value is 0
####
--echo Default value of optimizer_limit_heuristic_multiplier_pct is 0
SELECT @@global.optimizer_limit_heuristic_multiplier_pct;

####
## Verify that the variable is dynamic
####
--echo optimizer_limit_heuristic_multiplier_pct is a dynamic variable (change to 100)
set @@global.optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@global.optimizer_limit_heuristic_multiplier_pct;

####
## Restore the default value
####
--echo restore the default value
SET @@global.optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@global.optimizer_limit_heuristic_multiplier_pct;

####
## Restart the server with a non default value of the variable
####
--echo restart the server with non default value (1)
--let $restart_parameters = restart: --optimizer_limit_heuristic_multiplier_pct=1
--source include/restart_mysqld.inc

SELECT @@global.optimizer_limit_heuristic_multiplier_pct;

--echo restart the server with the default value (0)
--let $restart_parameters = restart:
--source include/restart_mysqld.inc

# check value is default (0)
SELECT @@global.optimizer_limit_heuristic_multiplier_pct;
143 changes: 143 additions & 0 deletions mysql-test/t/optimizer_limit_heuristic_multiplier_pct.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Ported from 5.6 optimizer_low_limit_heuristic.test
# optimizer_low_limit_heuristic=0 means optimizer_limit_heuristic_multiplier_pct = 100.
# optimizer_low_limit_heuristic=1 means optimizer_limit_heuristic_multiplier_pct = 0.
# Even with optimizer_limit_heuristic_multiplier_pct = 0, some plans changed from 5.6

let $start_value= `SELECT @@optimizer_limit_heuristic_multiplier_pct`;
SET optimizer_limit_heuristic_multiplier_pct = 0;

CREATE TABLE t1 (
pk INT PRIMARY KEY AUTO_INCREMENT,
i INT,
j INT,
INDEX (i),
INDEX (j)
);

INSERT INTO t1 (i,j) VALUES (1,1);

let $1=7;
SET @d=1;
while ($1)
{
eval INSERT INTO t1 (i,j) SELECT i+@d, j+@d from t1;
eval SET @d=@d*2;
dec $1;
}

ANALYZE TABLE t1;

let $query= SELECT * FROM t1
WHERE i<100 AND j<40
ORDER BY i LIMIT 5;

SELECT @@optimizer_limit_heuristic_multiplier_pct;

eval EXPLAIN $query;

SET optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@optimizer_limit_heuristic_multiplier_pct;

eval EXPLAIN $query;

SET optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@optimizer_limit_heuristic_multiplier_pct;

DROP TABLE t1;

CREATE TABLE t0 (
i0 INTEGER NOT NULL
);

INSERT INTO t0 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

CREATE TABLE t1 (
pk INTEGER PRIMARY KEY,
i1 INTEGER NOT NULL,
i2 INTEGER NOT NULL,
INDEX k1 (i1),
INDEX k2 (i1,i2)
) ENGINE=InnoDB;

INSERT INTO t1
SELECT a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0,
a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0,
a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0
FROM t0 AS a0, t0 AS a1;

CREATE TABLE t2 (
pk INTEGER PRIMARY KEY,
i1 INTEGER NOT NULL,
INDEX k1 (i1)
) ENGINE=InnoDB;

INSERT INTO t2
SELECT a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0,
a0.i0 + 10*a1.i0 + 100*a0.i0 + 1000*a1.i0
FROM t0 AS a0, t0 AS a1;

ANALYZE TABLE t1,t2;

--echo # Query 1
--echo # Query should use index to optimize the ORDER BY LIMIT

let query=
SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.i1=t2.i1
WHERE t1.pk > 7000 and t1.i1 > 2
ORDER BY t1.i1 LIMIT 2;

select @@optimizer_limit_heuristic_multiplier_pct;

eval EXPLAIN $query;

SET optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@optimizer_limit_heuristic_multiplier_pct;

eval EXPLAIN $query;

SET optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@optimizer_limit_heuristic_multiplier_pct;


--echo # Query 2
--echo # Query should use index to optimize the ORDER BY LIMIT

let query=
SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.i1=t2.i1
WHERE t1.pk > 5000 and t1.i1 > 2
ORDER BY t1.i1 LIMIT 5;

eval EXPLAIN $query;

SET optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@optimizer_limit_heuristic_multiplier_pct;

eval EXPLAIN $query;

SET optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@optimizer_limit_heuristic_multiplier_pct;


--echo # Query 3
--echo # Changing range condition on i1 should make
--echo # key on i1 get picked to give the order.
--echo # Unaffected by system variable.

let query=
SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.i1=t2.i1
WHERE t1.pk > 5000 and t1.i1 > 800
ORDER BY t1.i1 LIMIT 5;

eval EXPLAIN $query;

SET optimizer_limit_heuristic_multiplier_pct = 100;
SELECT @@optimizer_limit_heuristic_multiplier_pct;

eval EXPLAIN $query;

SET optimizer_limit_heuristic_multiplier_pct = 0;
SELECT @@optimizer_limit_heuristic_multiplier_pct;

DROP TABLE t0, t1, t2;

eval SET @@optimizer_limit_heuristic_multiplier_pct = $start_value;
Loading

0 comments on commit 675f82e

Please sign in to comment.