Skip to content

Commit

Permalink
Bug#84958 - InnoDB's MVCC has O(N^2) behaviors (#975)
Browse files Browse the repository at this point in the history
Summary:
(Also fixes PS-4712 - Assertion after bug#84958 fix on FB tree)

Problem:

If there're multiple row versions in InnoDB, reading one row
from PK may have O(N) complexity and reading from secondary keys may have
O(N^2) complexity.

Fix:

The problem occurs when there are many pending versions of the same row,
meaning that the primary key is the same, but a secondary key is different.
The slowdown occurs when the secondary index is traversed. This patch
creates a helper class for the function row_sel_get_clust_rec_for_mysql()
which can remember and re-use cached_clust_rec & cached_old_vers so that
rec_get_offsets() does not need to be called over and over for the
clustered record.

PS-4712 : Assertion after bug#84958 fix on FB tree

Problem:

The function row_sel_build_prev_vers_for_mysql() goes through the old versions
of the clustered index record and identifies the one which can be viewed by the
current transaction (MVCC).  The result is then cached in an object of
Row_sel_get_clust_rec_for_mysql.

The two things that are cached by the helper class
Row_sel_get_clust_rec_for_mysql are cached_clust_rec and cached_old_vers.  If
the latest record is cached_clust_rec, its old version for the given
transaction that is viewable is cached_old_vers. The important point here is
that the record offsets for these two records are not cached.

The record offsets for cached_clust_rec and the cached_old_vers need not be the
same.  This is confirmed by looking at row_sel_build_prev_vers_for_mysql().
The 6th argument which contains the record offsets is an input/output argument.
The input is the offsets of the latest version of clust_rec, and the output is
the record offsets of the older version.  So the record offsets of different
versions of clust_rec can change.

Solution:

Re-calculate the record offsets for the cached old_vers.
Pull Request resolved: #975

Reviewed By: lth

Differential Revision: D14282030

Pulled By: lth

fbshipit-source-id: 7dc2d23
  • Loading branch information
satya-bodapati authored and facebook-github-bot committed Apr 2, 2019
1 parent 7703200 commit f7d9a8d
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 27 deletions.
51 changes: 51 additions & 0 deletions mysql-test/suite/innodb/r/innodb_bug84958.result
@@ -0,0 +1,51 @@
create procedure insert_n(n int)
begin
declare i int default 0;
while i < n do
insert into t1 values (1, 2, 3) on duplicate key update c= c + 1;
set i = i + 1;
end while;
end~~
create function bprrs()
returns int
begin
declare ret int;
select variable_value
from information_schema.global_status
where
variable_name = 'innodb_buffer_pool_read_requests'
into ret;
return ret;
end~~
create table t1 (a int, b int, c int, primary key(a,b), key (b,c)) engine=InnoDB;
begin;
select * from t1;
a b c
call insert_n(100);
set @rr1= bprrs();
select * from t1 force index (b);
a b c
set @rr2= bprrs();
select @rr2 - @rr1 < 1000;
@rr2 - @rr1 < 1000
1
drop table t1;
create table t1 (a int, b int, c int, primary key(a,b), key (b,c)) engine=innodb;
insert into t1 values (1,2,3) on duplicate key update c=c+1;
begin;
select * from t1;
a b c
1 2 3
insert into t1 values (1,2,3) on duplicate key update c=null;
insert into t1 values (1,2,3) on duplicate key update c=null;
insert into t1 values (1,2,3) on duplicate key update c=null;
select * from t1 force index (primary);
a b c
1 2 3
select * from t1 force index (b);
a b c
1 2 3
commit;
drop table t1;
drop procedure insert_n;
drop function bprrs;
86 changes: 86 additions & 0 deletions mysql-test/suite/innodb/t/innodb_bug84958.test
@@ -0,0 +1,86 @@
#
# Bug #84958 InnoDB's MVCC has O(N^2) behaviors
# https://bugs.mysql.com/bug.php?id=84958
#

--source include/have_innodb.inc
--source include/count_sessions.inc

delimiter ~~;
create procedure insert_n(n int)
begin
declare i int default 0;
while i < n do
insert into t1 values (1, 2, 3) on duplicate key update c= c + 1;
set i = i + 1;
end while;
end~~

create function bprrs()
returns int
begin
declare ret int;
select variable_value
from information_schema.global_status
where
variable_name = 'innodb_buffer_pool_read_requests'
into ret;
return ret;
end~~
delimiter ;~~

create table t1 (a int, b int, c int, primary key(a,b), key (b,c)) engine=InnoDB;

connect (con2, localhost, root,,);

begin;
select * from t1;

connection con2;
call insert_n(100);

connection default;
set @rr1= bprrs();

select * from t1 force index (b);
set @rr2= bprrs();
select @rr2 - @rr1 < 1000;

disconnect con2;

connection default;
drop table t1;

# This testcase verifies that we return correct offsets when returning old_version
# of a cluster index record.

create table t1 (a int, b int, c int, primary key(a,b), key (b,c)) engine=innodb;

insert into t1 values (1,2,3) on duplicate key update c=c+1;

begin;
select * from t1;

connect (con2, localhost, root,,);
--connection con2

--let $i=3
while ($i)
{
insert into t1 values (1,2,3) on duplicate key update c=null;
--dec $i
}

--connection default
select * from t1 force index (primary);
select * from t1 force index (b);

commit;

disconnect con2;

connection default;
drop table t1;
drop procedure insert_n;
drop function bprrs;
--source include/wait_until_count_sessions.inc
129 changes: 102 additions & 27 deletions storage/innobase/row/row0sel.cc
Expand Up @@ -3021,35 +3021,90 @@ row_sel_build_prev_vers_for_mysql(
return(err);
}

/*********************************************************************//**
Retrieves the clustered index record corresponding to a record in a
/** Helper class to cache clust_rec and old_ver bug 84958 */

class
Row_sel_get_clust_rec_for_mysql
{
const rec_t* cached_clust_rec;
rec_t* cached_old_vers;
public:
Row_sel_get_clust_rec_for_mysql() :
cached_clust_rec(NULL),
cached_old_vers(NULL) {}

/** Retrieves the clustered index record corresponding to a record in a
non-clustered index. Does the necessary locking. Used in the MySQL
interface.
@param[in] prebuilt prebuilt struct in the handle
@param[in] sec_index secondary index where rec resides
@param[in] rec record in a non-clustered index; if
this is a locking read, then rec is not
allowed to be delete-marked, and that would
not make sense either
@param[in] thr query thread
@param[out] out_rec clustered record or an old version of
it, NULL if the old version did not exist
in the read view, i.e., it was a fresh
inserted version
@param[in,out] offsets offsets returned by
rec_get_offsets(rec, sec_index);
out: offsets returned by
rec_get_offsets(out_rec, clust_index)
or offsets of old version of clust_rec
@param[in,out] offset_heap memory heap from which
the offsets are allocated
@param[in] mtr mtr used to get access to the
non-clustered record; the same mtr is used to
access the clustered index
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
dberr_t operator()(
row_prebuilt_t* prebuilt,
dict_index_t* sec_index,
const rec_t* rec,
que_thr_t* thr,
const rec_t** out_rec,
ulint** offsets,
mem_heap_t** offset_heap,
mtr_t* mtr);
};

/** Retrieves the clustered index record corresponding to a record in a
non-clustered index. Does the necessary locking. Used in the MySQL
interface.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_sel_get_clust_rec_for_mysql(
/*============================*/
row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */
dict_index_t* sec_index,/*!< in: secondary index where rec resides */
const rec_t* rec, /*!< in: record in a non-clustered index; if
@param[in] prebuilt prebuilt struct in the handle
@param[in] sec_index secondary index where rec resides
@param[in] rec record in a non-clustered index; if
this is a locking read, then rec is not
allowed to be delete-marked, and that would
not make sense either */
que_thr_t* thr, /*!< in: query thread */
const rec_t** out_rec,/*!< out: clustered record or an old version of
not make sense either
@param[in] thr query thread
@param[out] out_rec clustered record or an old version of
it, NULL if the old version did not exist
in the read view, i.e., it was a fresh
inserted version */
ulint** offsets,/*!< in: offsets returned by
inserted version
@param[in,out] offsets offsets returned by
rec_get_offsets(rec, sec_index);
out: offsets returned by
rec_get_offsets(out_rec, clust_index) */
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
the offsets are allocated */
mtr_t* mtr) /*!< in: mtr used to get access to the
rec_get_offsets(out_rec, clust_index)
or offsets of old version of clust_rec
@param[in,out] offset_heap memory heap from which
the offsets are allocated
@param[in] mtr mtr used to get access to the
non-clustered record; the same mtr is used to
access the clustered index */
access the clustered index
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
Row_sel_get_clust_rec_for_mysql::operator()(
row_prebuilt_t* prebuilt,
dict_index_t* sec_index,
const rec_t* rec,
que_thr_t* thr,
const rec_t** out_rec,
ulint** offsets,
mem_heap_t** offset_heap,
mtr_t* mtr)
{
dict_index_t* clust_index;
const rec_t* clust_rec;
Expand Down Expand Up @@ -3157,15 +3212,34 @@ row_sel_get_clust_rec_for_mysql(
clust_rec, clust_index, *offsets,
trx->read_view)) {

/* The following call returns 'offsets' associated with
'old_vers' */
err = row_sel_build_prev_vers_for_mysql(
trx->read_view, clust_index, prebuilt,
clust_rec, offsets, offset_heap, &old_vers,
mtr);
if (clust_rec != cached_clust_rec) {

/* The following call returns 'offsets'
associated with old_vers' */
err = row_sel_build_prev_vers_for_mysql(
trx->read_view, clust_index, prebuilt,
clust_rec, offsets, offset_heap, &old_vers,
mtr);

if (err != DB_SUCCESS) {
goto err_exit;
}

if (err != DB_SUCCESS || old_vers == NULL) {
cached_clust_rec = clust_rec;
cached_old_vers = old_vers;
} else {
err = DB_SUCCESS;
old_vers = cached_old_vers;
/* When returning old_version, we should return
the offsets of old version cluster index record. */
if (old_vers != NULL) {
*offsets = rec_get_offsets(
old_vers, clust_index, *offsets,
ULINT_UNDEFINED, offset_heap);
}
}

if (old_vers == NULL) {
goto err_exit;
}

Expand Down Expand Up @@ -4025,6 +4099,7 @@ row_search_for_mysql(
const rec_t* rec;
const rec_t* result_rec = NULL;
const rec_t* clust_rec;
Row_sel_get_clust_rec_for_mysql row_sel_get_clust_rec_for_mysql;
dberr_t err = DB_SUCCESS;
ibool unique_search = FALSE;
ibool mtr_has_extra_clust_latch = FALSE;
Expand Down

0 comments on commit f7d9a8d

Please sign in to comment.