diff --git a/mysql-test/suite/rocksdb/r/add_index_inplace.result b/mysql-test/suite/rocksdb/r/add_index_inplace.result new file mode 100644 index 000000000000..4a707d3a6f42 --- /dev/null +++ b/mysql-test/suite/rocksdb/r/add_index_inplace.result @@ -0,0 +1,378 @@ +drop table if exists t1; +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `ka` (`a`), + KEY `kab` (`a`,`b`), + KEY `kb` (`b`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +a b +2 6 +3 7 +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2; +a b +3 7 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 ADD INDEX kb(b), DROP INDEX ka, ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `kab` (`a`,`b`), + KEY `kb` (`b`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +a b +2 6 +3 7 +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2; +a b +3 7 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 DROP INDEX ka, DROP INDEX kab, ALGORITHM=INPLACE; +ALTER TABLE t1 ADD INDEX kb(b), ADD INDEX kab(a,b), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `kb` (`b`), + KEY `kab` (`a`,`b`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +a b +2 6 +3 7 +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2; +a b +3 7 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 ADD INDEX kb(b), DROP INDEX ka, ADD INDEX kba(b,a), DROP INDEX kab, ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `kb` (`b`), + KEY `kba` (`b`,`a`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +a b +2 6 +3 7 +SELECT * FROM t1 FORCE INDEX(kba) WHERE a > 2; +a b +3 7 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +ALTER TABLE t1 DROP INDEX ka, ADD INDEX ka(b), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `kab` (`a`,`b`), + KEY `ka` (`b`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT * FROM t1 FORCE INDEX(ka) WHERE b > 5; +a b +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2; +a b +DROP TABLE t1; +CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `pk` char(8) COLLATE utf8_bin NOT NULL, + `a` varchar(11) COLLATE utf8_bin DEFAULT NULL, + `b` int(10) unsigned DEFAULT NULL, + PRIMARY KEY (`pk`) +) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin +SHOW COLUMNS IN t1; +Field Type Null Key Default Extra +pk char(8) NO PRI NULL +a varchar(11) YES NULL +b int(10) unsigned YES NULL +INSERT INTO t1 VALUES ('aaa', '1111', 1); +INSERT INTO t1 VALUES ('bbb', '2222', 2); +INSERT INTO t1 VALUES ('ccc', '3333', 3); +ALTER TABLE t1 ADD INDEX kab(a,b), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `pk` char(8) COLLATE utf8_bin NOT NULL, + `a` varchar(11) COLLATE utf8_bin DEFAULT NULL, + `b` int(10) unsigned DEFAULT NULL, + PRIMARY KEY (`pk`), + KEY `kab` (`a`,`b`) +) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > '2' AND b < 3; +pk a b +bbb 2222 2 +DROP TABLE t1; +CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `pk` char(8) COLLATE utf8_bin NOT NULL, + `a` varchar(11) COLLATE utf8_bin DEFAULT NULL, + `b` int(10) unsigned DEFAULT NULL, + PRIMARY KEY (`pk`) +) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin +SHOW COLUMNS IN t1; +Field Type Null Key Default Extra +pk char(8) NO PRI NULL +a varchar(11) YES NULL +b int(10) unsigned YES NULL +INSERT INTO t1 VALUES ('aaa', '1111', 1); +INSERT INTO t1 VALUES ('bbb', '2222', 2); +INSERT INTO t1 VALUES ('ccc', '3333', 3); +ALTER TABLE t1 ADD INDEX kab(a,b), ALGORITHM=INPLACE; +ALTER TABLE t1 ADD INDEX ka(a), DROP INDEX kab, ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `pk` char(8) COLLATE utf8_bin NOT NULL, + `a` varchar(11) COLLATE utf8_bin DEFAULT NULL, + `b` int(10) unsigned DEFAULT NULL, + PRIMARY KEY (`pk`), + KEY `ka` (`a`) +) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT * FROM t1 FORCE INDEX(ka) WHERE a > '2' AND b < 3; +pk a b +bbb 2222 2 +DROP TABLE t1; +CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `pk` char(8) COLLATE utf8_bin NOT NULL, + `a` varchar(11) COLLATE utf8_bin DEFAULT NULL, + `b` int(10) unsigned DEFAULT NULL, + PRIMARY KEY (`pk`) +) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin +SHOW COLUMNS IN t1; +Field Type Null Key Default Extra +pk char(8) NO PRI NULL +a varchar(11) YES NULL +b int(10) unsigned YES NULL +INSERT INTO t1 VALUES ('aaa', '1111', 1); +INSERT INTO t1 VALUES ('bbb', '2222', 2); +INSERT INTO t1 VALUES ('ccc', '3333', 3); +ALTER TABLE t1 ADD INDEX kab(a,b), ADD INDEX ka(a), ADD INDEX kb(b), ALGORITHM=INPLACE; +ALTER TABLE t1 DROP INDEX ka, DROP INDEX kb, ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `pk` char(8) COLLATE utf8_bin NOT NULL, + `a` varchar(11) COLLATE utf8_bin DEFAULT NULL, + `b` int(10) unsigned DEFAULT NULL, + PRIMARY KEY (`pk`), + KEY `kab` (`a`,`b`) +) ENGINE=ROCKSDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > '2' AND b < 3; +pk a b +bbb 2222 2 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +CREATE INDEX kb on t1 (b); +CREATE INDEX kba on t1 (b,a); +DROP INDEX ka on t1; +DROP INDEX kab on t1; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `kb` (`b`), + KEY `kba` (`b`,`a`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +a b +2 6 +3 7 +SELECT * FROM t1 FORCE INDEX(kba) WHERE a > 2; +a b +3 7 +DROP TABLE t1; +CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4; +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; +DROP INDEX kij ON t1; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `i` int(11) NOT NULL DEFAULT '0', + `j` int(11) DEFAULT NULL, + `k` int(11) DEFAULT NULL, + PRIMARY KEY (`i`), + KEY `j` (`j`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY KEY (i) +PARTITIONS 4 */ +SELECT * FROM t1 ORDER BY i LIMIT 10; +i j k +1 1 1 +2 2 2 +3 3 3 +4 4 4 +5 5 5 +6 6 6 +7 7 7 +8 8 8 +9 9 9 +10 10 10 +SELECT COUNT(*) FROM t1; +COUNT(*) +100 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +# crash_during_online_index_creation +flush logs; +SET SESSION debug="+d,crash_during_online_index_creation"; +ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; +ERROR HY000: Lost connection to MySQL server during query +SET SESSION debug="-d,crash_during_online_index_creation"; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `ka` (`a`), + KEY `kab` (`a`,`b`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +DROP TABLE t1; +CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4; +# crash_during_index_creation_partition +flush logs; +SET SESSION debug="+d,crash_during_index_creation_partition"; +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; +ERROR HY000: Lost connection to MySQL server during query +SET SESSION debug="-d,crash_during_index_creation_partition"; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `i` int(11) NOT NULL DEFAULT '0', + `j` int(11) DEFAULT NULL, + `k` int(11) DEFAULT NULL, + PRIMARY KEY (`i`), + KEY `j` (`j`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY KEY (i) +PARTITIONS 4 */ +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; +SELECT * FROM t1 ORDER BY i LIMIT 10; +i j k +1 1 1 +2 2 2 +3 3 3 +4 4 4 +5 5 5 +6 6 6 +7 7 7 +8 8 8 +9 9 9 +10 10 10 +SELECT COUNT(*) FROM t1; +COUNT(*) +100 +DROP TABLE t1; +CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4; +# crash_during_index_creation_partition +flush logs; +SET SESSION debug="+d,myrocks_simulate_index_create_rollback"; +# expected assertion failure from sql layer here for alter rollback +call mtr.add_suppression("Assertion `0' failed."); +call mtr.add_suppression("Attempting backtrace. You can use the following information to find out"); +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; +ERROR HY000: Lost connection to MySQL server during query +SET SESSION debug="-d,myrocks_simulate_index_create_rollback"; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `i` int(11) NOT NULL DEFAULT '0', + `j` int(11) DEFAULT NULL, + `k` int(11) DEFAULT NULL, + PRIMARY KEY (`i`), + KEY `j` (`j`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY KEY (i) +PARTITIONS 4 */ +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `i` int(11) NOT NULL DEFAULT '0', + `j` int(11) DEFAULT NULL, + `k` int(11) DEFAULT NULL, + PRIMARY KEY (`i`), + KEY `j` (`j`), + KEY `kij` (`i`,`j`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 +/*!50100 PARTITION BY KEY (i) +PARTITIONS 4 */ +SELECT COUNT(*) FROM t1; +COUNT(*) +100 +DROP TABLE t1; +CREATE TABLE t1 (a INT, b TEXT); +ALTER TABLE t1 ADD KEY kb(b(10)); +ERROR HY000: Unsupported collation on string indexed column test.t1.b Use binary collation (binary, latin1_bin, utf8_bin). +ALTER TABLE t1 ADD PRIMARY KEY(a); +DROP TABLE t1; diff --git a/mysql-test/suite/rocksdb/r/information_schema.result b/mysql-test/suite/rocksdb/r/information_schema.result index 526abf425f78..d6177a3f0193 100644 --- a/mysql-test/suite/rocksdb/r/information_schema.result +++ b/mysql-test/suite/rocksdb/r/information_schema.result @@ -63,7 +63,7 @@ SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK'; Variable_name Value rocksdb_pause_background_work ON DROP TABLE t3; -cf_id:0,index_id:271 +cf_id:0,index_id:268 SET GLOBAL ROCKSDB_PAUSE_BACKGROUND_WORK=0; SHOW GLOBAL VARIABLES LIKE 'ROCKSDB_PAUSE_BACKGROUND_WORK'; Variable_name Value diff --git a/mysql-test/suite/rocksdb/t/add_index_inplace.cnf b/mysql-test/suite/rocksdb/t/add_index_inplace.cnf new file mode 100644 index 000000000000..45ec29033c68 --- /dev/null +++ b/mysql-test/suite/rocksdb/t/add_index_inplace.cnf @@ -0,0 +1,5 @@ +[mysql] +no-defaults + +[mysqld.1] +rocksdb_strict_collation_check=1 diff --git a/mysql-test/suite/rocksdb/t/add_index_inplace.test b/mysql-test/suite/rocksdb/t/add_index_inplace.test new file mode 100644 index 000000000000..e0d7a4465c85 --- /dev/null +++ b/mysql-test/suite/rocksdb/t/add_index_inplace.test @@ -0,0 +1,290 @@ +--source include/have_rocksdb.inc +--source include/have_debug.inc + +--disable_warnings +drop table if exists t1; +--enable_warnings + +## +## test adding index inplace +## + +# test basic add +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +CHECK TABLE t1; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2; +DROP TABLE t1; + +# test add + drop (simultaneous) +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 ADD INDEX kb(b), DROP INDEX ka, ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +CHECK TABLE t1; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2; +DROP TABLE t1; + +## test multi-drop + multi-add +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 DROP INDEX ka, DROP INDEX kab, ALGORITHM=INPLACE; +ALTER TABLE t1 ADD INDEX kb(b), ADD INDEX kab(a,b), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +CHECK TABLE t1; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2; +DROP TABLE t1; + +# test multi add + drop (simultaneous) +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +ALTER TABLE t1 ADD INDEX kb(b), DROP INDEX ka, ADD INDEX kba(b,a), DROP INDEX kab, ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +CHECK TABLE t1; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kba) WHERE a > 2; +DROP TABLE t1; + +# test dropping and adding a key simultaneously w/ same name but different col +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +ALTER TABLE t1 DROP INDEX ka, ADD INDEX ka(b), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +CHECK TABLE t1; +--sorted_result +SELECT * FROM t1 FORCE INDEX(ka) WHERE b > 5; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > 2; +DROP TABLE t1; + +## +## test adding index inplace w/ various column types +## + +# test basic add +CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin; +SHOW CREATE TABLE t1; +SHOW COLUMNS IN t1; +INSERT INTO t1 VALUES ('aaa', '1111', 1); +INSERT INTO t1 VALUES ('bbb', '2222', 2); +INSERT INTO t1 VALUES ('ccc', '3333', 3); +ALTER TABLE t1 ADD INDEX kab(a,b), ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +CHECK TABLE t1; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > '2' AND b < 3; +DROP TABLE t1; + +## test add + drop (simultaneous) +CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin; +SHOW CREATE TABLE t1; +SHOW COLUMNS IN t1; +INSERT INTO t1 VALUES ('aaa', '1111', 1); +INSERT INTO t1 VALUES ('bbb', '2222', 2); +INSERT INTO t1 VALUES ('ccc', '3333', 3); +ALTER TABLE t1 ADD INDEX kab(a,b), ALGORITHM=INPLACE; +ALTER TABLE t1 ADD INDEX ka(a), DROP INDEX kab, ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +CHECK TABLE t1; +--sorted_result +SELECT * FROM t1 FORCE INDEX(ka) WHERE a > '2' AND b < 3; +DROP TABLE t1; + +### test multi-drop + multi-add +CREATE TABLE t1 (pk CHAR(8) PRIMARY KEY, a VARCHAR(11), b INT UNSIGNED) ENGINE=rocksdb charset utf8 collate utf8_bin; +SHOW CREATE TABLE t1; +SHOW COLUMNS IN t1; +INSERT INTO t1 VALUES ('aaa', '1111', 1); +INSERT INTO t1 VALUES ('bbb', '2222', 2); +INSERT INTO t1 VALUES ('ccc', '3333', 3); +ALTER TABLE t1 ADD INDEX kab(a,b), ADD INDEX ka(a), ADD INDEX kb(b), ALGORITHM=INPLACE; +ALTER TABLE t1 DROP INDEX ka, DROP INDEX kb, ALGORITHM=INPLACE; +SHOW CREATE TABLE t1; +CHECK TABLE t1; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kab) WHERE a > '2' AND b < 3; +DROP TABLE t1; + +## +## test adding via CREATE/DROP index syntax +## +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); +CREATE INDEX kb on t1 (b); +CREATE INDEX kba on t1 (b,a); +DROP INDEX ka on t1; +DROP INDEX kab on t1; +SHOW CREATE TABLE t1; +CHECK TABLE t1; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kb) WHERE b > 5; +--sorted_result +SELECT * FROM t1 FORCE INDEX(kba) WHERE a > 2; +DROP TABLE t1; + +# +# Create tables with partitions and try to update/select from them. +# +CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4; + +--disable_query_log +let $max = 100; +let $i = 1; +while ($i <= $max) { + let $insert = INSERT INTO t1 VALUES ($i, $i, $i); + inc $i; + eval $insert; +} +--enable_query_log + +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; +DROP INDEX kij ON t1; +SHOW CREATE TABLE t1; + +SELECT * FROM t1 ORDER BY i LIMIT 10; +SELECT COUNT(*) FROM t1; + +DROP TABLE t1; + +# +# test crash recovery +# + +CREATE TABLE t1 (a INT, b INT, KEY ka(a), KEY kab(a,b)) ENGINE=RocksDB; +INSERT INTO t1 (a, b) VALUES (1, 5); +INSERT INTO t1 (a, b) VALUES (2, 6); +INSERT INTO t1 (a, b) VALUES (3, 7); + +--echo # crash_during_online_index_creation +flush logs; + +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +SET SESSION debug="+d,crash_during_online_index_creation"; +--error 2013 +ALTER TABLE t1 ADD INDEX kb(b), ALGORITHM=INPLACE; + +--enable_reconnect +--source include/wait_until_connected_again.inc + +SET SESSION debug="-d,crash_during_online_index_creation"; + +SHOW CREATE TABLE t1; +CHECK TABLE t1; + +DROP TABLE t1; + +# +# Test crash recovery with partitioned tables +# +CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4; + +--disable_query_log +let $max = 100; +let $i = 1; +while ($i <= $max) { + let $insert = INSERT INTO t1 VALUES ($i, $i, $i); + inc $i; + eval $insert; +} +--enable_query_log + +--echo # crash_during_index_creation_partition +flush logs; + +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +SET SESSION debug="+d,crash_during_index_creation_partition"; +--error 2013 +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; + +--enable_reconnect +--source include/wait_until_connected_again.inc + +SET SESSION debug="-d,crash_during_index_creation_partition"; + +SHOW CREATE TABLE t1; + +# here, the index numbers should be higher because previously 4 index numbers +# were allocated for the partitioned table +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; + +SELECT * FROM t1 ORDER BY i LIMIT 10; +SELECT COUNT(*) FROM t1; + +DROP TABLE t1; + +# +# Test rollback on partitioned tables for inplace alter +# +CREATE TABLE t1 (i INT, j INT, k INT, PRIMARY KEY (i), KEY(j)) ENGINE = ROCKSDB PARTITION BY KEY(i) PARTITIONS 4; + +--disable_query_log +let $max = 100; +let $i = 1; +while ($i <= $max) { + let $insert = INSERT INTO t1 VALUES ($i, $i, $i); + inc $i; + eval $insert; +} +--enable_query_log + +--echo # crash_during_index_creation_partition +flush logs; + +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +SET SESSION debug="+d,myrocks_simulate_index_create_rollback"; + +--echo # expected assertion failure from sql layer here for alter rollback +call mtr.add_suppression("Assertion `0' failed."); +call mtr.add_suppression("Attempting backtrace. You can use the following information to find out"); + +--error 2013 + +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; + +--enable_reconnect +--source include/wait_until_connected_again.inc + +SET SESSION debug="-d,myrocks_simulate_index_create_rollback"; + +SHOW CREATE TABLE t1; + +# here, the index numbers should be higher because previously 4 index numbers +# were allocated for the partitioned table +ALTER TABLE t1 ADD INDEX kij(i,j), ALGORITHM=INPLACE; + +SHOW CREATE TABLE t1; +SELECT COUNT(*) FROM t1; + +DROP TABLE t1; + +# test failure in prepare phase (due to collation) +CREATE TABLE t1 (a INT, b TEXT); + +--error 1105 +ALTER TABLE t1 ADD KEY kb(b(10)); +ALTER TABLE t1 ADD PRIMARY KEY(a); +DROP TABLE t1; + + diff --git a/rocksdb b/rocksdb index f9b14be4951e..a4955b39ac48 160000 --- a/rocksdb +++ b/rocksdb @@ -1 +1 @@ -Subproject commit f9b14be4951e9aad73b7fcbc120b04127fe8511f +Subproject commit a4955b39ac4849e1dea33d70cd17f0008c90a117 diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index db0919b30f8a..98ae9519fd2a 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -4206,49 +4206,17 @@ void ha_rocksdb::setup_field_converters() m_null_bytes_in_rec= null_bytes; } - -int ha_rocksdb::open(const char *name, int mode, uint test_if_locked) +int ha_rocksdb::alloc_key_buffers(const TABLE* table_arg, + const Rdb_tbl_def* tbl_def_arg) { - DBUG_ENTER("ha_rocksdb::open"); - - if (m_table_handler != nullptr) - { - rdb_open_tables.release_table_handler(m_table_handler); - } - m_table_handler= rdb_open_tables.get_table_handler(name); - if (m_table_handler == nullptr) - { - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - - my_core::thr_lock_data_init(&m_table_handler->m_thr_lock, &m_db_lock, - nullptr); - m_io_perf.init(&m_table_handler->m_table_perf_context, - &m_table_handler->m_io_perf_read, &stats); - - /* - note: m_pk_descr may be non-NULL here, when using partitions. It seems, - it's ok to discard it - */ - std::string fullname; - if (rdb_normalize_tablename(name, &fullname)) { - return HA_ERR_INTERNAL_ERROR; - } - - m_tbl_def= ddl_manager.find(fullname); - if (m_tbl_def == nullptr) - { - my_error(ER_INTERNAL_ERROR, MYF(0), - "Attempt to open a table that is not present in RocksDB-SE data dictionary"); - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - m_lock_rows= RDB_LOCK_NONE; + DBUG_ENTER("ha_rocksdb::alloc_key_buffers"); + DBUG_ASSERT(m_pk_tuple == nullptr); - m_key_descr_arr= m_tbl_def->m_key_descr_arr; + std::shared_ptr* kd_arr= tbl_def_arg->m_key_descr_arr; uint key_len= 0; - m_pk_descr= m_key_descr_arr[pk_index(table, m_tbl_def)]; - if (has_hidden_pk(table)) + m_pk_descr= kd_arr[pk_index(table_arg, tbl_def_arg)]; + if (has_hidden_pk(table_arg)) { m_pk_key_parts= 1; } @@ -4259,7 +4227,8 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked) key_len= table->key_info[table->s->primary_key].key_length; } - m_pk_descr->setup(table, m_tbl_def); // move this into get_table_handler() ?? + // move this into get_table_handler() ?? + m_pk_descr->setup(table_arg, tbl_def_arg); m_pk_tuple= reinterpret_cast(my_malloc(key_len, MYF(0))); if (m_pk_tuple == nullptr) @@ -4274,32 +4243,23 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked) DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } - /* - Full table scan actually uses primary key - (UPDATE needs to know this, otherwise it will go into infinite loop on - queries like "UPDATE tbl SET pk=pk+100") - */ - key_used_on_scan= table->s->primary_key; - /* Sometimes, we may use m_sk_packed_tuple for storing packed PK */ uint max_packed_sk_len= pack_key_len; - for (uint i= 0; i < table->s->keys; i++) + for (uint i= 0; i < table_arg->s->keys; i++) { - if (i == table->s->primary_key) /* Primary key was processed above */ + if (i == table_arg->s->primary_key) /* Primary key was processed above */ continue; // TODO: move this into get_table_handler() ?? - m_key_descr_arr[i]->setup(table, m_tbl_def); + kd_arr[i]->setup(table_arg, tbl_def_arg); - uint packed_len= m_key_descr_arr[i]->max_storage_fmt_length(); + uint packed_len= kd_arr[i]->max_storage_fmt_length(); if (packed_len > max_packed_sk_len) { max_packed_sk_len= packed_len; } } - setup_field_converters(); - size_t tails_size= max_packed_sk_len + RDB_CHECKSUM_CHUNK_SIZE + RDB_UNPACK_HEADER_SIZE; @@ -4321,6 +4281,95 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked) DBUG_RETURN(HA_ERR_INTERNAL_ERROR); } + DBUG_RETURN(0); +} + +void ha_rocksdb::free_key_buffers() +{ + my_free(m_pk_tuple); + m_pk_tuple= nullptr; + + my_free(m_pk_packed_tuple); + m_pk_packed_tuple= nullptr; + + my_free(m_sk_packed_tuple); + m_sk_packed_tuple= nullptr; + + my_free(m_sk_match_prefix_buf); + m_sk_match_prefix_buf= nullptr; + + my_free(m_sk_packed_tuple_old); + m_sk_packed_tuple_old= nullptr; + + my_free(m_end_key_packed_tuple); + m_end_key_packed_tuple= nullptr; + + my_free(m_sk_tails); + m_sk_tails= nullptr; + + my_free(m_sk_tails_old); + m_sk_tails_old= nullptr; + + my_free(m_pack_buffer); + m_pack_buffer= nullptr; +} + +int ha_rocksdb::open(const char *name, int mode, uint test_if_locked) +{ + DBUG_ENTER("ha_rocksdb::open"); + + if (m_table_handler != nullptr) + { + rdb_open_tables.release_table_handler(m_table_handler); + } + m_table_handler= rdb_open_tables.get_table_handler(name); + if (m_table_handler == nullptr) + { + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + } + + my_core::thr_lock_data_init(&m_table_handler->m_thr_lock, &m_db_lock, + nullptr); + m_io_perf.init(&m_table_handler->m_table_perf_context, + &m_table_handler->m_io_perf_read, &stats); + + /* + note: m_pk_descr may be non-NULL here, when using partitions. It seems, + it's ok to discard it + */ + std::string fullname; + if (rdb_normalize_tablename(name, &fullname)) + { + return HA_ERR_INTERNAL_ERROR; + } + + m_tbl_def= ddl_manager.find(fullname); + if (m_tbl_def == nullptr) + { + my_error(ER_INTERNAL_ERROR, MYF(0), + "Attempt to open a table that is not present in RocksDB-SE data " + "dictionary"); + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + } + m_lock_rows= RDB_LOCK_NONE; + + m_key_descr_arr= m_tbl_def->m_key_descr_arr; + + /* + Full table scan actually uses primary key + (UPDATE needs to know this, otherwise it will go into infinite loop on + queries like "UPDATE tbl SET pk=pk+100") + */ + key_used_on_scan= table->s->primary_key; + + int err; + if ((err = alloc_key_buffers(table, m_tbl_def))) + { + DBUG_RETURN(err); + } + + setup_field_converters(); + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); /* @@ -4354,32 +4403,7 @@ int ha_rocksdb::close(void) m_pk_descr= nullptr; m_key_descr_arr= nullptr; - my_free(m_pk_tuple); - m_pk_tuple= nullptr; - - my_free(m_pk_packed_tuple); - m_pk_packed_tuple= nullptr; - - my_free(m_sk_packed_tuple); - m_sk_packed_tuple= nullptr; - - my_free(m_sk_match_prefix_buf); - m_sk_match_prefix_buf= nullptr; - - my_free(m_sk_packed_tuple_old); - m_sk_packed_tuple_old= nullptr; - - my_free(m_end_key_packed_tuple); - m_end_key_packed_tuple= nullptr; - - my_free(m_sk_tails); - m_sk_tails= nullptr; - - my_free(m_sk_tails_old); - m_sk_tails_old= nullptr; - - my_free(m_pack_buffer); - m_pack_buffer= nullptr; + free_key_buffers(); my_free(m_encoder_arr); m_encoder_arr= nullptr; @@ -4485,7 +4509,9 @@ int ha_rocksdb::create_key_defs(const TABLE *table_arg, const std::string& db_table, HA_CREATE_INFO *create_info, Rdb_tbl_def *tbl_def_arg, - const Rdb_tbl_def *old_tbl_def_arg /* = null */) + const TABLE *old_table_arg /* = nullptr */, + const Rdb_tbl_def *old_tbl_def_arg + /* = nullptr */) { DBUG_ASSERT(table_arg != nullptr); DBUG_ASSERT(create_info != nullptr); @@ -4538,7 +4564,8 @@ int ha_rocksdb::create_key_defs(const TABLE *table_arg, in-place alter table. Copy over existing keys from the old_tbl_def and generate the necessary new key definitions if any. */ - if (create_inplace_key_defs(table_arg, tbl_def_arg, old_tbl_def_arg, cfs)) + if (create_inplace_key_defs(table_arg, tbl_def_arg, old_table_arg, + old_tbl_def_arg, cfs)) { DBUG_RETURN(1); } @@ -4625,15 +4652,8 @@ int ha_rocksdb::create_cfs(const TABLE *table_arg, const std::string& db_table, index comment has Column Family name. If there was no comment, we get NULL, and it means use the default column family. */ - const char *comment; - const char *key_name; - if (is_hidden_pk(i, table_arg, tbl_def_arg)) { - comment= nullptr; - key_name= HIDDEN_PK_NAME; - } else { - comment= table_arg->key_info[i].comment.str; - key_name= table_arg->key_info[i].name; - } + const char *comment = get_key_comment(i, table_arg, tbl_def_arg); + const char *key_name = get_key_name(i, table_arg, tbl_def_arg); if (looks_like_per_index_cf_typo(comment)) { @@ -4680,6 +4700,7 @@ int ha_rocksdb::create_cfs(const TABLE *table_arg, const std::string& db_table, */ int ha_rocksdb::create_inplace_key_defs(const TABLE *table_arg, Rdb_tbl_def *tbl_def_arg, + const TABLE *old_table_arg, const Rdb_tbl_def *old_tbl_def_arg, const std::array& cfs) { @@ -4693,29 +4714,15 @@ int ha_rocksdb::create_inplace_key_defs(const TABLE *table_arg, old_tbl_def_arg->m_key_descr_arr; std::shared_ptr* new_key_descr= tbl_def_arg->m_key_descr_arr; + std::unordered_map old_key_pos = + get_old_key_positions(table_arg, tbl_def_arg, old_table_arg, + old_tbl_def_arg); - std::unordered_map name_pos; uint i; - - for (i= 0; i < old_tbl_def_arg->m_key_count; i++) - { - name_pos[old_key_descr[i]->m_name] = i; - } - for (i= 0; i < tbl_def_arg->m_key_count; i++) { - const char* key_name; - if (is_hidden_pk(i, table_arg, tbl_def_arg)) - { - key_name= HIDDEN_PK_NAME; - } - else - { - key_name= table_arg->key_info[i].name; - } - - auto it = name_pos.find(key_name); - if (it != name_pos.end()) + auto it = old_key_pos.find(get_key_name(i, table_arg, tbl_def_arg)); + if (it != old_key_pos.end()) { /* Found matching index in old table definition, so copy it over to the @@ -4769,6 +4776,92 @@ int ha_rocksdb::create_inplace_key_defs(const TABLE *table_arg, DBUG_RETURN(0); } +std::unordered_map ha_rocksdb::get_old_key_positions( + const TABLE* table_arg, + const Rdb_tbl_def* tbl_def_arg, + const TABLE* old_table_arg, + const Rdb_tbl_def* old_tbl_def_arg) +{ + DBUG_ASSERT(table_arg != nullptr); + DBUG_ASSERT(old_table_arg != nullptr); + DBUG_ASSERT(tbl_def_arg != nullptr); + DBUG_ASSERT(old_tbl_def_arg != nullptr); + + DBUG_ENTER("get_old_key_positions"); + + std::shared_ptr* old_key_descr= + old_tbl_def_arg->m_key_descr_arr; + std::unordered_map old_key_pos; + std::unordered_map new_key_pos; + uint i; + + for (i= 0; i < tbl_def_arg->m_key_count; i++) + { + new_key_pos[get_key_name(i, table_arg, tbl_def_arg)] = i; + } + + for (i= 0; i < old_tbl_def_arg->m_key_count; i++) + { + if (is_hidden_pk(i, old_table_arg, old_tbl_def_arg)) + { + old_key_pos[old_key_descr[i]->m_name] = i; + continue; + } + + /* + In case of matching key name, need to check key parts of keys as well, + in case a simultaneous drop + add is performed, where the key name is the + same but the key parts are different. + + Example: + CREATE TABLE t1 (a INT, b INT, KEY ka(a)) ENGINE=RocksDB; + ALTER TABLE t1 DROP INDEX ka, ADD INDEX ka(b), ALGORITHM=INPLACE; + */ + const KEY* old_key = &old_table_arg->key_info[i]; + auto it = new_key_pos.find(old_key->name); + if (it == new_key_pos.end()) + { + continue; + } + + KEY* new_key = &table_arg->key_info[it->second]; + + if (!compare_key_parts(old_key, new_key)) + { + old_key_pos[old_key->name] = i; + } + } + + DBUG_RETURN(old_key_pos); +} + +/* Check two keys to ensure that key parts within keys match */ +int ha_rocksdb::compare_key_parts(const KEY* old_key, const KEY* new_key) +{ + DBUG_ASSERT(old_key != nullptr); + DBUG_ASSERT(new_key != nullptr); + + DBUG_ENTER("compare_key_parts"); + + /* Skip if key parts do not match, as it is a different key */ + if (new_key->user_defined_key_parts != old_key->user_defined_key_parts) + { + DBUG_RETURN(1); + } + + /* Check to see that key parts themselves match */ + for (uint i= 0; i < old_key->user_defined_key_parts; i++) + { + if (strcmp(old_key->key_part[i].field->field_name, + new_key->key_part[i].field->field_name) != 0) + { + DBUG_RETURN(1); + } + } + + DBUG_RETURN(0); +} + /* Create key definition needed for storing data in rocksdb. This can be called either during CREATE table or doing ADD index operations. @@ -4821,13 +4914,7 @@ int ha_rocksdb::create_key_def(const TABLE *table_arg, uint i, kv_version= Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST; } - const char *key_name; - if (is_hidden_pk(i, table_arg, m_tbl_def)) { - key_name= HIDDEN_PK_NAME; - } else { - key_name= table_arg->key_info[i].name; - } - + const char *key_name = get_key_name(i, table_arg, m_tbl_def); *new_key_def= std::make_shared( index_id, i, cf_info.cf_handle, index_dict_version, index_type, kv_version, cf_info.is_reverse_cf, cf_info.is_auto_cf, key_name); @@ -6291,8 +6378,10 @@ int ha_rocksdb::index_next_with_direction(uchar *buf, bool move_forward) int rc; DBUG_ENTER("ha_rocksdb::index_next"); - if (active_index == table->s->primary_key) + if (active_index == pk_index(table, m_tbl_def)) + { rc= rnd_next_with_direction(buf, move_forward); + } else { if (m_skip_scan_it_next_call) @@ -6671,6 +6760,32 @@ bool ha_rocksdb::is_pk(const uint index, const TABLE* table_arg, is_hidden_pk(index, table_arg, tbl_def_arg); } +const char* ha_rocksdb::get_key_name(const uint index, const TABLE* table_arg, + const Rdb_tbl_def* tbl_def_arg) +{ + DBUG_ASSERT(table_arg != nullptr); + + if (is_hidden_pk(index, table_arg, tbl_def_arg)) + { + return HIDDEN_PK_NAME; + } + + return table_arg->key_info[index].name; +} + +const char* ha_rocksdb::get_key_comment(const uint index, + const TABLE* table_arg, + const Rdb_tbl_def* tbl_def_arg) +{ + DBUG_ASSERT(table_arg != nullptr); + + if (is_hidden_pk(index, table_arg, tbl_def_arg)) + { + return nullptr; + } + + return table_arg->key_info[index].comment.str; +} int ha_rocksdb::write_row(uchar *buf) { @@ -7062,7 +7177,9 @@ int ha_rocksdb::update_pk(uint key_id, const struct update_row_info& row_info, return 0; } -int ha_rocksdb::update_sk(uint key_id, const struct update_row_info& row_info) +int ha_rocksdb::update_sk(const TABLE* table_arg, + const std::shared_ptr& kd, + const struct update_row_info& row_info) { int new_packed_size; int new_key_tail_size; @@ -7074,6 +7191,7 @@ int ha_rocksdb::update_sk(uint key_id, const struct update_row_info& row_info) rocksdb::Slice new_value_slice; rocksdb::Slice old_key_slice; + uint key_id = kd->get_keyno(); /* Can skip updating this key if none of the key fields have changed. */ @@ -7084,8 +7202,7 @@ int ha_rocksdb::update_sk(uint key_id, const struct update_row_info& row_info) bool store_checksums= should_store_checksums(); - const std::shared_ptr& kd= m_key_descr_arr[key_id]; - new_packed_size= kd->pack_record(table, m_pack_buffer, row_info.new_data, + new_packed_size= kd->pack_record(table_arg, m_pack_buffer, row_info.new_data, m_sk_packed_tuple, m_sk_tails, &new_key_tail_size, store_checksums, row_info.hidden_pk_id); @@ -7093,7 +7210,8 @@ int ha_rocksdb::update_sk(uint key_id, const struct update_row_info& row_info) if (row_info.old_data != nullptr) { // The old value - old_packed_size= kd->pack_record(table, m_pack_buffer, row_info.old_data, + old_packed_size= kd->pack_record(table_arg, m_pack_buffer, + row_info.old_data, m_sk_packed_tuple_old, m_sk_tails_old, &old_key_tail_size, store_checksums, row_info.hidden_pk_id); @@ -7145,7 +7263,7 @@ int ha_rocksdb::update_sk(uint key_id, const struct update_row_info& row_info) */ rocksdb::WriteBatchBase *write_batch; if ((row_info.tx->m_ddl_transaction && - !(table->key_info[key_id].flags & HA_NOSAME)) || + !(table_arg->key_info[key_id].flags & HA_NOSAME)) || row_info.skip_unique_check) { write_batch= row_info.tx->get_blind_write_batch(); @@ -7172,13 +7290,14 @@ int ha_rocksdb::update_indexes(const struct update_row_info& row_info, for (uint key_id= 0; key_id < m_tbl_def->m_key_count; key_id++) { + const std::shared_ptr& kd= m_key_descr_arr[key_id]; if (is_pk(key_id, table, m_tbl_def)) { rc= update_pk(key_id, row_info, pk_changed); } else { - rc= update_sk(key_id, row_info); + rc= update_sk(table, kd, row_info); } if (rc != 0) @@ -8866,7 +8985,8 @@ my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter( if (ha_alter_info->handler_flags & ~(my_core::Alter_inplace_info::DROP_INDEX | - my_core::Alter_inplace_info::DROP_UNIQUE_INDEX)) + my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | + my_core::Alter_inplace_info::ADD_INDEX)) { DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); } @@ -8908,6 +9028,7 @@ bool ha_rocksdb::prepare_inplace_alter_table( { DBUG_ENTER("prepare_inplace_alter_table"); uint n_dropped_keys= ha_alter_info->index_drop_count; + uint n_added_keys= ha_alter_info->index_add_count; uint old_n_keys= m_tbl_def->m_key_count; uint new_n_keys= altered_table->s->keys; if (has_hidden_pk(altered_table)) @@ -8927,13 +9048,39 @@ bool ha_rocksdb::prepare_inplace_alter_table( new_tdef->m_hidden_pk_val= m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed); + if (ha_alter_info->handler_flags & + (my_core::Alter_inplace_info::DROP_INDEX | + my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | + my_core::Alter_inplace_info::ADD_INDEX) + && create_key_defs(altered_table, + m_tbl_def->m_dbname_tablename, + ha_alter_info->create_info, + new_tdef, table, m_tbl_def)) + { + /* Delete the new key descriptors */ + delete[] new_key_descr; + + /* + Explicitly mark as nullptr so we don't accidentally remove entries + from data dictionary on cleanup (or cause double delete[]). + */ + new_tdef->m_key_descr_arr= nullptr; + delete new_tdef; + DBUG_RETURN(1); + } + + std::unordered_set> added_indexes; std::unordered_set dropped_index_ids; + uint i; + uint j; + const KEY* key; + /* Determine which(if any) key definition(s) need to be dropped */ - for (uint i= 0; i < ha_alter_info->index_drop_count; i++) + for (i = 0; i < ha_alter_info->index_drop_count; i++) { const KEY* key = ha_alter_info->index_drop_buffer[i]; - for (uint j= 0; j < old_n_keys; j++) + for (j = 0; j < old_n_keys; j++) { if (!old_key_descr[j]->m_name.compare(key->name)) { @@ -8945,30 +9092,26 @@ bool ha_rocksdb::prepare_inplace_alter_table( DBUG_ASSERT(dropped_index_ids.size() == ha_alter_info->index_drop_count); - /* Check if indexes need to be dropped */ - if (ha_alter_info->handler_flags & - (my_core::Alter_inplace_info::DROP_INDEX | - my_core::Alter_inplace_info::DROP_UNIQUE_INDEX) - && create_key_defs(altered_table, - m_tbl_def->m_dbname_tablename, - ha_alter_info->create_info, - new_tdef, m_tbl_def)) + /* Determine which(if any) key definitions(s) need to be added */ + for (i = 0; i < ha_alter_info->index_add_count; i++) { - /* Delete the new key descriptors */ - for (uint i= 0; i < new_n_keys; i++) { - if (new_key_descr[i]) + key = &ha_alter_info->key_info_buffer[ + ha_alter_info->index_add_buffer[i]]; + for (j = 0; j < new_n_keys; j++) + { + if (!new_key_descr[j]->m_name.compare(key->name)) { - new_key_descr[i] = nullptr; + added_indexes.insert(new_key_descr[j]); + break; } } - - delete[] new_key_descr; - DBUG_RETURN(1); } + DBUG_ASSERT(added_indexes.size() == ha_alter_info->index_add_count); + ha_alter_info->handler_ctx= new Rdb_inplace_alter_ctx( new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys, - dropped_index_ids, n_dropped_keys); + added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys); DBUG_RETURN(0); } @@ -8993,12 +9136,120 @@ bool ha_rocksdb::prepare_inplace_alter_table( @retval true Error @retval false Success */ -bool ha_rocksdb::inplace_alter_table(TABLE* /* altered_table */, +bool ha_rocksdb::inplace_alter_table(TABLE* altered_table, my_core::Alter_inplace_info *ha_alter_info) { /* Do nothing for now */ DBUG_ENTER("inplace_alter_table"); DBUG_ASSERT(ha_alter_info->handler_ctx); + + Rdb_inplace_alter_ctx* ctx= + static_cast (ha_alter_info->handler_ctx); + + if (ha_alter_info->handler_flags & my_core::Alter_inplace_info::ADD_INDEX) + { + /* + Buffers need to be set up again to account for new, possibly longer + secondary keys. + */ + free_key_buffers(); + if (alloc_key_buffers(altered_table, ctx->m_new_tdef)) + { + DBUG_RETURN(1); + } + + /* Populate all new secondary keys by scanning primary key */ + if (inplace_populate_sk(altered_table, ctx->m_added_indexes)) + { + DBUG_RETURN(1); + } + } + DBUG_EXECUTE_IF("myrocks_simulate_index_create_rollback", DBUG_RETURN(1);); + + DBUG_RETURN(0); +} + +/** + Scan the Primary Key index entries and populate the new secondary keys. +*/ +int ha_rocksdb::inplace_populate_sk(const TABLE* new_table_arg, + const std::unordered_set>& indexes) +{ + DBUG_ENTER("ha_rocksdb::inplace_populate_sk"); + std::unique_ptr wb= dict_manager.begin(); + rocksdb::WriteBatch *batch= wb.get(); + + /* Update the data dictionary */ + std::unordered_set create_index_ids; + for (auto& index : indexes) + { + create_index_ids.insert(index->get_gl_index_id()); + } + dict_manager.add_create_index(create_index_ids, batch); + dict_manager.commit(batch); + + /* + Note: We pass in the currently existing table + tbl_def object here, as the + pk index position may have changed in the case of hidden primary keys. + */ + uint pk= pk_index(table, m_tbl_def); + ha_index_init(pk, true); + + const bool hidden_pk_exists = has_hidden_pk(table); + struct update_row_info row_info; + + row_info.tx = get_or_create_tx(table->in_use); + row_info.new_data = table->record[0]; + row_info.old_data = nullptr; + /* TODO(alexyang): no support for unique secondary keys yet */ + row_info.skip_unique_check = skip_unique_check(); + + int res; + for (res = index_first(table->record[0]); res == 0; + res = index_next(table->record[0])) + { + for (auto& index : indexes) + { + if (hidden_pk_exists && + read_hidden_pk_id_from_rowkey(&row_info.hidden_pk_id)) + { + // NO_LINT_DEBUG + sql_print_error("Error retrieving hidden pk id."); + ha_index_end(); + DBUG_RETURN(1); + } + + res = update_sk(new_table_arg, index, row_info); + if (res != 0) + { + // NO_LINT_DEBUG + sql_print_error("Failed to create new secondary key entry."); + ha_index_end(); + DBUG_RETURN(res); + } + + if (do_bulk_commit(row_info.tx)) + { + // NO_LINT_DEBUG + sql_print_error("Bulk commit failed during index creation."); + ha_index_end(); + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + } + } + } + + if (res != 0 && res != HA_ERR_END_OF_FILE) + { + // NO_LINT_DEBUG + sql_print_error("Error retrieving index entry from primary key."); + ha_index_end(); + DBUG_RETURN(1); + } + + DBUG_EXECUTE_IF("crash_during_online_index_creation", DBUG_SUICIDE();); + + ha_index_end(); + DBUG_RETURN(0); } @@ -9039,19 +9290,25 @@ bool ha_rocksdb::commit_inplace_alter_table( my_core::Alter_inplace_info *ha_alter_info, bool commit) { - DBUG_ENTER("commit_inplace_alter_table"); - Rdb_inplace_alter_ctx* ctx= + Rdb_inplace_alter_ctx* ctx0= static_cast (ha_alter_info->handler_ctx); + DBUG_ENTER("commit_inplace_alter_table"); + /* IMPORTANT: When rollback is requested, mysql will abort with an assertion failure. That means every failed commit during inplace alter - table will result in a fatal error on the server. + table will result in a fatal error on the server. Indexes ongoing creation + will be detected when the server restarts, and dropped. + + For partitioned tables, a rollback call to this function (commit == false) + is done for each partition. A successful commit call only executes once + for all partitions. */ if (!commit) { /* If ctx has not been created yet, nothing to do here */ - if (!ctx) + if (!ctx0) { DBUG_RETURN(0); } @@ -9061,47 +9318,98 @@ bool ha_rocksdb::commit_inplace_alter_table( erase the mappings inside the ddl_manager, as the old_key_descr is still using them. */ - if (ctx->m_new_key_descr) + if (ctx0->m_new_key_descr) { /* Delete the new key descriptors */ - for (uint i= 0; i < ctx->m_new_n_keys; i++) { - if (ctx->m_new_key_descr[i]) - { - ctx->m_new_key_descr[i] = nullptr; - } + for (uint i = 0; i < ctx0->m_new_tdef->m_key_count; i++) + { + ctx0->m_new_key_descr[i]= nullptr; } - delete[] ctx->m_new_key_descr; - ctx->m_new_key_descr = nullptr; + delete[] ctx0->m_new_key_descr; + ctx0->m_new_key_descr = nullptr; + ctx0->m_new_tdef->m_key_descr_arr = nullptr; + + delete ctx0->m_new_tdef; } DBUG_RETURN(0); } + DBUG_ASSERT(ctx0); + + /* + For partitioned tables, we need to commit all changes to all tables at + once, unlike in the other inplace alter API methods. + */ + inplace_alter_handler_ctx** ctx_array; + inplace_alter_handler_ctx* ctx_single[2]; + + if (ha_alter_info->group_commit_ctx) + { + DBUG_EXECUTE_IF("crash_during_index_creation_partition", + DBUG_SUICIDE();); + ctx_array = ha_alter_info->group_commit_ctx; + } + else + { + ctx_single[0] = ctx0; + ctx_single[1] = nullptr; + ctx_array = ctx_single; + } + + DBUG_ASSERT(ctx0 == ctx_array[0]); + ha_alter_info->group_commit_ctx = nullptr; + if (ha_alter_info->handler_flags & (my_core::Alter_inplace_info::DROP_INDEX | - my_core::Alter_inplace_info::DROP_UNIQUE_INDEX)) + my_core::Alter_inplace_info::DROP_UNIQUE_INDEX | + my_core::Alter_inplace_info::ADD_INDEX)) { std::unique_ptr wb= dict_manager.begin(); rocksdb::WriteBatch *batch= wb.get(); + std::unordered_set create_index_ids; - dict_manager.add_drop_index(ctx->m_dropped_index_ids, batch); - - m_tbl_def= ctx->m_new_tdef; + m_tbl_def= ctx0->m_new_tdef; m_key_descr_arr= m_tbl_def->m_key_descr_arr; m_pk_descr= m_key_descr_arr[pk_index(altered_table, m_tbl_def)]; dict_manager.lock(); - if (ddl_manager.put_and_write(ctx->m_new_tdef, batch) || - dict_manager.commit(batch)) + for (inplace_alter_handler_ctx** pctx = ctx_array; *pctx; pctx++) + { + Rdb_inplace_alter_ctx* ctx= static_cast (*pctx); + + /* Mark indexes to be dropped */ + dict_manager.add_drop_index(ctx->m_dropped_index_ids, batch); + + for (auto& index : ctx->m_added_indexes) + { + create_index_ids.insert(index->get_gl_index_id()); + } + + if (ddl_manager.put_and_write(ctx->m_new_tdef, batch)) + { + /* + Failed to write new entry into data dictionary, this should never + happen. + */ + DBUG_ASSERT(0); + } + } + + if (dict_manager.commit(batch)) { /* Should never reach here. We assume MyRocks will abort if commit fails. */ DBUG_ASSERT(0); } + dict_manager.unlock(); + /* Mark ongoing create indexes as finished/remove from data dictionary */ + dict_manager.finish_indexes_operation(create_index_ids, + Rdb_key_def::DDL_CREATE_INDEX_ONGOING); rdb_drop_idx_thread.signal(); } diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 1953eebd9dbc..4db69e5619d5 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -394,6 +394,7 @@ class ha_rocksdb: public my_core::handler int create_key_defs(const TABLE *table_arg, const std::string& db_table, HA_CREATE_INFO *create_info, Rdb_tbl_def *tbl_def_arg, + const TABLE *old_table_arg= nullptr, const Rdb_tbl_def *old_tbl_def_arg= nullptr) MY_ATTRIBUTE((__nonnull__(2, 4, 5), __warn_unused_result__)); int secondary_index_read(const int keyno, uchar *buf) @@ -485,6 +486,9 @@ class ha_rocksdb: public my_core::handler void get_storage_type(Rdb_field_encoder *encoder, uint kp); void setup_field_converters(); + int alloc_key_buffers(const TABLE* table_arg, const Rdb_tbl_def* tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void free_key_buffers(); // the buffer size should be at least 2*Rdb_key_def::INDEX_NUMBER_SIZE rocksdb::Range get_range(int i, uchar buf[]) const; @@ -605,6 +609,14 @@ class ha_rocksdb: public my_core::handler rocksdb::Slice *packed_rec) MY_ATTRIBUTE((__nonnull__)); + static const char* get_key_name(const uint index, const TABLE* table_arg, + const Rdb_tbl_def* tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + static const char* get_key_comment(const uint index, const TABLE* table_arg, + const Rdb_tbl_def* tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + static bool is_hidden_pk(const uint index, const TABLE* table_arg, const Rdb_tbl_def* tbl_def_arg) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -719,17 +731,31 @@ class ha_rocksdb: public my_core::handler int create_cfs(const TABLE *table_arg, const std::string& db_table, Rdb_tbl_def *tbl_def_arg, std::array* cfs); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int create_key_def(const TABLE *table_arg, uint i, const Rdb_tbl_def* tbl_def_arg, std::shared_ptr* new_key_def, const struct key_def_cf_info& cf_info); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int create_inplace_key_defs(const TABLE *table_arg, Rdb_tbl_def *tbl_def_arg, + const TABLE *old_table_arg, const Rdb_tbl_def *old_tbl_def_arg, const std::array& cfs); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + std::unordered_map get_old_key_positions( + const TABLE* table_arg, + const Rdb_tbl_def* tbl_def_arg, + const TABLE* old_table_arg, + const Rdb_tbl_def* old_tbl_def_arg) + MY_ATTRIBUTE((__nonnull__)); + + int compare_key_parts(const KEY* old_key, const KEY* new_key); + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int index_first_intern(uchar *buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -754,7 +780,9 @@ class ha_rocksdb: public my_core::handler bool* pk_changed); int update_pk(uint key_id, const struct update_row_info& row_info, bool pk_changed); - int update_sk(uint key_id, const struct update_row_info& row_info); + int update_sk(const TABLE* table_arg, + const std::shared_ptr& kd, + const struct update_row_info& row_info); int update_indexes(const struct update_row_info& row_info, bool pk_changed); int read_key_exact(const std::shared_ptr& kd, @@ -801,6 +829,10 @@ class ha_rocksdb: public my_core::handler MY_ATTRIBUTE((__nonnull__)); bool contains_foreign_key(THD* thd) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int inplace_populate_sk(const TABLE* table_arg, + const std::unordered_set>& indexes); + public: int index_init(uint idx, bool sorted) MY_ATTRIBUTE((__warn_unused_result__)); int index_end() MY_ATTRIBUTE((__warn_unused_result__)); @@ -935,21 +967,31 @@ struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx /* Stores the new number of key definitions */ const uint m_new_n_keys; + /* Stores the added key glids */ + std::unordered_set> m_added_indexes; + /* Stores the dropped key glids */ std::unordered_set m_dropped_index_ids; + /* Stores number of keys to add */ + const uint m_n_added_keys; + /* Stores number of keys to drop */ const uint m_n_dropped_keys; Rdb_inplace_alter_ctx( Rdb_tbl_def* new_tdef, std::shared_ptr* old_key_descr, std::shared_ptr* new_key_descr, uint old_n_keys, - uint new_n_keys, std::unordered_set dropped_index_ids, - uint n_dropped_keys) : + uint new_n_keys, + std::unordered_set> added_indexes, + std::unordered_set dropped_index_ids, + uint n_added_keys, uint n_dropped_keys) : my_core::inplace_alter_handler_ctx(), m_new_tdef(new_tdef), m_old_key_descr(old_key_descr), m_new_key_descr(new_key_descr), m_old_n_keys(old_n_keys), m_new_n_keys(new_n_keys), + m_added_indexes(added_indexes), m_dropped_index_ids(dropped_index_ids), + m_n_added_keys(n_added_keys), m_n_dropped_keys(n_dropped_keys) { } diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc index bada3f9f0685..c46d2df44780 100644 --- a/storage/rocksdb/rdb_datadic.cc +++ b/storage/rocksdb/rdb_datadic.cc @@ -537,7 +537,7 @@ int Rdb_key_def::successor(uchar *packed_tuple, uint len) Length of the packed tuple */ -uint Rdb_key_def::pack_record(TABLE *tbl, uchar *pack_buffer, +uint Rdb_key_def::pack_record(const TABLE *tbl, uchar *pack_buffer, const uchar *record, uchar *packed_tuple, uchar *unpack_info, int *unpack_info_len, bool should_store_checksums, @@ -2079,7 +2079,7 @@ bool Rdb_field_packing::setup(const Rdb_key_def *key_descr, const Field *field, } -Field *Rdb_field_packing::get_field_in_table(TABLE *tbl) const +Field *Rdb_field_packing::get_field_in_table(const TABLE *tbl) const { return tbl->key_info[m_keynr].key_part[m_key_part].field; } @@ -3647,6 +3647,10 @@ void Rdb_dict_manager::finish_indexes_operation( gl_index_id.cf_id, gl_index_id.index_id); end_ongoing_index_operation(batch, gl_index_id, dd_type); + } + + if (dd_type == Rdb_key_def::DDL_DROP_INDEX_ONGOING) + { delete_index_info(batch, gl_index_id); } } diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 78ce09295a50..b0fed7446d64 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -140,7 +140,7 @@ class Rdb_key_def const uchar *key_tuple, key_part_map keypart_map) const; /* Convert a key from Table->record format to mem-comparable form */ - uint pack_record(TABLE *tbl, uchar *pack_buffer, const uchar *record, + uint pack_record(const TABLE *tbl, uchar *pack_buffer, const uchar *record, uchar *packed_tuple, uchar *unpack_info, int *unpack_info_len, bool should_store_checksums, longlong hidden_pk_id= 0, uint n_key_parts= 0, @@ -212,6 +212,11 @@ class Rdb_key_def return covers_key(value) && !cmp_full_keys(value, prefix); } + uint32 get_keyno() const + { + return m_keyno; + } + uint32 get_index_number() const { return m_index_number; @@ -526,7 +531,7 @@ class Rdb_field_packing public: bool setup(const Rdb_key_def *key_descr, const Field *field, uint keynr_arg, uint key_part_arg, uint16 key_length); - Field *get_field_in_table(TABLE *tbl) const; + Field *get_field_in_table(const TABLE *tbl) const; void fill_hidden_pk_val(uchar **dst, longlong hidden_pk_id) const; };