Skip to content

Commit 45751b9

Browse files
bladepanfacebook-github-bot
authored andcommitted
fix bypass point lookup not checking ttl
Summary: - bypass PK point lookup use txn get/multiget methods directly, which does not check for ttl - change the point lookup to use rdb iterator, which has the ttl logic - add a multiget method for rdb iterator - extract the logic of converting rocksdb status to return codes for get api Differential Revision: D54371088 fbshipit-source-id: b0d807d
1 parent f87e564 commit 45751b9

File tree

6 files changed

+214
-43
lines changed

6 files changed

+214
-43
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
disable_query_log;
2+
SELECT TYPE, VALUE FROM THRIFT_SERVER_PLUGIN_OUTPUT WHERE TYPE IN ('header', 'row') ORDER BY SEQ_NUMBER;
3+
4+
# empty result set
5+
TRUNCATE TABLE THRIFT_SERVER_PLUGIN_OUTPUT;
6+
enable_query_log;
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
set global rocksdb_pause_ttl_compaction_filter=on;
2+
set global rocksdb_select_bypass_multiget_min=2;
3+
CREATE TABLE t1 (
4+
id BIGINT NOT NULL, val VARCHAR(64), ts BIGINT UNSIGNED NOT NULL, PRIMARY KEY (id)
5+
) ENGINE=ROCKSDB COMMENT='ttl_duration=1;ttl_col=ts;';
6+
insert into t1 values (1, 'v1', 42), (2, 'v2', 42), (3, 'v3', 42), (4, 'v4', UNIX_TIMESTAMP()+360000);
7+
8+
point query multi get
9+
10+
set global rocksdb_enable_ttl_read_filtering=off;
11+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where_in":[{"column":"id","op":0,"values":[{"signedIntVal":1}, {"signedIntVal":2}, {"signedIntVal":3}, {"signedIntVal":4}, {"signedIntVal":5}]}]}';
12+
TYPE VALUE
13+
header [{"type":2,"name":"id"}, {"type":4,"name":"val"}]
14+
row [{"signedIntVal":1}, {"stringVal":"v1"}]
15+
row [{"signedIntVal":2}, {"stringVal":"v2"}]
16+
row [{"signedIntVal":3}, {"stringVal":"v3"}]
17+
row [{"signedIntVal":4}, {"stringVal":"v4"}]
18+
set global rocksdb_enable_ttl_read_filtering=on;
19+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where_in":[{"column":"id","op":0,"values":[{"signedIntVal":1}, {"signedIntVal":2}, {"signedIntVal":3}, {"signedIntVal":4}, {"signedIntVal":5}]}]}';
20+
TYPE VALUE
21+
header [{"type":2,"name":"id"}, {"type":4,"name":"val"}]
22+
row [{"signedIntVal":4}, {"stringVal":"v4"}]
23+
24+
point query single get
25+
26+
set global rocksdb_enable_ttl_read_filtering=off;
27+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where":[{"column":"id","op":0,"value":{"signedIntVal":1}}]}';
28+
TYPE VALUE
29+
header [{"type":2,"name":"id"}, {"type":4,"name":"val"}]
30+
row [{"signedIntVal":1}, {"stringVal":"v1"}]
31+
set global rocksdb_enable_ttl_read_filtering=on;
32+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where":[{"column":"id","op":0,"value":{"signedIntVal":1}}]}';
33+
TYPE VALUE
34+
header [{"type":5,"name":"id"}, {"type":5,"name":"val"}]
35+
36+
point query single get multi keys
37+
38+
set global rocksdb_enable_ttl_read_filtering=off;
39+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where_in":[{"column":"id","op":0,"values":[{"signedIntVal":1}, {"signedIntVal":4}]}]}';
40+
TYPE VALUE
41+
header [{"type":2,"name":"id"}, {"type":4,"name":"val"}]
42+
row [{"signedIntVal":1}, {"stringVal":"v1"}]
43+
row [{"signedIntVal":4}, {"stringVal":"v4"}]
44+
set global rocksdb_enable_ttl_read_filtering=on;
45+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where_in":[{"column":"id","op":0,"values":[{"signedIntVal":1}, {"signedIntVal":4}]}]}';
46+
TYPE VALUE
47+
header [{"type":2,"name":"id"}, {"type":4,"name":"val"}]
48+
row [{"signedIntVal":4}, {"stringVal":"v4"}]
49+
50+
set global rocksdb_pause_ttl_compaction_filter=default;
51+
set global rocksdb_enable_ttl_read_filtering=default;
52+
set global rocksdb_select_bypass_multiget_min=default;
53+
DROP TABLE t1;
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
source ../include/init_thrift_server_plugin.inc;
2+
3+
disable_query_log;
4+
disable_warnings;
5+
DROP TABLE IF EXISTS t1;
6+
enable_warnings;
7+
enable_query_log;
8+
9+
# disable compaction ttl deletion
10+
set global rocksdb_pause_ttl_compaction_filter=on;
11+
set global rocksdb_select_bypass_multiget_min=2;
12+
13+
CREATE TABLE t1 (
14+
id BIGINT NOT NULL, val VARCHAR(64), ts BIGINT UNSIGNED NOT NULL, PRIMARY KEY (id)
15+
) ENGINE=ROCKSDB COMMENT='ttl_duration=1;ttl_col=ts;';
16+
17+
# first 3 records are expired
18+
insert into t1 values (1, 'v1', 42), (2, 'v2', 42), (3, 'v3', 42), (4, 'v4', UNIX_TIMESTAMP()+360000);
19+
20+
echo;
21+
echo point query multi get;
22+
echo;
23+
# where id in (1,2,3,4,5)
24+
set global rocksdb_enable_ttl_read_filtering=off;
25+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where_in":[{"column":"id","op":0,"values":[{"signedIntVal":1}, {"signedIntVal":2}, {"signedIntVal":3}, {"signedIntVal":4}, {"signedIntVal":5}]}]}';
26+
source ../include/get_thrift_server_plugin_output.inc;
27+
28+
# should filter out expired data
29+
set global rocksdb_enable_ttl_read_filtering=on;
30+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where_in":[{"column":"id","op":0,"values":[{"signedIntVal":1}, {"signedIntVal":2}, {"signedIntVal":3}, {"signedIntVal":4}, {"signedIntVal":5}]}]}';
31+
source ../include/get_thrift_server_plugin_output.inc;
32+
33+
echo;
34+
echo point query single get;
35+
echo;
36+
set global rocksdb_enable_ttl_read_filtering=off;
37+
# where id = 1
38+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where":[{"column":"id","op":0,"value":{"signedIntVal":1}}]}';
39+
source ../include/get_thrift_server_plugin_output.inc;
40+
41+
set global rocksdb_enable_ttl_read_filtering=on;
42+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where":[{"column":"id","op":0,"value":{"signedIntVal":1}}]}';
43+
source ../include/get_thrift_server_plugin_output.inc;
44+
45+
echo;
46+
echo point query single get multi keys;
47+
echo;
48+
set global rocksdb_enable_ttl_read_filtering=off;
49+
# where id in (1,4)
50+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where_in":[{"column":"id","op":0,"values":[{"signedIntVal":1}, {"signedIntVal":4}]}]}';
51+
source ../include/get_thrift_server_plugin_output.inc;
52+
set global rocksdb_enable_ttl_read_filtering=on;
53+
SET GLOBAL THRIFT_SERVER_TESTER_INPUT='bypass: {"db_name":"test","table_name":"t1","columns":["id", "val"],"where_in":[{"column":"id","op":0,"values":[{"signedIntVal":1}, {"signedIntVal":4}]}]}';
54+
source ../include/get_thrift_server_plugin_output.inc;
55+
56+
echo;
57+
set global rocksdb_pause_ttl_compaction_filter=default;
58+
set global rocksdb_enable_ttl_read_filtering=default;
59+
set global rocksdb_select_bypass_multiget_min=default;
60+
DROP TABLE t1;
61+
source ../include/uninit_thrift_server_plugin.inc;

storage/rocksdb/nosql_access.cc

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,14 +1522,6 @@ class select_exec {
15221522
return rdb_tx_get(m_tx, cf, key_slice, value_slice, m_table_type);
15231523
}
15241524

1525-
void multi_get(rocksdb::ColumnFamilyHandle *cf, size_t size,
1526-
bool sorted_input, const rocksdb::Slice *key_slices,
1527-
rocksdb::PinnableSlice *value_slices,
1528-
rocksdb::Status *statuses) {
1529-
rdb_tx_multi_get(m_tx, cf, size, key_slices, value_slices, m_table_type,
1530-
statuses, sorted_input);
1531-
}
1532-
15331525
void report_error(rocksdb::Status s) {
15341526
if (s.IsIOError() || s.IsCorruption()) {
15351527
rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL);
@@ -1578,7 +1570,7 @@ class select_exec {
15781570
bool unpack_for_pk(const rocksdb::Slice &rkey, const rocksdb::Slice &rvalue);
15791571
bool eval_cond();
15801572
int eval_and_send();
1581-
bool run_pk_point_query(txn_wrapper *txn);
1573+
bool run_pk_point_query();
15821574
bool run_sk_point_query(txn_wrapper *txn);
15831575
bool pack_index_tuple(uint key_part_no, Rdb_string_writer *writer,
15841576
const Field *field, const nosql_cond_value &value);
@@ -2335,7 +2327,7 @@ bool INLINE_ATTR select_exec::run_query() {
23352327
}
23362328

23372329
if (is_pk_point_query) {
2338-
ret = run_pk_point_query(&txn);
2330+
ret = run_pk_point_query();
23392331
} else {
23402332
m_pk_tuple_buf.resize(m_pk_def->max_storage_fmt_length());
23412333

@@ -2356,15 +2348,12 @@ bool INLINE_ATTR select_exec::run_query() {
23562348
return ret;
23572349
}
23582350

2359-
bool INLINE_ATTR select_exec::run_pk_point_query(txn_wrapper *txn) {
2360-
auto cf = m_key_def->get_cf();
2361-
2351+
bool INLINE_ATTR select_exec::run_pk_point_query() {
23622352
if (m_key_index_tuples.size() > get_select_bypass_multiget_min()) {
23632353
size_t size = m_key_index_tuples.size();
23642354
std::vector<rocksdb::Slice> key_slices;
23652355
key_slices.reserve(size);
23662356
std::vector<rocksdb::PinnableSlice> value_slices(size);
2367-
std::vector<rocksdb::Status> statuses(size);
23682357

23692358
for (auto &writer : m_key_index_tuples) {
23702359
key_slices.push_back(writer.get_key_slice());
@@ -2375,19 +2364,18 @@ bool INLINE_ATTR select_exec::run_pk_point_query(txn_wrapper *txn) {
23752364

23762365
bool sorted_input =
23772366
(m_key_def->m_is_reverse_cf == m_parser.is_order_desc());
2378-
2379-
txn->multi_get(m_key_def->get_cf(), size, sorted_input, key_slices.data(),
2380-
value_slices.data(), statuses.data());
2367+
std::vector<int> rtn_codes(size);
2368+
m_iterator->multi_get(key_slices, value_slices, rtn_codes, sorted_input);
23812369

23822370
for (size_t i = 0; i < size; ++i) {
23832371
if (unlikely(handle_killed())) {
23842372
return true;
23852373
}
2386-
2387-
if (statuses[i].IsNotFound()) {
2374+
int rc = rtn_codes[i];
2375+
if (rc == HA_ERR_KEY_NOT_FOUND) {
23882376
continue;
2389-
} else if (!statuses[i].ok()) {
2390-
txn->report_error(statuses[i]);
2377+
} else if (rc) {
2378+
m_handler->print_error(rc, 0);
23912379
return true;
23922380
}
23932381

@@ -2413,11 +2401,13 @@ bool INLINE_ATTR select_exec::run_pk_point_query(txn_wrapper *txn) {
24132401
value_slice.Reset();
24142402

24152403
rocksdb::Slice key_slice = writer.get_key_slice();
2416-
rocksdb::Status s = txn->get(cf, key_slice, &value_slice);
2417-
if (s.IsNotFound()) {
2404+
auto rc = m_iterator->get(&key_slice, &value_slice, RDB_LOCK_NONE);
2405+
if (rc == HA_ERR_KEY_NOT_FOUND) {
24182406
continue;
2419-
} else if (!s.ok()) {
2420-
txn->report_error(s);
2407+
}
2408+
2409+
if (rc) {
2410+
m_handler->print_error(rc, 0);
24212411
return true;
24222412
}
24232413

storage/rocksdb/rdb_iterator.cc

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
#include "./rdb_iterator.h"
1818

1919
#include <algorithm>
20+
#include <cstddef>
2021

2122
/* MySQL includes */
23+
#include "rdb_utils.h"
2224
#include "scope_guard.h"
2325
#include "sql/sql_class.h"
2426
#include "sql/thr_malloc.h"
@@ -377,10 +379,29 @@ int Rdb_iterator_base::seek(enum ha_rkey_function find_flag,
377379
return rc;
378380
}
379381

382+
int Rdb_iterator_base::convert_get_status(myrocks::Rdb_transaction *tx,
383+
const rocksdb::Status &s,
384+
rocksdb::PinnableSlice *value,
385+
bool skip_ttl_check) {
386+
int rc = HA_EXIT_SUCCESS;
387+
if (!s.IsNotFound() && !s.ok()) {
388+
return rdb_tx_set_status_error(tx, s, *m_kd, m_tbl_def);
389+
}
390+
391+
const bool hide_ttl_rec =
392+
!skip_ttl_check && m_kd->has_ttl() &&
393+
rdb_should_hide_ttl_rec(*m_kd, s.IsNotFound() ? nullptr : value, tx);
394+
395+
if (hide_ttl_rec || s.IsNotFound()) {
396+
return HA_ERR_KEY_NOT_FOUND;
397+
}
398+
399+
return rc;
400+
}
401+
380402
int Rdb_iterator_base::get(const rocksdb::Slice *key,
381403
rocksdb::PinnableSlice *value, Rdb_lock_type type,
382404
bool skip_ttl_check, bool skip_wait) {
383-
int rc = HA_EXIT_SUCCESS;
384405
m_valid = false;
385406
Rdb_transaction *const tx = get_tx_from_thd(m_thd);
386407
rocksdb::Status s;
@@ -397,19 +418,23 @@ int Rdb_iterator_base::get(const rocksdb::Slice *key,
397418
s = rocksdb::Status::Corruption();
398419
});
399420

400-
if (!s.IsNotFound() && !s.ok()) {
401-
return rdb_tx_set_status_error(tx, s, *m_kd, m_tbl_def);
402-
}
403-
404-
const bool hide_ttl_rec =
405-
!skip_ttl_check && m_kd->has_ttl() &&
406-
rdb_should_hide_ttl_rec(*m_kd, s.IsNotFound() ? nullptr : value, tx);
421+
return convert_get_status(tx, s, value, skip_ttl_check);
422+
}
407423

408-
if (hide_ttl_rec || s.IsNotFound()) {
409-
return HA_ERR_KEY_NOT_FOUND;
424+
void Rdb_iterator_base::multi_get(
425+
const std::vector<rocksdb::Slice> &key_slices,
426+
std::vector<rocksdb::PinnableSlice> &value_slices,
427+
std::vector<int> &rtn_codes, bool sorted_input) {
428+
auto tx = get_tx_from_thd(m_thd);
429+
const auto size = key_slices.size();
430+
std::vector<rocksdb::Status> statuses(size);
431+
rdb_tx_multi_get(tx, m_kd->get_cf(), size, key_slices.data(),
432+
value_slices.data(), m_table_type, statuses.data(),
433+
sorted_input);
434+
for (std::size_t i = 0; i < size; i++) {
435+
rtn_codes[i] = convert_get_status(tx, statuses[i], &value_slices[i],
436+
/*skip_ttl_check*/ false);
410437
}
411-
412-
return rc;
413438
}
414439

415440
Rdb_iterator_partial::Rdb_iterator_partial(
@@ -994,11 +1019,10 @@ int Rdb_iterator_partial::seek(enum ha_rkey_function find_flag,
9941019
return rc;
9951020
}
9961021

997-
int Rdb_iterator_partial::get(const rocksdb::Slice *key,
998-
rocksdb::PinnableSlice *value, Rdb_lock_type type,
999-
bool skip_ttl_check, bool skip_wait) {
1000-
int rc = Rdb_iterator_base::get(key, value, type, skip_ttl_check, skip_wait);
1001-
1022+
int Rdb_iterator_partial::handle_get_result(
1023+
int get_rtn_code, const rocksdb::Slice *key, rocksdb::PinnableSlice *value,
1024+
Rdb_lock_type type, bool skip_ttl_check, bool skip_wait) {
1025+
int rc = get_rtn_code;
10021026
if (rc == HA_ERR_KEY_NOT_FOUND) {
10031027
const uint size =
10041028
m_kd->get_primary_key_tuple(*m_pkd, key, m_sk_packed_tuple);
@@ -1023,13 +1047,35 @@ int Rdb_iterator_partial::get(const rocksdb::Slice *key,
10231047

10241048
value->PinSelf(
10251049
rocksdb::Slice((const char *)m_sk_packed_tuple, sk_packed_size));
1026-
rc = 0;
1050+
rc = HA_EXIT_SUCCESS;
10271051
}
1052+
return rc;
1053+
}
10281054

1055+
int Rdb_iterator_partial::get(const rocksdb::Slice *key,
1056+
rocksdb::PinnableSlice *value, Rdb_lock_type type,
1057+
bool skip_ttl_check, bool skip_wait) {
1058+
int rc = Rdb_iterator_base::get(key, value, type, skip_ttl_check, skip_wait);
1059+
rc = handle_get_result(rc, key, value, type, skip_ttl_check, skip_wait);
10291060
m_partial_valid = false;
10301061
return rc;
10311062
}
10321063

1064+
void Rdb_iterator_partial::multi_get(
1065+
const std::vector<rocksdb::Slice> &key_slices,
1066+
std::vector<rocksdb::PinnableSlice> &value_slices,
1067+
std::vector<int> &rtn_codes, bool sorted_input) {
1068+
Rdb_iterator_base::multi_get(key_slices, value_slices, rtn_codes,
1069+
sorted_input);
1070+
1071+
for (std::size_t i = 0; i < rtn_codes.size(); i++) {
1072+
rtn_codes[i] = handle_get_result(
1073+
rtn_codes[i], &key_slices[i], &value_slices[i], RDB_LOCK_NONE,
1074+
/*skip_ttl_check*/ false, /*skip_wait*/ false);
1075+
}
1076+
m_partial_valid = false;
1077+
}
1078+
10331079
int Rdb_iterator_partial::next_with_direction_in_group(bool direction) {
10341080
uint tmp;
10351081
int rc = HA_EXIT_SUCCESS;

storage/rocksdb/rdb_iterator.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class Rdb_iterator {
6464
virtual int get(const rocksdb::Slice *key, rocksdb::PinnableSlice *value,
6565
Rdb_lock_type type, bool skip_ttl_check = false,
6666
bool skip_wait = false) = 0;
67+
virtual void multi_get(const std::vector<rocksdb::Slice> &key_slices,
68+
std::vector<rocksdb::PinnableSlice> &value_slices,
69+
std::vector<int> &rtn_codes, bool sorted_input) = 0;
6770
virtual int next() = 0;
6871
virtual int prev() = 0;
6972
virtual rocksdb::Slice key() = 0;
@@ -85,6 +88,9 @@ class Rdb_iterator_base : public Rdb_iterator {
8588
const int bytes_changed_by_succ,
8689
const rocksdb::Slice &end_key);
8790
int next_with_direction(bool move_forward, bool skip_next);
91+
int convert_get_status(myrocks::Rdb_transaction *tx,
92+
const rocksdb::Status &status,
93+
rocksdb::PinnableSlice *value, bool skip_ttl_check);
8894

8995
public:
9096
Rdb_iterator_base(THD *thd, ha_rocksdb *rocksdb_handler,
@@ -100,6 +106,9 @@ class Rdb_iterator_base : public Rdb_iterator {
100106
int get(const rocksdb::Slice *key, rocksdb::PinnableSlice *value,
101107
Rdb_lock_type type, bool skip_ttl_check = false,
102108
bool skip_wait = false) override;
109+
void multi_get(const std::vector<rocksdb::Slice> &key_slices,
110+
std::vector<rocksdb::PinnableSlice> &value_slices,
111+
std::vector<int> &rtn_codes, bool sorted_input) override;
103112

104113
int next() override { return next_with_direction(true, false); }
105114

@@ -198,6 +207,9 @@ class Rdb_iterator_partial : public Rdb_iterator_base {
198207
int read_prefix_from_pk();
199208
int next_with_direction_in_group(bool direction);
200209
int next_with_direction(bool direction);
210+
int handle_get_result(int rtn_code, const rocksdb::Slice *key,
211+
rocksdb::PinnableSlice *value, Rdb_lock_type type,
212+
bool skip_ttl_check, bool skip_wait);
201213

202214
using Slice_pair = std::pair<rocksdb::Slice, rocksdb::Slice>;
203215
using Records = std::vector<Slice_pair>;
@@ -234,6 +246,9 @@ class Rdb_iterator_partial : public Rdb_iterator_base {
234246
int get(const rocksdb::Slice *key, rocksdb::PinnableSlice *value,
235247
Rdb_lock_type type, bool skip_ttl_check = false,
236248
bool skip_wait = false) override;
249+
void multi_get(const std::vector<rocksdb::Slice> &key_slices,
250+
std::vector<rocksdb::PinnableSlice> &value_slices,
251+
std::vector<int> &rtn_codes, bool sorted_input) override;
237252
int next() override;
238253
int prev() override;
239254
rocksdb::Slice key() override;

0 commit comments

Comments
 (0)