From 70f2bcd777b0fb58b72e9dbe5e5477e34248d7dd Mon Sep 17 00:00:00 2001 From: Artem Danilov Date: Tue, 10 Oct 2017 08:18:24 -0700 Subject: [PATCH] Don't decode/unpack keys unless requested Summary: This branch checks gets rid of expensive decoding/unpacking of the key fields unless they requested by higher layers. The method can be further optimized to decode keys fields partially when not all fields of the key are requested but this branch doesn't tackle it as it would take more effort and gives improvement for much less cases. I've found one more implicit demand for unpacking/decoding key in addition to merge, which is update of auto increment value. Test Plan: Run count select on a 15G table with utf8_bin collation top access performance gain. Observed 16% time reduction for count query and 13% time reduction for SUM query using full scan table and extracting non-key column. mysql> select count(count) from assoc_count; +--------------+ | count(count) | +--------------+ | 320435881 | +--------------+ 1 row in set (2 min 59.28 sec) <-- before change 1 row in set (2 min 30.80 sec) <-- after change mysql> select sum(count) from assoc_count; +--------------+ | sum(count) | +--------------+ | 619505329429 | +--------------+ 1 row in set (3 min 13.79 sec) <-- before change 1 row in set (2 min 48.82 sec) <-- after change The table definition: CREATE TABLE `assoc_count` ( `id` bigint(20) unsigned NOT NULL DEFAULT '0', `id_type` int(10) unsigned NOT NULL DEFAULT '0', `assoc_type` bigint(20) unsigned NOT NULL DEFAULT '0', `count` int(10) unsigned NOT NULL DEFAULT '0', `time` int(10) unsigned NOT NULL DEFAULT '0', `version` bigint(20) unsigned NOT NULL DEFAULT '0', PRIMARY KEY (`id`,`assoc_type`) COMMENT 'cf_assoc_count' ) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin ROW_FORMAT=COMPACT; Reviewers: mung, herman Reviewed By: herman Subscribers: webscalesql-eng@fb.com, phabricatorlinter Differential Revision: https://phabricator.intern.facebook.com/D5909811 Tasks: T21813614 Signature: 5909811:1507610696:b2b2d2a92d5e8c4c9f7f9f58f758cf4245559f12 --- storage/rocksdb/ha_rocksdb.cc | 36 ++++++++++++++++++++++++----------- storage/rocksdb/ha_rocksdb.h | 6 ++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 9240d707cb96..247bf08e498a 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -4423,6 +4423,7 @@ void ha_rocksdb::load_auto_incr_value() { index_last() creates a snapshot. When a snapshot is created here, it has to be released as well. (GitHub issue#189) */ + m_key_requested = true; // we need to unpack the key to load the value Rdb_transaction *const tx = get_or_create_tx(table->in_use); const bool is_new_snapshot = !tx->has_snapshot(); @@ -4962,12 +4963,12 @@ int ha_rocksdb::convert_record_to_storage_format( Setup which fields will be unpacked when reading rows @detail - Two special cases when we still unpack all fields: + Three special cases when we still unpack all fields: - When this table is being updated (m_lock_rows==RDB_LOCK_WRITE). - When @@rocksdb_verify_row_debug_checksums is ON (In this mode, we need to - read all - fields to find whether there is a row checksum at the end. We could skip - the fields instead of decoding them, but currently we do decoding.) + read all fields to find whether there is a row checksum at the end. We could + skip the fields instead of decoding them, but currently we do decoding.) + - On index merge as bitmap is cleared during that operation @seealso ha_rocksdb::setup_field_converters() @@ -4975,20 +4976,29 @@ int ha_rocksdb::convert_record_to_storage_format( */ void ha_rocksdb::setup_read_decoders() { m_decoders_vect.clear(); + m_key_requested = false; int last_useful = 0; int skip_size = 0; for (uint i = 0; i < table->s->fields; i++) { + // bitmap is cleared on index merge, but it still needs to decode columns + const bool field_requested = + m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums || + bitmap_is_clear_all(table->read_set) || + bitmap_is_set(table->read_set, table->field[i]->field_index); + // We only need the decoder if the whole record is stored. if (m_encoder_arr[i].m_storage_type != Rdb_field_encoder::STORE_ALL) { + // the field potentially needs unpacking + if (field_requested) { + // the field is in the read set + m_key_requested = true; + } continue; } - // bitmap is cleared on index merge, but it still needs to decode columns - if (m_lock_rows == RDB_LOCK_WRITE || m_verify_row_debug_checksums || - bitmap_is_clear_all(table->read_set) || - bitmap_is_set(table->read_set, table->field[i]->field_index)) { + if (field_requested) { // We will need to decode this field m_decoders_vect.push_back({&m_encoder_arr[i], true, skip_size}); last_useful = m_decoders_vect.size(); @@ -5229,9 +5239,13 @@ int ha_rocksdb::convert_record_from_storage_format( Rdb_key_def::get_unpack_header_size(unpack_info[0])); } - int err = m_pk_descr->unpack_record(table, buf, &rowkey_slice, - unpack_info ? &unpack_slice : nullptr, - false /* verify_checksum */); + int err = HA_EXIT_SUCCESS; + if (m_key_requested) { + err = m_pk_descr->unpack_record(table, buf, &rowkey_slice, + unpack_info ? &unpack_slice : nullptr, + false /* verify_checksum */); + } + if (err != HA_EXIT_SUCCESS) { return err; } diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index f04ee5752ad8..f1b2d1a001ae 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -681,6 +681,12 @@ class ha_rocksdb : public my_core::handler { */ std::vector m_decoders_vect; + /* + This tells if any field which is part of the key needs to be unpacked and + decoded. + */ + bool m_key_requested = false; + /* Setup field_decoders based on type of scan and table->read_set */ void setup_read_decoders();