Skip to content

Commit

Permalink
Faster value decoding
Browse files Browse the repository at this point in the history
Upstream commit ID : fb-mysql-5.6.35/0e45b20a6a7e205f230314e1c6d1d91b752ac372
PS-7731 : Merge percona-202102

Summary:
Similar to integer key decoding improvement, but for values:
* Remove dependency to field* and cache those information in Rdb_field_encoder
* No more Field::move_field - just write to buffer directly

We were originally planning to use Rdb_protocol_value_decoder for MySQL bypass project to eliminate overhead of decoding to integer then convert to string, but in practice it probably makes little difference and it is not going to be needed when we go with bypass RPC (which is thrift binary protocol). So this changes removes Rdb_protocol_value_decoder completely.

One thing that worth mentioning is I've unified pack_length and pack_length_in_rec because they are essentially identical in our scenarios and in the case they could be different (more packed BITS format that borrows bits from null byte when storage engine supports it) we don't really support it at all, so unifying it to avoid confusion and assert it.

With this MyRocks is now either faster or on par with InnoDB when it comes to SELECT integer columns stored in value.

NOTE:FDO data needs to be updated for this diff in order to take full advantage of this change.

Reference Patch: facebook/mysql-5.6@f4cefba

Reviewed By: Pushapgl

Differential Revision: D25880543

fbshipit-source-id: 19faf536554
  • Loading branch information
yizhang82 authored and VarunNagaraju committed Jun 6, 2024
1 parent 6c08ee7 commit 5a6258d
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 90 deletions.
147 changes: 72 additions & 75 deletions storage/rocksdb/rdb_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,56 +59,43 @@ void dbug_modify_key_varchar8(String *on_disk_rec) {
0 OK
other HA_ERR error code (can be SE-specific)
*/
int Rdb_convert_to_record_value_decoder::decode(uchar *const buf, uint *offset,
TABLE *table,
my_core::Field *field,
int Rdb_convert_to_record_value_decoder::decode(uchar *const buf, TABLE *table,
Rdb_field_encoder *field_dec,
Rdb_string_reader *reader,
bool decode, bool is_null) {
DBUG_TRACE;
int err = HA_EXIT_SUCCESS;

uint field_offset = field->field_ptr() - table->record[0];
*offset = field_offset;
uint null_offset = field->null_offset();
bool maybe_null = field->is_nullable();
field->move_field(buf + field_offset,
maybe_null ? buf + null_offset : nullptr, field->null_bit);

auto ptr = buf + field_dec->m_field_offset;
if (is_null) {
if (decode) {
if (decode && field_dec->maybe_null()) {
// This sets the NULL-bit of this record
field->set_null();
buf[field_dec->m_field_null_offset] |= field_dec->m_field_null_mask;

/*
Besides that, set the field value to default value. CHECKSUM TABLE
depends on this.
*/
memcpy(field->field_ptr(), table->s->default_values + field_offset,
field->pack_length());
memcpy(ptr, table->s->default_values + field_dec->m_field_offset,
field_dec->m_field_pack_length);
}
} else {
if (decode) {
if (decode && field_dec->maybe_null()) {
// sets non-null bits for this record
field->set_notnull();
buf[field_dec->m_field_null_offset] &= ~(field_dec->m_field_null_mask);
}

if (!field->is_virtual_gcol()) {
if (!field_dec->m_is_virtual_gcol) {
if (field_dec->m_field_type == MYSQL_TYPE_BLOB ||
field_dec->m_field_type == MYSQL_TYPE_JSON) {
err = decode_blob(table, field, reader, decode);
err = decode_blob(table, ptr, field_dec, reader, decode);
} else if (field_dec->m_field_type == MYSQL_TYPE_VARCHAR) {
err = decode_varchar(field, reader, decode);
err = decode_varchar(ptr, field_dec, reader, decode);
} else {
err = decode_fixed_length_field(field, field_dec, reader, decode);
err = decode_fixed_length_field(ptr, field_dec, reader, decode);
}
}
}

// Restore field->ptr and field->null_ptr
field->move_field(table->record[0] + field_offset,
maybe_null ? table->record[0] + null_offset : nullptr,
field->null_bit);

return err;
}

Expand All @@ -122,21 +109,20 @@ int Rdb_convert_to_record_value_decoder::decode(uchar *const buf, uint *offset,
0 OK
other HA_ERR error code (can be SE-specific)
*/
int Rdb_convert_to_record_value_decoder::decode_blob(TABLE *table, Field *field,
Rdb_string_reader *reader,
bool decode) {
my_core::Field_blob *blob = (my_core::Field_blob *)field;

int Rdb_convert_to_record_value_decoder::decode_blob(
TABLE *table, uchar *const buf, Rdb_field_encoder *field_dec,
Rdb_string_reader *reader, bool decode) {
// Get the number of bytes needed to store length
const uint length_bytes = blob->pack_length() - portable_sizeof_char_ptr;
const uint length_bytes =
field_dec->m_field_pack_length - portable_sizeof_char_ptr;

const char *data_len_str;
if (!(data_len_str = reader->read(length_bytes))) {
return HA_ERR_ROCKSDB_CORRUPT_DATA;
}

memcpy(blob->field_ptr(), data_len_str, length_bytes);
uint32 data_len = blob->get_length(
memcpy(buf, data_len_str, length_bytes);
uint32 data_len = Field_blob::get_length(
reinterpret_cast<const uchar *>(data_len_str), length_bytes);
const char *blob_ptr;
if (!(blob_ptr = reader->read(data_len))) {
Expand All @@ -146,8 +132,8 @@ int Rdb_convert_to_record_value_decoder::decode_blob(TABLE *table, Field *field,
if (decode) {
// set 8-byte pointer to 0, like innodb does (relevant for 32-bit
// platforms)
memset(blob->field_ptr() + length_bytes, 0, 8);
memcpy(blob->field_ptr() + length_bytes, &blob_ptr, sizeof(uchar **));
memset(buf + length_bytes, 0, 8);
memcpy(buf + length_bytes, &blob_ptr, sizeof(uchar **));
}

return HA_EXIT_SUCCESS;
Expand All @@ -165,17 +151,17 @@ int Rdb_convert_to_record_value_decoder::decode_blob(TABLE *table, Field *field,
other HA_ERR error code (can be SE-specific)
*/
int Rdb_convert_to_record_value_decoder::decode_fixed_length_field(
my_core::Field *const field, Rdb_field_encoder *field_dec,
uchar *const buf, Rdb_field_encoder *field_dec,
Rdb_string_reader *const reader, bool decode) {
uint len = field_dec->m_pack_length_in_rec;
uint len = field_dec->m_field_pack_length;
if (len > 0) {
const char *data_bytes;
if ((data_bytes = reader->read(len)) == nullptr) {
return HA_ERR_ROCKSDB_CORRUPT_DATA;
}

if (decode) {
memcpy(field->field_ptr(), data_bytes, len);
memcpy(buf, data_bytes, len);
}
}

Expand All @@ -193,24 +179,23 @@ int Rdb_convert_to_record_value_decoder::decode_fixed_length_field(
other HA_ERR error code (can be SE-specific)
*/
int Rdb_convert_to_record_value_decoder::decode_varchar(
Field *field, Rdb_string_reader *const reader, bool decode) {
my_core::Field_varstring *const field_var = (my_core::Field_varstring *)field;

uchar *const buf, Rdb_field_encoder *field_dec,
Rdb_string_reader *const reader, bool decode) {
const char *data_len_str;
if (!(data_len_str = reader->read(field_var->get_length_bytes()))) {
if (!(data_len_str = reader->read(field_dec->m_field_length_bytes))) {
return HA_ERR_ROCKSDB_CORRUPT_DATA;
}

uint data_len;
// field_var->length_bytes is 1 or 2
if (field_var->get_length_bytes() == 1) {
// field_dec->length_bytes is 1 or 2
if (field_dec->m_field_length_bytes == 1) {
data_len = (uchar)data_len_str[0];
} else {
DBUG_ASSERT(field_var->get_length_bytes() == 2);
DBUG_ASSERT(field_dec->m_field_length_bytes == 2);
data_len = uint2korr(data_len_str);
}

if (data_len > field_var->field_length) {
if (data_len > field_dec->m_field_length) {
// The data on disk is longer than table DDL allows?
return HA_ERR_ROCKSDB_CORRUPT_DATA;
}
Expand All @@ -220,8 +205,7 @@ int Rdb_convert_to_record_value_decoder::decode_varchar(
}

if (decode) {
memcpy(field_var->field_ptr(), data_len_str,
field_var->get_length_bytes() + data_len);
memcpy(buf, data_len_str, field_dec->m_field_length_bytes + data_len);
}

return HA_EXIT_SUCCESS;
Expand All @@ -241,7 +225,6 @@ Rdb_value_field_iterator<value_field_decoder>::Rdb_value_field_iterator(
m_field_iter = fields->begin();
m_field_end = fields->end();
m_null_bytes = rdb_converter->get_null_bytes();
m_offset = 0;
}

// Iterate each requested field and decode one by one
Expand All @@ -262,11 +245,9 @@ int Rdb_value_field_iterator<value_field_decoder>::next() {
return HA_ERR_ROCKSDB_CORRUPT_DATA;
}

m_field = m_table->field[m_field_dec->m_field_index];
// Decode each field
err = value_field_decoder::decode(m_buf, &m_offset, m_table, m_field,
m_field_dec, m_value_slice_reader, decode,
m_is_null);
err = value_field_decoder::decode(m_buf, m_table, m_field_dec,
m_value_slice_reader, decode, m_is_null);
if (err != HA_EXIT_SUCCESS) {
return err;
}
Expand All @@ -284,18 +265,6 @@ bool Rdb_value_field_iterator<value_field_decoder>::end_of_fields() const {
return m_field_iter == m_field_end;
}

template <typename value_field_decoder>
Field *Rdb_value_field_iterator<value_field_decoder>::get_field() const {
DBUG_ASSERT(m_field != nullptr);
return m_field;
}

template <typename value_field_decoder>
void *Rdb_value_field_iterator<value_field_decoder>::get_dst() const {
DBUG_ASSERT(m_buf != nullptr);
return m_buf + m_offset;
}

template <typename value_field_decoder>
int Rdb_value_field_iterator<value_field_decoder>::get_field_index() const {
DBUG_ASSERT(m_field_dec != nullptr);
Expand All @@ -311,7 +280,6 @@ enum_field_types Rdb_value_field_iterator<value_field_decoder>::get_field_type()

template <typename value_field_decoder>
bool Rdb_value_field_iterator<value_field_decoder>::is_null() const {
DBUG_ASSERT(m_field != nullptr);
return m_is_null;
}

Expand Down Expand Up @@ -437,7 +405,7 @@ void Rdb_converter::setup_field_decoders(const MY_BITMAP *field_map,
} else {
// Fixed-width field can be skipped without looking at it.
// Add appropriate skip_size to the next field.
skip_size += m_encoder_arr[i].m_pack_length_in_rec;
skip_size += m_encoder_arr[i].m_field_pack_length;
}
}
}
Expand Down Expand Up @@ -489,20 +457,49 @@ void Rdb_converter::setup_field_encoders() {
}
}

m_encoder_arr[i].m_field_type = field->real_type();
/*
The difference between pack_length and pack_length_in_rec is fairly
subtle. The only difference is in Field_bit case where it borrows some
bits in null bytes in memory to store the 'uneven' high bits, therefore
the pack_length is the length of remaining bits while the
pack_length_in_rec is the full length of all bits when you store it on
disk. Only MyIsam and archive supports it, indicating by
HA_CAN_BIT_FIELD. We don't handle this case today at all (nor do we need
to), and we use pack_length everywhere, so just assert it and move on.
*/
DBUG_ASSERT(field->pack_length() == field->pack_length_in_rec());

auto field_type = field->real_type();
m_encoder_arr[i].m_field_type = field_type;
m_encoder_arr[i].m_field_index = i;
m_encoder_arr[i].m_pack_length_in_rec = field->pack_length_in_rec();
m_encoder_arr[i].m_field_pack_length = field->pack_length();
m_encoder_arr[i].m_field_offset = field->field_ptr() - m_table->record[0];
m_encoder_arr[i].m_field_type = field_type;
m_encoder_arr[i].m_is_virtual_gcol = field->is_virtual_gcol();

if (field_type == MYSQL_TYPE_VARCHAR) {
auto varchar = reinterpret_cast<const Field_varstring *>(field);
m_encoder_arr[i].m_field_length = varchar->field_length;
m_encoder_arr[i].m_field_length_bytes = varchar->get_length_bytes();
} else {
m_encoder_arr[i].m_field_length = -1;
m_encoder_arr[i].m_field_length_bytes = -1;
}

if (field->is_nullable()) {
auto maybe_null = field->is_nullable();
if (maybe_null) {
m_encoder_arr[i].m_null_mask = cur_null_mask;
m_encoder_arr[i].m_null_offset = null_bytes_length;
m_encoder_arr[i].m_field_null_offset = field->null_offset();
m_encoder_arr[i].m_field_null_mask = field->null_bit;
if (cur_null_mask == 0x80) {
cur_null_mask = 0x1;
null_bytes_length++;
} else {
cur_null_mask = cur_null_mask << 1;
}
} else {
m_encoder_arr[i].m_null_offset = 0;
m_encoder_arr[i].m_null_mask = 0;
}
}
Expand Down Expand Up @@ -626,9 +623,9 @@ int Rdb_converter::convert_record_from_storage_format(
err = pk_def->unpack_record(m_table, dst, key_slice,
!unpack_slice.empty() ? &unpack_slice : nullptr,
false /* verify_checksum */);
}
if (err != HA_EXIT_SUCCESS) {
return err;
if (err != HA_EXIT_SUCCESS) {
return err;
}
}

if (!decode_value || get_decode_fields()->size() == 0) {
Expand Down Expand Up @@ -845,7 +842,7 @@ int Rdb_converter::encode_value_slice(
field_var->get_length_bytes() + data_len);
} else {
/* Copy the field data */
const uint len = field->pack_length_in_rec();
const uint len = field->pack_length();
m_storage_record.append(reinterpret_cast<char *>(field->field_ptr()),
len);
}
Expand Down
19 changes: 9 additions & 10 deletions storage/rocksdb/rdb_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,21 @@ class Rdb_convert_to_record_value_decoder {
Rdb_convert_to_record_value_decoder &operator=(
const Rdb_convert_to_record_value_decoder &decoder) = delete;

static int decode(uchar *const buf, uint *offset, TABLE *table,
my_core::Field *field, Rdb_field_encoder *field_dec,
Rdb_string_reader *reader, bool decode, bool is_null);
static int decode(uchar *const buf, TABLE *table,
Rdb_field_encoder *field_dec, Rdb_string_reader *reader,
bool decode, bool is_null);

private:
static int decode_blob(TABLE *table, Field *field, Rdb_string_reader *reader,
bool decode);
static int decode_fixed_length_field(Field *const field,
static int decode_blob(TABLE *table, uchar *const buf,
Rdb_field_encoder *field_dec,
Rdb_string_reader *reader, bool decode);
static int decode_fixed_length_field(uchar *const buf,
Rdb_field_encoder *field_dec,
Rdb_string_reader *const reader,
bool decode);

static int decode_varchar(Field *const field, Rdb_string_reader *const reader,
bool decode);
static int decode_varchar(uchar *const buf, Rdb_field_encoder *field_dec,
Rdb_string_reader *const reader, bool decode);
};

/**
Expand Down Expand Up @@ -118,8 +119,6 @@ class Rdb_value_field_iterator {
int get_field_index() const;
// get current field type
enum_field_types get_field_type() const;
// get current field
Field *get_field() const;
};

/**
Expand Down
2 changes: 1 addition & 1 deletion storage/rocksdb/rdb_datadic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2117,7 +2117,7 @@ int Rdb_key_def::unpack_record(TABLE *const table, uchar *const buf,
has_covered_bitmap ? &covered_bitmap : nullptr, buf);
while (iter.has_next()) {
err = iter.next();
if (err) {
if (unlikely(err)) {
return err;
}
}
Expand Down
15 changes: 11 additions & 4 deletions storage/rocksdb/rdb_datadic.h
Original file line number Diff line number Diff line change
Expand Up @@ -1251,13 +1251,20 @@ class Rdb_field_encoder {
STORAGE_TYPE m_storage_type;

uint m_null_offset;
uint16 m_field_index;

uchar m_null_mask; // 0 means the field cannot be null

/*
Cached field information
*/
my_core::enum_field_types m_field_type;

uint m_pack_length_in_rec;
uchar m_field_null_mask;
uint16 m_field_index;
uint m_field_pack_length;
uint m_field_length_bytes;
uint m_field_length;
ptrdiff_t m_field_null_offset;
ptrdiff_t m_field_offset;
bool m_is_virtual_gcol;

bool maybe_null() const { return m_null_mask != 0; }

Expand Down

0 comments on commit 5a6258d

Please sign in to comment.