Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

os/bluestore: space efficient int encoding #9728

Merged
merged 7 commits into from Jun 21, 2016
Merged
1 change: 1 addition & 0 deletions src/include/Makefile.am
Expand Up @@ -96,6 +96,7 @@ noinst_HEADERS += \
include/rangeset.h \
include/rados.h \
include/rbd_types.h \
include/small_encoding.h \
include/statlite.h \
include/str_list.h \
include/str_map.h \
Expand Down
277 changes: 277 additions & 0 deletions src/include/small_encoding.h
@@ -0,0 +1,277 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#ifndef CEPH_SMALL_ENCODING_H
#define CEPH_SMALL_ENCODING_H

#include "include/buffer.h"
#include "include/int_types.h"

// varint encoding
//
// high bit of every byte indicates whether another byte follows.
template<typename T>
inline void small_encode_varint(T v, bufferlist& bl) {
uint8_t byte = v & 0x7f;
v >>= 7;
while (v) {
byte |= 0x80;
::encode(byte, bl);
byte = (v & 0x7f);
v >>= 7;
}
::encode(byte, bl);
}

template<typename T>
inline void small_decode_varint(T& v, bufferlist::iterator& p)
{
uint8_t byte;
::decode(byte, p);
v = byte & 0x7f;
int shift = 7;
while (byte & 0x80) {
::decode(byte, p);
v |= (T)(byte & 0x7f) << shift;
shift += 7;
}
}

// signed varint encoding
//
// low bit = 1 = negative, 0 = positive
// high bit of every byte indicates whether another byte follows.
template<typename T>
inline void small_encode_signed_varint(T v, bufferlist& bl) {
uint8_t byte = 0;
if (v < 0) {
v = -v;
byte = 1;
}
byte |= (v & 0x3f) << 1;
v >>= 6;
while (v) {
byte |= 0x80;
::encode(byte, bl);
byte = (v & 0x7f);
v >>= 7;
}
::encode(byte, bl);
}

template<typename T>
inline void small_decode_signed_varint(T& v, bufferlist::iterator& p)
{
uint8_t byte;
::decode(byte, p);
bool negative = byte & 1;
v = (byte & 0x7e) >> 1;
int shift = 6;
while (byte & 0x80) {
::decode(byte, p);
v |= (T)(byte & 0x7f) << shift;
shift += 7;
}
if (negative) {
v = -v;
}
}

// varint + lowz encoding
//
// first(low) 2 bits = how many low zero bits (nibbles)
// high bit of each byte = another byte follows
// (so, 5 bits data in first byte, 7 bits data thereafter)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also make 00 = no low zeros, 01 = 6 low zeros, 02 = 10 low zeros, 03 = 14 low zeros, or something like that if we think the <6 or even <12 low zero bits is less common to optimize for. Remember block_size is 4096 = 12 bits.

template<typename T>
inline void small_encode_varint_lowz(T v, bufferlist& bl) {
int lowz = v ? (ctz(v) / 4) : 0;
uint8_t byte = std::min(lowz, 3);
v >>= byte * 4;
byte |= (((uint8_t)v << 2) & 0x7c);
v >>= 5;
while (v) {
byte |= 0x80;
::encode(byte, bl);
byte = (v & 0x7f);
v >>= 7;
}
::encode(byte, bl);
}

template<typename T>
inline void small_decode_varint_lowz(T& v, bufferlist::iterator& p)
{
uint8_t byte;
::decode(byte, p);
int shift = (byte & 3) * 4;
v = ((byte >> 2) & 0x1f) << shift;
shift += 5;
while (byte & 0x80) {
::decode(byte, p);
v |= (T)(byte & 0x7f) << shift;
shift += 7;
}
}

// signed varint + lowz encoding
//
// first low bit = 1 for negative, 0 for positive
// next 2 bits = how many low zero bits (nibbles)
// high bit of each byte = another byte follows
// (so, 4 bits data in first byte, 7 bits data thereafter)
template<typename T>
inline void small_encode_signed_varint_lowz(T v, bufferlist& bl) {
uint8_t byte = 0;
if (v < 0) {
v = -v;
byte = 1;
}
int lowz = v ? (ctz(v) / 4) : 0;
lowz = std::min(lowz, 3);
byte |= lowz << 1;
v >>= lowz * 4;
byte |= (((uint8_t)v << 3) & 0x78);
v >>= 4;
while (v) {
byte |= 0x80;
::encode(byte, bl);
byte = (v & 0x7f);
v >>= 7;
}
::encode(byte, bl);
}

template<typename T>
inline void small_decode_signed_varint_lowz(T& v, bufferlist::iterator& p)
{
uint8_t byte;
::decode(byte, p);
bool negative = byte & 1;
int shift = (byte & 6) * 2;
v = ((byte >> 3) & 0xf) << shift;
shift += 4;
while (byte & 0x80) {
::decode(byte, p);
v |= (T)(byte & 0x7f) << shift;
shift += 7;
}
if (negative) {
v = -v;
}
}


// LBA
//
// first 1-3 bits = how many low zero bits
// *0 = 12 (common 4 K alignment case)
// *01 = 16
// *011 = 20
// *111 = byte
// then 28-30 bits of data
// then last bit = another byte follows
// high bit of each subsequent byte = another byte follows
inline void small_encode_lba(uint64_t v, bufferlist& bl) {
int low_zero_nibbles = v ? (int)(ctz(v) / 4) : 0;
int pos;
uint32_t word;
int t = low_zero_nibbles - 3;
if (t < 0) {
pos = 3;
word = 0x7;
} else if (t < 3) {
v >>= (low_zero_nibbles * 4);
pos = t + 1;
word = (1 << t) - 1;
} else {
pos = 3;
word = 0x3;
}
word |= (v << pos) & 0x7fffffff;
v >>= 31 - pos;
if (!v) {
::encode(word, bl);
return;
}
word |= 0x80000000;
::encode(word, bl);
uint8_t byte = v & 0x7f;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use small_encode_varint for the rest?

v >>= 7;
while (v) {
byte |= 0x80;
::encode(byte, bl);
byte = (v & 0x7f);
v >>= 7;
}
::encode(byte, bl);
}

inline void small_decode_lba(uint64_t& v, bufferlist::iterator& p) {
uint32_t word;
::decode(word, p);
int shift;
switch (word & 7) {
case 0:
case 2:
case 4:
case 6:
v = (uint64_t)(word & 0x7ffffffe) << (12 - 1);
shift = 12 + 30;
break;
case 1:
case 5:
v = (uint64_t)(word & 0x7ffffffc) << (16 - 2);
shift = 16 + 29;
break;
case 3:
v = (uint64_t)(word & 0x7ffffff8) << (20 - 3);
shift = 20 + 28;
break;
case 7:
v = (uint64_t)(word & 0x7ffffff8) >> 3;
shift = 28;
}
uint8_t byte = word >> 24;
while (byte & 0x80) {
::decode(byte, p);
v |= (uint64_t)(byte & 0x7f) << shift;
shift += 7;
}
}


// short bufferptrs, bufferlists, strings
template<typename T>
inline void small_encode_buf_lowz(const T& bp, bufferlist& bl) {
size_t l = bp.length();
small_encode_varint_lowz(l, bl);
bl.append(bp);
}
template<typename T>
inline void small_decode_buf_lowz(T& bp, bufferlist::iterator& p) {
size_t l;
small_decode_varint_lowz(l, p);
p.copy(l, bp);
}

// STL containers

template<typename T>
inline void small_encode_obj(const std::vector<T>& v, bufferlist& bl) {
size_t n = v.size();
small_encode_varint(n, bl);
for (auto p = v.cbegin(); p != v.cend(); ++p) {
p->encode(bl);
}
}
template<typename T>
inline void small_decode_obj(std::vector<T>& v, bufferlist::iterator& p) {
size_t n;
small_decode_varint(n, p);
v.clear();
while (n--) {
v.push_back(T());
v.back().decode(p);
}
}

#endif
13 changes: 7 additions & 6 deletions src/os/bluestore/BlueStore.cc
Expand Up @@ -2807,7 +2807,7 @@ int BlueStore::fsck()
}
dout(20) << __func__ << " wal " << wt.seq
<< " ops " << wt.ops.size()
<< " released " << wt.released << dendl;
<< " released 0x" << std::hex << wt.released << std::dec << dendl;
used_blocks.insert(wt.released);
}
}
Expand All @@ -2824,17 +2824,18 @@ int BlueStore::fsck()
interval_set<uint64_t> free, overlap;
free.insert(offset, length);
overlap.intersection_of(free, used_blocks);
derr << __func__ << " overlap: " << overlap << dendl;
derr << __func__ << " overlap: 0x" << std::hex << overlap
<< std::dec << dendl;
++errors;
continue;
}
used_blocks.insert(offset, length);
}
if (!used_blocks.contains(0, bdev->get_size())) {
derr << __func__ << " leaked some space; free+used = "
derr << __func__ << " leaked some space; free+used = 0x" << std::hex
<< used_blocks
<< " != expected 0~" << bdev->get_size()
<< dendl;
<< " != expected 0x0~" << bdev->get_size()
<< std::dec << dendl;
++errors;
}
}
Expand Down Expand Up @@ -5226,7 +5227,7 @@ void BlueStore::_dump_blob_map(BlobMap &bm, int log_level)
{
for (auto& b : bm.blob_map) {
dout(log_level) << __func__ << " " << b << dendl;
if (b.blob.has_csum_data()) {
if (b.blob.has_csum()) {
vector<uint64_t> v;
unsigned n = b.blob.get_csum_count();
for (unsigned i = 0; i < n; ++i)
Expand Down