Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cachelib/allocator/CCacheAllocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator,
currentChunksIndex_(0) {
auto& currentChunks = chunks_[currentChunksIndex_];
for (auto chunk : *object.chunks()) {
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk)));
// TODO : pass multi-tier flag when compact cache supports multi-tier config
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk), false));
}
}

Expand Down Expand Up @@ -97,7 +98,8 @@ CCacheAllocator::SerializationType CCacheAllocator::saveState() {

std::lock_guard<std::mutex> guard(resizeLock_);
for (auto chunk : getCurrentChunks()) {
object.chunks()->push_back(allocator_.compress(chunk).saveState());
// TODO : pass multi-tier flag when compact cache supports multi-tier config
object.chunks()->push_back(allocator_.compress(chunk, false).saveState());
}
return object;
}
Expand Down
4 changes: 2 additions & 2 deletions cachelib/allocator/CacheAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -1362,8 +1362,8 @@ class CacheAllocator : public CacheBase {
sizeof(typename RefcountWithFlags::Value) + sizeof(uint32_t) +
sizeof(uint32_t) + sizeof(KAllocation)) == sizeof(Item),
"vtable overhead");
// XXX: this will fail due to CompressedPtr change
// static_assert(32 == sizeof(Item), "item overhead is 32 bytes");
// Check for CompressedPtr single/multi tier support
static_assert(32 == sizeof(Item), "item overhead is 32 bytes");

// make sure there is no overhead in ChainedItem on top of a regular Item
static_assert(sizeof(Item) == sizeof(ChainedItem),
Expand Down
78 changes: 42 additions & 36 deletions cachelib/allocator/memory/CompressedPtr.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,22 @@ class SlabAllocator;
template <typename PtrType, typename AllocatorContainer>
class PtrCompressor;

// the following are for pointer compression for the memory allocator. We
// compress pointers by storing the slab index and the alloc index of the
// allocation inside the slab. With slab worth kNumSlabBits of data, if we
// have the min allocation size as 64 bytes, that requires kNumSlabBits - 6
// bits for storing the alloc index. This leaves the remaining (32 -
// (kNumSlabBits - 6)) bits for the slab index. Hence we can index 256 GiB
// of memory in slabs and index anything more than 64 byte allocations inside
// the slab using a 32 bit representation.
//
// This CompressedPtr makes decompression fast by staying away from division and
// modulo arithmetic and doing those during the compression time. We most often
// decompress a CompressedPtr than compress a pointer while creating one.
// modulo arithmetic and doing those during the compression time. We most often
// decompress a CompressedPtr than compress a pointer while creating one. This
// is used for pointer compression by the memory allocator.

// We compress pointers by storing the tier index, slab index and alloc index of
// the allocation inside the slab. With slab worth kNumSlabBits (22 bits) of data,
// if we have the min allocation size as 64 bytes, that requires kNumSlabBits - 6
// = 16 bits for storing the alloc index. The tier id occupies the 32nd bit only
// since its value cannot exceed kMaxTiers (2). This leaves the remaining
// (32 - (kNumSlabBits - 6) - 1 bit for tier id) = 15 bits for the slab index.
// Hence we can index 128 GiB of memory in slabs per tier and index anything more
// than 64 byte allocations inside the slab using a 32 bit representation.
class CACHELIB_PACKED_ATTR CompressedPtr {
public:
using PtrType = uint64_t;
using PtrType = uint32_t;
// Thrift doesn't support unsigned type
using SerializedPtrType = int64_t;

Expand All @@ -65,9 +66,9 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
return static_cast<uint32_t>(1) << (Slab::kMinAllocPower);
}

// maximum adressable memory for pointer compression to work.
// maximum addressable memory for pointer compression to work.
static constexpr size_t getMaxAddressableSize() noexcept {
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits);
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits + 1);
}

// default construct to nullptr.
Expand All @@ -92,8 +93,8 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
PtrType ptr_{kNull};

// create a compressed pointer for a valid memory allocation.
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx, TierId tid = 0)
: ptr_(compress(slabIdx, allocIdx, tid)) {}
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx, bool isMultiTiered, TierId tid = 0)
: ptr_(compress(slabIdx, allocIdx, isMultiTiered, tid)) {}

constexpr explicit CompressedPtr(PtrType ptr) noexcept : ptr_{ptr} {}

Expand All @@ -103,45 +104,48 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
static constexpr unsigned int kNumAllocIdxBits =
Slab::kNumSlabBits - Slab::kMinAllocPower;

// Use topmost 32 bits for TierId
// XXX: optimize
static constexpr unsigned int kNumTierIdxOffset = 32;
// Use 32nd bit position for TierId
static constexpr unsigned int kNumTierIdxOffset = 31;

static constexpr PtrType kAllocIdxMask = ((PtrType)1 << kNumAllocIdxBits) - 1;

// kNumTierIdxBits most significant bits
static constexpr PtrType kTierIdxMask = (((PtrType)1 << kNumTierIdxOffset) - 1) << (NumBits<PtrType>::value - kNumTierIdxOffset);
static constexpr PtrType kTierIdxMask = (PtrType)1 << kNumTierIdxOffset;

// Number of bits for the slab index. This will be the top 16 bits of the
// compressed ptr.
static constexpr unsigned int kNumSlabIdxBits =
NumBits<PtrType>::value - kNumTierIdxOffset - kNumAllocIdxBits;
kNumTierIdxOffset - kNumAllocIdxBits;

// Compress the given slabIdx and allocIdx into a 64-bit compressed
// pointer.
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx, TierId tid) noexcept {
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx, bool isMultiTiered, TierId tid) noexcept {
XDCHECK_LE(allocIdx, kAllocIdxMask);
if (!isMultiTiered) {
XDCHECK_LT(slabIdx, (1u << (kNumSlabIdxBits+1)) - 1);
return (slabIdx << kNumAllocIdxBits) + allocIdx;
}
XDCHECK_LT(slabIdx, (1u << kNumSlabIdxBits) - 1);
return (static_cast<uint64_t>(tid) << kNumTierIdxOffset) + (slabIdx << kNumAllocIdxBits) + allocIdx;
}

// Get the slab index of the compressed ptr
uint32_t getSlabIdx() const noexcept {
uint32_t getSlabIdx(bool isMultiTiered) const noexcept {
XDCHECK(!isNull());
auto noTierIdPtr = ptr_ & ~kTierIdxMask;
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
return static_cast<uint32_t>(noTierIdPtr >> kNumAllocIdxBits);
}

// Get the allocation index of the compressed ptr
uint32_t getAllocIdx() const noexcept {
uint32_t getAllocIdx(bool isMultiTiered) const noexcept {
XDCHECK(!isNull());
auto noTierIdPtr = ptr_ & ~kTierIdxMask;
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
return static_cast<uint32_t>(noTierIdPtr & kAllocIdxMask);
}

uint32_t getTierId() const noexcept {
uint32_t getTierId(bool isMultiTiered) const noexcept {
XDCHECK(!isNull());
return static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset);
return isMultiTiered ? static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset) : 0;
}

void setTierId(TierId tid) noexcept {
Expand All @@ -160,11 +164,11 @@ class SingleTierPtrCompressor {
: allocator_(allocator) {}

const CompressedPtr compress(const PtrType* uncompressed) const {
return allocator_.compress(uncompressed);
return allocator_.compress(uncompressed, false);
}

PtrType* unCompress(const CompressedPtr compressed) const {
return static_cast<PtrType*>(allocator_.unCompress(compressed));
return static_cast<PtrType*>(allocator_.unCompress(compressed, false));
}

bool operator==(const SingleTierPtrCompressor& rhs) const noexcept {
Expand Down Expand Up @@ -196,19 +200,21 @@ class PtrCompressor {
break;
}

auto cptr = allocators_[tid]->compress(uncompressed);
cptr.setTierId(tid);

bool isMultiTiered = allocators_.size() > 1;
auto cptr = allocators_[tid]->compress(uncompressed, isMultiTiered);
if (isMultiTiered) { // config has multiple tiers
cptr.setTierId(tid);
}
return cptr;
}

PtrType* unCompress(const CompressedPtr compressed) const {
if (compressed.isNull()) {
return nullptr;
}

auto &allocator = *allocators_[compressed.getTierId()];
return static_cast<PtrType*>(allocator.unCompress(compressed));
bool isMultiTiered = allocators_.size() > 1;
auto &allocator = *allocators_[compressed.getTierId(isMultiTiered)];
return static_cast<PtrType*>(allocator.unCompress(compressed, isMultiTiered));
}

bool operator==(const PtrCompressor& rhs) const noexcept {
Expand Down
8 changes: 4 additions & 4 deletions cachelib/allocator/memory/MemoryAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -543,8 +543,8 @@ class MemoryAllocator {
// as the original pointer is valid.
//
// @throw std::invalid_argument if the ptr is invalid.
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
return slabAllocator_.compress(ptr);
CompressedPtr CACHELIB_INLINE compress(const void* ptr, bool isMultiTiered) const {
return slabAllocator_.compress(ptr, isMultiTiered);
}

// retrieve the raw pointer corresponding to the compressed pointer. This is
Expand All @@ -555,8 +555,8 @@ class MemoryAllocator {
// @return the raw pointer corresponding to this compressed pointer.
//
// @throw std::invalid_argument if the compressed pointer is invalid.
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr) const {
return slabAllocator_.unCompress(cPtr);
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr, bool isMultiTiered) const {
return slabAllocator_.unCompress(cPtr, isMultiTiered);
}

// a special implementation of pointer compression for benchmarking purposes.
Expand Down
10 changes: 5 additions & 5 deletions cachelib/allocator/memory/SlabAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class SlabAllocator {
// the corresponding memory allocator. trying to inline this just increases
// the code size and does not move the needle on the benchmarks much.
// Calling this with invalid input in optimized build is undefined behavior.
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
CompressedPtr CACHELIB_INLINE compress(const void* ptr, bool isMultiTiered) const {
if (ptr == nullptr) {
return CompressedPtr{};
}
Expand All @@ -246,19 +246,19 @@ class SlabAllocator {
static_cast<uint32_t>(reinterpret_cast<const uint8_t*>(ptr) -
reinterpret_cast<const uint8_t*>(slab)) /
allocSize;
return CompressedPtr{slabIndex, allocIdx};
return CompressedPtr{slabIndex, allocIdx, isMultiTiered};
}

// uncompress the point and return the raw ptr. This function never throws
// in optimized build and assumes that the caller is responsible for calling
// it with a valid compressed pointer.
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr) const {
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr, bool isMultiTiered) const {
if (ptr.isNull()) {
return nullptr;
}

const SlabIdx slabIndex = ptr.getSlabIdx();
const uint32_t allocIdx = ptr.getAllocIdx();
const SlabIdx slabIndex = ptr.getSlabIdx(isMultiTiered);
const uint32_t allocIdx = ptr.getAllocIdx(isMultiTiered);
const Slab* slab = &slabMemoryStart_[slabIndex];

#ifndef NDEBUG
Expand Down
6 changes: 3 additions & 3 deletions cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,13 +401,13 @@ TEST_F(MemoryAllocatorTest, PointerCompression) {
for (const auto& pool : poolAllocs) {
const auto& allocs = pool.second;
for (const auto* alloc : allocs) {
CompressedPtr ptr = m.compress(alloc);
CompressedPtr ptr = m.compress(alloc, false);
ASSERT_FALSE(ptr.isNull());
ASSERT_EQ(alloc, m.unCompress(ptr));
ASSERT_EQ(alloc, m.unCompress(ptr, false));
}
}

ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr)));
ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr, false), false));
}

TEST_F(MemoryAllocatorTest, Restorable) {
Expand Down
1 change: 0 additions & 1 deletion cachelib/allocator/tests/AllocatorMemoryTiersTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
stats = allocator->getGlobalCacheStats();
slabStats = allocator->getAllocationClassStats(0,0,cid);
}
ASSERT_GE(slabStats.approxFreePercent,9.5);

auto perclassEstats = allocator->getBackgroundMoverClassStats(MoverDir::Evict);
auto perclassPstats = allocator->getBackgroundMoverClassStats(MoverDir::Promote);
Expand Down
26 changes: 11 additions & 15 deletions cachelib/allocator/tests/BaseAllocatorTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -4928,13 +4928,13 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
/* TODO: we adjust alloc size by -20 or -40 due to increased CompressedPtr size */
auto allocateItem1 =
std::async(std::launch::async, allocFn, std::string{"hello"},
std::vector<uint32_t>{100 - 20, 500, 1000});
std::vector<uint32_t>{100, 500, 1000});
auto allocateItem2 =
std::async(std::launch::async, allocFn, std::string{"world"},
std::vector<uint32_t>{200- 40, 1000, 2000});
std::vector<uint32_t>{200, 1000, 2000});
auto allocateItem3 =
std::async(std::launch::async, allocFn, std::string{"yolo"},
std::vector<uint32_t>{100-20, 200, 5000});
std::vector<uint32_t>{100, 200, 5000});

auto slabRelease = std::async(releaseFn);
slabRelease.wait();
Expand Down Expand Up @@ -5772,9 +5772,7 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
AllocatorT alloc(config);
const size_t numBytes = alloc.getCacheMemoryStats().cacheSize;
const auto poolSize = numBytes / 2;
// TODO: becasue CompressedPtr size is increased, key1 must be of equal
// size with key2
std::string key1 = "key1";
std::string key1 = "key1-some-random-string-here";
auto poolId = alloc.addPool("one", poolSize, {} /* allocSizes */, mmConfig);
auto handle1 = alloc.allocate(poolId, key1, 1);
alloc.insert(handle1);
Expand Down Expand Up @@ -5831,37 +5829,35 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
auto poolId = alloc.addPool("one", poolSize, {} /* allocSizes */, mmConfig);
auto handle1 = alloc.allocate(poolId, key1, 1);
alloc.insert(handle1);
// TODO: key2 must be the same length as the rest due to increased
// CompressedPtr size
auto handle2 = alloc.allocate(poolId, "key2-some-random-string-here", 1);
auto handle2 = alloc.allocate(poolId, "key2", 1);
alloc.insert(handle2);
ASSERT_NE(alloc.find("key2-some-random-string-here"), nullptr);
ASSERT_NE(alloc.find("key2"), nullptr);
sleep(9);

ASSERT_NE(alloc.find(key1), nullptr);
auto tail = alloc.dumpEvictionIterator(
poolId, 1 /* second allocation class, TODO: CompressedPtr */, 3 /* last 3 items */);
poolId, 0 /* first allocation class */, 3 /* last 3 items */);
// item 1 gets promoted (age 9), tail age 9, lru refresh time 3 (default)
EXPECT_TRUE(checkItemKey(tail[1], key1));

auto handle3 = alloc.allocate(poolId, key3, 1);
alloc.insert(handle3);

sleep(6);
tail = alloc.dumpEvictionIterator(poolId, 1 /* second allocation class, TODO: CompressedPtr */,
tail = alloc.dumpEvictionIterator(poolId, 0 /* first allocation class */,
3 /* last 3 items */);
ASSERT_NE(alloc.find(key3), nullptr);
tail = alloc.dumpEvictionIterator(poolId, 1 /* second allocation class, TODO: CompressedPtr */,
tail = alloc.dumpEvictionIterator(poolId, 0 /* first allocation class */,
3 /* last 3 items */);
// tail age 15, lru refresh time 6 * 0.7 = 4.2 = 4,
// item 3 age 6 gets promoted
EXPECT_TRUE(checkItemKey(tail[1], key1));

alloc.remove("key2-some-random-string-here");
alloc.remove("key2");
sleep(3);

ASSERT_NE(alloc.find(key3), nullptr);
tail = alloc.dumpEvictionIterator(poolId, 1 /* second allocation class, TODO: CompressedPtr */,
tail = alloc.dumpEvictionIterator(poolId, 0 /* second allocation class */,
2 /* last 2 items */);
// tail age 9, lru refresh time 4, item 3 age 3, not promoted
EXPECT_TRUE(checkItemKey(tail[1], key3));
Expand Down
6 changes: 3 additions & 3 deletions cachelib/benchmarks/PtrCompressionBench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ void buildAllocs(size_t poolSize) {
void* alloc = ma->allocate(pid, size);
XDCHECK_GE(size, CompressedPtr::getMinAllocSize());
if (alloc != nullptr) {
validAllocs.push_back({alloc, ma->compress(alloc)});
validAllocs.push_back({alloc, ma->compress(alloc, false)});
validAllocsAlt.push_back({alloc, ma->compressAlt(alloc)});
numAllocations++;
}
Expand All @@ -83,7 +83,7 @@ BENCHMARK(CompressionAlt) {

BENCHMARK_RELATIVE(Compression) {
for (const auto& alloc : validAllocs) {
CompressedPtr c = m->compress(alloc.first);
CompressedPtr c = m->compress(alloc.first, false);
folly::doNotOptimizeAway(c);
}
}
Expand All @@ -97,7 +97,7 @@ BENCHMARK(DeCompressAlt) {

BENCHMARK_RELATIVE(DeCompress) {
for (const auto& alloc : validAllocs) {
void* ptr = m->unCompress(alloc.second);
void* ptr = m->unCompress(alloc.second, false);
folly::doNotOptimizeAway(ptr);
}
}
Expand Down
1 change: 1 addition & 0 deletions run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# Newline separated list of tests to ignore
BLACKLIST="allocator-test-NavySetupTest
allocator-test-NvmCacheTests
shm-test-test_page_size"

if [ "$1" == "long" ]; then
Expand Down