Skip to content

Commit 0ec1d07

Browse files
committed
added ability for compressed pointer to use full 32 bits for addressing in single tier mode and use 31 bits for addressing in multi-tier mode
1 parent 9bb4db8 commit 0ec1d07

File tree

9 files changed

+75
-70
lines changed

9 files changed

+75
-70
lines changed

cachelib/allocator/CCacheAllocator.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator,
3636
currentChunksIndex_(0) {
3737
auto& currentChunks = chunks_[currentChunksIndex_];
3838
for (auto chunk : *object.chunks()) {
39-
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk)));
39+
// TODO : pass multi-tier flag when compact cache supports multi-tier config
40+
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk), false));
4041
}
4142
}
4243

@@ -97,7 +98,8 @@ CCacheAllocator::SerializationType CCacheAllocator::saveState() {
9798

9899
std::lock_guard<std::mutex> guard(resizeLock_);
99100
for (auto chunk : getCurrentChunks()) {
100-
object.chunks()->push_back(allocator_.compress(chunk).saveState());
101+
// TODO : pass multi-tier flag when compact cache supports multi-tier config
102+
object.chunks()->push_back(allocator_.compress(chunk, false).saveState());
101103
}
102104
return object;
103105
}

cachelib/allocator/CacheAllocator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,8 +1362,8 @@ class CacheAllocator : public CacheBase {
13621362
sizeof(typename RefcountWithFlags::Value) + sizeof(uint32_t) +
13631363
sizeof(uint32_t) + sizeof(KAllocation)) == sizeof(Item),
13641364
"vtable overhead");
1365-
// XXX: this will fail due to CompressedPtr change
1366-
// static_assert(32 == sizeof(Item), "item overhead is 32 bytes");
1365+
// Check for CompressedPtr single/multi tier support
1366+
static_assert(32 == sizeof(Item), "item overhead is 32 bytes");
13671367

13681368
// make sure there is no overhead in ChainedItem on top of a regular Item
13691369
static_assert(sizeof(Item) == sizeof(ChainedItem),

cachelib/allocator/memory/CompressedPtr.h

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -30,21 +30,22 @@ class SlabAllocator;
3030
template <typename PtrType, typename AllocatorContainer>
3131
class PtrCompressor;
3232

33-
// the following are for pointer compression for the memory allocator. We
34-
// compress pointers by storing the slab index and the alloc index of the
35-
// allocation inside the slab. With slab worth kNumSlabBits of data, if we
36-
// have the min allocation size as 64 bytes, that requires kNumSlabBits - 6
37-
// bits for storing the alloc index. This leaves the remaining (32 -
38-
// (kNumSlabBits - 6)) bits for the slab index. Hence we can index 256 GiB
39-
// of memory in slabs and index anything more than 64 byte allocations inside
40-
// the slab using a 32 bit representation.
41-
//
4233
// This CompressedPtr makes decompression fast by staying away from division and
43-
// modulo arithmetic and doing those during the compression time. We most often
44-
// decompress a CompressedPtr than compress a pointer while creating one.
34+
// modulo arithmetic and doing those during the compression time. We most often
35+
// decompress a CompressedPtr than compress a pointer while creating one. This
36+
// is used for pointer compression by the memory allocator.
37+
38+
// We compress pointers by storing the tier index, slab index and alloc index of
39+
// the allocation inside the slab. With slab worth kNumSlabBits (22 bits) of data,
40+
// if we have the min allocation size as 64 bytes, that requires kNumSlabBits - 6
41+
// = 16 bits for storing the alloc index. The tier id occupies the 32nd bit only
42+
// since its value cannot exceed kMaxTiers (2). This leaves the remaining
43+
// (32 - (kNumSlabBits - 6) - 1 bit for tier id) = 15 bits for the slab index.
44+
// Hence we can index 128 GiB of memory in slabs per tier and index anything more
45+
// than 64 byte allocations inside the slab using a 32 bit representation.
4546
class CACHELIB_PACKED_ATTR CompressedPtr {
4647
public:
47-
using PtrType = uint64_t;
48+
using PtrType = uint32_t;
4849
// Thrift doesn't support unsigned type
4950
using SerializedPtrType = int64_t;
5051

@@ -65,9 +66,9 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
6566
return static_cast<uint32_t>(1) << (Slab::kMinAllocPower);
6667
}
6768

68-
// maximum adressable memory for pointer compression to work.
69+
// maximum addressable memory for pointer compression to work.
6970
static constexpr size_t getMaxAddressableSize() noexcept {
70-
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits);
71+
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits + 1);
7172
}
7273

7374
// default construct to nullptr.
@@ -92,8 +93,8 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
9293
PtrType ptr_{kNull};
9394

9495
// create a compressed pointer for a valid memory allocation.
95-
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx, TierId tid = 0)
96-
: ptr_(compress(slabIdx, allocIdx, tid)) {}
96+
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx, bool isMultiTiered, TierId tid = 0)
97+
: ptr_(compress(slabIdx, allocIdx, isMultiTiered, tid)) {}
9798

9899
constexpr explicit CompressedPtr(PtrType ptr) noexcept : ptr_{ptr} {}
99100

@@ -103,45 +104,48 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
103104
static constexpr unsigned int kNumAllocIdxBits =
104105
Slab::kNumSlabBits - Slab::kMinAllocPower;
105106

106-
// Use topmost 32 bits for TierId
107-
// XXX: optimize
108-
static constexpr unsigned int kNumTierIdxOffset = 32;
107+
// Use 32nd bit position for TierId
108+
static constexpr unsigned int kNumTierIdxOffset = 31;
109109

110110
static constexpr PtrType kAllocIdxMask = ((PtrType)1 << kNumAllocIdxBits) - 1;
111111

112112
// kNumTierIdxBits most significant bits
113-
static constexpr PtrType kTierIdxMask = (((PtrType)1 << kNumTierIdxOffset) - 1) << (NumBits<PtrType>::value - kNumTierIdxOffset);
113+
static constexpr PtrType kTierIdxMask = (PtrType)1 << kNumTierIdxOffset;
114114

115115
// Number of bits for the slab index. This will be the top 16 bits of the
116116
// compressed ptr.
117117
static constexpr unsigned int kNumSlabIdxBits =
118-
NumBits<PtrType>::value - kNumTierIdxOffset - kNumAllocIdxBits;
118+
kNumTierIdxOffset - kNumAllocIdxBits;
119119

120120
// Compress the given slabIdx and allocIdx into a 64-bit compressed
121121
// pointer.
122-
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx, TierId tid) noexcept {
122+
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx, bool isMultiTiered, TierId tid) noexcept {
123123
XDCHECK_LE(allocIdx, kAllocIdxMask);
124+
if (!isMultiTiered) {
125+
XDCHECK_LT(slabIdx, (1u << (kNumSlabIdxBits+1)) - 1);
126+
return (slabIdx << kNumAllocIdxBits) + allocIdx;
127+
}
124128
XDCHECK_LT(slabIdx, (1u << kNumSlabIdxBits) - 1);
125129
return (static_cast<uint64_t>(tid) << kNumTierIdxOffset) + (slabIdx << kNumAllocIdxBits) + allocIdx;
126130
}
127131

128132
// Get the slab index of the compressed ptr
129-
uint32_t getSlabIdx() const noexcept {
133+
uint32_t getSlabIdx(bool isMultiTiered) const noexcept {
130134
XDCHECK(!isNull());
131-
auto noTierIdPtr = ptr_ & ~kTierIdxMask;
135+
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
132136
return static_cast<uint32_t>(noTierIdPtr >> kNumAllocIdxBits);
133137
}
134138

135139
// Get the allocation index of the compressed ptr
136-
uint32_t getAllocIdx() const noexcept {
140+
uint32_t getAllocIdx(bool isMultiTiered) const noexcept {
137141
XDCHECK(!isNull());
138-
auto noTierIdPtr = ptr_ & ~kTierIdxMask;
142+
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
139143
return static_cast<uint32_t>(noTierIdPtr & kAllocIdxMask);
140144
}
141145

142-
uint32_t getTierId() const noexcept {
146+
uint32_t getTierId(bool isMultiTiered) const noexcept {
143147
XDCHECK(!isNull());
144-
return static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset);
148+
return isMultiTiered ? static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset) : 0;
145149
}
146150

147151
void setTierId(TierId tid) noexcept {
@@ -160,11 +164,11 @@ class SingleTierPtrCompressor {
160164
: allocator_(allocator) {}
161165

162166
const CompressedPtr compress(const PtrType* uncompressed) const {
163-
return allocator_.compress(uncompressed);
167+
return allocator_.compress(uncompressed, false);
164168
}
165169

166170
PtrType* unCompress(const CompressedPtr compressed) const {
167-
return static_cast<PtrType*>(allocator_.unCompress(compressed));
171+
return static_cast<PtrType*>(allocator_.unCompress(compressed, false));
168172
}
169173

170174
bool operator==(const SingleTierPtrCompressor& rhs) const noexcept {
@@ -196,19 +200,21 @@ class PtrCompressor {
196200
break;
197201
}
198202

199-
auto cptr = allocators_[tid]->compress(uncompressed);
200-
cptr.setTierId(tid);
201-
203+
bool isMultiTiered = allocators_.size() > 1;
204+
auto cptr = allocators_[tid]->compress(uncompressed, isMultiTiered);
205+
if (isMultiTiered) { // config has multiple tiers
206+
cptr.setTierId(tid);
207+
}
202208
return cptr;
203209
}
204210

205211
PtrType* unCompress(const CompressedPtr compressed) const {
206212
if (compressed.isNull()) {
207213
return nullptr;
208214
}
209-
210-
auto &allocator = *allocators_[compressed.getTierId()];
211-
return static_cast<PtrType*>(allocator.unCompress(compressed));
215+
bool isMultiTiered = allocators_.size() > 1;
216+
auto &allocator = *allocators_[compressed.getTierId(isMultiTiered)];
217+
return static_cast<PtrType*>(allocator.unCompress(compressed, isMultiTiered));
212218
}
213219

214220
bool operator==(const PtrCompressor& rhs) const noexcept {

cachelib/allocator/memory/MemoryAllocator.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,8 @@ class MemoryAllocator {
543543
// as the original pointer is valid.
544544
//
545545
// @throw std::invalid_argument if the ptr is invalid.
546-
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
547-
return slabAllocator_.compress(ptr);
546+
CompressedPtr CACHELIB_INLINE compress(const void* ptr, bool isMultiTiered) const {
547+
return slabAllocator_.compress(ptr, isMultiTiered);
548548
}
549549

550550
// retrieve the raw pointer corresponding to the compressed pointer. This is
@@ -555,8 +555,8 @@ class MemoryAllocator {
555555
// @return the raw pointer corresponding to this compressed pointer.
556556
//
557557
// @throw std::invalid_argument if the compressed pointer is invalid.
558-
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr) const {
559-
return slabAllocator_.unCompress(cPtr);
558+
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr, bool isMultiTiered) const {
559+
return slabAllocator_.unCompress(cPtr, isMultiTiered);
560560
}
561561

562562
// a special implementation of pointer compression for benchmarking purposes.

cachelib/allocator/memory/SlabAllocator.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class SlabAllocator {
225225
// the corresponding memory allocator. trying to inline this just increases
226226
// the code size and does not move the needle on the benchmarks much.
227227
// Calling this with invalid input in optimized build is undefined behavior.
228-
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
228+
CompressedPtr CACHELIB_INLINE compress(const void* ptr, bool isMultiTiered) const {
229229
if (ptr == nullptr) {
230230
return CompressedPtr{};
231231
}
@@ -246,19 +246,19 @@ class SlabAllocator {
246246
static_cast<uint32_t>(reinterpret_cast<const uint8_t*>(ptr) -
247247
reinterpret_cast<const uint8_t*>(slab)) /
248248
allocSize;
249-
return CompressedPtr{slabIndex, allocIdx};
249+
return CompressedPtr{slabIndex, allocIdx, isMultiTiered};
250250
}
251251

252252
// uncompress the point and return the raw ptr. This function never throws
253253
// in optimized build and assumes that the caller is responsible for calling
254254
// it with a valid compressed pointer.
255-
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr) const {
255+
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr, bool isMultiTiered) const {
256256
if (ptr.isNull()) {
257257
return nullptr;
258258
}
259259

260-
const SlabIdx slabIndex = ptr.getSlabIdx();
261-
const uint32_t allocIdx = ptr.getAllocIdx();
260+
const SlabIdx slabIndex = ptr.getSlabIdx(isMultiTiered);
261+
const uint32_t allocIdx = ptr.getAllocIdx(isMultiTiered);
262262
const Slab* slab = &slabMemoryStart_[slabIndex];
263263

264264
#ifndef NDEBUG

cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,13 +401,13 @@ TEST_F(MemoryAllocatorTest, PointerCompression) {
401401
for (const auto& pool : poolAllocs) {
402402
const auto& allocs = pool.second;
403403
for (const auto* alloc : allocs) {
404-
CompressedPtr ptr = m.compress(alloc);
404+
CompressedPtr ptr = m.compress(alloc, false);
405405
ASSERT_FALSE(ptr.isNull());
406-
ASSERT_EQ(alloc, m.unCompress(ptr));
406+
ASSERT_EQ(alloc, m.unCompress(ptr, false));
407407
}
408408
}
409409

410-
ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr)));
410+
ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr, false), false));
411411
}
412412

413413
TEST_F(MemoryAllocatorTest, Restorable) {

cachelib/allocator/tests/BaseAllocatorTest.h

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4928,13 +4928,13 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
49284928
/* TODO: we adjust alloc size by -20 or -40 due to increased CompressedPtr size */
49294929
auto allocateItem1 =
49304930
std::async(std::launch::async, allocFn, std::string{"hello"},
4931-
std::vector<uint32_t>{100 - 20, 500, 1000});
4931+
std::vector<uint32_t>{100, 500, 1000});
49324932
auto allocateItem2 =
49334933
std::async(std::launch::async, allocFn, std::string{"world"},
4934-
std::vector<uint32_t>{200- 40, 1000, 2000});
4934+
std::vector<uint32_t>{200, 1000, 2000});
49354935
auto allocateItem3 =
49364936
std::async(std::launch::async, allocFn, std::string{"yolo"},
4937-
std::vector<uint32_t>{100-20, 200, 5000});
4937+
std::vector<uint32_t>{100, 200, 5000});
49384938

49394939
auto slabRelease = std::async(releaseFn);
49404940
slabRelease.wait();
@@ -5772,9 +5772,7 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
57725772
AllocatorT alloc(config);
57735773
const size_t numBytes = alloc.getCacheMemoryStats().cacheSize;
57745774
const auto poolSize = numBytes / 2;
5775-
// TODO: becasue CompressedPtr size is increased, key1 must be of equal
5776-
// size with key2
5777-
std::string key1 = "key1";
5775+
std::string key1 = "key1-some-random-string-here";
57785776
auto poolId = alloc.addPool("one", poolSize, {} /* allocSizes */, mmConfig);
57795777
auto handle1 = alloc.allocate(poolId, key1, 1);
57805778
alloc.insert(handle1);
@@ -5831,37 +5829,35 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
58315829
auto poolId = alloc.addPool("one", poolSize, {} /* allocSizes */, mmConfig);
58325830
auto handle1 = alloc.allocate(poolId, key1, 1);
58335831
alloc.insert(handle1);
5834-
// TODO: key2 must be the same length as the rest due to increased
5835-
// CompressedPtr size
5836-
auto handle2 = alloc.allocate(poolId, "key2-some-random-string-here", 1);
5832+
auto handle2 = alloc.allocate(poolId, "key2", 1);
58375833
alloc.insert(handle2);
5838-
ASSERT_NE(alloc.find("key2-some-random-string-here"), nullptr);
5834+
ASSERT_NE(alloc.find("key2"), nullptr);
58395835
sleep(9);
58405836

58415837
ASSERT_NE(alloc.find(key1), nullptr);
58425838
auto tail = alloc.dumpEvictionIterator(
5843-
poolId, 1 /* second allocation class, TODO: CompressedPtr */, 3 /* last 3 items */);
5839+
poolId, 0 /* first allocation class */, 3 /* last 3 items */);
58445840
// item 1 gets promoted (age 9), tail age 9, lru refresh time 3 (default)
58455841
EXPECT_TRUE(checkItemKey(tail[1], key1));
58465842

58475843
auto handle3 = alloc.allocate(poolId, key3, 1);
58485844
alloc.insert(handle3);
58495845

58505846
sleep(6);
5851-
tail = alloc.dumpEvictionIterator(poolId, 1 /* second allocation class, TODO: CompressedPtr */,
5847+
tail = alloc.dumpEvictionIterator(poolId, 0 /* first allocation class */,
58525848
3 /* last 3 items */);
58535849
ASSERT_NE(alloc.find(key3), nullptr);
5854-
tail = alloc.dumpEvictionIterator(poolId, 1 /* second allocation class, TODO: CompressedPtr */,
5850+
tail = alloc.dumpEvictionIterator(poolId, 0 /* first allocation class */,
58555851
3 /* last 3 items */);
58565852
// tail age 15, lru refresh time 6 * 0.7 = 4.2 = 4,
58575853
// item 3 age 6 gets promoted
58585854
EXPECT_TRUE(checkItemKey(tail[1], key1));
58595855

5860-
alloc.remove("key2-some-random-string-here");
5856+
alloc.remove("key2");
58615857
sleep(3);
58625858

58635859
ASSERT_NE(alloc.find(key3), nullptr);
5864-
tail = alloc.dumpEvictionIterator(poolId, 1 /* second allocation class, TODO: CompressedPtr */,
5860+
tail = alloc.dumpEvictionIterator(poolId, 0 /* second allocation class */,
58655861
2 /* last 2 items */);
58665862
// tail age 9, lru refresh time 4, item 3 age 3, not promoted
58675863
EXPECT_TRUE(checkItemKey(tail[1], key3));

cachelib/benchmarks/PtrCompressionBench.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ void buildAllocs(size_t poolSize) {
6161
void* alloc = ma->allocate(pid, size);
6262
XDCHECK_GE(size, CompressedPtr::getMinAllocSize());
6363
if (alloc != nullptr) {
64-
validAllocs.push_back({alloc, ma->compress(alloc)});
64+
validAllocs.push_back({alloc, ma->compress(alloc, false)});
6565
validAllocsAlt.push_back({alloc, ma->compressAlt(alloc)});
6666
numAllocations++;
6767
}
@@ -83,7 +83,7 @@ BENCHMARK(CompressionAlt) {
8383

8484
BENCHMARK_RELATIVE(Compression) {
8585
for (const auto& alloc : validAllocs) {
86-
CompressedPtr c = m->compress(alloc.first);
86+
CompressedPtr c = m->compress(alloc.first, false);
8787
folly::doNotOptimizeAway(c);
8888
}
8989
}
@@ -97,7 +97,7 @@ BENCHMARK(DeCompressAlt) {
9797

9898
BENCHMARK_RELATIVE(DeCompress) {
9999
for (const auto& alloc : validAllocs) {
100-
void* ptr = m->unCompress(alloc.second);
100+
void* ptr = m->unCompress(alloc.second, false);
101101
folly::doNotOptimizeAway(ptr);
102102
}
103103
}

run_tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
# Newline separated list of tests to ignore
44
BLACKLIST="allocator-test-NavySetupTest
5+
allocator-test-NvmCacheTests
56
shm-test-test_page_size"
67

78
if [ "$1" == "long" ]; then

0 commit comments

Comments
 (0)