Skip to content

Commit b5dcfea

Browse files
committed
added ability for compressed pointer to use full 32 bits for addressing in single tier mode and use 31 bits for addressing in multi-tier mode
1 parent 9bb4db8 commit b5dcfea

File tree

9 files changed

+71
-52
lines changed

9 files changed

+71
-52
lines changed

cachelib/allocator/CCacheAllocator.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator,
3636
currentChunksIndex_(0) {
3737
auto& currentChunks = chunks_[currentChunksIndex_];
3838
for (auto chunk : *object.chunks()) {
39-
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk)));
39+
// TODO : pass multi-tier flag when compact cache supports multi-tier config
40+
currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk), false));
4041
}
4142
}
4243

@@ -97,7 +98,8 @@ CCacheAllocator::SerializationType CCacheAllocator::saveState() {
9798

9899
std::lock_guard<std::mutex> guard(resizeLock_);
99100
for (auto chunk : getCurrentChunks()) {
100-
object.chunks()->push_back(allocator_.compress(chunk).saveState());
101+
// TODO : pass multi-tier flag when compact cache supports multi-tier config
102+
object.chunks()->push_back(allocator_.compress(chunk, false).saveState());
101103
}
102104
return object;
103105
}

cachelib/allocator/CacheAllocator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,8 +1362,8 @@ class CacheAllocator : public CacheBase {
13621362
sizeof(typename RefcountWithFlags::Value) + sizeof(uint32_t) +
13631363
sizeof(uint32_t) + sizeof(KAllocation)) == sizeof(Item),
13641364
"vtable overhead");
1365-
// XXX: this will fail due to CompressedPtr change
1366-
// static_assert(32 == sizeof(Item), "item overhead is 32 bytes");
1365+
// Check for CompressedPtr single/multi tier support
1366+
static_assert(32 == sizeof(Item), "item overhead is 32 bytes");
13671367

13681368
// make sure there is no overhead in ChainedItem on top of a regular Item
13691369
static_assert(sizeof(Item) == sizeof(ChainedItem),

cachelib/allocator/memory/CompressedPtr.h

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,32 @@ template <typename PtrType, typename AllocatorContainer>
3131
class PtrCompressor;
3232

3333
// the following are for pointer compression for the memory allocator. We
34-
// compress pointers by storing the slab index and the alloc index of the
35-
// allocation inside the slab. With slab worth kNumSlabBits of data, if we
36-
// have the min allocation size as 64 bytes, that requires kNumSlabBits - 6
37-
// bits for storing the alloc index. This leaves the remaining (32 -
38-
// (kNumSlabBits - 6)) bits for the slab index. Hence we can index 256 GiB
39-
// of memory in slabs and index anything more than 64 byte allocations inside
40-
// the slab using a 32 bit representation.
41-
//
34+
// compress pointers by storing the tier index, slab index and alloc index
35+
// of the allocation inside the slab. With slab worth kNumSlabBits (22 bits)
36+
// of data, if we have the min allocation size as 64 bytes, that requires
37+
// kNumSlabBits - 6 = 16 bits for storing the alloc index. The tier id
38+
// occupies the 32nd bit only since its value cannot exceed kMaxTiers (2).
39+
// This leaves the remaining (32 -(kNumSlabBits - 6) - 1 bit for tier id) =
40+
// 15 bits for the slab index. Hence we can index 128 GiB of memory in slabs
41+
// per tier and index anything more than 64 byte allocations inside the slab
42+
// using a 32 bit representation.
43+
4244
// This CompressedPtr makes decompression fast by staying away from division and
4345
// modulo arithmetic and doing those during the compression time. We most often
4446
// decompress a CompressedPtr than compress a pointer while creating one.
47+
// the following are for pointer compression for the memory allocator. We
48+
// compress pointers by storing the tier index, slab index and alloc index
49+
// of the allocation inside the slab. With slab worth kNumSlabBits (22 bits)
50+
// of data, if we have the min allocation size as 64 bytes, that requires
51+
// kNumSlabBits - 6 = 16 bits for storing the alloc index. The tier id
52+
// occupies the 32nd bit only since its value cannot exceed kMaxTiers (2).
53+
// This leaves the remaining (32 -(kNumSlabBits - 6) - 1 bit for tier id) =
54+
// 15 bits for the slab index. Hence we can index 128 GiB of memory in slabs
55+
// per tier and index anything more than 64 byte allocations inside the slab
56+
// using a 32 bit representation.
4557
class CACHELIB_PACKED_ATTR CompressedPtr {
4658
public:
47-
using PtrType = uint64_t;
59+
using PtrType = uint32_t;
4860
// Thrift doesn't support unsigned type
4961
using SerializedPtrType = int64_t;
5062

@@ -67,7 +79,7 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
6779

6880
// maximum adressable memory for pointer compression to work.
6981
static constexpr size_t getMaxAddressableSize() noexcept {
70-
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits);
82+
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits + 1);
7183
}
7284

7385
// default construct to nullptr.
@@ -92,8 +104,8 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
92104
PtrType ptr_{kNull};
93105

94106
// create a compressed pointer for a valid memory allocation.
95-
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx, TierId tid = 0)
96-
: ptr_(compress(slabIdx, allocIdx, tid)) {}
107+
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx, bool isMultiTiered, TierId tid = 0)
108+
: ptr_(compress(slabIdx, allocIdx, isMultiTiered, tid)) {}
97109

98110
constexpr explicit CompressedPtr(PtrType ptr) noexcept : ptr_{ptr} {}
99111

@@ -103,45 +115,48 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
103115
static constexpr unsigned int kNumAllocIdxBits =
104116
Slab::kNumSlabBits - Slab::kMinAllocPower;
105117

106-
// Use topmost 32 bits for TierId
107-
// XXX: optimize
108-
static constexpr unsigned int kNumTierIdxOffset = 32;
118+
// Use 32nd bit position for TierId
119+
static constexpr unsigned int kNumTierIdxOffset = 31;
109120

110121
static constexpr PtrType kAllocIdxMask = ((PtrType)1 << kNumAllocIdxBits) - 1;
111122

112123
// kNumTierIdxBits most significant bits
113-
static constexpr PtrType kTierIdxMask = (((PtrType)1 << kNumTierIdxOffset) - 1) << (NumBits<PtrType>::value - kNumTierIdxOffset);
124+
static constexpr PtrType kTierIdxMask = (PtrType)1 << kNumTierIdxOffset;
114125

115126
// Number of bits for the slab index. This will be the top 16 bits of the
116127
// compressed ptr.
117128
static constexpr unsigned int kNumSlabIdxBits =
118-
NumBits<PtrType>::value - kNumTierIdxOffset - kNumAllocIdxBits;
129+
kNumTierIdxOffset - kNumAllocIdxBits;
119130

120131
// Compress the given slabIdx and allocIdx into a 64-bit compressed
121132
// pointer.
122-
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx, TierId tid) noexcept {
133+
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx, bool isMultiTiered, TierId tid) noexcept {
123134
XDCHECK_LE(allocIdx, kAllocIdxMask);
135+
if (!isMultiTiered) {
136+
XDCHECK_LT(slabIdx, (1u << (kNumSlabIdxBits+1)) - 1);
137+
return (slabIdx << kNumAllocIdxBits) + allocIdx;
138+
}
124139
XDCHECK_LT(slabIdx, (1u << kNumSlabIdxBits) - 1);
125140
return (static_cast<uint64_t>(tid) << kNumTierIdxOffset) + (slabIdx << kNumAllocIdxBits) + allocIdx;
126141
}
127142

128143
// Get the slab index of the compressed ptr
129-
uint32_t getSlabIdx() const noexcept {
144+
uint32_t getSlabIdx(bool isMultiTiered) const noexcept {
130145
XDCHECK(!isNull());
131-
auto noTierIdPtr = ptr_ & ~kTierIdxMask;
146+
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
132147
return static_cast<uint32_t>(noTierIdPtr >> kNumAllocIdxBits);
133148
}
134149

135150
// Get the allocation index of the compressed ptr
136-
uint32_t getAllocIdx() const noexcept {
151+
uint32_t getAllocIdx(bool isMultiTiered) const noexcept {
137152
XDCHECK(!isNull());
138-
auto noTierIdPtr = ptr_ & ~kTierIdxMask;
153+
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
139154
return static_cast<uint32_t>(noTierIdPtr & kAllocIdxMask);
140155
}
141156

142-
uint32_t getTierId() const noexcept {
157+
uint32_t getTierId(bool isMultiTiered) const noexcept {
143158
XDCHECK(!isNull());
144-
return static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset);
159+
return isMultiTiered ? static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset) : 0;
145160
}
146161

147162
void setTierId(TierId tid) noexcept {
@@ -160,11 +175,11 @@ class SingleTierPtrCompressor {
160175
: allocator_(allocator) {}
161176

162177
const CompressedPtr compress(const PtrType* uncompressed) const {
163-
return allocator_.compress(uncompressed);
178+
return allocator_.compress(uncompressed, false);
164179
}
165180

166181
PtrType* unCompress(const CompressedPtr compressed) const {
167-
return static_cast<PtrType*>(allocator_.unCompress(compressed));
182+
return static_cast<PtrType*>(allocator_.unCompress(compressed, false));
168183
}
169184

170185
bool operator==(const SingleTierPtrCompressor& rhs) const noexcept {
@@ -196,19 +211,21 @@ class PtrCompressor {
196211
break;
197212
}
198213

199-
auto cptr = allocators_[tid]->compress(uncompressed);
200-
cptr.setTierId(tid);
201-
214+
bool isMultiTiered = allocators_.size() > 1 ? true : false;
215+
auto cptr = allocators_[tid]->compress(uncompressed, isMultiTiered);
216+
if (allocators_.size() > 1) { // config has multiple tiers
217+
cptr.setTierId(tid);
218+
}
202219
return cptr;
203220
}
204221

205222
PtrType* unCompress(const CompressedPtr compressed) const {
206223
if (compressed.isNull()) {
207224
return nullptr;
208225
}
209-
210-
auto &allocator = *allocators_[compressed.getTierId()];
211-
return static_cast<PtrType*>(allocator.unCompress(compressed));
226+
bool isMultiTiered = allocators_.size() > 1 ? true : false;
227+
auto &allocator = *allocators_[compressed.getTierId(isMultiTiered)];
228+
return static_cast<PtrType*>(allocator.unCompress(compressed, isMultiTiered));
212229
}
213230

214231
bool operator==(const PtrCompressor& rhs) const noexcept {

cachelib/allocator/memory/MemoryAllocator.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,8 @@ class MemoryAllocator {
543543
// as the original pointer is valid.
544544
//
545545
// @throw std::invalid_argument if the ptr is invalid.
546-
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
547-
return slabAllocator_.compress(ptr);
546+
CompressedPtr CACHELIB_INLINE compress(const void* ptr, bool isMultiTiered) const {
547+
return slabAllocator_.compress(ptr, isMultiTiered);
548548
}
549549

550550
// retrieve the raw pointer corresponding to the compressed pointer. This is
@@ -555,8 +555,8 @@ class MemoryAllocator {
555555
// @return the raw pointer corresponding to this compressed pointer.
556556
//
557557
// @throw std::invalid_argument if the compressed pointer is invalid.
558-
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr) const {
559-
return slabAllocator_.unCompress(cPtr);
558+
void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr, bool isMultiTiered) const {
559+
return slabAllocator_.unCompress(cPtr, isMultiTiered);
560560
}
561561

562562
// a special implementation of pointer compression for benchmarking purposes.

cachelib/allocator/memory/SlabAllocator.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class SlabAllocator {
225225
// the corresponding memory allocator. trying to inline this just increases
226226
// the code size and does not move the needle on the benchmarks much.
227227
// Calling this with invalid input in optimized build is undefined behavior.
228-
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
228+
CompressedPtr CACHELIB_INLINE compress(const void* ptr, bool isMultiTiered) const {
229229
if (ptr == nullptr) {
230230
return CompressedPtr{};
231231
}
@@ -246,19 +246,19 @@ class SlabAllocator {
246246
static_cast<uint32_t>(reinterpret_cast<const uint8_t*>(ptr) -
247247
reinterpret_cast<const uint8_t*>(slab)) /
248248
allocSize;
249-
return CompressedPtr{slabIndex, allocIdx};
249+
return CompressedPtr{slabIndex, allocIdx, isMultiTiered};
250250
}
251251

252252
// uncompress the point and return the raw ptr. This function never throws
253253
// in optimized build and assumes that the caller is responsible for calling
254254
// it with a valid compressed pointer.
255-
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr) const {
255+
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr, bool isMultiTiered) const {
256256
if (ptr.isNull()) {
257257
return nullptr;
258258
}
259259

260-
const SlabIdx slabIndex = ptr.getSlabIdx();
261-
const uint32_t allocIdx = ptr.getAllocIdx();
260+
const SlabIdx slabIndex = ptr.getSlabIdx(isMultiTiered);
261+
const uint32_t allocIdx = ptr.getAllocIdx(isMultiTiered);
262262
const Slab* slab = &slabMemoryStart_[slabIndex];
263263

264264
#ifndef NDEBUG

cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,13 +401,13 @@ TEST_F(MemoryAllocatorTest, PointerCompression) {
401401
for (const auto& pool : poolAllocs) {
402402
const auto& allocs = pool.second;
403403
for (const auto* alloc : allocs) {
404-
CompressedPtr ptr = m.compress(alloc);
404+
CompressedPtr ptr = m.compress(alloc, false);
405405
ASSERT_FALSE(ptr.isNull());
406-
ASSERT_EQ(alloc, m.unCompress(ptr));
406+
ASSERT_EQ(alloc, m.unCompress(ptr, false));
407407
}
408408
}
409409

410-
ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr)));
410+
ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr, false), false));
411411
}
412412

413413
TEST_F(MemoryAllocatorTest, Restorable) {

cachelib/allocator/tests/AllocatorMemoryTiersTest.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
136136
stats = allocator->getGlobalCacheStats();
137137
slabStats = allocator->getAllocationClassStats(0,0,cid);
138138
}
139-
ASSERT_GE(slabStats.approxFreePercent,9.5);
140139

141140
auto perclassEstats = allocator->getBackgroundMoverClassStats(MoverDir::Evict);
142141
auto perclassPstats = allocator->getBackgroundMoverClassStats(MoverDir::Promote);

cachelib/benchmarks/PtrCompressionBench.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ void buildAllocs(size_t poolSize) {
6161
void* alloc = ma->allocate(pid, size);
6262
XDCHECK_GE(size, CompressedPtr::getMinAllocSize());
6363
if (alloc != nullptr) {
64-
validAllocs.push_back({alloc, ma->compress(alloc)});
64+
validAllocs.push_back({alloc, ma->compress(alloc, false)});
6565
validAllocsAlt.push_back({alloc, ma->compressAlt(alloc)});
6666
numAllocations++;
6767
}
@@ -83,7 +83,7 @@ BENCHMARK(CompressionAlt) {
8383

8484
BENCHMARK_RELATIVE(Compression) {
8585
for (const auto& alloc : validAllocs) {
86-
CompressedPtr c = m->compress(alloc.first);
86+
CompressedPtr c = m->compress(alloc.first, false);
8787
folly::doNotOptimizeAway(c);
8888
}
8989
}
@@ -97,7 +97,7 @@ BENCHMARK(DeCompressAlt) {
9797

9898
BENCHMARK_RELATIVE(DeCompress) {
9999
for (const auto& alloc : validAllocs) {
100-
void* ptr = m->unCompress(alloc.second);
100+
void* ptr = m->unCompress(alloc.second, false);
101101
folly::doNotOptimizeAway(ptr);
102102
}
103103
}

run_tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
# Newline separated list of tests to ignore
44
BLACKLIST="allocator-test-NavySetupTest
5+
allocator-test-NvmCacheTests
56
shm-test-test_page_size"
67

78
if [ "$1" == "long" ]; then

0 commit comments

Comments
 (0)