Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

smart_malloc hphp/vector/zend_array

New smart_malloc/free api for variable-size smart-allocation.  Memory
allocated with smart_malloc can be freed with smart_free, but otherwise
is swept at request-end.  Allocations underl0 1K are bump-allocated from
slabs with an 8-byte header.  Over 1K are routed to malloc with a
16-byte header (doubly-linked list).

ZendArray, VectorArray, HphpArray refactored to use smart_malloc instead
of a bunch of storage-segregated size classes.
  • Loading branch information...
commit 77cd177e3c0ccf720e3e6e4c6851214ed2162275 1 parent f62a1b2
@edwinsmith edwinsmith authored joelpob committed
View
183 src/runtime/base/array/hphp_array.cpp
@@ -45,111 +45,28 @@ static const Trace::Module TRACEMOD = Trace::runtime;
/*
* Allocation of HphpArray buffers works like this: the smallest buffer
- * size is allocated inline in HphpArray. The next group of ^2 sizes is
- * SmartAllocated, and big buffers are malloc'd. HphpArray::m_allocMode
- * tracks the state as it progresses from
+ * size is allocated inline in HphpArray. Larger buffer sizes are smart
+ * allocated or malloc-allocated depending on whether the array itself
+ * was smart-allocated or not. (nonSmartCopy() is used to create static
+ * arrays). HphpArray::m_allocMode tracks the state as it progresses:
*
- * kInline -> kSmart -> kMalloc
+ * kInline -> kSmart, or
+ * -> kMalloc
*
* Hashtables never shrink, so the allocMode Never goes backwards.
* If an array is pre-sized, we might skip directly to kSmart or kMalloc.
* If an array is created via nonSmartCopy(), we skip kSmart.
+ * Since kMalloc is only used for static arrays, and static arrays are
+ * never swept, we don't need any sweep method.
*
- * For kInline, we use space in HphpArray defined as InlineSlots.
- * The next couple size classes are declared below, using SlotsImpl as
- * a helper. Each concrete class just needs to instantiate the smart
- * allocator members.
- *
- * Since size reallocations always follow a known sequence, each concrete
- * Slots class's alloc() method takes care of copying data from the next
- * size-class down and freeing it without any indirection.
- *
- * Finally we have allocSlots() and freeSlots() which take care of
- * using the proper concrete class, with minimum fuss and boilerplate.
- *
- * SlotsImpl declares space for both the Elm slots and the ElmInd
- * hashtable. For small and medium-sized tables, the hashtable
- * still fits in-line in HphpArray even when the slots don't.
- * We handle that in the template by declaring hash[0], and
- * HphpArray::allocData/reallocData point m_hash to the inline space
- * instead of the space in the Slots class.
- *
- * For larger smart-allocated tables, m_hash will point to the hash[]
- * table declared here, just like we do for malloc.
+ * For kInline, we use space in HphpArray defined as InlineSlots, which
+ * has enough room for slots and the hashtable. The next few larger array
+ * sizes use the inline space for just the hashtable, with slots allocated
+ * separately. Even larger tables allocate the hashtable and slots
+ * contiguously.
*/
+IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS(HphpArray);
-typedef HphpArray::Elm Elm;
-typedef HphpArray::ElmInd ElmInd;
-typedef HphpArray::InlineSlots InlineSlots;
-
-/*
- * This is the implementation guts for each smart-allocated buffer;
- * size is compile-time constant.
- */
-template <int Size, class Self, class Half>
-struct SlotsImpl {
- static const uint HashCap = Size * sizeof(ElmInd) <= sizeof(InlineSlots) ?
- 0 : Size;
- static const uint Cap = Size - Size / HphpArray::LoadScale;
- Elm slots[Cap];
- ElmInd hash[HashCap];
- void dump() {}
-
- // allocate an instance of Self, copy data from the given instance
- // of Half, then free Half if necessary.
- static Elm* alloc(Elm* old_data) {
- Elm* data = (NEW(Self)())->slots;
- if (old_data) {
- memcpy(data, old_data, sizeof(Half::slots));
- Half::rel(old_data);
- }
- return data;
- }
-
- // Free an instance, given a pointer to its interior slots[] array.
- static void rel(Elm* data) {
- Self* p = (Self*)(uintptr_t(data) - offsetof(Self, slots));
- DELETE(Self)(p);
- }
-};
-
-struct Slots8: SlotsImpl<8, Slots8, InlineSlots> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Slots8);
-};
-struct Slots16: SlotsImpl<16, Slots16, Slots8> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Slots16);
-};
-struct Slots32: SlotsImpl<32, Slots32, Slots16> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Slots32);
-};
-struct Slots64: SlotsImpl<64, Slots64, Slots32> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Slots64);
-};
-const uint MaxSmartCap = Slots64::Cap;
-
-Elm* allocSlots(uint cap, Elm* data) {
- ASSERT(cap <= MaxSmartCap);
- return cap <= Slots8::Cap ? Slots8::alloc(data) :
- cap <= Slots16::Cap ? Slots16::alloc(data) :
- cap <= Slots32::Cap ? Slots32::alloc(data) :
- Slots64::alloc(data);
-}
-
-void freeSlots(uint mask, Elm* data) {
- ASSERT(mask >= 7 && mask <= 63);
- switch (mask) {
- case 7: Slots8::rel(data); break;
- case 15: Slots16::rel(data); break;
- case 31: Slots32::rel(data); break;
- default: Slots64::rel(data); break;
- }
-}
-
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS(Slots8);
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS(Slots16);
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS(Slots32);
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS(Slots64);
-IMPLEMENT_SMART_ALLOCATION(HphpArray, SmartAllocatorImpl::NeedSweep);
//=============================================================================
// Static members.
@@ -171,17 +88,6 @@ static inline size_t computeDataSize(uint32 tableMask) {
computeMaxElms(tableMask) * sizeof(HphpArray::Elm);
}
-static inline void adjustUsageStats(size_t delta, bool refresh = false) {
- MemoryManager* mm = MemoryManager::TheMemoryManager();
- MemoryUsageStats& stats = mm->getStats();
- stats.alloc += delta;
- stats.usage += delta;
- JEMALLOC_STATS_ADJUST(&stats, delta);
- if (refresh) {
- mm->refreshStats();
- }
-}
-
static inline size_t computeMaskFromNumElms(uint32 numElms) {
ASSERT(numElms <= 0x7fffffffU);
size_t lgSize = HphpArray::MinLgTableSize;
@@ -270,10 +176,9 @@ HphpArray::~HphpArray() {
}
}
if (m_allocMode == kSmart) {
- freeSlots(m_tableMask, m_data);
+ smart_free(m_data);
} else if (m_allocMode == kMalloc) {
free(m_data);
- adjustUsageStats(-computeDataSize(m_tableMask));
}
}
@@ -877,17 +782,14 @@ void HphpArray::allocData(size_t maxElms, size_t tableSize) {
}
size_t hashSize = tableSize * sizeof(ElmInd);
size_t dataSize = maxElms * sizeof(Elm);
- if (maxElms <= MaxSmartCap && !m_nonsmart) {
- m_data = allocSlots(maxElms, 0);
+ size_t allocSize = hashSize <= sizeof(m_inline_hash) ? dataSize :
+ dataSize + hashSize;
+ if (!m_nonsmart) {
+ m_data = (Elm*) smart_malloc(allocSize);
m_allocMode = kSmart;
} else {
- size_t allocSize = hashSize <= sizeof(m_inline_hash) ? dataSize :
- dataSize + hashSize;
- void* block = malloc(allocSize);
- if (!block) throw OutOfMemoryException(allocSize);
- m_data = (Elm*) block;
+ m_data = (Elm*) Util::safe_malloc(allocSize);
m_allocMode = kMalloc;
- adjustUsageStats(allocSize);
}
m_hash = hashSize <= sizeof(m_inline_hash) ? m_inline_hash :
(ElmInd*)(uintptr_t(m_data) + dataSize);
@@ -897,29 +799,26 @@ void HphpArray::reallocData(size_t maxElms, size_t tableSize, uint oldMask) {
ASSERT(m_data && oldMask > 0 && maxElms > SmallSize);
size_t hashSize = tableSize * sizeof(ElmInd);
size_t dataSize = maxElms * sizeof(Elm);
- if (maxElms <= MaxSmartCap && !m_nonsmart) {
- m_data = allocSlots(maxElms, m_data);
- m_allocMode = kSmart;
+ size_t allocSize = hashSize <= sizeof(m_inline_hash) ? dataSize :
+ dataSize + hashSize;
+ size_t oldDataSize = computeMaxElms(oldMask) * sizeof(Elm); // slots only.
+ if (!m_nonsmart) {
+ ASSERT(m_allocMode == kInline || m_allocMode == kSmart);
+ if (m_allocMode == kInline) {
+ m_data = (Elm*) smart_malloc(allocSize);
+ memcpy(m_data, m_inline_data.slots, oldDataSize);
+ m_allocMode = kSmart;
+ } else {
+ m_data = (Elm*) smart_realloc(m_data, allocSize);
+ }
} else {
- size_t allocSize = hashSize <= sizeof(m_inline_hash) ? dataSize :
- dataSize + hashSize;
- size_t oldDataSize = computeMaxElms(oldMask) * sizeof(Elm); // slots only.
- if (m_allocMode != kMalloc) {
- void* block = malloc(allocSize);
- if (block == NULL) throw OutOfMemoryException(allocSize);
- memcpy(block, m_data, oldDataSize);
- if (m_allocMode == kSmart) freeSlots(oldMask, m_data);
- m_data = (Elm*) block;
+ ASSERT(m_allocMode == kInline || m_allocMode == kMalloc);
+ if (m_allocMode == kInline) {
+ m_data = (Elm*) Util::safe_malloc(allocSize);
+ memcpy(m_data, m_inline_data.slots, oldDataSize);
m_allocMode = kMalloc;
- adjustUsageStats(allocSize);
} else {
- void* block = realloc(m_data, allocSize);
- if (block == NULL) throw OutOfMemoryException(allocSize);
- m_data = (Elm*) block;
- size_t oldHashSize = computeTableSize(oldMask) * sizeof(ElmInd);
- size_t oldAllocSize = oldHashSize <= sizeof(m_inline_hash) ? oldDataSize :
- oldDataSize + oldHashSize;
- adjustUsageStats(allocSize - oldAllocSize, true);
+ m_data = (Elm*) Util::safe_realloc(m_data, allocSize);
}
}
m_hash = hashSize <= sizeof(m_inline_hash) ? m_inline_hash :
@@ -2223,14 +2122,6 @@ CVarRef HphpArray::endRef() {
}
//=============================================================================
-// Memory allocator methods.
-
-void HphpArray::sweep() {
- if (m_allocMode == kMalloc) free(m_data);
- // Its okay to skip calling adjustUsageStats() in the sweep phase.
-}
-
-//=============================================================================
// VM runtime support functions.
namespace VM {
View
4 src/runtime/base/array/hphp_array.h
@@ -264,7 +264,6 @@ class HphpArray : public ArrayData {
struct InlineSlots {
Elm slots[SmallSize];
ElmInd hash[SmallHashSize];
- static void rel(Elm*) { /* nop */ };
};
private:
@@ -399,8 +398,7 @@ class HphpArray : public ArrayData {
void resizeIfNeeded();
// Memory allocator methods.
- DECLARE_SMART_ALLOCATION(HphpArray, SmartAllocatorImpl::NeedSweep);
- void sweep();
+ DECLARE_SMART_ALLOCATION_NOCALLBACKS(HphpArray);
private:
enum EmptyMode { StaticEmptyArray };
View
93 src/runtime/base/array/vector_array.cpp
@@ -25,55 +25,7 @@ namespace HPHP {
StaticEmptyVectorArray StaticEmptyVectorArray::s_theEmptyVectorArray;
-template <class Elm, int Size, class Self>
-struct BufImpl {
- Elm slots[Size];
- void dump() {}
- static Elm* alloc() {
- return (NEW(Self)())->slots;
- }
- static void rel(Elm* data) {
- Self* p = (Self*)(uintptr_t(data) - offsetof(Self, slots));
- DELETE(Self)(p);
- }
-};
-
-struct Buf8: BufImpl<TypedValue, 8, Buf8> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Buf8);
-};
-struct Buf16: BufImpl<TypedValue, 16, Buf16> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Buf16);
-};
-struct Buf32: BufImpl<TypedValue, 32, Buf32> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Buf32);
-};
-struct Buf64: BufImpl<TypedValue, 64, Buf64> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Buf64);
-};
-static const uint MaxSmartCap = 64;
-
-TypedValue* VectorArray::smartAlloc(uint cap) {
- ASSERT(cap <= MaxSmartCap);
- return cap <= 8 ? Buf8::alloc() :
- cap <= 16 ? Buf16::alloc() :
- cap <= 32 ? Buf32::alloc() :
- Buf64::alloc();
-}
-void VectorArray::smartFree(TypedValue* data, uint cap) {
- ASSERT(cap == Util::nextPower2(cap) && cap <= MaxSmartCap);
- switch (cap) {
- case 8: Buf8::rel(data); break;
- case 16: Buf16::rel(data); break;
- case 32: Buf32::rel(data); break;
- default: Buf64::rel(data); break;
- }
-}
-
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_HOT(Buf8);
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_HOT(Buf16);
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_HOT(Buf32);
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_HOT(Buf64);
-IMPLEMENT_SMART_ALLOCATION_HOT(VectorArray, SmartAllocatorImpl::NeedSweep);
+IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_HOT(VectorArray);
#ifdef DEBUGGING_SMART_ALLOCATOR
#define DECLARE_ALLOCATOR(a, T, I)
@@ -190,13 +142,13 @@ void VectorArray::alloc(uint size) {
}
uint cap = Util::nextPower2(size);
m_capacity = cap;
- if (cap <= MaxSmartCap && !m_nonsmart) {
- m_elems = smartAlloc(cap);
+ if (!m_nonsmart) {
+ m_elems = (TypedValue*) smart_malloc(cap * sizeof(TypedValue));
m_allocMode = kSmart;
- return;
+ } else {
+ m_elems = (TypedValue*) malloc(cap * sizeof(TypedValue));
+ m_allocMode = kMalloc;
}
- m_elems = (TypedValue*) malloc(cap * sizeof(TypedValue));
- m_allocMode = kMalloc;
}
HOT_FUNC_HPHP
@@ -244,7 +196,7 @@ VectorArray::~VectorArray() {
tvAsVariant(&m_elems[i]).~Variant();
}
if (m_allocMode == kSmart) {
- smartFree(m_elems, m_capacity);
+ smart_free(m_elems);
} else if (m_allocMode == kMalloc) {
free(m_elems);
}
@@ -277,26 +229,23 @@ VectorArray::VectorArray(const VectorArray *src, bool sma /* ignored */) :
void VectorArray::grow(uint newSize) {
ASSERT(newSize > FixedSize);
- uint old_capacity = m_capacity;
m_capacity = Util::nextPower2(newSize);
- if (m_capacity <= MaxSmartCap && !m_nonsmart) {
- TypedValue* elems = smartAlloc(m_capacity);
- memcpy(elems, m_elems, m_size * sizeof(TypedValue));
- if (m_allocMode == kSmart) {
- smartFree(m_elems, old_capacity);
- } else {
+ if (!m_nonsmart) {
+ ASSERT(m_allocMode == kInline || m_allocMode == kSmart);
+ if (m_allocMode == kInline) {
+ m_elems = (TypedValue*)smart_malloc(m_capacity * sizeof(TypedValue));
+ memcpy(m_elems, m_fixed, m_size * sizeof(TypedValue));
m_allocMode = kSmart;
+ } else {
+ m_elems = (TypedValue*)smart_realloc(m_elems,
+ m_capacity * sizeof(TypedValue));
}
- m_elems = elems;
- } else if (m_allocMode != kMalloc) {
- TypedValue* elems = (TypedValue*)malloc(m_capacity * sizeof(TypedValue));
- memcpy(elems, m_elems, m_size * sizeof(TypedValue));
- if (m_allocMode == kSmart) {
- smartFree(m_elems, old_capacity);
- }
- m_elems = elems;
+ } else if (m_allocMode == kInline) {
+ m_elems = (TypedValue*)malloc(m_capacity * sizeof(TypedValue));
+ memcpy(m_elems, m_fixed, m_size * sizeof(TypedValue));
m_allocMode = kMalloc;
} else {
+ ASSERT(m_allocMode == kMalloc);
m_elems = (TypedValue*)realloc(m_elems, m_capacity * sizeof(TypedValue));
}
}
@@ -405,10 +354,6 @@ ssize_t VectorArray::getIndex(CVarRef k) const {
return ArrayData::invalid_index;
}
-void VectorArray::sweep() {
- if (m_allocMode == kMalloc) free(m_elems);
-}
-
ZendArray *VectorArray::escalateToNonEmptyZendArray() const {
ASSERT(m_size);
ZendArray *ret;
View
5 src/runtime/base/array/vector_array.h
@@ -126,9 +126,8 @@ class VectorArray : public ArrayData {
virtual CVarRef endRef();
virtual ArrayData *escalate(bool mutableIteration = false) const;
- DECLARE_SMART_ALLOCATION(VectorArray, SmartAllocatorImpl::NeedSweep);
+ DECLARE_SMART_ALLOCATION_NOCALLBACKS(VectorArray);
- void sweep();
private:
enum AllocMode { kInline, kSmart, kMalloc };
TypedValue m_fixed[FixedSize];
@@ -144,8 +143,6 @@ class VectorArray : public ArrayData {
void grow(uint newSize) NEVER_INLINE;
void checkSize(uint n = 1);
void checkInsertIterator(ssize_t pos);
- static TypedValue* smartAlloc(uint cap);
- static void smartFree(TypedValue* data, uint cap);
};
class StaticEmptyVectorArray : public VectorArray {
View
66 src/runtime/base/array/zend_array.cpp
@@ -31,50 +31,8 @@ namespace HPHP {
///////////////////////////////////////////////////////////////////////////////
-template <class Elm, int Size, class Self>
-struct BucketsImpl {
- Elm slots[Size];
- void dump() {}
- static Elm* alloc() {
- return (NEW(Self)())->slots;
- }
- static void rel(Elm* data) {
- Self* p = (Self*)(uintptr_t(data) - offsetof(Self, slots));
- DELETE(Self)(p);
- }
-};
-
-struct Buckets16: BucketsImpl<ZendArray::Bucket*, 16, Buckets16> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Buckets16);
-};
-struct Buckets32: BucketsImpl<ZendArray::Bucket*, 32, Buckets32> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Buckets32);
-};
-struct Buckets64: BucketsImpl<ZendArray::Bucket*, 64, Buckets64> {
- DECLARE_SMART_ALLOCATION_NOCALLBACKS(Buckets64);
-};
-static const uint MaxSmartSize = 64;
-
-ZendArray::Bucket** ZendArray::smartAlloc(uint cap) {
- ASSERT(cap <= 64);
- return cap <= 16 ? Buckets16::alloc() :
- cap <= 32 ? Buckets32::alloc() :
- Buckets64::alloc();
-}
-void ZendArray::smartFree(Bucket** data, uint cap) {
- ASSERT(cap == Util::nextPower2(cap) && cap <= MaxSmartSize);
- switch (cap) {
- case 16: Buckets16::rel(data); break;
- case 32: Buckets32::rel(data); break;
- default: Buckets64::rel(data); break;
- }
-}
-
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_HOT(Buckets16);
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_HOT(Buckets32);
-IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_HOT(Buckets64);
IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_CLS(ZendArray, Bucket);
-IMPLEMENT_SMART_ALLOCATION_HOT(ZendArray, SmartAllocatorImpl::NeedSweep);
+IMPLEMENT_SMART_ALLOCATION_NOCALLBACKS_HOT(ZendArray);
// append/insert/update
@@ -146,9 +104,8 @@ void ZendArray::init(uint nSize) {
m_arBuckets = m_inlineBuckets;
memset(m_inlineBuckets, 0, MinSize * sizeof(Bucket*));
m_allocMode = kInline;
- } else if (size <= MaxSmartSize && !m_nonsmart) {
- m_arBuckets = smartAlloc(size);
- memset(m_arBuckets, 0, size * sizeof(Bucket*));
+ } else if (!m_nonsmart) {
+ m_arBuckets = (Bucket**) smart_calloc(size, sizeof(Bucket*));
m_allocMode = kSmart;
} else {
m_arBuckets = (Bucket **)calloc(size, sizeof(Bucket*));
@@ -189,7 +146,7 @@ ZendArray::~ZendArray() {
DELETE(Bucket)(q);
}
if (m_allocMode == kSmart) {
- smartFree(m_arBuckets, tableSize());
+ smart_free(m_arBuckets);
} else if (m_allocMode == kMalloc) {
free(m_arBuckets);
}
@@ -603,12 +560,12 @@ void ZendArray::resize() {
// For large size classes, it might not move, but since we don't need
// memcpy, why take the chance.
if (m_allocMode == kSmart) {
- smartFree(m_arBuckets, oldSize);
+ smart_free(m_arBuckets);
} else if (m_allocMode == kMalloc) {
free(m_arBuckets);
}
- if (newSize <= MaxSmartSize && !m_nonsmart) {
- m_arBuckets = smartAlloc(newSize);
+ if (!m_nonsmart) {
+ m_arBuckets = (Bucket**) smart_malloc(newSize * sizeof(Bucket*));
m_allocMode = kSmart;
} else {
m_arBuckets = (Bucket**) malloc(newSize * sizeof(Bucket*));
@@ -1584,15 +1541,6 @@ CVarRef ZendArray::endRef() {
}
///////////////////////////////////////////////////////////////////////////////
-// memory allocator methods.
-
-void ZendArray::sweep() {
- if (m_allocMode == kMalloc) {
- free(m_arBuckets);
- }
-}
-
-///////////////////////////////////////////////////////////////////////////////
// class Bucket
HOT_FUNC_HPHP
View
5 src/runtime/base/array/zend_array.h
@@ -275,14 +275,11 @@ class ZendArray : public ArrayData {
void init(uint nSize);
void resize();
void rehash();
- static Bucket** smartAlloc(uint cap);
- static void smartFree(Bucket**, uint cap);
/**
* Memory allocator methods.
*/
- DECLARE_SMART_ALLOCATION(ZendArray, SmartAllocatorImpl::NeedSweep);
- void sweep();
+ DECLARE_SMART_ALLOCATION_NOCALLBACKS(ZendArray);
};
class StaticEmptyZendArray : public ZendArray {
View
230 src/runtime/base/memory/memory_manager.cpp
@@ -158,15 +158,14 @@ void MemoryManager::AllocIterator::next() {
++m_it;
}
-MemoryManager::MemoryManager() : m_enabled(false) {
- if (RuntimeOption::EnableMemoryManager) {
- m_enabled = true;
- }
+MemoryManager::MemoryManager() : m_enabled(RuntimeOption::EnableMemoryManager) {
#ifdef USE_JEMALLOC
threadStats(m_allocated, m_deallocated, m_cactive, m_cactiveLimit);
#endif
resetStats();
m_stats.maxBytes = INT64_MAX;
+ m_front = m_limit = 0;
+ m_smartsweep = 0;
}
void MemoryManager::resetStats() {
@@ -186,6 +185,11 @@ void MemoryManager::resetStats() {
#endif
}
+NEVER_INLINE
+void MemoryManager::refreshStatsHelper() {
+ refreshStats();
+}
+
void MemoryManager::refreshStatsHelperExceeded() {
ThreadInfo* info = ThreadInfo::s_threadInfo.getNoCheck();
info->m_reqInjectionData.setMemExceededFlag();
@@ -219,10 +223,41 @@ void MemoryManager::sweepAll() {
#endif
}
+struct SmallNode {
+ size_t padbytes; // <= kMaxSmartSize means small block
+};
+
+struct SweepNode {
+ SweepNode* next;
+ union {
+ SweepNode* prev;
+ size_t padbytes;
+ };
+};
+
void MemoryManager::rollback() {
+ typedef std::vector<char*>::const_iterator SlabIter;
for (unsigned int i = 0; i < m_smartAllocators.size(); i++) {
m_smartAllocators[i]->rollbackObjects();
}
+ // free smart-malloc slabs
+ for (SlabIter i = m_slabs.begin(), end = m_slabs.end(); i != end; ++i) {
+ free(*i);
+ }
+ m_slabs.clear();
+ // free large allocation blocks
+ if (SweepNode* n = m_smartsweep) {
+ for (SweepNode *next = 0; next != m_smartsweep; n = next) {
+ next = n->next;
+ free(n);
+ }
+ m_smartsweep = 0;
+ }
+ // zero out freelists
+ for (unsigned i = 0; i < kNumSizes; i++) {
+ m_smartfree[i].clear();
+ }
+ m_front = m_limit = 0;
}
void MemoryManager::logStats() {
@@ -238,10 +273,197 @@ void MemoryManager::checkMemory(bool detailed) {
printf("Peak Usage: %lld bytes\t", m_stats.peakUsage);
printf("Peak Alloc: %lld bytes\n", m_stats.peakAlloc);
+ printf("Slabs: %lu KiB\n", m_slabs.size() * SLAB_SIZE / 1024);
+
for (unsigned int i = 0; i < m_smartAllocators.size(); i++) {
m_smartAllocators[i]->checkMemory(detailed);
}
}
+//
+// smart_malloc implementation notes
+//
+// These functions allocate all small blocks from a single slab,
+// and defer larger allocations directly to malloc. When small blocks
+// are freed they're placed the appropriate size-segreated freelist.
+// (m_smartfree[i]). Small blocks have an 8-byte SmallNode and
+// are swept en-masse when slabs are freed.
+//
+// Medium blocks use a 16-byte SweepNode header to maintain a doubly-linked
+// list of blocks to free at request end. smart_free can distinguish
+// SmallNode and SweepNode because valid next/prev pointers must be
+// larger than kMaxSmartSize.
+//
+
+inline void* MemoryManager::smartMalloc(size_t nbytes) {
+ ASSERT(nbytes > 0);
+ if (LIKELY(nbytes <= kMaxSmartSize)) {
+ // we round up before adding header-padding, so at least some
+ // allocations will be 16-byte aligned or greater. If we included
+ // padding before rounding, every allocation would be 8-aligned.
+ // We can change this as the common-cases evolve over time.
+ size_t padbytes = (nbytes + kMask) & ~kMask; // not counting header
+ size_t allbytes = padbytes + sizeof(SmallNode);
+ m_stats.usage += allbytes;
+ unsigned i = (padbytes - 1) >> kLgSizeQuantum;
+ ASSERT(i < kNumSizes);
+ void* p = m_smartfree[i].maybePop();
+ if (LIKELY(p != 0)) return p;
+ char* mem = m_front;
+ if (LIKELY(mem + allbytes <= m_limit)) {
+ m_front = mem + allbytes;
+ SmallNode* n = (SmallNode*) mem;
+ n->padbytes = padbytes;
+ return n + 1;
+ }
+ return smartMallocSlab(padbytes);
+ }
+ return smartMallocBig(nbytes);
+}
+
+inline void MemoryManager::smartFree(void* ptr) {
+ ASSERT(ptr != 0);
+ SweepNode* n = ((SweepNode*)ptr) - 1;
+ size_t padbytes = n->padbytes;
+ if (LIKELY(padbytes <= kMaxSmartSize)) {
+ ASSERT(memset(ptr, kSmartFreeFill, padbytes));
+ unsigned i = (padbytes - 1) >> kLgSizeQuantum;
+ ASSERT(i < kNumSizes);
+ m_smartfree[i].push(ptr);
+ m_stats.usage -= padbytes + sizeof(SmallNode);
+ return;
+ }
+ smartFreeBig(n);
+}
+
+// quick-and-dirty realloc implementation. We could do better if the block
+// is malloc'd, by deferring to the underlying realloc.
+inline void* MemoryManager::smartRealloc(void* ptr, size_t nbytes) {
+ ASSERT(ptr != 0 && nbytes > 0);
+ SweepNode* n = ((SweepNode*)ptr) - 1;
+ size_t old_padbytes = n->padbytes;
+ if (LIKELY(old_padbytes <= kMaxSmartSize)) {
+ void* newmem = smartMalloc(nbytes);
+ memcpy(newmem, ptr, std::min(old_padbytes, nbytes));
+ smartFree(ptr);
+ return newmem;
+ }
+ SweepNode* next = n->next;
+ SweepNode* prev = n->prev;
+ SweepNode* n2 = (SweepNode*) realloc(n, nbytes + sizeof(SweepNode));
+ if (n2 != n) {
+ // block moved; must re-link to sweeplist
+ if (next != n) {
+ next->prev = prev->next = n2;
+ } else {
+ n2->next = n2->prev = n2;
+ }
+ if (m_smartsweep == n) m_smartsweep = n2;
+ }
+ return n2 + 1;
+}
+
+NEVER_INLINE char* MemoryManager::newSlab() {
+ if (hhvm && UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
+ refreshStatsHelper();
+ }
+ char* slab = (char*) Util::safe_malloc(SLAB_SIZE);
+ JEMALLOC_STATS_ADJUST(&m_stats, SLAB_SIZE);
+ m_stats.alloc += SLAB_SIZE;
+ if (m_stats.alloc > m_stats.peakAlloc) {
+ m_stats.peakAlloc = m_stats.alloc;
+ }
+ m_slabs.push_back(slab);
+ return slab;
+}
+
+NEVER_INLINE
+void* MemoryManager::smartMallocSlab(size_t padbytes) {
+ char* slab = newSlab();
+ size_t allbytes = padbytes + sizeof(SmallNode);
+ m_front = slab + allbytes;
+ m_limit = slab + SLAB_SIZE;
+ SmallNode* n = (SmallNode*) slab;
+ n->padbytes = padbytes;
+ return n + 1;
+}
+
+inline void* MemoryManager::smartEnlist(SweepNode* n) {
+ if (hhvm && UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
+ refreshStatsHelper();
+ }
+ SweepNode* next = m_smartsweep;
+ if (next) {
+ SweepNode* prev = next->prev;
+ n->next = next;
+ n->prev = prev;
+ next->prev = prev->next = n;
+ } else {
+ n->next = n->prev = n;
+ }
+ m_smartsweep = n;
+ ASSERT(n->padbytes > kMaxSmartSize);
+ return n + 1;
+}
+
+NEVER_INLINE
+void* MemoryManager::smartMallocBig(size_t nbytes) {
+ ASSERT(nbytes > 0);
+ SweepNode* n = (SweepNode*) Util::safe_malloc(nbytes + sizeof(SweepNode));
+ return smartEnlist(n);
+}
+
+NEVER_INLINE
+void* MemoryManager::smartCallocBig(size_t totalbytes) {
+ ASSERT(totalbytes > 0);
+ SweepNode* n = (SweepNode*)Util::safe_calloc(totalbytes + sizeof(SweepNode),
+ 1);
+ return smartEnlist(n);
+}
+
+NEVER_INLINE
+void MemoryManager::smartFreeBig(SweepNode* n) {
+ SweepNode* next = n->next;
+ SweepNode* prev = n->prev;
+ next->prev = prev;
+ prev->next = next;
+ if (UNLIKELY(n == m_smartsweep)) {
+ m_smartsweep = (next != n) ? next : 0;
+ }
+ free(n);
+}
+
+static inline MemoryManager& MM() {
+ return *MemoryManager::TheMemoryManager();
+}
+
+// smart_malloc api entry points, with support for malloc/free corner cases.
+
+HOT_FUNC
+void* smart_malloc(size_t nbytes) {
+ return MM().smartMalloc(std::max(nbytes, size_t(1)));
+}
+
+HOT_FUNC
+void* smart_calloc(size_t count, size_t nbytes) {
+ size_t totalbytes = std::max(nbytes * count, size_t(1));
+ if (totalbytes <= MemoryManager::kMaxSmartSize) {
+ return memset(MM().smartMalloc(totalbytes), 0, totalbytes);
+ }
+ return MM().smartCallocBig(totalbytes);
+}
+
+HOT_FUNC
+void* smart_realloc(void* ptr, size_t nbytes) {
+ if (!ptr) return MM().smartMalloc(std::max(nbytes, size_t(1)));
+ if (!nbytes) return ptr ? MM().smartFree(ptr), (void*)0 : (void*)0;
+ return MM().smartRealloc(ptr, nbytes);
+}
+
+HOT_FUNC
+void smart_free(void* ptr) {
+ if (ptr) MM().smartFree(ptr);
+}
+
///////////////////////////////////////////////////////////////////////////////
}
View
153 src/runtime/base/memory/memory_manager.h
@@ -26,6 +26,107 @@ namespace HPHP {
class SmartAllocatorImpl;
+struct SmartNode;
+struct SweepNode;
+
+// jemalloc uses 0x5a but we use 0x6a so we can tell the difference
+// when debugging.
+const char kSmartFreeFill = 0x6a;
+
+/**
+ * A garbage list is a freelist of items that uses the space in the items
+ * to store a singly linked list.
+ */
+class GarbageList {
+public:
+ GarbageList() : ptr(NULL) {
+ }
+
+ // Pops an item, or returns NULL
+ void* maybePop() {
+ void** ret = ptr;
+ if (LIKELY(ret != NULL)) {
+ ptr = (void**)*ret;
+ }
+ return ret;
+ }
+
+ // Pushes an item on to the list. The item must be larger than
+ // sizeof(void*)
+ void push(void* val) {
+ void** convval = (void**)val;
+ *convval = ptr;
+ ptr = convval;
+ }
+
+ // Number of items on the list. We calculate this iteratively
+ // on the assumption we don't query this often, so iterating is
+ // faster than keeping a size field up-to-date.
+ int size() const {
+ int sz = 0;
+ for (Iterator it = begin(), e = end(); it != e; ++it, ++sz) {}
+ return sz;
+ }
+
+ bool empty() const {
+ return ptr == NULL;
+ }
+
+ // Remove all items from this list
+ void clear() {
+ ptr = NULL;
+ }
+
+ class Iterator {
+ public:
+ Iterator(const GarbageList& l) : curptr(l.ptr) {}
+
+ Iterator(const Iterator &other) : curptr(other.curptr) {}
+ Iterator() : curptr(NULL) {}
+
+ bool operator==(const Iterator &it) {
+ return curptr == it.curptr;
+ }
+
+ bool operator!=(const Iterator &it) {
+ return !operator==(it);
+ }
+
+ Iterator &operator++() {
+ if (curptr) {
+ curptr = (void**)*curptr;
+ }
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ Iterator ret(*this);
+ operator++();
+ return ret;
+ }
+
+ void* operator*() const {
+ return curptr;
+ }
+
+ private:
+ void** curptr;
+ };
+
+ Iterator begin() const {
+ return Iterator(*this);
+ }
+
+ Iterator end() const {
+ return Iterator();
+ }
+
+ typedef Iterator iterator;
+
+private:
+ void** ptr;
+};
+
/**
* MemoryManager categorizes memory usage into 3 categories and maintain some
* of them with different strategy:
@@ -118,6 +219,11 @@ class MemoryManager : boost::noncopyable {
*/
void resetStats();
+ /**
+ * Out-of-line version of refresh stats
+ */
+ void refreshStatsHelper();
+
void refreshStats() {
refreshStats<true>(m_stats);
}
@@ -208,17 +314,38 @@ class MemoryManager : boost::noncopyable {
}
};
+ void* smartMalloc(size_t nbytes);
+ void* smartRealloc(void* ptr, size_t nbytes);
+ void* smartCallocBig(size_t totalbytes);
+ void smartFree(void* ptr);
+ static const size_t kMaxSmartSize = 2048;
+
private:
+ char* newSlab();
+ void* smartEnlist(SweepNode*);
+ void* smartMallocSlab(size_t padbytes);
+ void* smartMallocBig(size_t nbytes);
+ void smartFreeBig(SweepNode*);
void refreshStatsHelperExceeded();
#ifdef USE_JEMALLOC
void refreshStatsHelperStop();
#endif
+private:
+ static const unsigned kLgSizeQuantum = 6; // 64 bytes
+ static const unsigned kNumSizes = kMaxSmartSize >> kLgSizeQuantum;
+ static const size_t kMask = (1 << kLgSizeQuantum) - 1;
+
+private:
+ char *m_front, *m_limit;
+ GarbageList m_smartfree[kNumSizes];
+ SweepNode* m_smartsweep;
+ MemoryUsageStats m_stats;
bool m_enabled;
std::vector<SmartAllocatorImpl*> m_smartAllocators;
+ std::vector<char*> m_slabs;
- MemoryUsageStats m_stats;
#ifdef USE_JEMALLOC
uint64* m_allocated;
uint64* m_deallocated;
@@ -233,6 +360,30 @@ class MemoryManager : boost::noncopyable {
#endif
};
+//
+// smart_malloc api for request-scoped memory
+//
+// These functions behave like malloc, but get memory from the current
+// thread's MemoryManager instance. At request-end, any un-freed memory
+// is explicitly freed and garbage filled. If any pointers to this memory
+// survive beyond a request, they'll be dangling pointers.
+//
+// Block sizes <= MemoryManager::kMaxSmartSize are region-allocated
+// and are only guaranteed to be 8-byte aligned. Larger blocks are
+// directly malloc'd (with a header) and are 16-byte aligned.
+//
+// Clients must not mix/match calls between smart_malloc and malloc:
+// - these blocks have a header that malloc wouldn't grok
+// - memory is auto-freed at request-end, unlike malloc
+// - all bookeeping is thread local; freeing a smart_malloc block
+// from a different thread than it was malloc'd from, even while
+// the original request is still running, will just crash and burn.
+//
+void* smart_malloc(size_t nbytes);
+void* smart_calloc(size_t count, size_t bytes);
+void* smart_realloc(void* ptr, size_t nbytes);
+void smart_free(void* ptr);
+
///////////////////////////////////////////////////////////////////////////////
}
View
11 src/runtime/base/memory/smart_allocator.core.inc
@@ -3,21 +3,10 @@ SMART_ALLOCATOR_ENTRY(StringData)
SMART_ALLOCATOR_ENTRY(Array)
SMART_ALLOCATOR_ENTRY(SharedMap)
SMART_ALLOCATOR_ENTRY(VectorArray)
-SMART_ALLOCATOR_ENTRY(Buf8) // for VectorArray
-SMART_ALLOCATOR_ENTRY(Buf16) // for VectorArray
-SMART_ALLOCATOR_ENTRY(Buf32) // for VectorArray
-SMART_ALLOCATOR_ENTRY(Buf64) // for VectorArray
SMART_ALLOCATOR_ENTRY(Variant)
SMART_ALLOCATOR_ENTRY(Bucket)
SMART_ALLOCATOR_ENTRY(ZendArray)
-SMART_ALLOCATOR_ENTRY(Buckets16) // for ZendArray
-SMART_ALLOCATOR_ENTRY(Buckets32) // for ZendArray
-SMART_ALLOCATOR_ENTRY(Buckets64) // for ZendArray
SMART_ALLOCATOR_ENTRY(HphpArray)
-SMART_ALLOCATOR_ENTRY(Slots8) // for HphpArray
-SMART_ALLOCATOR_ENTRY(Slots16) // for HphpArray
-SMART_ALLOCATOR_ENTRY(Slots32) // for HphpArray
-SMART_ALLOCATOR_ENTRY(Slots64) // for HphpArray
SMART_ALLOCATOR_ENTRY(ObjectData)
SMART_ALLOCATOR_ENTRY(GlobalVariables)
SMART_ALLOCATOR_ENTRY(TaintTraceNode)
View
14 src/runtime/base/memory/smart_allocator.cpp
@@ -166,16 +166,15 @@ HOT_FUNC
void *SmartAllocatorImpl::alloc(size_t nbytes) {
ASSERT(nbytes == size_t(m_itemSize));
ASSERT(m_next && m_next <= m_limit);
- MemoryUsageStats* stats = &MemoryManager::TheMemoryManager()->getStats();
// Just update the usage, while the peakUsage is maintained by
// FrameInjection.
- int64 usage = stats->usage + nbytes;
- stats->usage = usage;
+ MemoryUsageStats* stats = &MemoryManager::TheMemoryManager()->getStats();
+ int64 usage = (stats->usage += nbytes);
if (hhvm && UNLIKELY(usage > stats->maxBytes)) {
// It's possible that this simplified check will trip later than
// it should in a perfect world but it's cheaper than a full call
// to refreshStats on every alloc().
- statsHelper();
+ MemoryManager::TheMemoryManager()->refreshStatsHelper();
}
#ifndef SMART_ALLOCATOR_DEBUG_FREE
void* freelist_value = m_freelist.maybePop();
@@ -223,13 +222,6 @@ void *SmartAllocatorImpl::allocHelper() {
return p;
}
-// cold-path helper function, only called when request memory overflow
-// is likely.
-void SmartAllocatorImpl::statsHelper() {
- ASSERT(MemoryManager::TheMemoryManager()->getStats().maxBytes > 0);
- MemoryManager::TheMemoryManager()->refreshStats();
-}
-
bool SmartAllocatorImpl::assertValidHelper(void *obj) const {
if (obj) {
#ifdef DETECT_DOUBLE_FREE
View
95 src/runtime/base/memory/smart_allocator.h
@@ -157,98 +157,6 @@ typedef hphp_hash_map<int64, int, int64_hash> BlockIndexMap;
typedef boost::dynamic_bitset<unsigned long long> FreeMap;
/**
- * A garbage list is a freelist of items that uses the space in the items
- * to store a singly linked list.
- */
-class GarbageList {
-public:
- GarbageList() : ptr(NULL) {
- }
-
- // Pops an item, or returns NULL
- void* maybePop() {
- void** ret = ptr;
- if (LIKELY(ret != NULL)) {
- ptr = (void**)*ret;
- }
- return ret;
- }
-
- // Pushes an item on to the list. The item must be larger than
- // sizeof(void*)
- void push(void* val) {
- void** convval = (void**)val;
- *convval = ptr;
- ptr = convval;
- }
-
- // Number of items on the list.
- int size() const {
- int sz = 0;
- for (Iterator it = begin(), e = end(); it != e; ++it, ++sz) {}
- return sz;
- }
-
- bool empty() const {
- return ptr == NULL;
- }
-
- // Remove all items from this list
- void clear() {
- ptr = NULL;
- }
-
- class Iterator {
- public:
- Iterator(const GarbageList& l) : curptr(l.ptr) {}
-
- Iterator(const Iterator &other) : curptr(other.curptr) {}
- Iterator() : curptr(NULL) {}
-
- bool operator==(const Iterator &it) {
- return curptr == it.curptr;
- }
-
- bool operator!=(const Iterator &it) {
- return !operator==(it);
- }
-
- Iterator &operator++() {
- if (curptr) {
- curptr = (void**)*curptr;
- }
- return *this;
- }
-
- Iterator operator++(int) {
- Iterator ret(*this);
- operator++();
- return ret;
- }
-
- void* operator*() const {
- return curptr;
- }
-
- private:
- void** curptr;
- };
-
- Iterator begin() const {
- return Iterator(*this);
- }
-
- Iterator end() const {
- return Iterator();
- }
-
- typedef Iterator iterator;
-
-private:
- void** ptr;
-};
-
-/**
* Just a simple free-list based memory allocator.
*/
class SmartAllocatorImpl : boost::noncopyable {
@@ -287,7 +195,7 @@ class SmartAllocatorImpl : boost::noncopyable {
void* alloc(size_t size);
void dealloc(void *obj) {
ASSERT(assertValidHelper(obj));
- ASSERT(memset(obj, 0x6a, m_itemSize));
+ ASSERT(memset(obj, kSmartFreeFill, m_itemSize));
m_freelist.push(obj);
if (hhvm) {
*((int*)(uintptr_t(obj) + FAST_REFCOUNT_OFFSET)) = RefCountTombstoneValue;
@@ -315,7 +223,6 @@ class SmartAllocatorImpl : boost::noncopyable {
private:
void* allocHelper() NEVER_INLINE;
- void statsHelper() NEVER_INLINE;
bool assertValidHelper(void *obj) const;
// keep these frequently used fields together.
View
16 src/util/alloc.cpp
@@ -25,22 +25,6 @@
namespace HPHP { namespace Util {
///////////////////////////////////////////////////////////////////////////////
-void* safe_malloc(size_t size) {
- void *ptr = std::malloc(size);
- if (ptr == NULL) throw OutOfMemoryException(size);
- return ptr;
-}
-
-void* safe_realloc(void *ptr, size_t size) {
- ptr = std::realloc(ptr, size);
- if (!ptr && size != 0) throw OutOfMemoryException(size);
- return ptr;
-}
-
-void safe_free(void *ptr) {
- free(ptr); // standard free() allows ptr == 0
-}
-
void flush_thread_caches() {
#ifndef NO_JEMALLOC
if (mallctl) {
View
26 src/util/alloc.h
@@ -72,9 +72,27 @@ namespace Util {
/**
* Safe memory allocation.
*/
-void* safe_malloc(size_t size);
-void* safe_realloc(void* ptr, size_t size);
-void safe_free(void* ptr);
+inline void* safe_malloc(size_t size) {
+ void* p = malloc(size);
+ if (!p) throw OutOfMemoryException(size);
+ return p;
+}
+
+inline void* safe_calloc(size_t count, size_t size) {
+ void* p = calloc(count, size);
+ if (!p) throw OutOfMemoryException(size);
+ return p;
+}
+
+inline void* safe_realloc(void* ptr, size_t size) {
+ ptr = realloc(ptr, size);
+ if (!ptr && size > 0) throw OutOfMemoryException(size);
+ return ptr;
+}
+
+inline void safe_free(void* ptr) {
+ return free(ptr);
+}
/**
* Instruct low level memory allocator to free memory back to system. Called
@@ -94,7 +112,7 @@ void flush_thread_stack();
*/
class ScopedMem {
ScopedMem(const ScopedMem&); // disable copying
- ScopedMem& operator=(const ScopedMem&);
+ ScopedMem& operator=(const ScopedMem&);
public:
ScopedMem(void* ptr) : m_ptr(ptr) {}
~ScopedMem() { free(m_ptr); }
Please sign in to comment.
Something went wrong with that request. Please try again.